+++ /dev/null
-// -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-\r
-//\r
-// internal System.Xml.XmlUtil\r
-//\r
-// Author:\r
-// Daniel Weber (daniel-weber@austin.rr.com)\r
-// Code ported from Open XML 2.3.17 (Delphi/Kylix)\r
-//\r
-// (C) 2001 Daniel Weber\r
-//\r
-\r
-using System;\r
-using System.IO;\r
-\r
-namespace System.Xml\r
-{\r
- /// <summary>\r
- /// Helper class with static utility functions that are not Xml version specific\r
- /// Such as encoding changes\r
- /// </summary>\r
- internal class XmlUtil\r
- {\r
- public static char Iso8859_1ToUTF16Char(byte P)\r
- {\r
- return (char) P;\r
- }\r
-\r
- public static char Iso8859_2ToUTF16Char(byte P)\r
- {\r
- switch (P)\r
- {\r
- case 0xa1: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK\r
- case 0xa2: return (char) 0x02d8; // BREVE\r
- case 0xa3: return (char) 0x0141; // LATIN CAPITAL LETTER L WITH STROKE\r
- case 0xa5: return (char) 0x0132; // LATIN CAPITAL LETTER L WITH CARON\r
- case 0xa6: return (char) 0x015a; // LATIN CAPITAL LETTER S WITH ACUTE\r
- case 0xa9: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON\r
- case 0xaa: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA\r
- case 0xab: return (char) 0x0164; // LATIN CAPITAL LETTER T WITH CARON\r
- case 0xac: return (char) 0x0179; // LATIN CAPITAL LETTER Z WITH ACUTE\r
- case 0xae: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON\r
- case 0xaf: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT ABOVE\r
- case 0xb1: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK\r
- case 0xb2: return (char) 0x02db; // OGONEK\r
- case 0xb3: return (char) 0x0142; // LATIN SMALL LETTER L WITH STROKE\r
- case 0xb5: return (char) 0x013e; // LATIN SMALL LETTER L WITH CARON\r
- case 0xb6: return (char) 0x015b; // LATIN SMALL LETTER S WITH ACUTE\r
- case 0xb7: return (char) 0x02c7; // CARON\r
- case 0xb9: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON\r
- case 0xba: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA\r
- case 0xbb: return (char) 0x0165; // LATIN SMALL LETTER T WITH CARON\r
- case 0xbc: return (char) 0x017a; // LATIN SMALL LETTER Z WITH ACUTE\r
- case 0xbd: return (char) 0x02dd; // DOUBLE ACUTE ACCENT\r
- case 0xbe: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON\r
- case 0xbf: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT ABOVE\r
- case 0xc0: return (char) 0x0154; // LATIN CAPITAL LETTER R WITH ACUTE\r
- case 0xc3: return (char) 0x0102; // LATIN CAPITAL LETTER A WITH BREVE\r
- case 0xc5: return (char) 0x0139; // LATIN CAPITAL LETTER L WITH ACUTE\r
- case 0xc6: return (char) 0x0106; // LATIN CAPITAL LETTER C WITH ACUTE\r
- case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON\r
- case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK\r
- case 0xcc: return (char) 0x011a; // LATIN CAPITAL LETTER E WITH CARON\r
- case 0xcf: return (char) 0x010e; // LATIN CAPITAL LETTER D WITH CARON\r
- case 0xd0: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE\r
- case 0xd1: return (char) 0x0143; // LATIN CAPITAL LETTER N WITH ACUTE\r
- case 0xd2: return (char) 0x0147; // LATIN CAPITAL LETTER N WITH CARON\r
- case 0xd5: return (char) 0x0150; // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE\r
- case 0xd8: return (char) 0x0158; // LATIN CAPITAL LETTER R WITH CARON\r
- case 0xd9: return (char) 0x016e; // LATIN CAPITAL LETTER U WITH RING ABOVE\r
- case 0xdb: return (char) 0x0170; // LATIN CAPITAL LETTER U WITH WITH DOUBLE ACUTE\r
- case 0xde: return (char) 0x0162; // LATIN CAPITAL LETTER T WITH CEDILLA\r
- case 0xe0: return (char) 0x0155; // LATIN SMALL LETTER R WITH ACUTE\r
- case 0xe3: return (char) 0x0103; // LATIN SMALL LETTER A WITH BREVE\r
- case 0xe5: return (char) 0x013a; // LATIN SMALL LETTER L WITH ACUTE\r
- case 0xe6: return (char) 0x0107; // LATIN SMALL LETTER C WITH ACUTE\r
- case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON\r
- case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK\r
- case 0xec: return (char) 0x011b; // LATIN SMALL LETTER E WITH CARON\r
- case 0xef: return (char) 0x010f; // LATIN SMALL LETTER D WITH CARON\r
- case 0xf0: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE\r
- case 0xf1: return (char) 0x0144; // LATIN SMALL LETTER N WITH ACUTE\r
- case 0xf2: return (char) 0x0148; // LATIN SMALL LETTER N WITH CARON\r
- case 0xf5: return (char) 0x0151; // LATIN SMALL LETTER O WITH DOUBLE ACUTE\r
- case 0xf8: return (char) 0x0159; // LATIN SMALL LETTER R WITH CARON\r
- case 0xf9: return (char) 0x016f; // LATIN SMALL LETTER U WITH RING ABOVE\r
- case 0xfb: return (char) 0x0171; // LATIN SMALL LETTER U WITH WITH DOUBLE ACUTE\r
- case 0xfe: return (char) 0x0163; // LATIN SMALL LETTER T WITH CEDILLA\r
- case 0xff: return (char) 0x02d9; // DOT ABOVE\r
- default:\r
- return (char) P;\r
- }\r
- }\r
-\r
- public static char Iso8859_3ToUTF16Char( byte P)\r
- {\r
- switch (P)\r
- {\r
- case 0xa1: return (char) 0x0126; // LATIN CAPITAL LETTER H WITH STROKE\r
- case 0xa2: return (char) 0x02d8; // BREVE\r
- case 0xa5: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
- case 0xa6: return (char) 0x0124; // LATIN CAPITAL LETTER H WITH CIRCUMFLEX\r
- case 0xa9: return (char) 0x0130; // LATIN CAPITAL LETTER I WITH DOT ABOVE\r
- case 0xaa: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA\r
- case 0xab: return (char) 0x011e; // LATIN CAPITAL LETTER G WITH BREVE\r
- case 0xac: return (char) 0x0134; // LATIN CAPITAL LETTER J WITH CIRCUMFLEX\r
- case 0xae: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
- case 0xaf: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT\r
- case 0xb1: return (char) 0x0127; // LATIN SMALL LETTER H WITH STROKE\r
- case 0xb6: return (char) 0x0125; // LATIN SMALL LETTER H WITH CIRCUMFLEX\r
- case 0xb9: return (char) 0x0131; // LATIN SMALL LETTER DOTLESS I\r
- case 0xba: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA\r
- case 0xbb: return (char) 0x011f; // LATIN SMALL LETTER G WITH BREVE\r
- case 0xbc: return (char) 0x0135; // LATIN SMALL LETTER J WITH CIRCUMFLEX\r
- case 0xbe: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
- case 0xbf: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT\r
- case 0xc3: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
- case 0xc5: return (char) 0x010a; // LATIN CAPITAL LETTER C WITH DOT ABOVE\r
- case 0xc6: return (char) 0x0108; // LATIN CAPITAL LETTER C WITH CIRCUMFLEX\r
- case 0xd0: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
- case 0xd5: return (char) 0x0120; // LATIN CAPITAL LETTER G WITH DOT ABOVE\r
- case 0xd8: return (char) 0x011c; // LATIN CAPITAL LETTER G WITH CIRCUMFLEX\r
- case 0xdd: return (char) 0x016c; // LATIN CAPITAL LETTER U WITH BREVE\r
- case 0xde: return (char) 0x015c; // LATIN CAPITAL LETTER S WITH CIRCUMFLEX\r
- case 0xe3: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
- case 0xe5: return (char) 0x010b; // LATIN SMALL LETTER C WITH DOT ABOVE\r
- case 0xe6: return (char) 0x0109; // LATIN SMALL LETTER C WITH CIRCUMFLEX\r
- case 0xf0: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
- case 0xf5: return (char) 0x0121; // LATIN SMALL LETTER G WITH DOT ABOVE\r
- case 0xf8: return (char) 0x011d; // LATIN SMALL LETTER G WITH CIRCUMFLEX\r
- case 0xfd: return (char) 0x016d; // LATIN SMALL LETTER U WITH BREVE\r
- case 0xfe: return (char) 0x015d; // LATIN SMALL LETTER S WITH CIRCUMFLEX\r
- case 0xff: return (char) 0x02d9; // DOT ABOVE\r
- default:\r
- return (char) P;\r
- }\r
- }\r
-\r
- public static char Iso8859_4ToUTF16Char( byte P)\r
- {\r
- switch (P)\r
- {\r
- case 0xa1: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK\r
- case 0xa2: return (char) 0x0138; // LATIN SMALL LETTER KRA\r
- case 0xa3: return (char) 0x0156; // LATIN CAPITAL LETTER R WITH CEDILLA\r
- case 0xa5: return (char) 0x0128; // LATIN CAPITAL LETTER I WITH TILDE\r
- case 0xa6: return (char) 0x013b; // LATIN CAPITAL LETTER L WITH CEDILLA\r
- case 0xa9: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON\r
- case 0xaa: return (char) 0x0112; // LATIN CAPITAL LETTER E WITH MACRON\r
- case 0xab: return (char) 0x0122; // LATIN CAPITAL LETTER G WITH CEDILLA\r
- case 0xac: return (char) 0x0166; // LATIN CAPITAL LETTER T WITH STROKE\r
- case 0xae: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON\r
- case 0xb1: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK\r
- case 0xb2: return (char) 0x02db; // OGONEK\r
- case 0xb3: return (char) 0x0157; // LATIN SMALL LETTER R WITH CEDILLA\r
- case 0xb5: return (char) 0x0129; // LATIN SMALL LETTER I WITH TILDE\r
- case 0xb6: return (char) 0x013c; // LATIN SMALL LETTER L WITH CEDILLA\r
- case 0xb7: return (char) 0x02c7; // CARON\r
- case 0xb9: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON\r
- case 0xba: return (char) 0x0113; // LATIN SMALL LETTER E WITH MACRON\r
- case 0xbb: return (char) 0x0123; // LATIN SMALL LETTER G WITH CEDILLA\r
- case 0xbc: return (char) 0x0167; // LATIN SMALL LETTER T WITH STROKE\r
- case 0xbd: return (char) 0x014a; // LATIN CAPITAL LETTER ENG\r
- case 0xbe: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON\r
- case 0xbf: return (char) 0x014b; // LATIN SMALL LETTER ENG\r
- case 0xc0: return (char) 0x0100; // LATIN CAPITAL LETTER A WITH MACRON\r
- case 0xc7: return (char) 0x012e; // LATIN CAPITAL LETTER I WITH OGONEK\r
- case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON\r
- case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK\r
- case 0xcc: return (char) 0x0116; // LATIN CAPITAL LETTER E WITH DOT ABOVE\r
- case 0xcf: return (char) 0x012a; // LATIN CAPITAL LETTER I WITH MACRON\r
- case 0xd0: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE\r
- case 0xd1: return (char) 0x0145; // LATIN CAPITAL LETTER N WITH CEDILLA\r
- case 0xd2: return (char) 0x014c; // LATIN CAPITAL LETTER O WITH MACRON\r
- case 0xd3: return (char) 0x0136; // LATIN CAPITAL LETTER K WITH CEDILLA\r
- case 0xd9: return (char) 0x0172; // LATIN CAPITAL LETTER U WITH OGONEK\r
- case 0xdd: return (char) 0x0168; // LATIN CAPITAL LETTER U WITH TILDE\r
- case 0xde: return (char) 0x016a; // LATIN CAPITAL LETTER U WITH MACRON\r
- case 0xe0: return (char) 0x0101; // LATIN SMALL LETTER A WITH MACRON\r
- case 0xe7: return (char) 0x012f; // LATIN SMALL LETTER I WITH OGONEK\r
- case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON\r
- case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK\r
- case 0xec: return (char) 0x0117; // LATIN SMALL LETTER E WITH DOT ABOVE\r
- case 0xef: return (char) 0x012b; // LATIN SMALL LETTER I WITH MACRON\r
- case 0xf0: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE\r
- case 0xf1: return (char) 0x0146; // LATIN SMALL LETTER N WITH CEDILLA\r
- case 0xf2: return (char) 0x014d; // LATIN SMALL LETTER O WITH MACRON\r
- case 0xf3: return (char) 0x0137; // LATIN SMALL LETTER K WITH CEDILLA\r
- case 0xf9: return (char) 0x0173; // LATIN SMALL LETTER U WITH OGONEK\r
- case 0xfd: return (char) 0x0169; // LATIN SMALL LETTER U WITH TILDE\r
- case 0xfe: return (char) 0x016b; // LATIN SMALL LETTER U WITH MACRON\r
- case 0xff: return (char) 0x02d9; // DOT ABOVE\r
- default:\r
- return (char) P;\r
- }\r
- }\r
-\r
- public static char Iso8859_5ToUTF16Char(byte P)\r
- {\r
- if ( (P >= 0x00) & (P <= 0xa0) )\r
- return (char) P;\r
- else if ( P == 0xad )\r
- return (char) P;\r
- else if ( P == 0xf0 )\r
- return (char) 0x2116; // NUMERO SIGN\r
- else if ( P == 0xfd )\r
- return (char) 0x00a7; // SECTION SIGN\r
- else\r
- return System.Convert.ToChar( 0x0360 + P );\r
- }\r
-\r
- public static char Iso8859_6ToUTF16Char(byte P)\r
- {\r
- if ( (P >= 0x00) & ( P <= 0xa0) )\r
- return (char) P;\r
- else if ( P == 0xa4)\r
- return (char) P;\r
- else if ( ( P == 0xac ) | (P==0xbb) | (P==0xbf) )\r
- return System.Convert.ToChar(P + 0x0580);\r
- else if ( (P >= 0xc1) & ( P <= 0xda) )\r
- return System.Convert.ToChar(P + 0x0580);\r
- else if ( (P >= 0xe0) & ( P <= 0xf2) )\r
- return System.Convert.ToChar(P + 0x0580);\r
- else\r
- throw new InvalidOperationException("Invalid ISO-8859-6 sequence [" + P.ToString() + "]");\r
- }\r
-\r
- public static char Iso8859_7ToUTF16Char(byte P)\r
- {\r
- if ( (P >= 0x00) & ( P <= 0xa0) )\r
- return (char) P;\r
- else if ( (P >= 0xa6) & ( P <= 0xa9) )\r
- return (char) P;\r
- else if ( (P >= 0xab) & ( P <= 0xad) )\r
- return (char) P;\r
- else if ( (P >= 0xb0) & ( P <= 0xb3) )\r
- return (char) P;\r
- else if ( (P == 0xb7) | (P==0xbb) | (P==0xbd) )\r
- return (char) P;\r
- else if ( P ==0xa1 ) // LEFT SINGLE QUOTATION MARK\r
- return (char) 0x2018;\r
- else if ( P==0xa2 ) // RIGHT SINGLE QUOTATION MARK\r
- return (char) 0x2019;\r
- else if ( P==0xaf ) // HORIZONTAL BAR\r
- return (char) 0x2015;\r
- else if ( (P==0xd2) | (P==0xff) )\r
- throw new InvalidOperationException("Invalid ISO-8859-7 sequence [" + P.ToString() + "]");\r
- else\r
- return System.Convert.ToChar(P + 0x02d0);\r
-\r
- }\r
-\r
- public static char Iso8859_8ToUTF16Char(byte P)\r
- {\r
- if ( (P >= 0x00) & ( P <= 0xa0) )\r
- return (char) P;\r
- else if ( (P >= 0xa2) & ( P <= 0xa9) )\r
- return (char) P;\r
- else if ( (P >= 0xab) & ( P <= 0xae) )\r
- return (char) P;\r
- else if ( (P >= 0xb0) & ( P <= 0xb9) )\r
- return (char) P;\r
- else if ( (P >= 0xbb) & ( P <= 0xbe) )\r
- return (char) P;\r
- else if ( P==0xaa ) // MULTIPLICATION SIGN\r
- return (char) 0x00d7;\r
- else if ( P==0xaf ) // OVERLINE\r
- return (char) 0x203e;\r
- else if ( P==0xba ) // DIVISION SIGN\r
- return (char) 0x00f7;\r
- else if ( P==0xdf ) // DOUBLE LOW LINE\r
- return (char) 0x2017;\r
- else if ( (P >= 0xe0) & ( P <= 0xfa) )\r
- return System.Convert.ToChar(P + 0x04e0);\r
- else \r
- throw new InvalidOperationException("Invalid ISO-8859-8 sequence [" + P.ToString() + "]");\r
- }\r
-\r
- public static char Iso8859_9ToUTF16Char(byte P)\r
- {\r
- switch (P)\r
- {\r
- case 0xd0: return (char) 0x011e; // LATIN CAPITAL LETTER G WITH BREVE\r
- case 0xdd: return (char) 0x0130; // LATIN CAPITAL LETTER I WITH DOT ABOVE\r
- case 0xde: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA\r
- case 0xf0: return (char) 0x011f; // LATIN SMALL LETTER G WITH BREVE\r
- case 0xfd: return (char) 0x0131; // LATIN SMALL LETTER I WITH DOT ABOVE\r
- case 0xfe: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA\r
- default:\r
- return (char) P;\r
- }\r
- }\r
-\r
- public static char Iso8859_10ToUTF16Char(byte P)\r
- {\r
- switch (P)\r
- {\r
- case 0xa1: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK\r
- case 0xa2: return (char) 0x0112; // LATIN CAPITAL LETTER E WITH MACRON\r
- case 0xa3: return (char) 0x0122; // LATIN CAPITAL LETTER G WITH CEDILLA\r
- case 0xa4: return (char) 0x012a; // LATIN CAPITAL LETTER I WITH MACRON\r
- case 0xa5: return (char) 0x0128; // LATIN CAPITAL LETTER I WITH TILDE\r
- case 0xa6: return (char) 0x0136; // LATIN CAPITAL LETTER K WITH CEDILLA\r
- case 0xa8: return (char) 0x013b; // LATIN CAPITAL LETTER L WITH CEDILLA\r
- case 0xa9: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE\r
- case 0xaa: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON\r
- case 0xab: return (char) 0x0166; // LATIN CAPITAL LETTER T WITH STROKE\r
- case 0xac: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON\r
- case 0xae: return (char) 0x016a; // LATIN CAPITAL LETTER U WITH MACRON\r
- case 0xaf: return (char) 0x014a; // LATIN CAPITAL LETTER ENG\r
- case 0xb1: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK\r
- case 0xb2: return (char) 0x0113; // LATIN SMALL LETTER E WITH MACRON\r
- case 0xb3: return (char) 0x0123; // LATIN SMALL LETTER G WITH CEDILLA\r
- case 0xb4: return (char) 0x012b; // LATIN SMALL LETTER I WITH MACRON\r
- case 0xb5: return (char) 0x0129; // LATIN SMALL LETTER I WITH TILDE\r
- case 0xb6: return (char) 0x0137; // LATIN SMALL LETTER K WITH CEDILLA\r
- case 0xb8: return (char) 0x013c; // LATIN SMALL LETTER L WITH CEDILLA\r
- case 0xb9: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE\r
- case 0xba: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON\r
- case 0xbb: return (char) 0x0167; // LATIN SMALL LETTER T WITH STROKE\r
- case 0xbc: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON\r
- case 0xbd: return (char) 0x2015; // HORIZONTAL BAR\r
- case 0xbe: return (char) 0x016b; // LATIN SMALL LETTER U WITH MACRON\r
- case 0xbf: return (char) 0x014b; // LATIN SMALL LETTER ENG\r
- case 0xc0: return (char) 0x0100; // LATIN CAPITAL LETTER A WITH MACRON\r
- case 0xc7: return (char) 0x012e; // LATIN CAPITAL LETTER I WITH OGONEK\r
- case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON\r
- case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK\r
- case 0xcc: return (char) 0x0116; // LATIN CAPITAL LETTER E WITH DOT ABOVE\r
- case 0xd1: return (char) 0x0145; // LATIN CAPITAL LETTER N WITH CEDILLA\r
- case 0xd2: return (char) 0x014c; // LATIN CAPITAL LETTER O WITH MACRON\r
- case 0xd7: return (char) 0x0168; // LATIN CAPITAL LETTER U WITH TILDE\r
- case 0xd9: return (char) 0x0172; // LATIN CAPITAL LETTER U WITH OGONEK\r
- case 0xe0: return (char) 0x0101; // LATIN SMALL LETTER A WITH MACRON\r
- case 0xe7: return (char) 0x012f; // LATIN SMALL LETTER I WITH OGONEK\r
- case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON\r
- case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK\r
- case 0xec: return (char) 0x0117; // LATIN SMALL LETTER E WITH DOT ABOVE\r
- case 0xf1: return (char) 0x0146; // LATIN SMALL LETTER N WITH CEDILLA\r
- case 0xf2: return (char) 0x014d; // LATIN SMALL LETTER O WITH MACRON\r
- case 0xf7: return (char) 0x0169; // LATIN SMALL LETTER U WITH TILDE\r
- case 0xf9: return (char) 0x0173; // LATIN SMALL LETTER U WITH OGONEK\r
- case 0xff: return (char) 0x0138; // LATIN SMALL LETTER KRA\r
- default:\r
- return (char) P;\r
- }\r
- }\r
-\r
- public static char Iso8859_13ToUTF16Char(byte P)\r
- {\r
- switch(P)\r
- {\r
- case 0xa1: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK\r
- case 0xa5: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK\r
- case 0xa8: return (char) 0x00d8; // LATIN CAPITAL LETTER O WITH STROKE\r
- case 0xaa: return (char) 0x0156; // LATIN CAPITAL LETTER R WITH CEDILLA\r
- case 0xaf: return (char) 0x00c6; // LATIN CAPITAL LETTER AE\r
- case 0xb4: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK\r
- case 0xb8: return (char) 0x00f8; // LATIN SMALL LETTER O WITH STROKE\r
- case 0xba: return (char) 0x0157; // LATIN SMALL LETTER R WITH CEDILLA\r
- case 0xbf: return (char) 0x00e6; // LATIN SMALL LETTER AE\r
- case 0xc0: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK\r
- case 0xc1: return (char) 0x012e; // LATIN CAPITAL LETTER I WITH OGONEK\r
- case 0xc2: return (char) 0x0100; // LATIN CAPITAL LETTER A WITH MACRON\r
- case 0xc3: return (char) 0x0106; // LATIN CAPITAL LETTER C WITH ACUTE\r
- case 0xc6: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK\r
- case 0xc7: return (char) 0x0112; // LATIN CAPITAL LETTER E WITH MACRON\r
- case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON\r
- case 0xca: return (char) 0x0179; // LATIN CAPITAL LETTER Z WITH ACUTE\r
- case 0xcb: return (char) 0x0116; // LATIN CAPITAL LETTER E WITH DOT ABOVE\r
- case 0xcc: return (char) 0x0122; // LATIN CAPITAL LETTER G WITH CEDILLA\r
- case 0xcd: return (char) 0x0136; // LATIN CAPITAL LETTER K WITH CEDILLA\r
- case 0xce: return (char) 0x012a; // LATIN CAPITAL LETTER I WITH MACRON\r
- case 0xcf: return (char) 0x013b; // LATIN CAPITAL LETTER L WITH CEDILLA\r
- case 0xd0: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON\r
- case 0xd1: return (char) 0x0143; // LATIN CAPITAL LETTER N WITH ACUTE\r
- case 0xd2: return (char) 0x0145; // LATIN CAPITAL LETTER N WITH CEDILLA\r
- case 0xd4: return (char) 0x014c; // LATIN CAPITAL LETTER O WITH MACRON\r
- case 0xd8: return (char) 0x0172; // LATIN CAPITAL LETTER U WITH OGONEK\r
- case 0xd9: return (char) 0x0141; // LATIN CAPITAL LETTER L WITH STROKE\r
- case 0xda: return (char) 0x015a; // LATIN CAPITAL LETTER S WITH ACUTE\r
- case 0xdb: return (char) 0x016a; // LATIN CAPITAL LETTER U WITH MACRON\r
- case 0xdd: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT ABOVE\r
- case 0xde: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON\r
- case 0xe0: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK\r
- case 0xe1: return (char) 0x012f; // LATIN SMALL LETTER I WITH OGONEK\r
- case 0xe2: return (char) 0x0101; // LATIN SMALL LETTER A WITH MACRON\r
- case 0xe3: return (char) 0x0107; // LATIN SMALL LETTER C WITH ACUTE\r
- case 0xe6: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK\r
- case 0xe7: return (char) 0x0113; // LATIN SMALL LETTER E WITH MACRON\r
- case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON\r
- case 0xea: return (char) 0x017a; // LATIN SMALL LETTER Z WITH ACUTE\r
- case 0xeb: return (char) 0x0117; // LATIN SMALL LETTER E WITH DOT ABOVE\r
- case 0xec: return (char) 0x0123; // LATIN SMALL LETTER G WITH CEDILLA\r
- case 0xed: return (char) 0x0137; // LATIN SMALL LETTER K WITH CEDILLA\r
- case 0xee: return (char) 0x012b; // LATIN SMALL LETTER I WITH MACRON\r
- case 0xef: return (char) 0x013c; // LATIN SMALL LETTER L WITH CEDILLA\r
- case 0xf0: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON\r
- case 0xf1: return (char) 0x0144; // LATIN SMALL LETTER N WITH ACUTE\r
- case 0xf2: return (char) 0x0146; // LATIN SMALL LETTER N WITH CEDILLA\r
- case 0xf4: return (char) 0x014d; // LATIN SMALL LETTER O WITH MACRON\r
- case 0xf8: return (char) 0x0173; // LATIN SMALL LETTER U WITH OGONEK\r
- case 0xf9: return (char) 0x0142; // LATIN SMALL LETTER L WITH STROKE\r
- case 0xfa: return (char) 0x015b; // LATIN SMALL LETTER S WITH ACUTE\r
- case 0xfb: return (char) 0x016b; // LATIN SMALL LETTER U WITH MACRON\r
- case 0xfd: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT ABOVE\r
- case 0xfe: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON\r
- case 0xff: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK\r
- default:\r
- return (char) P;\r
- }\r
- }\r
-\r
- public static char Iso8859_14ToUTF16Char(byte P)\r
- {\r
- switch (P)\r
- {\r
- case 0xa1: return (char) 0x1e02; // LATIN CAPITAL LETTER B WITH DOT ABOVE\r
- case 0xa2: return (char) 0x1e03; // LATIN SMALL LETTER B WITH DOT ABOVE\r
- case 0xa4: return (char) 0x010a; // LATIN CAPITAL LETTER C WITH DOT ABOVE\r
- case 0xa5: return (char) 0x010b; // LATIN SMALL LETTER C WITH DOT ABOVE\r
- case 0xa6: return (char) 0x1e0a; // LATIN CAPITAL LETTER D WITH DOT ABOVE\r
- case 0xa8: return (char) 0x1e80; // LATIN CAPITAL LETTER W WITH GRAVE\r
- case 0xaa: return (char) 0x1e82; // LATIN CAPITAL LETTER W WITH ACUTE\r
- case 0xab: return (char) 0x1e0b; // LATIN SMALL LETTER D WITH DOT ABOVE\r
- case 0xac: return (char) 0x1ef2; // LATIN CAPITAL LETTER Y WITH GRAVE\r
- case 0xaf: return (char) 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS\r
- case 0xb0: return (char) 0x1e1e; // LATIN CAPITAL LETTER F WITH DOT ABOVE\r
- case 0xb1: return (char) 0x1e1f; // LATIN SMALL LETTER F WITH DOT ABOVE\r
- case 0xb2: return (char) 0x0120; // LATIN CAPITAL LETTER G WITH DOT ABOVE\r
- case 0xb3: return (char) 0x0121; // LATIN SMALL LETTER G WITH DOT ABOVE\r
- case 0xb4: return (char) 0x1e40; // LATIN CAPITAL LETTER M WITH DOT ABOVE\r
- case 0xb5: return (char) 0x1e41; // LATIN SMALL LETTER M WITH DOT ABOVE\r
- case 0xb7: return (char) 0x1e56; // LATIN CAPITAL LETTER P WITH DOT ABOVE\r
- case 0xb8: return (char) 0x1e81; // LATIN SMALL LETTER W WITH GRAVE\r
- case 0xb9: return (char) 0x1e57; // LATIN SMALL LETTER P WITH DOT ABOVE\r
- case 0xba: return (char) 0x1e83; // LATIN SMALL LETTER W WITH ACUTE\r
- case 0xbb: return (char) 0x1e60; // LATIN CAPITAL LETTER S WITH DOT ABOVE\r
- case 0xbc: return (char) 0x1ef3; // LATIN SMALL LETTER Y WITH GRAVE\r
- case 0xbd: return (char) 0x1e84; // LATIN CAPITAL LETTER W WITH DIAERESIS\r
- case 0xbe: return (char) 0x1e85; // LATIN SMALL LETTER W WITH DIAERESIS\r
- case 0xbf: return (char) 0x1e61; // LATIN SMALL LETTER S WITH DOT ABOVE\r
- case 0xd0: return (char) 0x0174; // LATIN CAPITAL LETTER W WITH CIRCUMFLEX\r
- case 0xd7: return (char) 0x1e6a; // LATIN CAPITAL LETTER T WITH DOT ABOVE\r
- case 0xde: return (char) 0x0176; // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX\r
- case 0xf0: return (char) 0x0175; // LATIN SMALL LETTER W WITH CIRCUMFLEX\r
- case 0xf7: return (char) 0x1e6b; // LATIN SMALL LETTER T WITH DOT ABOVE\r
- case 0xfe: return (char) 0x0177; // LATIN SMALL LETTER Y WITH CIRCUMFLEX\r
- default:\r
- return (char) P;\r
- }\r
- }\r
-\r
- public static char Iso8859_15ToUTF16Char(byte P)\r
- {\r
- switch (P)\r
- {\r
- case 0xa4: return (char) 0x20ac; // EURO SIGN\r
- case 0xa6: return (char) 0x00a6; // LATIN CAPITAL LETTER S WITH CARON\r
- case 0xa8: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON\r
- case 0xb4: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON\r
- case 0xb8: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON\r
- case 0xbc: return (char) 0x0152; // LATIN CAPITAL LIGATURE OE\r
- case 0xbd: return (char) 0x0153; // LATIN SMALL LIGATURE OE\r
- case 0xbe: return (char) 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS\r
- default:\r
- return (char) P;\r
- }\r
- }\r
-\r
- public static char KOI8_RToUTF16Char(byte P)\r
- {\r
- switch (P)\r
- {\r
- case 0x80: return (char) 0x2500; // BOX DRAWINGS LIGHT HORIZONTAL\r
- case 0x81: return (char) 0x2502; // BOX DRAWINGS LIGHT VERTICAL\r
- case 0x82: return (char) 0x250c; // BOX DRAWINGS LIGHT DOWN AND RIGHT\r
- case 0x83: return (char) 0x2510; // BOX DRAWINGS LIGHT DOWN AND LEFT\r
- case 0x84: return (char) 0x2514; // BOX DRAWINGS LIGHT UP AND RIGHT\r
- case 0x85: return (char) 0x2518; // BOX DRAWINGS LIGHT UP AND LEFT\r
- case 0x86: return (char) 0x251c; // BOX DRAWINGS LIGHT VERTICAL AND RIGHT\r
- case 0x87: return (char) 0x2524; // BOX DRAWINGS LIGHT VERTICAL AND LEFT\r
- case 0x88: return (char) 0x252c; // BOX DRAWINGS LIGHT DOWN AND HORIZONTAL\r
- case 0x89: return (char) 0x2534; // BOX DRAWINGS LIGHT UP AND HORIZONTAL\r
- case 0x8a: return (char) 0x253c; // BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL\r
- case 0x8b: return (char) 0x2580; // UPPER HALF BLOCK\r
- case 0x8c: return (char) 0x2584; // LOWER HALF BLOCK\r
- case 0x8d: return (char) 0x2588; // FULL BLOCK\r
- case 0x8e: return (char) 0x258c; // LEFT HALF BLOCK\r
- case 0x8f: return (char) 0x2590; // RIGHT HALF BLOCK\r
- case 0x90: return (char) 0x2591; // LIGHT SHADE\r
- case 0x91: return (char) 0x2592; // MEDIUM SHADE\r
- case 0x92: return (char) 0x2593; // DARK SHADE\r
- case 0x93: return (char) 0x2320; // TOP HALF INTEGRAL\r
- case 0x94: return (char) 0x25a0; // BLACK SQUARE\r
- case 0x95: return (char) 0x2219; // BULLET OPERATOR\r
- case 0x96: return (char) 0x221a; // SQUARE ROOT\r
- case 0x97: return (char) 0x2248; // ALMOST EQUAL TO\r
- case 0x98: return (char) 0x2264; // LESS-THAN OR EQUAL TO\r
- case 0x99: return (char) 0x2265; // GREATER-THAN OR EQUAL TO\r
- case 0x9a: return (char) 0x00a0; // NO-BREAK SPACE\r
- case 0x9b: return (char) 0x2321; // BOTTOM HALF INTEGRAL\r
- case 0x9c: return (char) 0x00b0; // DEGREE SIGN\r
- case 0x9d: return (char) 0x00b2; // SUPERSCRIPT TWO\r
- case 0x9e: return (char) 0x00b7; // MIDDLE DOT\r
- case 0x9f: return (char) 0x00f7; // DIVISION SIGN\r
- case 0xa0: return (char) 0x2550; // BOX DRAWINGS DOUBLE HORIZONTAL\r
- case 0xa1: return (char) 0x2551; // BOX DRAWINGS DOUBLE VERTICAL\r
- case 0xa2: return (char) 0x2552; // BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE\r
- case 0xa3: return (char) 0x0451; // CYRILLIC SMALL LETTER IO\r
- case 0xa4: return (char) 0x2553; // BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE\r
- case 0xa5: return (char) 0x2554; // BOX DRAWINGS DOUBLE DOWN AND RIGHT\r
- case 0xa6: return (char) 0x2555; // BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE\r
- case 0xa7: return (char) 0x2556; // BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE\r
- case 0xa8: return (char) 0x2557; // BOX DRAWINGS DOUBLE DOWN AND LEFT\r
- case 0xa9: return (char) 0x2558; // BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE\r
- case 0xaa: return (char) 0x2559; // BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE\r
- case 0xab: return (char) 0x255a; // BOX DRAWINGS DOUBLE UP AND RIGHT\r
- case 0xac: return (char) 0x255b; // BOX DRAWINGS UP SINGLE AND LEFT DOUBLE\r
- case 0xad: return (char) 0x255c; // BOX DRAWINGS UP DOUBLE AND LEFT SINGLE\r
- case 0xae: return (char) 0x255d; // BOX DRAWINGS DOUBLE UP AND LEFT\r
- case 0xaf: return (char) 0x255e; // BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE\r
- case 0xb0: return (char) 0x255f; // BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE\r
- case 0xb1: return (char) 0x2560; // BOX DRAWINGS DOUBLE VERTICAL AND RIGHT\r
- case 0xb2: return (char) 0x2561; // BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE\r
- case 0xb3: return (char) 0x0401; // CYRILLIC CAPITAL LETTER IO\r
- case 0xb4: return (char) 0x2562; // BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE\r
- case 0xb5: return (char) 0x2563; // BOX DRAWINGS DOUBLE VERTICAL AND LEFT\r
- case 0xb6: return (char) 0x2564; // BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE\r
- case 0xb7: return (char) 0x2565; // BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE\r
- case 0xb8: return (char) 0x2566; // BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL\r
- case 0xb9: return (char) 0x2567; // BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE\r
- case 0xba: return (char) 0x2568; // BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE\r
- case 0xbb: return (char) 0x2569; // BOX DRAWINGS DOUBLE UP AND HORIZONTAL\r
- case 0xbc: return (char) 0x256a; // BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE\r
- case 0xbd: return (char) 0x256b; // BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE\r
- case 0xbe: return (char) 0x256c; // BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL\r
- case 0xbf: return (char) 0x00a9; // COPYRIGHT SIGN\r
- case 0xc0: return (char) 0x044e; // CYRILLIC SMALL LETTER YU\r
- case 0xc1: return (char) 0x0430; // CYRILLIC SMALL LETTER A\r
- case 0xc2: return (char) 0x0431; // CYRILLIC SMALL LETTER BE\r
- case 0xc3: return (char) 0x0446; // CYRILLIC SMALL LETTER TSE\r
- case 0xc4: return (char) 0x0434; // CYRILLIC SMALL LETTER DE\r
- case 0xc5: return (char) 0x0435; // CYRILLIC SMALL LETTER IE\r
- case 0xc6: return (char) 0x0444; // CYRILLIC SMALL LETTER EF\r
- case 0xc7: return (char) 0x0433; // CYRILLIC SMALL LETTER GHE\r
- case 0xc8: return (char) 0x0445; // CYRILLIC SMALL LETTER HA\r
- case 0xc9: return (char) 0x0438; // CYRILLIC SMALL LETTER I\r
- case 0xca: return (char) 0x0439; // CYRILLIC SMALL LETTER SHORT I\r
- case 0xcb: return (char) 0x043a; // CYRILLIC SMALL LETTER KA\r
- case 0xcc: return (char) 0x043b; // CYRILLIC SMALL LETTER EL\r
- case 0xcd: return (char) 0x043c; // CYRILLIC SMALL LETTER EM\r
- case 0xce: return (char) 0x043d; // CYRILLIC SMALL LETTER EN\r
- case 0xcf: return (char) 0x043e; // CYRILLIC SMALL LETTER O\r
- case 0xd0: return (char) 0x043f; // CYRILLIC SMALL LETTER PE\r
- case 0xd1: return (char) 0x044f; // CYRILLIC SMALL LETTER YA\r
- case 0xd2: return (char) 0x0440; // CYRILLIC SMALL LETTER ER\r
- case 0xd3: return (char) 0x0441; // CYRILLIC SMALL LETTER ES\r
- case 0xd4: return (char) 0x0442; // CYRILLIC SMALL LETTER TE\r
- case 0xd5: return (char) 0x0443; // CYRILLIC SMALL LETTER U\r
- case 0xd6: return (char) 0x0436; // CYRILLIC SMALL LETTER ZHE\r
- case 0xd7: return (char) 0x0432; // CYRILLIC SMALL LETTER VE\r
- case 0xd8: return (char) 0x044c; // CYRILLIC SMALL LETTER SOFT SIGN\r
- case 0xd9: return (char) 0x044b; // CYRILLIC SMALL LETTER YERU\r
- case 0xda: return (char) 0x0437; // CYRILLIC SMALL LETTER ZE\r
- case 0xdb: return (char) 0x0448; // CYRILLIC SMALL LETTER SHA\r
- case 0xdc: return (char) 0x044d; // CYRILLIC SMALL LETTER E\r
- case 0xdd: return (char) 0x0449; // CYRILLIC SMALL LETTER SHCHA\r
- case 0xde: return (char) 0x0447; // CYRILLIC SMALL LETTER CHE\r
- case 0xdf: return (char) 0x044a; // CYRILLIC SMALL LETTER HARD SIGN\r
- case 0xe0: return (char) 0x042e; // CYRILLIC CAPITAL LETTER YU\r
- case 0xe1: return (char) 0x0410; // CYRILLIC CAPITAL LETTER A\r
- case 0xe2: return (char) 0x0411; // CYRILLIC CAPITAL LETTER BE\r
- case 0xe3: return (char) 0x0426; // CYRILLIC CAPITAL LETTER TSE\r
- case 0xe4: return (char) 0x0414; // CYRILLIC CAPITAL LETTER DE\r
- case 0xe5: return (char) 0x0415; // CYRILLIC CAPITAL LETTER IE\r
- case 0xe6: return (char) 0x0424; // CYRILLIC CAPITAL LETTER EF\r
- case 0xe7: return (char) 0x0413; // CYRILLIC CAPITAL LETTER GHE\r
- case 0xe8: return (char) 0x0425; // CYRILLIC CAPITAL LETTER HA\r
- case 0xe9: return (char) 0x0418; // CYRILLIC CAPITAL LETTER I\r
- case 0xea: return (char) 0x0419; // CYRILLIC CAPITAL LETTER SHORT I\r
- case 0xeb: return (char) 0x041a; // CYRILLIC CAPITAL LETTER KA\r
- case 0xec: return (char) 0x041b; // CYRILLIC CAPITAL LETTER EL\r
- case 0xed: return (char) 0x041c; // CYRILLIC CAPITAL LETTER EM\r
- case 0xee: return (char) 0x041d; // CYRILLIC CAPITAL LETTER EN\r
- case 0xef: return (char) 0x041e; // CYRILLIC CAPITAL LETTER O\r
- case 0xf0: return (char) 0x041f; // CYRILLIC CAPITAL LETTER PE\r
- case 0xf1: return (char) 0x042f; // CYRILLIC CAPITAL LETTER YA\r
- case 0xf2: return (char) 0x0420; // CYRILLIC CAPITAL LETTER ER\r
- case 0xf3: return (char) 0x0421; // CYRILLIC CAPITAL LETTER ES\r
- case 0xf4: return (char) 0x0422; // CYRILLIC CAPITAL LETTER TE\r
- case 0xf5: return (char) 0x0423; // CYRILLIC CAPITAL LETTER U\r
- case 0xf6: return (char) 0x0416; // CYRILLIC CAPITAL LETTER ZHE\r
- case 0xf7: return (char) 0x0412; // CYRILLIC CAPITAL LETTER VE\r
- case 0xf8: return (char) 0x042c; // CYRILLIC CAPITAL LETTER SOFT SIGN\r
- case 0xf9: return (char) 0x042b; // CYRILLIC CAPITAL LETTER YERU\r
- case 0xfa: return (char) 0x0417; // CYRILLIC CAPITAL LETTER ZE\r
- case 0xfb: return (char) 0x0428; // CYRILLIC CAPITAL LETTER SHA\r
- case 0xfc: return (char) 0x042d; // CYRILLIC CAPITAL LETTER E\r
- case 0xfd: return (char) 0x0429; // CYRILLIC CAPITAL LETTER SHCHA\r
- case 0xfe: return (char) 0x0427; // CYRILLIC CAPITAL LETTER CHE\r
- case 0xff: return (char) 0x042a; // CYRILLIC CAPITAL LETTER HARD SIGN\r
- default:\r
- return (char) P;\r
- }\r
- }\r
-\r
- public static char cp10000_MacRomanToUTF16Char(byte P)\r
- {\r
- switch (P)\r
- {\r
- case 0x80: return (char) 0x00c4; // LATIN CAPITAL LETTER A WITH DIAERESIS\r
- case 0x81: return (char) 0x00c5; // LATIN CAPITAL LETTER A WITH RING ABOVE\r
- case 0x82: return (char) 0x00c7; // LATIN CAPITAL LETTER C WITH CEDILLA\r
- case 0x83: return (char) 0x00c9; // LATIN CAPITAL LETTER E WITH ACUTE\r
- case 0x84: return (char) 0x00d1; // LATIN CAPITAL LETTER N WITH TILDE\r
- case 0x85: return (char) 0x00d6; // LATIN CAPITAL LETTER O WITH DIAERESIS\r
- case 0x86: return (char) 0x00dc; // LATIN CAPITAL LETTER U WITH DIAERESIS\r
- case 0x87: return (char) 0x00e1; // LATIN SMALL LETTER A WITH ACUTE\r
- case 0x88: return (char) 0x00e0; // LATIN SMALL LETTER A WITH GRAVE\r
- case 0x89: return (char) 0x00e2; // LATIN SMALL LETTER A WITH CIRCUMFLEX\r
- case 0x8a: return (char) 0x00e4; // LATIN SMALL LETTER A WITH DIAERESIS\r
- case 0x8b: return (char) 0x00e3; // LATIN SMALL LETTER A WITH TILDE\r
- case 0x8c: return (char) 0x00e5; // LATIN SMALL LETTER A WITH RING ABOVE\r
- case 0x8d: return (char) 0x00e7; // LATIN SMALL LETTER C WITH CEDILLA\r
- case 0x8e: return (char) 0x00e9; // LATIN SMALL LETTER E WITH ACUTE\r
- case 0x8f: return (char) 0x00e8; // LATIN SMALL LETTER E WITH GRAVE\r
- case 0x90: return (char) 0x00ea; // LATIN SMALL LETTER E WITH CIRCUMFLEX\r
- case 0x91: return (char) 0x00eb; // LATIN SMALL LETTER E WITH DIAERESIS\r
- case 0x92: return (char) 0x00ed; // LATIN SMALL LETTER I WITH ACUTE\r
- case 0x93: return (char) 0x00ec; // LATIN SMALL LETTER I WITH GRAVE\r
- case 0x94: return (char) 0x00ee; // LATIN SMALL LETTER I WITH CIRCUMFLEX\r
- case 0x95: return (char) 0x00ef; // LATIN SMALL LETTER I WITH DIAERESIS\r
- case 0x96: return (char) 0x00f1; // LATIN SMALL LETTER N WITH TILDE\r
- case 0x97: return (char) 0x00f3; // LATIN SMALL LETTER O WITH ACUTE\r
- case 0x98: return (char) 0x00f2; // LATIN SMALL LETTER O WITH GRAVE\r
- case 0x99: return (char) 0x00f4; // LATIN SMALL LETTER O WITH CIRCUMFLEX\r
- case 0x9a: return (char) 0x00f6; // LATIN SMALL LETTER O WITH DIAERESIS\r
- case 0x9b: return (char) 0x00f5; // LATIN SMALL LETTER O WITH TILDE\r
- case 0x9c: return (char) 0x00fa; // LATIN SMALL LETTER U WITH ACUTE\r
- case 0x9d: return (char) 0x00f9; // LATIN SMALL LETTER U WITH GRAVE\r
- case 0x9e: return (char) 0x00fb; // LATIN SMALL LETTER U WITH CIRCUMFLEX\r
- case 0x9f: return (char) 0x00fc; // LATIN SMALL LETTER U WITH DIAERESIS\r
- case 0xa0: return (char) 0x2020; // DAGGER\r
- case 0xa1: return (char) 0x00b0; // DEGREE SIGN\r
- case 0xa4: return (char) 0x00a7; // SECTION SIGN\r
- case 0xa5: return (char) 0x2022; // BULLET\r
- case 0xa6: return (char) 0x00b6; // PILCROW SIGN\r
- case 0xa7: return (char) 0x00df; // LATIN SMALL LETTER SHARP S\r
- case 0xa8: return (char) 0x00ae; // REGISTERED SIGN\r
- case 0xaa: return (char) 0x2122; // TRADE MARK SIGN\r
- case 0xab: return (char) 0x00b4; // ACUTE ACCENT\r
- case 0xac: return (char) 0x00a8; // DIAERESIS\r
- case 0xad: return (char) 0x2260; // NOT EQUAL TO\r
- case 0xae: return (char) 0x00c6; // LATIN CAPITAL LIGATURE AE\r
- case 0xaf: return (char) 0x00d8; // LATIN CAPITAL LETTER O WITH STROKE\r
- case 0xb0: return (char) 0x221e; // INFINITY\r
- case 0xb2: return (char) 0x2264; // LESS-THAN OR EQUAL TO\r
- case 0xb3: return (char) 0x2265; // GREATER-THAN OR EQUAL TO\r
- case 0xb4: return (char) 0x00a5; // YEN SIGN\r
- case 0xb6: return (char) 0x2202; // PARTIAL DIFFERENTIAL\r
- case 0xb7: return (char) 0x2211; // N-ARY SUMMATION\r
- case 0xb8: return (char) 0x220f; // N-ARY PRODUCT\r
- case 0xb9: return (char) 0x03c0; // GREEK SMALL LETTER PI\r
- case 0xba: return (char) 0x222b; // INTEGRAL\r
- case 0xbb: return (char) 0x00aa; // FEMININE ORDINAL INDICATOR\r
- case 0xbc: return (char) 0x00ba; // MASCULINE ORDINAL INDICATOR\r
- case 0xbd: return (char) 0x2126; // OHM SIGN\r
- case 0xbe: return (char) 0x00e6; // LATIN SMALL LIGATURE AE\r
- case 0xbf: return (char) 0x00f8; // LATIN SMALL LETTER O WITH STROKE\r
- case 0xc0: return (char) 0x00bf; // INVERTED QUESTION MARK\r
- case 0xc1: return (char) 0x00a1; // INVERTED EXCLAMATION MARK\r
- case 0xc2: return (char) 0x00ac; // NOT SIGN\r
- case 0xc3: return (char) 0x221a; // SQUARE ROOT\r
- case 0xc4: return (char) 0x0192; // LATIN SMALL LETTER F WITH HOOK\r
- case 0xc5: return (char) 0x2248; // ALMOST EQUAL TO\r
- case 0xc6: return (char) 0x2206; // INCREMENT\r
- case 0xc7: return (char) 0x00ab; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK\r
- case 0xc8: return (char) 0x00bb; // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK\r
- case 0xc9: return (char) 0x2026; // HORIZONTAL ELLIPSIS\r
- case 0xca: return (char) 0x00a0; // NO-BREAK SPACE\r
- case 0xcb: return (char) 0x00c0; // LATIN CAPITAL LETTER A WITH GRAVE\r
- case 0xcc: return (char) 0x00c3; // LATIN CAPITAL LETTER A WITH TILDE\r
- case 0xcd: return (char) 0x00d5; // LATIN CAPITAL LETTER O WITH TILDE\r
- case 0xce: return (char) 0x0152; // LATIN CAPITAL LIGATURE OE\r
- case 0xcf: return (char) 0x0153; // LATIN SMALL LIGATURE OE\r
- case 0xd0: return (char) 0x2013; // EN DASH\r
- case 0xd1: return (char) 0x2014; // EM DASH\r
- case 0xd2: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK\r
- case 0xd3: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK\r
- case 0xd4: return (char) 0x2018; // LEFT SINGLE QUOTATION MARK\r
- case 0xd5: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK\r
- case 0xd6: return (char) 0x00f7; // DIVISION SIGN\r
- case 0xd7: return (char) 0x25ca; // LOZENGE\r
- case 0xd8: return (char) 0x00ff; // LATIN SMALL LETTER Y WITH DIAERESIS\r
- case 0xd9: return (char) 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS\r
- case 0xda: return (char) 0x2044; // FRACTION SLASH\r
- case 0xdb: return (char) 0x00a4; // CURRENCY SIGN\r
- case 0xdc: return (char) 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK\r
- case 0xdd: return (char) 0x203a; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK\r
- case 0xde: return (char) 0xfb01; // LATIN SMALL LIGATURE FI\r
- case 0xdf: return (char) 0xfb02; // LATIN SMALL LIGATURE FL\r
- case 0xe0: return (char) 0x2021; // DOUBLE DAGGER\r
- case 0xe1: return (char) 0x00b7; // MIDDLE DOT\r
- case 0xe2: return (char) 0x201a; // SINGLE LOW-9 QUOTATION MARK\r
- case 0xe3: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK\r
- case 0xe4: return (char) 0x2030; // PER MILLE SIGN\r
- case 0xe5: return (char) 0x00c2; // LATIN CAPITAL LETTER A WITH CIRCUMFLEX\r
- case 0xe6: return (char) 0x00ca; // LATIN CAPITAL LETTER E WITH CIRCUMFLEX\r
- case 0xe7: return (char) 0x00c1; // LATIN CAPITAL LETTER A WITH ACUTE\r
- case 0xe8: return (char) 0x00cb; // LATIN CAPITAL LETTER E WITH DIAERESIS\r
- case 0xe9: return (char) 0x00c8; // LATIN CAPITAL LETTER E WITH GRAVE\r
- case 0xea: return (char) 0x00cd; // LATIN CAPITAL LETTER I WITH ACUTE\r
- case 0xeb: return (char) 0x00ce; // LATIN CAPITAL LETTER I WITH CIRCUMFLEX\r
- case 0xec: return (char) 0x00cf; // LATIN CAPITAL LETTER I WITH DIAERESIS\r
- case 0xed: return (char) 0x00cc; // LATIN CAPITAL LETTER I WITH GRAVE\r
- case 0xee: return (char) 0x00d3; // LATIN CAPITAL LETTER O WITH ACUTE\r
- case 0xef: return (char) 0x00d4; // LATIN CAPITAL LETTER O WITH CIRCUMFLEX\r
- case 0xf0: throw new InvalidOperationException("Invalid cp10000_MacRoman sequence [" + P.ToString() + "]"); \r
- case 0xf1: return (char) 0x00d2; // LATIN CAPITAL LETTER O WITH GRAVE\r
- case 0xf2: return (char) 0x00da; // LATIN CAPITAL LETTER U WITH ACUTE\r
- case 0xf3: return (char) 0x00db; // LATIN CAPITAL LETTER U WITH CIRCUMFLEX\r
- case 0xf4: return (char) 0x00d9; // LATIN CAPITAL LETTER U WITH GRAVE\r
- case 0xf5: return (char) 0x0131; // LATIN SMALL LETTER DOTLESS I\r
- case 0xf6: return (char) 0x02c6; // MODIFIER LETTER CIRCUMFLEX ACCENT\r
- case 0xf7: return (char) 0x02dc; // SMALL TILDE\r
- case 0xf8: return (char) 0x00af; // MACRON\r
- case 0xf9: return (char) 0x02d8; // BREVE\r
- case 0xfa: return (char) 0x02d9; // DOT ABOVE\r
- case 0xfb: return (char) 0x02da; // RING ABOVE\r
- case 0xfc: return (char) 0x00b8; // CEDILLA\r
- case 0xfd: return (char) 0x02dd; // DOUBLE ACUTE ACCENT\r
- case 0xfe: return (char) 0x02db; // OGONEK\r
- case 0xff: return (char) 0x02c7; // CARON\r
- default:\r
- return (char) P;\r
- }\r
- }\r
-\r
- public static char cp1250ToUTF16Char(byte P)\r
- {\r
- // This function was provided by Miloslav Skácel (ported by DrW)\r
- switch (P)\r
- {\r
- case 0x80: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]"); \r
- case 0x81: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]"); \r
- case 0x83: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]"); \r
- case 0x88: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]"); \r
- case 0x90: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]"); \r
- case 0x98: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]"); \r
- case 0x82: return (char) 0x201a; // SINGLE LOW-9 QUOTATION MARK\r
- case 0x84: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK\r
- case 0x85: return (char) 0x2026; // HORIZONTAL ELLIPSIS\r
- case 0x86: return (char) 0x2020; // DAGGER\r
- case 0x87: return (char) 0x2021; // DOUBLE DAGGER\r
- case 0x89: return (char) 0x2030; // PER MILLE SIGN\r
- case 0x8a: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON\r
- case 0x8b: return (char) 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK\r
- case 0x8c: return (char) 0x015a; // LATIN CAPITAL LETTER S WITH ACUTE\r
- case 0x8d: return (char) 0x0164; // LATIN CAPITAL LETTER T WITH CARON\r
- case 0x8e: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON\r
- case 0x8f: return (char) 0x0179; // LATIN CAPITAL LETTER Z WITH ACUTE\r
- case 0x91: return (char) 0x2018; // LEFT SINGLE QUOTATION MARK\r
- case 0x92: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK\r
- case 0x93: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK\r
- case 0x94: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK\r
- case 0x95: return (char) 0x2022; // BULLET\r
- case 0x96: return (char) 0x2013; // EN-DASH\r
- case 0x97: return (char) 0x2014; // EM-DASH\r
- case 0x99: return (char) 0x2122; // TRADE MARK SIGN\r
- case 0x9a: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON\r
- case 0x9b: return (char) 0x203a; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK\r
- case 0x9c: return (char) 0x015b; // LATIN SMALL LETTER S WITH ACUTE\r
- case 0x9d: return (char) 0x0165; // LATIN SMALL LETTER T WITH CARON\r
- case 0x9e: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON\r
- case 0x9f: return (char) 0x017a; // LATIN SMALL LETTER Z WITH ACUTE\r
- case 0xa0: return (char) 0x00a0; // NO-BREAK SPACE\r
- case 0xa1: return (char) 0x02c7; // CARON\r
- case 0xa2: return (char) 0x02d8; // BREVE\r
- case 0xa3: return (char) 0x0141; // LATIN CAPITAL LETTER L WITH STROKE\r
- case 0xa4: return (char) 0x00a4; // CURRENCY SIGN\r
- case 0xa5: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK\r
- case 0xa6: return (char) 0x00a6; // BROKEN BAR\r
- case 0xa7: return (char) 0x00a7; // SECTION SIGN\r
- case 0xa8: return (char) 0x00a8; // DIAERESIS\r
- case 0xa9: return (char) 0x00a9; // COPYRIGHT SIGN\r
- case 0xaa: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA\r
- case 0xab: return (char) 0x00ab; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK\r
- case 0xac: return (char) 0x00ac; // NOT SIGN\r
- case 0xad: return (char) 0x00ad; // SOFT HYPHEN\r
- case 0xae: return (char) 0x00ae; // REGISTERED SIGN\r
- case 0xaf: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT ABOVE\r
- case 0xb0: return (char) 0x00b0; // DEGREE SIGN\r
- case 0xb1: return (char) 0x00b1; // PLUS-MINUS SIGN\r
- case 0xb2: return (char) 0x02db; // OGONEK\r
- case 0xb3: return (char) 0x0142; // LATIN SMALL LETTER L WITH STROKE\r
- case 0xb4: return (char) 0x00b4; // ACUTE ACCENT\r
- case 0xb5: return (char) 0x00b5; // MIKRO SIGN\r
- case 0xb6: return (char) 0x00b6; // PILCROW SIGN\r
- case 0xb7: return (char) 0x00b7; // MIDDLE DOT\r
- case 0xb8: return (char) 0x00b8; // CEDILLA\r
- case 0xb9: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK\r
- case 0xba: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA\r
- case 0xbb: return (char) 0x00bb; // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK\r
- case 0xbc: return (char) 0x013d; // LATIN CAPITAL LETTER L WITH CARON\r
- case 0xbd: return (char) 0x02dd; // DOUBLE ACUTE ACCENT\r
- case 0xbe: return (char) 0x013e; // LATIN SMALL LETTER L WITH CARON\r
- case 0xbf: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT ABOVE\r
- case 0xc0: return (char) 0x0154; // LATIN CAPITAL LETTER R WITH ACUTE\r
- case 0xc1: return (char) 0x00c1; // LATIN CAPITAL LETTER A WITH ACUTE\r
- case 0xc2: return (char) 0x00c2; // LATIN CAPITAL LETTER A WITH CIRCUMFLEX\r
- case 0xc3: return (char) 0x0102; // LATIN CAPITAL LETTER A WITH BREVE\r
- case 0xc4: return (char) 0x00c4; // LATIN CAPITAL LETTER A WITH DIAERESIS\r
- case 0xc5: return (char) 0x0139; // LATIN CAPITAL LETTER L WITH ACUTE\r
- case 0xc6: return (char) 0x0106; // LATIN CAPITAL LETTER C WITH ACUTE\r
- case 0xc7: return (char) 0x00c7; // LATIN CAPITAL LETTER C WITH CEDILLA\r
- case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON\r
- case 0xc9: return (char) 0x00c9; // LATIN CAPITAL LETTER E WITH ACUTE\r
- case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK\r
- case 0xcb: return (char) 0x00cb; // LATIN CAPITAL LETTER E WITH DIAERESIS\r
- case 0xcc: return (char) 0x011a; // LATIN CAPITAL LETTER E WITH CARON\r
- case 0xcd: return (char) 0x00cd; // LATIN CAPITAL LETTER I WITH ACUTE\r
- case 0xce: return (char) 0x00ce; // LATIN CAPITAL LETTER I WITH CIRCUMFLEX\r
- case 0xcf: return (char) 0x010e; // LATIN CAPITAL LETTER D WITH CARON\r
- case 0xd0: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE\r
- case 0xd1: return (char) 0x0143; // LATIN CAPITAL LETTER N WITH ACUTE\r
- case 0xd2: return (char) 0x0147; // LATIN CAPITAL LETTER N WITH CARON\r
- case 0xd3: return (char) 0x00d3; // LATIN CAPITAL LETTER O WITH ACUTE\r
- case 0xd4: return (char) 0x00d4; // LATIN CAPITAL LETTER O WITH CIRCUMFLEX\r
- case 0xd5: return (char) 0x0150; // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE\r
- case 0xd6: return (char) 0x00d6; // LATIN CAPITAL LETTER O WITH DIAERESIS\r
- case 0xd7: return (char) 0x00d7; // MULTIPLICATION SIGN\r
- case 0xd8: return (char) 0x0158; // LATIN CAPITAL LETTER R WITH CARON\r
- case 0xd9: return (char) 0x016e; // LATIN CAPITAL LETTER U WITH RING ABOVE\r
- case 0xda: return (char) 0x00da; // LATIN CAPITAL LETTER U WITH ACUTE\r
- case 0xdb: return (char) 0x0170; // LATIN CAPITAL LETTER U WITH WITH DOUBLE ACUTE\r
- case 0xdc: return (char) 0x00dc; // LATIN CAPITAL LETTER U WITH DIAERESIS\r
- case 0xdd: return (char) 0x00dd; // LATIN CAPITAL LETTER Y WITH ACUTE\r
- case 0xde: return (char) 0x0162; // LATIN CAPITAL LETTER T WITH CEDILLA\r
- case 0xdf: return (char) 0x00df; // LATIN SMALL LETTER SHARP S\r
- case 0xe0: return (char) 0x0155; // LATIN SMALL LETTER R WITH ACUTE\r
- case 0xe1: return (char) 0x00e1; // LATIN SMALL LETTER A WITH ACUTE\r
- case 0xe2: return (char) 0x00e2; // LATIN SMALL LETTER A WITH CIRCUMFLEX\r
- case 0xe3: return (char) 0x0103; // LATIN SMALL LETTER A WITH BREVE\r
- case 0xe4: return (char) 0x00e4; // LATIN SMALL LETTER A WITH DIAERESIS\r
- case 0xe5: return (char) 0x013a; // LATIN SMALL LETTER L WITH ACUTE\r
- case 0xe6: return (char) 0x0107; // LATIN SMALL LETTER C WITH ACUTE\r
- case 0xe7: return (char) 0x00e7; // LATIN SMALL LETTER C WITH CEDILLA\r
- case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON 100D\r
- case 0xe9: return (char) 0x00e9; // LATIN SMALL LETTER E WITH ACUTE\r
- case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK\r
- case 0xeb: return (char) 0x00eb; // LATIN SMALL LETTER E WITH DIAERESIS\r
- case 0xec: return (char) 0x011b; // LATIN SMALL LETTER E WITH CARON\r
- case 0xed: return (char) 0x00ed; // LATIN SMALL LETTER I WITH ACUTE\r
- case 0xee: return (char) 0x00ee; // LATIN SMALL LETTER I WITH CIRCUMFLEX\r
- case 0xef: return (char) 0x010f; // LATIN SMALL LETTER D WITH CARON\r
- case 0xf0: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE\r
- case 0xf1: return (char) 0x0144; // LATIN SMALL LETTER N WITH ACUTE\r
- case 0xf2: return (char) 0x0148; // LATIN SMALL LETTER N WITH CARON\r
- case 0xf3: return (char) 0x00f3; // LATIN SMALL LETTER O WITH ACUTE\r
- case 0xf4: return (char) 0x00f4; // LATIN SMALL LETTER O WITH CIRCUMFLEX\r
- case 0xf5: return (char) 0x0151; // LATIN SMALL LETTER O WITH DOUBLE ACUTE\r
- case 0xf6: return (char) 0x00f6; // LATIN SMALL LETTER O WITH DIAERESIS\r
- case 0xf7: return (char) 0x00f7; // DIVISION SIGN\r
- case 0xf8: return (char) 0x0159; // LATIN SMALL LETTER R WITH CARON\r
- case 0xf9: return (char) 0x016f; // LATIN SMALL LETTER U WITH RING ABOVE\r
- case 0xfa: return (char) 0x00fa; // LATIN SMALL LETTER U WITH ACUTE\r
- case 0xfb: return (char) 0x0171; // LATIN SMALL LETTER U WITH WITH DOUBLE ACUTE\r
- case 0xfc: return (char) 0x00fc; // LATIN SMALL LETTER U WITH DIAERESIS\r
- case 0xfd: return (char) 0x00fd; // LATIN SMALL LETTER Y WITH ACUTE\r
- case 0xfe: return (char) 0x0163; // LATIN SMALL LETTER T WITH CEDILLA\r
- case 0xff: return (char) 0x02d9; // DOT ABOVE\r
- default:\r
- return (char) P;\r
- }\r
- }\r
-\r
- public static char cp1251ToUTF16Char(byte P)\r
- {\r
- switch (P)\r
- {\r
- case 0x80: return (char) 0x0402; // CYRILLIC CAPITAL LETTER DJE\r
- case 0x81: return (char) 0x0403; // CYRILLIC CAPITAL LETTER GJE\r
- case 0x82: return (char) 0x201a; // SINGLE LOW-9 QUOTATION MARK\r
- case 0x83: return (char) 0x0453; // CYRILLIC SMALL LETTER GJE\r
- case 0x84: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK\r
- case 0x85: return (char) 0x2026; // HORIZONTAL ELLIPSIS\r
- case 0x86: return (char) 0x2020; // DAGGER\r
- case 0x87: return (char) 0x2021; // DOUBLE DAGGER\r
- case 0x88: return (char) 0x20ac; // EURO SIGN\r
- case 0x89: return (char) 0x2030; // PER MILLE SIGN\r
- case 0x8a: return (char) 0x0409; // CYRILLIC CAPITAL LETTER LJE\r
- case 0x8b: return (char) 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK\r
- case 0x8c: return (char) 0x040a; // CYRILLIC CAPITAL LETTER NJE\r
- case 0x8d: return (char) 0x040c; // CYRILLIC CAPITAL LETTER KJE\r
- case 0x8e: return (char) 0x040b; // CYRILLIC CAPITAL LETTER TSHE\r
- case 0x8f: return (char) 0x040f; // CYRILLIC CAPITAL LETTER DZHE\r
- case 0x90: return (char) 0x0452; // CYRILLIC SMALL LETTER DJE\r
- case 0x91: return (char) 0x2018; // LEFT SINGLE QUOTATION MARK\r
- case 0x92: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK\r
- case 0x93: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK\r
- case 0x94: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK\r
- case 0x95: return (char) 0x2022; // BULLET\r
- case 0x96: return (char) 0x2013; // EN DASH\r
- case 0x97: return (char) 0x2014; // EM DASH\r
- case 0x98: throw new InvalidOperationException("Invalid cp1251 sequence [" + P.ToString() + "]"); \r
- case 0x99: return (char) 0x2122; // TRADE MARK SIGN\r
- case 0x9a: return (char) 0x0459; // CYRILLIC SMALL LETTER LJE\r
- case 0x9b: return (char) 0x203a; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK\r
- case 0x9c: return (char) 0x045a; // CYRILLIC SMALL LETTER NJE\r
- case 0x9d: return (char) 0x045c; // CYRILLIC SMALL LETTER KJE\r
- case 0x9e: return (char) 0x045b; // CYRILLIC SMALL LETTER TSHE\r
- case 0x9f: return (char) 0x045f; // CYRILLIC SMALL LETTER DZHE\r
- case 0xa0: return (char) 0x00a0; // NO-BREAK SPACE\r
- case 0xa1: return (char) 0x040e; // CYRILLIC CAPITAL LETTER SHORT U\r
- case 0xa2: return (char) 0x045e; // CYRILLIC SMALL LETTER SHORT U\r
- case 0xa3: return (char) 0x0408; // CYRILLIC CAPITAL LETTER JE\r
- case 0xa4: return (char) 0x00a4; // CURRENCY SIGN\r
- case 0xa5: return (char) 0x0490; // CYRILLIC CAPITAL LETTER GHE WITH UPTURN\r
- case 0xa8: return (char) 0x0401; // CYRILLIC CAPITAL LETTER IO\r
- case 0xaa: return (char) 0x0404; // CYRILLIC CAPITAL LETTER UKRAINIAN IE\r
- case 0xaf: return (char) 0x0407; // CYRILLIC CAPITAL LETTER YI\r
- case 0xb2: return (char) 0x0406; // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I\r
- case 0xb3: return (char) 0x0456; // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I\r
- case 0xb4: return (char) 0x0491; // CYRILLIC SMALL LETTER GHE WITH UPTURN\r
- case 0xb8: return (char) 0x0451; // CYRILLIC SMALL LETTER IO\r
- case 0xb9: return (char) 0x2116; // NUMERO SIGN\r
- case 0xba: return (char) 0x0454; // CYRILLIC SMALL LETTER UKRAINIAN IE\r
- case 0xbc: return (char) 0x0458; // CYRILLIC SMALL LETTER JE\r
- case 0xbd: return (char) 0x0405; // CYRILLIC CAPITAL LETTER DZE\r
- case 0xbe: return (char) 0x0455; // CYRILLIC SMALL LETTER DZE\r
- case 0xbf: return (char) 0x0457; // CYRILLIC SMALL LETTER YI\r
- }\r
-\r
- if ( (P >= 0xc0) | (P <= 0xff) )\r
- return System.Convert.ToChar( P + 0x0350);\r
- return (char) P;\r
- }\r
-\r
- public static char cp1252ToUTF16Char(byte P)\r
- {\r
- // Provided by Olaf Lösken. (ported by DrW)\r
- // Info taken from\r
- // ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT\r
- switch (P)\r
- {\r
- case 0x80 : return (char) 0x20AC; //EUROSIGN\r
- case 0x81 : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]"); \r
- case 0x82 : return (char) 0x201A; //SINGLE LOW-9 QUOTATION MARK\r
- case 0x83 : return (char) 0x0192; //ATIN SMALL LETTER F WITH HOOK\r
- case 0x84 : return (char) 0x201E; //DOUBLE LOW-9 QUOTATION MARK\r
- case 0x85 : return (char) 0x2026; //HORIZONTAL ELLIPSIS\r
- case 0x86 : return (char) 0x2020; //DAGGER\r
- case 0x87 : return (char) 0x2021; //DOUBLE DAGGER\r
- case 0x88 : return (char) 0x02C6; //MODIFIER LETTER CIRCUMFLEX ACCENT\r
- case 0x89 : return (char) 0x2030; //PER MILLE SIGN\r
- case 0x8A : return (char) 0x0160; //LATIN CAPITAL LETTER S WITH CARON\r
- case 0x8B : return (char) 0x2039; //SINGLE LEFT-POINTING ANGLE QUOTATION MARK\r
- case 0x8C : return (char) 0x0152; //LATIN CAPITAL LIGATURE OE\r
- case 0x8D : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]"); \r
- case 0x8E : return (char) 0x017D; //LATIN CAPITAL LETTER Z WITH CARON\r
- case 0x8F : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]"); \r
- case 0x90 : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]"); \r
- case 0x91 : return (char) 0x2018; //LEFT SINGLE QUOTATION MARK\r
- case 0x92 : return (char) 0x2019; //RIGHT SINGLE QUOTATION MARK\r
- case 0x93 : return (char) 0x201C; //LEFT DOUBLE QUOTATION MARK\r
- case 0x94 : return (char) 0x201D; //RIGHT DOUBLE QUOTATION MARK\r
- case 0x95 : return (char) 0x2022; //BULLET\r
- case 0x96 : return (char) 0x2013; //EN DASH\r
- case 0x97 : return (char) 0x2014; //EM DASH\r
- case 0x98 : return (char) 0x02DC; //SMALL TILDE\r
- case 0x99 : return (char) 0x2122; //TRADE MARK SIGN\r
- case 0x9A : return (char) 0x0161; //LATIN SMALL LETTER S WITH CARON\r
- case 0x9B : return (char) 0x203A; //SINGLE RIGHT-POINTING ANGLE QUOTATION MARK\r
- case 0x9C : return (char) 0x0153; //LATIN SMALL LIGATURE OE\r
- case 0x9D : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]"); \r
- case 0x9E : return (char) 0x017E; //LATIN SMALL LETTER Z WITH CARON\r
- case 0x9F : return (char) 0x0178; //LATIN CAPITAL LETTER Y WITH D\r
- default:\r
- return (char) P;\r
- }\r
- }\r
-\r
- /// <summary>\r
- /// Read in a UTF-8 encoded character. If no character is on the stream, throws\r
- /// an ArgumentException.<seealso cref="http://www.ietf.org/rfc/rfc2279.txt"/>\r
- /// </summary>\r
- /// <exception cref="InvalidOperationException">Thrownn if 1) called at EOF, \r
- /// 2) invalid UTF-8 encoding found.</exception>\r
- /// <param name="stream">Stream to read from</param>\r
- /// <returns>Encoded character (could be two characters, upper/lower Surragate pair)</returns>\r
- public static int ReadUTF8Char(Stream stream)\r
- {\r
- byte[] buf = new byte[1];\r
-\r
- if ( stream.Read(buf, 0, 1) != 1)\r
- throw new InvalidOperationException("Unexptected EOF reading stream");\r
- \r
- if (buf[0] >= 0x80) // UTF-8 sequence\r
- {\r
- int numOctets = 1;\r
- byte first = buf[0];\r
- int mask = 0x40; \r
- int ucs4 = buf[0];\r
-\r
- // first octed must be 110x xxxx to 1111 110x if high order bit set\r
- if ( (buf[0] & 0xc0) != 0xc0)\r
- throw new InvalidOperationException("Invalid UTF-8 sequence at position " + stream.Position.ToString());\r
-\r
- // we could mask off the first octet and get the number of octets,\r
- // but it's easier to cycle through. If the bit is set, we have another character to read\r
- while ( (mask & first) != 0 )\r
- {\r
- // read next character of stream\r
- if (stream.Length == stream.Position)\r
- throw new InvalidOperationException("Aborted UTF-8 (unexpected EOF) sequence at position " + stream.Position.ToString());\r
- \r
- if ( stream.Read(buf, 0, 1) != 1)\r
- throw new InvalidOperationException("Aborted UTF-8 sequence (missing characters) at position " + stream.Position.ToString());\r
- \r
- // all octet sequence bytes start with 10nn nnnn, or they are invalid\r
- if ( (buf[0] & 0xc0) != 0x80 )\r
- throw new InvalidOperationException("Invalid UTF-8 sequence at position " + stream.Position.ToString());\r
-\r
- // 6 bits are valid in this item (low order 6)\r
- // mask them off and add them\r
- ucs4 = (ucs4 << 6) | (buf[0] & 0x3F); // add bits to result\r
- numOctets++; \r
- mask = mask >> 1; // adjust mask\r
- }\r
-\r
- // Max 6 octets in sequence\r
- if ( numOctets > 6) \r
- throw new InvalidOperationException("Invalid UTF-8 sequence (no 0-bit in hdr) at position " + stream.Position.ToString());\r
- \r
- // UTF-8 can encode up to the following values, per octet size\r
- int[] MaxCode = {0x7F, 0x7FF, 0xFFFF, 0x1FFFFF, 0x3FFFFFF, 0x7FFFFFFF};\r
-\r
- // mask off the original header bits\r
- ucs4 = ucs4 & MaxCode[numOctets - 1]; // array is zero-based\r
-\r
- // check for invalid sequence as suggested by RFC2279\r
- // (check that proper octet sequence size was used to encode character)\r
- // (if 0x7F was mapped to a 2-octet sequence, this is an improper coding)\r
- if ( (numOctets > 1) && (ucs4 <= MaxCode[numOctets -2]))\r
- throw new InvalidOperationException("Invalid UTF-8 sequence (invalid sequence) at position " + stream.Position.ToString());\r
-\r
- return ucs4;\r
- }\r
- else\r
- // 1-byte value, return it\r
- return buf[0];\r
- }\r
- \r
- public static char Utf16LowSurrogate(int val)\r
- {\r
- int val2 = 0xDC00 ^ (val & 0x03FF); // 0xdc00 xor (val and 0x03ff)\r
- return (char) val2;\r
- }\r
-\r
- public static char Utf16HighSurrogate(int val)\r
- {\r
- int value2 = 0xD7C0 + ( val >> 10 );\r
- return (char) value2;\r
- }\r
-\r
- }\r
-}
\ No newline at end of file