--- /dev/null
+// -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-\r
+//\r
+// System.Xml.XmlInputSource.cs\r
+// port of Open Xml TXmlInputSource class\r
+//\r
+// Author:\r
+// Daniel Weber (daniel-weber@austin.rr.com\r
+//\r
+// (C) 2001 Daniel Weber\r
+// \r
+//\r
+\r
+using System;\r
+using System.IO;\r
+\r
+namespace System.Xml\r
+{\r
+ internal class XmlInputSource\r
+ {\r
+ private DomEncodingType Fencoding;\r
+\r
+ private string FpublicID;\r
+ private string FsystemID;\r
+ public string FrootName;\r
+\r
+ private Stream FStream;\r
+\r
+ private bool FLastCharWasCR;\r
+\r
+ // Buffer storage for UTF-8 surrogates\r
+ // see http://www.ietf.org/rfc/rfc2279.txt for a complete description of UTF-8 encoding\r
+ private int FLastUcs4; \r
+\r
+\r
+ //FLocator: TdomStandardLocator;\r
+\r
+ // public properties\r
+ //===========================================================================\r
+ public DomEncodingType encoding\r
+ {\r
+ get\r
+ {\r
+ return Fencoding;\r
+ }\r
+ }\r
+\r
+ //property locator: TdomStandardLocator read FLocator;\r
+ public string publicId\r
+ {\r
+ get\r
+ {\r
+ return FpublicID;\r
+ }\r
+ }\r
+\r
+ public string rootName \r
+ {\r
+ get\r
+ {\r
+ return FrootName;\r
+ }\r
+ }\r
+\r
+ public Stream stream\r
+ {\r
+ get\r
+ {\r
+ return FStream;\r
+ }\r
+ }\r
+\r
+ public string streamAsWideString\r
+ {\r
+ get\r
+ {\r
+ return string.Empty;\r
+ //wideString read getStreamAsWideString;\r
+ }\r
+ }\r
+\r
+ public string systemId\r
+ {\r
+ get \r
+ {\r
+ return FsystemID;\r
+ }\r
+ }\r
+\r
+ // private methods\r
+ //===========================================================================\r
+ /// <summary>\r
+ /// Analyze the first bytes of an XML document to try and determine encoding\r
+ /// </summary>\r
+ /// <returns>Determined encoding type, defaults to UTF-8</returns>\r
+ private void setEncodingType()\r
+ {\r
+ try\r
+ {\r
+ byte[] buf = new byte[4];\r
+\r
+ FStream.Seek(0, SeekOrigin.Begin);\r
+ FStream.Read(buf, 0, 4);\r
+\r
+ // UTF-16 code streams should begin with 0xfeff for big-endian systems\r
+ // or 0xfffe for little endian systems.\r
+ // check that first....\r
+ if ( (buf[0] == 0xfe) & (buf[1] == 0xff) )\r
+ Fencoding = DomEncodingType.etUTF16BE;\r
+ else if ( (buf[0] == 0xff) & (buf[1] == 0xfe) )\r
+ Fencoding = DomEncodingType.etUTF16LE;\r
+ else\r
+ {\r
+ // assume utf-8, look for encoding in <?xml version="1.0" encoding="ISO-8859-6"> tag, eg\r
+ Fencoding = DomEncodingType.etUTF8;\r
+\r
+ // Check if the stream begins with <?[X|x][M|m][L|l]\r
+ if ( (buf[0] == 0x3c) & (buf[1] == 0x3f) & // "<?" \r
+ ((buf[2] == 0x78) | (buf[2] ==0x58) ) & // "x" or "X"\r
+ ( (buf[2] == 0x6d) | (buf[3] ==0x4d) ) & // "m" or "M"\r
+ ( (buf[2] == 0x6c) | (buf[2] ==0x4c) ) ) // "l" or "L"\r
+ {\r
+ \r
+ string tag = "";\r
+ while (FStream.Position != FStream.Length)\r
+ {\r
+ char c = getNextChar();\r
+ tag += c;\r
+\r
+ if (c == '>')\r
+ break;\r
+ }\r
+\r
+ // start from the location of "encoding", and scan for quotes\r
+ string encodeString = readEncodingAttrFromTag(tag);\r
+ encodeString = encodeString.ToUpper();\r
+\r
+ if ( (encodeString.IndexOf("ISO-8859-1") != -1) |\r
+ (encodeString.IndexOf("LATIN-1") != -1) )\r
+ Fencoding = DomEncodingType.etLatin1;\r
+ else if ( (encodeString.IndexOf("ISO-8859-2") != -1) |\r
+ (encodeString.IndexOf("LATIN-2") != -1) )\r
+ Fencoding = DomEncodingType.etLatin2;\r
+ else if ( (encodeString.IndexOf("ISO-8859-3") != -1) |\r
+ (encodeString.IndexOf("LATIN-3") != -1) )\r
+ Fencoding = DomEncodingType.etLatin3;\r
+ else if ( (encodeString.IndexOf("ISO-8859-4") != -1) |\r
+ (encodeString.IndexOf("LATIN-4") != -1) )\r
+ Fencoding = DomEncodingType.etLatin4;\r
+ else if ( (encodeString.IndexOf("ISO-8859-5") != -1) |\r
+ (encodeString.IndexOf("CYRILLIC") != -1) )\r
+ Fencoding = DomEncodingType.etCyrillic;\r
+ else if ( (encodeString.IndexOf("ISO-8859-6") != -1) |\r
+ (encodeString.IndexOf("ARABIC") != -1) )\r
+ Fencoding = DomEncodingType.etArabic;\r
+ else if ( (encodeString.IndexOf("ISO-8859-7") != -1) |\r
+ (encodeString.IndexOf("GREEK") != -1) )\r
+ Fencoding = DomEncodingType.etGreek;\r
+ else if ( (encodeString.IndexOf("ISO-8859-8") != -1) |\r
+ (encodeString.IndexOf("HEBREW") != -1) )\r
+ Fencoding = DomEncodingType.etHebrew;\r
+ else if ( (encodeString.IndexOf("ISO-8859-9") != -1) |\r
+ (encodeString.IndexOf("LATIN-5") != -1) )\r
+ Fencoding = DomEncodingType.etLatin5;\r
+ else if ( (encodeString.IndexOf("ISO-8859-10") != -1) |\r
+ (encodeString.IndexOf("LATIN-6") != -1) )\r
+ Fencoding = DomEncodingType.etLatin6;\r
+ else if ( (encodeString.IndexOf("ISO-8859-13") != -1) |\r
+ (encodeString.IndexOf("LATIN-7") != -1) )\r
+ Fencoding = DomEncodingType.etLatin7;\r
+ else if ( (encodeString.IndexOf("ISO-8859-14") != -1) |\r
+ (encodeString.IndexOf("LATIN-8") != -1) )\r
+ Fencoding = DomEncodingType.etLatin8;\r
+ else if ( (encodeString.IndexOf("ISO-8859-15") != -1) |\r
+ (encodeString.IndexOf("LATIN-9") != -1) )\r
+ Fencoding = DomEncodingType.etLatin9;\r
+ else if (encodeString.IndexOf("KOI8-R") != -1)\r
+ Fencoding = DomEncodingType.etKOI8R;\r
+ else if (encodeString.IndexOf("CP10000_MACROMAN") != -1)\r
+ Fencoding = DomEncodingType.etcp10000_MacRoman;\r
+ else if ( (encodeString.IndexOf("Windows-1250") != -1) |\r
+ (encodeString.IndexOf("CP1250") != -1) )\r
+ Fencoding = DomEncodingType.etcp1250;\r
+ else if ( (encodeString.IndexOf("Windows-1251") != -1) |\r
+ (encodeString.IndexOf("CP1251") != -1) )\r
+ Fencoding = DomEncodingType.etcp1251;\r
+ else if ( (encodeString.IndexOf("Windows-1252") != -1) |\r
+ (encodeString.IndexOf("CP1252") != -1) )\r
+ Fencoding = DomEncodingType.etcp1252;\r
+ }\r
+ }\r
+\r
+ }\r
+ catch\r
+ {\r
+ Fencoding = DomEncodingType.etUTF8;\r
+ }\r
+\r
+ FStream.Seek(0, SeekOrigin.Begin);\r
+ }\r
+\r
+ /// <summary>\r
+ /// Helper function to try and find the encoding attribute value in \r
+ /// declaration tag. Does not do well-formedness checks.\r
+ /// </summary>\r
+ /// <param name="tag">string to scan</param>\r
+ /// <exception cref="InvalidOperationException">If bad encoding char found, mis-matched quotes, or no equals sign.</exception>\r
+ /// <returns>encoding, or string.Empty if it is not found.</returns>\r
+ private string readEncodingAttrFromTag( string tag )\r
+ {\r
+ int encodeIndex = tag.IndexOf("encoding");\r
+ if ( encodeIndex == -1)\r
+ return string.Empty;\r
+ else\r
+ {\r
+ int curIndex = encodeIndex + "encoding".Length;\r
+ bool firstQuoteFound = false;\r
+ bool equalsFound = false;\r
+ char quoteChar = (char) 0xffff; // c# insists on initialization...\r
+ string encoding = "";\r
+\r
+ while ( curIndex != tag.Length )\r
+ {\r
+ char c = tag[curIndex];\r
+ curIndex++;\r
+\r
+ if ( c == '=')\r
+ {\r
+ equalsFound = true;\r
+ continue;\r
+ }\r
+\r
+ if ( (c== '\"') | (c=='\'') )\r
+ {\r
+ if ( !firstQuoteFound & !equalsFound)\r
+ throw new InvalidOperationException("No equals sign found in encoding attribute");\r
+ else if ( firstQuoteFound )\r
+ {\r
+ if (c == quoteChar)\r
+ return encoding;\r
+ else\r
+ throw new InvalidOperationException("non-matching quotes in attribute value");\r
+ }\r
+ else\r
+ {\r
+ firstQuoteFound = true;\r
+ quoteChar = c;\r
+ continue;\r
+ }\r
+ }\r
+ else if (firstQuoteFound)\r
+ {\r
+ if ( ( c >= 'a') & ( c <= 'z')) encoding += c;\r
+ else if ( ( c >= 'A') & ( c <= 'Z')) encoding += c;\r
+ else if ( ( c >= '0') & ( c <= '9')) encoding += c;\r
+ else if ( c == '_' ) encoding += c;\r
+ else if ( c == '-') encoding += c;\r
+ else if (c == '.') encoding += c;\r
+ else\r
+ throw new InvalidOperationException("invalid character in encoding attribute");\r
+ }\r
+ }\r
+ return string.Empty;\r
+ }\r
+ }\r
+ \r
+\r
+ /// <summary>\r
+ /// Return true if input stream is at EOF.\r
+ /// </summary>\r
+ /// <returns></returns>\r
+ public bool atEOF()\r
+ {\r
+ return (FStream.Length == FStream.Position);\r
+ }\r
+ \r
+ /// <summary>\r
+ /// Sets the internal root name by analyzing the tags at the beginning of the stream.\r
+ /// root name is:\r
+ /// - the element tag of the first element found\r
+ /// - the root name listed in a !DOCTYPE tag\r
+ /// - empty if a parse error occurs, or no applicable tags are found.\r
+ /// Does not do well-formedness checks - skips comments and proc. instructions\r
+ /// </summary>\r
+ private void getRootName()\r
+ {\r
+ reset();\r
+ FrootName = string.Empty;\r
+ \r
+ while ( ! atEOF() )\r
+ {\r
+ string tag = "<";\r
+ char c = getNextChar();\r
+\r
+ // skip whitespace to first tag\r
+ while ( !atEOF() && (XmlNames_1_0.IsXmlWhiteSpace( c )) )\r
+ c = getNextChar();\r
+ if ( (c != '<') | atEOF() ) break;\r
+\r
+ while ( !atEOF() & ( c != '>' ) )\r
+ {\r
+ c = getNextChar();\r
+ tag += c;\r
+ }\r
+ if ( atEOF() ) break;\r
+\r
+ // Only allow 1) comments, 2) processing instructions before <!DOCTYPE ...>\r
+ if ( tag.StartsWith("<?") ) // Processing instruction\r
+ continue;\r
+ else if ( tag.StartsWith("<--") ) // comment\r
+ continue;\r
+ else if ( tag.StartsWith("<!DOCTYPE") ) // what we're looking for...\r
+ {\r
+ setRootName( tag );\r
+ break;\r
+ }\r
+ // no DOCTYPE tag? Use the first element tag as the root\r
+ else if ( tag.StartsWith( "<" ) )\r
+ setRootName( tag );\r
+ // we hit a non-comment, processing instruction or declaration, we ain't gonna get it\r
+ else\r
+ {\r
+ FrootName = string.Empty;\r
+ break;\r
+ }\r
+ }\r
+ }\r
+\r
+ private void setRootName( string doctypeTag )\r
+ {\r
+ int start = doctypeTag.IndexOf("<DOCTYPE");\r
+ if ( start == -1 ) \r
+ start = 1; // set from element\r
+ else\r
+ start += "<DOCTYPE".Length;\r
+ while ( ( start != doctypeTag.Length ) & XmlNames_1_0.IsXmlWhiteSpace( doctypeTag[start] ) )\r
+ start++;\r
+ \r
+ string tmp = string.Empty;\r
+\r
+ while ( ( start != doctypeTag.Length ) && \r
+ !XmlNames_1_0.IsXmlWhiteSpace(doctypeTag[start]) &&\r
+ (doctypeTag[start] != '>') &&\r
+ (doctypeTag[start] != '[') && \r
+ (doctypeTag[start] != '/') )\r
+ tmp += doctypeTag[start];\r
+\r
+ if (XmlNames_1_0.isXmlName(tmp) ) FrootName = tmp;\r
+ }\r
+\r
+ /// <summary>\r
+ /// Read in the next character (either UTF-8 or UTF-16) and convert by charset\r
+ /// Normalize CR/LF pairs to single CR.\r
+ /// </summary>\r
+ /// <returns></returns>\r
+ public char getNextChar()\r
+ {\r
+ byte[] buf = new byte[2];\r
+ char retval = (char) 0xffff;\r
+ int bCount;\r
+\r
+ switch(Fencoding)\r
+ {\r
+ case DomEncodingType.etLatin1:\r
+ bCount = stream.Read(buf,0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.Iso8859_1ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etLatin2:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.Iso8859_2ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etLatin3:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.Iso8859_3ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etLatin4:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.Iso8859_4ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etCyrillic:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.Iso8859_5ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etArabic:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.Iso8859_6ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etGreek:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.Iso8859_7ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etHebrew:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.Iso8859_8ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etLatin5:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.Iso8859_9ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etLatin6:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.Iso8859_10ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etLatin7:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.Iso8859_13ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etLatin8:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.Iso8859_14ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etLatin9:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.Iso8859_15ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etKOI8R:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.KOI8_RToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etcp10000_MacRoman:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.cp10000_MacRomanToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etcp1250:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.cp1250ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etcp1251:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.cp1251ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etcp1252:\r
+ bCount = stream.Read(buf, 0, 1);\r
+ if (bCount == 1) \r
+ retval = XmlUtil.cp1252ToUTF16Char(buf[0]);\r
+ break;\r
+ case DomEncodingType.etUTF8:\r
+ if ( FLastUcs4 >= 0x10000)\r
+ {\r
+ // Output low surrogate\r
+ retval = XmlUtil.Utf16LowSurrogate(FLastUcs4);\r
+ FLastUcs4 = 0;\r
+ }\r
+ else\r
+ {\r
+ FLastUcs4 = XmlUtil.ReadUTF8Char( stream );\r
+ if ( FLastUcs4 >= 0x10000)\r
+ retval = XmlUtil.Utf16HighSurrogate(FLastUcs4);\r
+ else\r
+ retval = (char) FLastUcs4;\r
+ }\r
+ break;\r
+ case DomEncodingType.etUTF16BE:\r
+ bCount = stream.Read(buf, 0, 2);\r
+ if (bCount == 2)\r
+ retval = System.Convert.ToChar( (buf[0] << 16) + buf[1] );\r
+ break;\r
+ case DomEncodingType.etUTF16LE:\r
+ bCount = stream.Read(buf, 0, 2);\r
+ if (bCount == 2)\r
+ retval = System.Convert.ToChar( (buf[1] << 16) + buf[0] );\r
+ break;\r
+ }\r
+\r
+ // normalize CRLF or a single CR to LF:\r
+ if ( (retval == 0x000D) & FLastCharWasCR) // 0x000d = CR\r
+ {\r
+ FLastCharWasCR = false;\r
+ return getNextChar();\r
+ }\r
+ else if ( retval == 0x000A) // 0x000a = LF\r
+ {\r
+ FLastCharWasCR = true;\r
+ return (char) 0x000D;\r
+ }\r
+ else\r
+ FLastCharWasCR = false;\r
+ return retval;\r
+ \r
+ // if assigned(locator) then locator.evaluate(dest);\r
+ }\r
+\r
+ /// <summary>\r
+ /// Reset the Input to the origin and clear internal variables.\r
+ /// </summary>\r
+ public void reset()\r
+ {\r
+ FLastUcs4 = 0;\r
+ FLastCharWasCR = false;\r
+\r
+ switch(Fencoding)\r
+ {\r
+ // skip the leading 0xfeff/oxfffe on UTF-16 streams\r
+ case DomEncodingType.etUTF16BE:\r
+ FStream.Seek(2, SeekOrigin.Begin);\r
+ break;\r
+ case DomEncodingType.etUTF16LE:\r
+ FStream.Seek(2, SeekOrigin.Begin);\r
+ break;\r
+ default:\r
+ FStream.Seek(0, SeekOrigin.Begin);\r
+ break;\r
+ }\r
+ }\r
+ \r
+/*\r
+ * private\r
+ \r
+\r
+ protected\r
+ function getStreamAsWideString: wideString; virtual;\r
+ procedure skipTextDecl(const locator: TdomStandardLocator); virtual;\r
+ public\r
+ constructor create(const stream: TStream;\r
+ const publicId,\r
+ systemId: wideString); virtual;\r
+ destructor destroy; override;\r
+\r
+\r
+*/\r
+ // Constructor\r
+ //===========================================================================\r
+ XmlInputSource(Stream inputStream, string publicID, string systemID)\r
+ {\r
+ if (inputStream == null)\r
+ throw new NullReferenceException("Null stream passed to XmlInputSource constructor");\r
+\r
+ FStream = inputStream;\r
+ FLastUcs4 = 0;\r
+ FLastCharWasCR = false;\r
+ FpublicID = publicID;\r
+ FsystemID = systemID;\r
+ setEncodingType();\r
+ //FLocator:= TdomStandardLocator.create(self);\r
+ getRootName();\r
+ }\r
+ }\r
+\r
+}\r
--- /dev/null
+// -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-\r
+//\r
+// internal System.Xml.XmlUtil\r
+//\r
+// Author:\r
+// Daniel Weber (daniel-weber@austin.rr.com)\r
+// Code ported from Open XML 2.3.17 (Delphi/Kylix)\r
+//\r
+// (C) 2001 Daniel Weber\r
+//\r
+\r
+using System;\r
+using System.IO;\r
+\r
+namespace System.Xml\r
+{\r
+ /// <summary>\r
+ /// Helper class with static utility functions that are not Xml version specific\r
+ /// Such as encoding changes\r
+ /// </summary>\r
+ internal class XmlUtil\r
+ {\r
+ public static char Iso8859_1ToUTF16Char(byte P)\r
+ {\r
+ return (char) P;\r
+ }\r
+\r
+ public static char Iso8859_2ToUTF16Char(byte P)\r
+ {\r
+ switch (P)\r
+ {\r
+ case 0xa1: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK\r
+ case 0xa2: return (char) 0x02d8; // BREVE\r
+ case 0xa3: return (char) 0x0141; // LATIN CAPITAL LETTER L WITH STROKE\r
+ case 0xa5: return (char) 0x0132; // LATIN CAPITAL LETTER L WITH CARON\r
+ case 0xa6: return (char) 0x015a; // LATIN CAPITAL LETTER S WITH ACUTE\r
+ case 0xa9: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON\r
+ case 0xaa: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA\r
+ case 0xab: return (char) 0x0164; // LATIN CAPITAL LETTER T WITH CARON\r
+ case 0xac: return (char) 0x0179; // LATIN CAPITAL LETTER Z WITH ACUTE\r
+ case 0xae: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON\r
+ case 0xaf: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT ABOVE\r
+ case 0xb1: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK\r
+ case 0xb2: return (char) 0x02db; // OGONEK\r
+ case 0xb3: return (char) 0x0142; // LATIN SMALL LETTER L WITH STROKE\r
+ case 0xb5: return (char) 0x013e; // LATIN SMALL LETTER L WITH CARON\r
+ case 0xb6: return (char) 0x015b; // LATIN SMALL LETTER S WITH ACUTE\r
+ case 0xb7: return (char) 0x02c7; // CARON\r
+ case 0xb9: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON\r
+ case 0xba: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA\r
+ case 0xbb: return (char) 0x0165; // LATIN SMALL LETTER T WITH CARON\r
+ case 0xbc: return (char) 0x017a; // LATIN SMALL LETTER Z WITH ACUTE\r
+ case 0xbd: return (char) 0x02dd; // DOUBLE ACUTE ACCENT\r
+ case 0xbe: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON\r
+ case 0xbf: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT ABOVE\r
+ case 0xc0: return (char) 0x0154; // LATIN CAPITAL LETTER R WITH ACUTE\r
+ case 0xc3: return (char) 0x0102; // LATIN CAPITAL LETTER A WITH BREVE\r
+ case 0xc5: return (char) 0x0139; // LATIN CAPITAL LETTER L WITH ACUTE\r
+ case 0xc6: return (char) 0x0106; // LATIN CAPITAL LETTER C WITH ACUTE\r
+ case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON\r
+ case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK\r
+ case 0xcc: return (char) 0x011a; // LATIN CAPITAL LETTER E WITH CARON\r
+ case 0xcf: return (char) 0x010e; // LATIN CAPITAL LETTER D WITH CARON\r
+ case 0xd0: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE\r
+ case 0xd1: return (char) 0x0143; // LATIN CAPITAL LETTER N WITH ACUTE\r
+ case 0xd2: return (char) 0x0147; // LATIN CAPITAL LETTER N WITH CARON\r
+ case 0xd5: return (char) 0x0150; // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE\r
+ case 0xd8: return (char) 0x0158; // LATIN CAPITAL LETTER R WITH CARON\r
+ case 0xd9: return (char) 0x016e; // LATIN CAPITAL LETTER U WITH RING ABOVE\r
+ case 0xdb: return (char) 0x0170; // LATIN CAPITAL LETTER U WITH WITH DOUBLE ACUTE\r
+ case 0xde: return (char) 0x0162; // LATIN CAPITAL LETTER T WITH CEDILLA\r
+ case 0xe0: return (char) 0x0155; // LATIN SMALL LETTER R WITH ACUTE\r
+ case 0xe3: return (char) 0x0103; // LATIN SMALL LETTER A WITH BREVE\r
+ case 0xe5: return (char) 0x013a; // LATIN SMALL LETTER L WITH ACUTE\r
+ case 0xe6: return (char) 0x0107; // LATIN SMALL LETTER C WITH ACUTE\r
+ case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON\r
+ case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK\r
+ case 0xec: return (char) 0x011b; // LATIN SMALL LETTER E WITH CARON\r
+ case 0xef: return (char) 0x010f; // LATIN SMALL LETTER D WITH CARON\r
+ case 0xf0: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE\r
+ case 0xf1: return (char) 0x0144; // LATIN SMALL LETTER N WITH ACUTE\r
+ case 0xf2: return (char) 0x0148; // LATIN SMALL LETTER N WITH CARON\r
+ case 0xf5: return (char) 0x0151; // LATIN SMALL LETTER O WITH DOUBLE ACUTE\r
+ case 0xf8: return (char) 0x0159; // LATIN SMALL LETTER R WITH CARON\r
+ case 0xf9: return (char) 0x016f; // LATIN SMALL LETTER U WITH RING ABOVE\r
+ case 0xfb: return (char) 0x0171; // LATIN SMALL LETTER U WITH WITH DOUBLE ACUTE\r
+ case 0xfe: return (char) 0x0163; // LATIN SMALL LETTER T WITH CEDILLA\r
+ case 0xff: return (char) 0x02d9; // DOT ABOVE\r
+ default:\r
+ return (char) P;\r
+ }\r
+ }\r
+\r
+ public static char Iso8859_3ToUTF16Char( byte P)\r
+ {\r
+ switch (P)\r
+ {\r
+ case 0xa1: return (char) 0x0126; // LATIN CAPITAL LETTER H WITH STROKE\r
+ case 0xa2: return (char) 0x02d8; // BREVE\r
+ case 0xa5: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
+ case 0xa6: return (char) 0x0124; // LATIN CAPITAL LETTER H WITH CIRCUMFLEX\r
+ case 0xa9: return (char) 0x0130; // LATIN CAPITAL LETTER I WITH DOT ABOVE\r
+ case 0xaa: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA\r
+ case 0xab: return (char) 0x011e; // LATIN CAPITAL LETTER G WITH BREVE\r
+ case 0xac: return (char) 0x0134; // LATIN CAPITAL LETTER J WITH CIRCUMFLEX\r
+ case 0xae: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
+ case 0xaf: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT\r
+ case 0xb1: return (char) 0x0127; // LATIN SMALL LETTER H WITH STROKE\r
+ case 0xb6: return (char) 0x0125; // LATIN SMALL LETTER H WITH CIRCUMFLEX\r
+ case 0xb9: return (char) 0x0131; // LATIN SMALL LETTER DOTLESS I\r
+ case 0xba: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA\r
+ case 0xbb: return (char) 0x011f; // LATIN SMALL LETTER G WITH BREVE\r
+ case 0xbc: return (char) 0x0135; // LATIN SMALL LETTER J WITH CIRCUMFLEX\r
+ case 0xbe: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
+ case 0xbf: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT\r
+ case 0xc3: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
+ case 0xc5: return (char) 0x010a; // LATIN CAPITAL LETTER C WITH DOT ABOVE\r
+ case 0xc6: return (char) 0x0108; // LATIN CAPITAL LETTER C WITH CIRCUMFLEX\r
+ case 0xd0: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
+ case 0xd5: return (char) 0x0120; // LATIN CAPITAL LETTER G WITH DOT ABOVE\r
+ case 0xd8: return (char) 0x011c; // LATIN CAPITAL LETTER G WITH CIRCUMFLEX\r
+ case 0xdd: return (char) 0x016c; // LATIN CAPITAL LETTER U WITH BREVE\r
+ case 0xde: return (char) 0x015c; // LATIN CAPITAL LETTER S WITH CIRCUMFLEX\r
+ case 0xe3: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
+ case 0xe5: return (char) 0x010b; // LATIN SMALL LETTER C WITH DOT ABOVE\r
+ case 0xe6: return (char) 0x0109; // LATIN SMALL LETTER C WITH CIRCUMFLEX\r
+ case 0xf0: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");\r
+ case 0xf5: return (char) 0x0121; // LATIN SMALL LETTER G WITH DOT ABOVE\r
+ case 0xf8: return (char) 0x011d; // LATIN SMALL LETTER G WITH CIRCUMFLEX\r
+ case 0xfd: return (char) 0x016d; // LATIN SMALL LETTER U WITH BREVE\r
+ case 0xfe: return (char) 0x015d; // LATIN SMALL LETTER S WITH CIRCUMFLEX\r
+ case 0xff: return (char) 0x02d9; // DOT ABOVE\r
+ default:\r
+ return (char) P;\r
+ }\r
+ }\r
+\r
+ public static char Iso8859_4ToUTF16Char( byte P)\r
+ {\r
+ switch (P)\r
+ {\r
+ case 0xa1: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK\r
+ case 0xa2: return (char) 0x0138; // LATIN SMALL LETTER KRA\r
+ case 0xa3: return (char) 0x0156; // LATIN CAPITAL LETTER R WITH CEDILLA\r
+ case 0xa5: return (char) 0x0128; // LATIN CAPITAL LETTER I WITH TILDE\r
+ case 0xa6: return (char) 0x013b; // LATIN CAPITAL LETTER L WITH CEDILLA\r
+ case 0xa9: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON\r
+ case 0xaa: return (char) 0x0112; // LATIN CAPITAL LETTER E WITH MACRON\r
+ case 0xab: return (char) 0x0122; // LATIN CAPITAL LETTER G WITH CEDILLA\r
+ case 0xac: return (char) 0x0166; // LATIN CAPITAL LETTER T WITH STROKE\r
+ case 0xae: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON\r
+ case 0xb1: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK\r
+ case 0xb2: return (char) 0x02db; // OGONEK\r
+ case 0xb3: return (char) 0x0157; // LATIN SMALL LETTER R WITH CEDILLA\r
+ case 0xb5: return (char) 0x0129; // LATIN SMALL LETTER I WITH TILDE\r
+ case 0xb6: return (char) 0x013c; // LATIN SMALL LETTER L WITH CEDILLA\r
+ case 0xb7: return (char) 0x02c7; // CARON\r
+ case 0xb9: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON\r
+ case 0xba: return (char) 0x0113; // LATIN SMALL LETTER E WITH MACRON\r
+ case 0xbb: return (char) 0x0123; // LATIN SMALL LETTER G WITH CEDILLA\r
+ case 0xbc: return (char) 0x0167; // LATIN SMALL LETTER T WITH STROKE\r
+ case 0xbd: return (char) 0x014a; // LATIN CAPITAL LETTER ENG\r
+ case 0xbe: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON\r
+ case 0xbf: return (char) 0x014b; // LATIN SMALL LETTER ENG\r
+ case 0xc0: return (char) 0x0100; // LATIN CAPITAL LETTER A WITH MACRON\r
+ case 0xc7: return (char) 0x012e; // LATIN CAPITAL LETTER I WITH OGONEK\r
+ case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON\r
+ case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK\r
+ case 0xcc: return (char) 0x0116; // LATIN CAPITAL LETTER E WITH DOT ABOVE\r
+ case 0xcf: return (char) 0x012a; // LATIN CAPITAL LETTER I WITH MACRON\r
+ case 0xd0: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE\r
+ case 0xd1: return (char) 0x0145; // LATIN CAPITAL LETTER N WITH CEDILLA\r
+ case 0xd2: return (char) 0x014c; // LATIN CAPITAL LETTER O WITH MACRON\r
+ case 0xd3: return (char) 0x0136; // LATIN CAPITAL LETTER K WITH CEDILLA\r
+ case 0xd9: return (char) 0x0172; // LATIN CAPITAL LETTER U WITH OGONEK\r
+ case 0xdd: return (char) 0x0168; // LATIN CAPITAL LETTER U WITH TILDE\r
+ case 0xde: return (char) 0x016a; // LATIN CAPITAL LETTER U WITH MACRON\r
+ case 0xe0: return (char) 0x0101; // LATIN SMALL LETTER A WITH MACRON\r
+ case 0xe7: return (char) 0x012f; // LATIN SMALL LETTER I WITH OGONEK\r
+ case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON\r
+ case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK\r
+ case 0xec: return (char) 0x0117; // LATIN SMALL LETTER E WITH DOT ABOVE\r
+ case 0xef: return (char) 0x012b; // LATIN SMALL LETTER I WITH MACRON\r
+ case 0xf0: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE\r
+ case 0xf1: return (char) 0x0146; // LATIN SMALL LETTER N WITH CEDILLA\r
+ case 0xf2: return (char) 0x014d; // LATIN SMALL LETTER O WITH MACRON\r
+ case 0xf3: return (char) 0x0137; // LATIN SMALL LETTER K WITH CEDILLA\r
+ case 0xf9: return (char) 0x0173; // LATIN SMALL LETTER U WITH OGONEK\r
+ case 0xfd: return (char) 0x0169; // LATIN SMALL LETTER U WITH TILDE\r
+ case 0xfe: return (char) 0x016b; // LATIN SMALL LETTER U WITH MACRON\r
+ case 0xff: return (char) 0x02d9; // DOT ABOVE\r
+ default:\r
+ return (char) P;\r
+ }\r
+ }\r
+\r
+ public static char Iso8859_5ToUTF16Char(byte P)\r
+ {\r
+ if ( (P >= 0x00) & (P <= 0xa0) )\r
+ return (char) P;\r
+ else if ( P == 0xad )\r
+ return (char) P;\r
+ else if ( P == 0xf0 )\r
+ return (char) 0x2116; // NUMERO SIGN\r
+ else if ( P == 0xfd )\r
+ return (char) 0x00a7; // SECTION SIGN\r
+ else\r
+ return System.Convert.ToChar( 0x0360 + P );\r
+ }\r
+\r
+ public static char Iso8859_6ToUTF16Char(byte P)\r
+ {\r
+ if ( (P >= 0x00) & ( P <= 0xa0) )\r
+ return (char) P;\r
+ else if ( P == 0xa4)\r
+ return (char) P;\r
+ else if ( ( P == 0xac ) | (P==0xbb) | (P==0xbf) )\r
+ return System.Convert.ToChar(P + 0x0580);\r
+ else if ( (P >= 0xc1) & ( P <= 0xda) )\r
+ return System.Convert.ToChar(P + 0x0580);\r
+ else if ( (P >= 0xe0) & ( P <= 0xf2) )\r
+ return System.Convert.ToChar(P + 0x0580);\r
+ else\r
+ throw new InvalidOperationException("Invalid ISO-8859-6 sequence [" + P.ToString() + "]");\r
+ }\r
+\r
+ public static char Iso8859_7ToUTF16Char(byte P)\r
+ {\r
+ if ( (P >= 0x00) & ( P <= 0xa0) )\r
+ return (char) P;\r
+ else if ( (P >= 0xa6) & ( P <= 0xa9) )\r
+ return (char) P;\r
+ else if ( (P >= 0xab) & ( P <= 0xad) )\r
+ return (char) P;\r
+ else if ( (P >= 0xb0) & ( P <= 0xb3) )\r
+ return (char) P;\r
+ else if ( (P == 0xb7) | (P==0xbb) | (P==0xbd) )\r
+ return (char) P;\r
+ else if ( P ==0xa1 ) // LEFT SINGLE QUOTATION MARK\r
+ return (char) 0x2018;\r
+ else if ( P==0xa2 ) // RIGHT SINGLE QUOTATION MARK\r
+ return (char) 0x2019;\r
+ else if ( P==0xaf ) // HORIZONTAL BAR\r
+ return (char) 0x2015;\r
+ else if ( (P==0xd2) | (P==0xff) )\r
+ throw new InvalidOperationException("Invalid ISO-8859-7 sequence [" + P.ToString() + "]");\r
+ else\r
+ return System.Convert.ToChar(P + 0x02d0);\r
+\r
+ }\r
+\r
+ public static char Iso8859_8ToUTF16Char(byte P)\r
+ {\r
+ if ( (P >= 0x00) & ( P <= 0xa0) )\r
+ return (char) P;\r
+ else if ( (P >= 0xa2) & ( P <= 0xa9) )\r
+ return (char) P;\r
+ else if ( (P >= 0xab) & ( P <= 0xae) )\r
+ return (char) P;\r
+ else if ( (P >= 0xb0) & ( P <= 0xb9) )\r
+ return (char) P;\r
+ else if ( (P >= 0xbb) & ( P <= 0xbe) )\r
+ return (char) P;\r
+ else if ( P==0xaa ) // MULTIPLICATION SIGN\r
+ return (char) 0x00d7;\r
+ else if ( P==0xaf ) // OVERLINE\r
+ return (char) 0x203e;\r
+ else if ( P==0xba ) // DIVISION SIGN\r
+ return (char) 0x00f7;\r
+ else if ( P==0xdf ) // DOUBLE LOW LINE\r
+ return (char) 0x2017;\r
+ else if ( (P >= 0xe0) & ( P <= 0xfa) )\r
+ return System.Convert.ToChar(P + 0x04e0);\r
+ else \r
+ throw new InvalidOperationException("Invalid ISO-8859-8 sequence [" + P.ToString() + "]");\r
+ }\r
+\r
+ public static char Iso8859_9ToUTF16Char(byte P)\r
+ {\r
+ switch (P)\r
+ {\r
+ case 0xd0: return (char) 0x011e; // LATIN CAPITAL LETTER G WITH BREVE\r
+ case 0xdd: return (char) 0x0130; // LATIN CAPITAL LETTER I WITH DOT ABOVE\r
+ case 0xde: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA\r
+ case 0xf0: return (char) 0x011f; // LATIN SMALL LETTER G WITH BREVE\r
+ case 0xfd: return (char) 0x0131; // LATIN SMALL LETTER I WITH DOT ABOVE\r
+ case 0xfe: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA\r
+ default:\r
+ return (char) P;\r
+ }\r
+ }\r
+\r
+ public static char Iso8859_10ToUTF16Char(byte P)\r
+ {\r
+ switch (P)\r
+ {\r
+ case 0xa1: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK\r
+ case 0xa2: return (char) 0x0112; // LATIN CAPITAL LETTER E WITH MACRON\r
+ case 0xa3: return (char) 0x0122; // LATIN CAPITAL LETTER G WITH CEDILLA\r
+ case 0xa4: return (char) 0x012a; // LATIN CAPITAL LETTER I WITH MACRON\r
+ case 0xa5: return (char) 0x0128; // LATIN CAPITAL LETTER I WITH TILDE\r
+ case 0xa6: return (char) 0x0136; // LATIN CAPITAL LETTER K WITH CEDILLA\r
+ case 0xa8: return (char) 0x013b; // LATIN CAPITAL LETTER L WITH CEDILLA\r
+ case 0xa9: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE\r
+ case 0xaa: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON\r
+ case 0xab: return (char) 0x0166; // LATIN CAPITAL LETTER T WITH STROKE\r
+ case 0xac: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON\r
+ case 0xae: return (char) 0x016a; // LATIN CAPITAL LETTER U WITH MACRON\r
+ case 0xaf: return (char) 0x014a; // LATIN CAPITAL LETTER ENG\r
+ case 0xb1: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK\r
+ case 0xb2: return (char) 0x0113; // LATIN SMALL LETTER E WITH MACRON\r
+ case 0xb3: return (char) 0x0123; // LATIN SMALL LETTER G WITH CEDILLA\r
+ case 0xb4: return (char) 0x012b; // LATIN SMALL LETTER I WITH MACRON\r
+ case 0xb5: return (char) 0x0129; // LATIN SMALL LETTER I WITH TILDE\r
+ case 0xb6: return (char) 0x0137; // LATIN SMALL LETTER K WITH CEDILLA\r
+ case 0xb8: return (char) 0x013c; // LATIN SMALL LETTER L WITH CEDILLA\r
+ case 0xb9: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE\r
+ case 0xba: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON\r
+ case 0xbb: return (char) 0x0167; // LATIN SMALL LETTER T WITH STROKE\r
+ case 0xbc: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON\r
+ case 0xbd: return (char) 0x2015; // HORIZONTAL BAR\r
+ case 0xbe: return (char) 0x016b; // LATIN SMALL LETTER U WITH MACRON\r
+ case 0xbf: return (char) 0x014b; // LATIN SMALL LETTER ENG\r
+ case 0xc0: return (char) 0x0100; // LATIN CAPITAL LETTER A WITH MACRON\r
+ case 0xc7: return (char) 0x012e; // LATIN CAPITAL LETTER I WITH OGONEK\r
+ case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON\r
+ case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK\r
+ case 0xcc: return (char) 0x0116; // LATIN CAPITAL LETTER E WITH DOT ABOVE\r
+ case 0xd1: return (char) 0x0145; // LATIN CAPITAL LETTER N WITH CEDILLA\r
+ case 0xd2: return (char) 0x014c; // LATIN CAPITAL LETTER O WITH MACRON\r
+ case 0xd7: return (char) 0x0168; // LATIN CAPITAL LETTER U WITH TILDE\r
+ case 0xd9: return (char) 0x0172; // LATIN CAPITAL LETTER U WITH OGONEK\r
+ case 0xe0: return (char) 0x0101; // LATIN SMALL LETTER A WITH MACRON\r
+ case 0xe7: return (char) 0x012f; // LATIN SMALL LETTER I WITH OGONEK\r
+ case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON\r
+ case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK\r
+ case 0xec: return (char) 0x0117; // LATIN SMALL LETTER E WITH DOT ABOVE\r
+ case 0xf1: return (char) 0x0146; // LATIN SMALL LETTER N WITH CEDILLA\r
+ case 0xf2: return (char) 0x014d; // LATIN SMALL LETTER O WITH MACRON\r
+ case 0xf7: return (char) 0x0169; // LATIN SMALL LETTER U WITH TILDE\r
+ case 0xf9: return (char) 0x0173; // LATIN SMALL LETTER U WITH OGONEK\r
+ case 0xff: return (char) 0x0138; // LATIN SMALL LETTER KRA\r
+ default:\r
+ return (char) P;\r
+ }\r
+ }\r
+\r
+ public static char Iso8859_13ToUTF16Char(byte P)\r
+ {\r
+ switch(P)\r
+ {\r
+ case 0xa1: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK\r
+ case 0xa5: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK\r
+ case 0xa8: return (char) 0x00d8; // LATIN CAPITAL LETTER O WITH STROKE\r
+ case 0xaa: return (char) 0x0156; // LATIN CAPITAL LETTER R WITH CEDILLA\r
+ case 0xaf: return (char) 0x00c6; // LATIN CAPITAL LETTER AE\r
+ case 0xb4: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK\r
+ case 0xb8: return (char) 0x00f8; // LATIN SMALL LETTER O WITH STROKE\r
+ case 0xba: return (char) 0x0157; // LATIN SMALL LETTER R WITH CEDILLA\r
+ case 0xbf: return (char) 0x00e6; // LATIN SMALL LETTER AE\r
+ case 0xc0: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK\r
+ case 0xc1: return (char) 0x012e; // LATIN CAPITAL LETTER I WITH OGONEK\r
+ case 0xc2: return (char) 0x0100; // LATIN CAPITAL LETTER A WITH MACRON\r
+ case 0xc3: return (char) 0x0106; // LATIN CAPITAL LETTER C WITH ACUTE\r
+ case 0xc6: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK\r
+ case 0xc7: return (char) 0x0112; // LATIN CAPITAL LETTER E WITH MACRON\r
+ case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON\r
+ case 0xca: return (char) 0x0179; // LATIN CAPITAL LETTER Z WITH ACUTE\r
+ case 0xcb: return (char) 0x0116; // LATIN CAPITAL LETTER E WITH DOT ABOVE\r
+ case 0xcc: return (char) 0x0122; // LATIN CAPITAL LETTER G WITH CEDILLA\r
+ case 0xcd: return (char) 0x0136; // LATIN CAPITAL LETTER K WITH CEDILLA\r
+ case 0xce: return (char) 0x012a; // LATIN CAPITAL LETTER I WITH MACRON\r
+ case 0xcf: return (char) 0x013b; // LATIN CAPITAL LETTER L WITH CEDILLA\r
+ case 0xd0: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON\r
+ case 0xd1: return (char) 0x0143; // LATIN CAPITAL LETTER N WITH ACUTE\r
+ case 0xd2: return (char) 0x0145; // LATIN CAPITAL LETTER N WITH CEDILLA\r
+ case 0xd4: return (char) 0x014c; // LATIN CAPITAL LETTER O WITH MACRON\r
+ case 0xd8: return (char) 0x0172; // LATIN CAPITAL LETTER U WITH OGONEK\r
+ case 0xd9: return (char) 0x0141; // LATIN CAPITAL LETTER L WITH STROKE\r
+ case 0xda: return (char) 0x015a; // LATIN CAPITAL LETTER S WITH ACUTE\r
+ case 0xdb: return (char) 0x016a; // LATIN CAPITAL LETTER U WITH MACRON\r
+ case 0xdd: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT ABOVE\r
+ case 0xde: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON\r
+ case 0xe0: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK\r
+ case 0xe1: return (char) 0x012f; // LATIN SMALL LETTER I WITH OGONEK\r
+ case 0xe2: return (char) 0x0101; // LATIN SMALL LETTER A WITH MACRON\r
+ case 0xe3: return (char) 0x0107; // LATIN SMALL LETTER C WITH ACUTE\r
+ case 0xe6: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK\r
+ case 0xe7: return (char) 0x0113; // LATIN SMALL LETTER E WITH MACRON\r
+ case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON\r
+ case 0xea: return (char) 0x017a; // LATIN SMALL LETTER Z WITH ACUTE\r
+ case 0xeb: return (char) 0x0117; // LATIN SMALL LETTER E WITH DOT ABOVE\r
+ case 0xec: return (char) 0x0123; // LATIN SMALL LETTER G WITH CEDILLA\r
+ case 0xed: return (char) 0x0137; // LATIN SMALL LETTER K WITH CEDILLA\r
+ case 0xee: return (char) 0x012b; // LATIN SMALL LETTER I WITH MACRON\r
+ case 0xef: return (char) 0x013c; // LATIN SMALL LETTER L WITH CEDILLA\r
+ case 0xf0: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON\r
+ case 0xf1: return (char) 0x0144; // LATIN SMALL LETTER N WITH ACUTE\r
+ case 0xf2: return (char) 0x0146; // LATIN SMALL LETTER N WITH CEDILLA\r
+ case 0xf4: return (char) 0x014d; // LATIN SMALL LETTER O WITH MACRON\r
+ case 0xf8: return (char) 0x0173; // LATIN SMALL LETTER U WITH OGONEK\r
+ case 0xf9: return (char) 0x0142; // LATIN SMALL LETTER L WITH STROKE\r
+ case 0xfa: return (char) 0x015b; // LATIN SMALL LETTER S WITH ACUTE\r
+ case 0xfb: return (char) 0x016b; // LATIN SMALL LETTER U WITH MACRON\r
+ case 0xfd: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT ABOVE\r
+ case 0xfe: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON\r
+ case 0xff: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK\r
+ default:\r
+ return (char) P;\r
+ }\r
+ }\r
+\r
+ public static char Iso8859_14ToUTF16Char(byte P)\r
+ {\r
+ switch (P)\r
+ {\r
+ case 0xa1: return (char) 0x1e02; // LATIN CAPITAL LETTER B WITH DOT ABOVE\r
+ case 0xa2: return (char) 0x1e03; // LATIN SMALL LETTER B WITH DOT ABOVE\r
+ case 0xa4: return (char) 0x010a; // LATIN CAPITAL LETTER C WITH DOT ABOVE\r
+ case 0xa5: return (char) 0x010b; // LATIN SMALL LETTER C WITH DOT ABOVE\r
+ case 0xa6: return (char) 0x1e0a; // LATIN CAPITAL LETTER D WITH DOT ABOVE\r
+ case 0xa8: return (char) 0x1e80; // LATIN CAPITAL LETTER W WITH GRAVE\r
+ case 0xaa: return (char) 0x1e82; // LATIN CAPITAL LETTER W WITH ACUTE\r
+ case 0xab: return (char) 0x1e0b; // LATIN SMALL LETTER D WITH DOT ABOVE\r
+ case 0xac: return (char) 0x1ef2; // LATIN CAPITAL LETTER Y WITH GRAVE\r
+ case 0xaf: return (char) 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS\r
+ case 0xb0: return (char) 0x1e1e; // LATIN CAPITAL LETTER F WITH DOT ABOVE\r
+ case 0xb1: return (char) 0x1e1f; // LATIN SMALL LETTER F WITH DOT ABOVE\r
+ case 0xb2: return (char) 0x0120; // LATIN CAPITAL LETTER G WITH DOT ABOVE\r
+ case 0xb3: return (char) 0x0121; // LATIN SMALL LETTER G WITH DOT ABOVE\r
+ case 0xb4: return (char) 0x1e40; // LATIN CAPITAL LETTER M WITH DOT ABOVE\r
+ case 0xb5: return (char) 0x1e41; // LATIN SMALL LETTER M WITH DOT ABOVE\r
+ case 0xb7: return (char) 0x1e56; // LATIN CAPITAL LETTER P WITH DOT ABOVE\r
+ case 0xb8: return (char) 0x1e81; // LATIN SMALL LETTER W WITH GRAVE\r
+ case 0xb9: return (char) 0x1e57; // LATIN SMALL LETTER P WITH DOT ABOVE\r
+ case 0xba: return (char) 0x1e83; // LATIN SMALL LETTER W WITH ACUTE\r
+ case 0xbb: return (char) 0x1e60; // LATIN CAPITAL LETTER S WITH DOT ABOVE\r
+ case 0xbc: return (char) 0x1ef3; // LATIN SMALL LETTER Y WITH GRAVE\r
+ case 0xbd: return (char) 0x1e84; // LATIN CAPITAL LETTER W WITH DIAERESIS\r
+ case 0xbe: return (char) 0x1e85; // LATIN SMALL LETTER W WITH DIAERESIS\r
+ case 0xbf: return (char) 0x1e61; // LATIN SMALL LETTER S WITH DOT ABOVE\r
+ case 0xd0: return (char) 0x0174; // LATIN CAPITAL LETTER W WITH CIRCUMFLEX\r
+ case 0xd7: return (char) 0x1e6a; // LATIN CAPITAL LETTER T WITH DOT ABOVE\r
+ case 0xde: return (char) 0x0176; // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX\r
+ case 0xf0: return (char) 0x0175; // LATIN SMALL LETTER W WITH CIRCUMFLEX\r
+ case 0xf7: return (char) 0x1e6b; // LATIN SMALL LETTER T WITH DOT ABOVE\r
+ case 0xfe: return (char) 0x0177; // LATIN SMALL LETTER Y WITH CIRCUMFLEX\r
+ default:\r
+ return (char) P;\r
+ }\r
+ }\r
+\r
+ public static char Iso8859_15ToUTF16Char(byte P)\r
+ {\r
+ switch (P)\r
+ {\r
+ case 0xa4: return (char) 0x20ac; // EURO SIGN\r
+ case 0xa6: return (char) 0x00a6; // LATIN CAPITAL LETTER S WITH CARON\r
+ case 0xa8: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON\r
+ case 0xb4: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON\r
+ case 0xb8: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON\r
+ case 0xbc: return (char) 0x0152; // LATIN CAPITAL LIGATURE OE\r
+ case 0xbd: return (char) 0x0153; // LATIN SMALL LIGATURE OE\r
+ case 0xbe: return (char) 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS\r
+ default:\r
+ return (char) P;\r
+ }\r
+ }\r
+\r
+ public static char KOI8_RToUTF16Char(byte P)\r
+ {\r
+ switch (P)\r
+ {\r
+ case 0x80: return (char) 0x2500; // BOX DRAWINGS LIGHT HORIZONTAL\r
+ case 0x81: return (char) 0x2502; // BOX DRAWINGS LIGHT VERTICAL\r
+ case 0x82: return (char) 0x250c; // BOX DRAWINGS LIGHT DOWN AND RIGHT\r
+ case 0x83: return (char) 0x2510; // BOX DRAWINGS LIGHT DOWN AND LEFT\r
+ case 0x84: return (char) 0x2514; // BOX DRAWINGS LIGHT UP AND RIGHT\r
+ case 0x85: return (char) 0x2518; // BOX DRAWINGS LIGHT UP AND LEFT\r
+ case 0x86: return (char) 0x251c; // BOX DRAWINGS LIGHT VERTICAL AND RIGHT\r
+ case 0x87: return (char) 0x2524; // BOX DRAWINGS LIGHT VERTICAL AND LEFT\r
+ case 0x88: return (char) 0x252c; // BOX DRAWINGS LIGHT DOWN AND HORIZONTAL\r
+ case 0x89: return (char) 0x2534; // BOX DRAWINGS LIGHT UP AND HORIZONTAL\r
+ case 0x8a: return (char) 0x253c; // BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL\r
+ case 0x8b: return (char) 0x2580; // UPPER HALF BLOCK\r
+ case 0x8c: return (char) 0x2584; // LOWER HALF BLOCK\r
+ case 0x8d: return (char) 0x2588; // FULL BLOCK\r
+ case 0x8e: return (char) 0x258c; // LEFT HALF BLOCK\r
+ case 0x8f: return (char) 0x2590; // RIGHT HALF BLOCK\r
+ case 0x90: return (char) 0x2591; // LIGHT SHADE\r
+ case 0x91: return (char) 0x2592; // MEDIUM SHADE\r
+ case 0x92: return (char) 0x2593; // DARK SHADE\r
+ case 0x93: return (char) 0x2320; // TOP HALF INTEGRAL\r
+ case 0x94: return (char) 0x25a0; // BLACK SQUARE\r
+ case 0x95: return (char) 0x2219; // BULLET OPERATOR\r
+ case 0x96: return (char) 0x221a; // SQUARE ROOT\r
+ case 0x97: return (char) 0x2248; // ALMOST EQUAL TO\r
+ case 0x98: return (char) 0x2264; // LESS-THAN OR EQUAL TO\r
+ case 0x99: return (char) 0x2265; // GREATER-THAN OR EQUAL TO\r
+ case 0x9a: return (char) 0x00a0; // NO-BREAK SPACE\r
+ case 0x9b: return (char) 0x2321; // BOTTOM HALF INTEGRAL\r
+ case 0x9c: return (char) 0x00b0; // DEGREE SIGN\r
+ case 0x9d: return (char) 0x00b2; // SUPERSCRIPT TWO\r
+ case 0x9e: return (char) 0x00b7; // MIDDLE DOT\r
+ case 0x9f: return (char) 0x00f7; // DIVISION SIGN\r
+ case 0xa0: return (char) 0x2550; // BOX DRAWINGS DOUBLE HORIZONTAL\r
+ case 0xa1: return (char) 0x2551; // BOX DRAWINGS DOUBLE VERTICAL\r
+ case 0xa2: return (char) 0x2552; // BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE\r
+ case 0xa3: return (char) 0x0451; // CYRILLIC SMALL LETTER IO\r
+ case 0xa4: return (char) 0x2553; // BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE\r
+ case 0xa5: return (char) 0x2554; // BOX DRAWINGS DOUBLE DOWN AND RIGHT\r
+ case 0xa6: return (char) 0x2555; // BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE\r
+ case 0xa7: return (char) 0x2556; // BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE\r
+ case 0xa8: return (char) 0x2557; // BOX DRAWINGS DOUBLE DOWN AND LEFT\r
+ case 0xa9: return (char) 0x2558; // BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE\r
+ case 0xaa: return (char) 0x2559; // BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE\r
+ case 0xab: return (char) 0x255a; // BOX DRAWINGS DOUBLE UP AND RIGHT\r
+ case 0xac: return (char) 0x255b; // BOX DRAWINGS UP SINGLE AND LEFT DOUBLE\r
+ case 0xad: return (char) 0x255c; // BOX DRAWINGS UP DOUBLE AND LEFT SINGLE\r
+ case 0xae: return (char) 0x255d; // BOX DRAWINGS DOUBLE UP AND LEFT\r
+ case 0xaf: return (char) 0x255e; // BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE\r
+ case 0xb0: return (char) 0x255f; // BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE\r
+ case 0xb1: return (char) 0x2560; // BOX DRAWINGS DOUBLE VERTICAL AND RIGHT\r
+ case 0xb2: return (char) 0x2561; // BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE\r
+ case 0xb3: return (char) 0x0401; // CYRILLIC CAPITAL LETTER IO\r
+ case 0xb4: return (char) 0x2562; // BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE\r
+ case 0xb5: return (char) 0x2563; // BOX DRAWINGS DOUBLE VERTICAL AND LEFT\r
+ case 0xb6: return (char) 0x2564; // BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE\r
+ case 0xb7: return (char) 0x2565; // BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE\r
+ case 0xb8: return (char) 0x2566; // BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL\r
+ case 0xb9: return (char) 0x2567; // BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE\r
+ case 0xba: return (char) 0x2568; // BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE\r
+ case 0xbb: return (char) 0x2569; // BOX DRAWINGS DOUBLE UP AND HORIZONTAL\r
+ case 0xbc: return (char) 0x256a; // BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE\r
+ case 0xbd: return (char) 0x256b; // BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE\r
+ case 0xbe: return (char) 0x256c; // BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL\r
+ case 0xbf: return (char) 0x00a9; // COPYRIGHT SIGN\r
+ case 0xc0: return (char) 0x044e; // CYRILLIC SMALL LETTER YU\r
+ case 0xc1: return (char) 0x0430; // CYRILLIC SMALL LETTER A\r
+ case 0xc2: return (char) 0x0431; // CYRILLIC SMALL LETTER BE\r
+ case 0xc3: return (char) 0x0446; // CYRILLIC SMALL LETTER TSE\r
+ case 0xc4: return (char) 0x0434; // CYRILLIC SMALL LETTER DE\r
+ case 0xc5: return (char) 0x0435; // CYRILLIC SMALL LETTER IE\r
+ case 0xc6: return (char) 0x0444; // CYRILLIC SMALL LETTER EF\r
+ case 0xc7: return (char) 0x0433; // CYRILLIC SMALL LETTER GHE\r
+ case 0xc8: return (char) 0x0445; // CYRILLIC SMALL LETTER HA\r
+ case 0xc9: return (char) 0x0438; // CYRILLIC SMALL LETTER I\r
+ case 0xca: return (char) 0x0439; // CYRILLIC SMALL LETTER SHORT I\r
+ case 0xcb: return (char) 0x043a; // CYRILLIC SMALL LETTER KA\r
+ case 0xcc: return (char) 0x043b; // CYRILLIC SMALL LETTER EL\r
+ case 0xcd: return (char) 0x043c; // CYRILLIC SMALL LETTER EM\r
+ case 0xce: return (char) 0x043d; // CYRILLIC SMALL LETTER EN\r
+ case 0xcf: return (char) 0x043e; // CYRILLIC SMALL LETTER O\r
+ case 0xd0: return (char) 0x043f; // CYRILLIC SMALL LETTER PE\r
+ case 0xd1: return (char) 0x044f; // CYRILLIC SMALL LETTER YA\r
+ case 0xd2: return (char) 0x0440; // CYRILLIC SMALL LETTER ER\r
+ case 0xd3: return (char) 0x0441; // CYRILLIC SMALL LETTER ES\r
+ case 0xd4: return (char) 0x0442; // CYRILLIC SMALL LETTER TE\r
+ case 0xd5: return (char) 0x0443; // CYRILLIC SMALL LETTER U\r
+ case 0xd6: return (char) 0x0436; // CYRILLIC SMALL LETTER ZHE\r
+ case 0xd7: return (char) 0x0432; // CYRILLIC SMALL LETTER VE\r
+ case 0xd8: return (char) 0x044c; // CYRILLIC SMALL LETTER SOFT SIGN\r
+ case 0xd9: return (char) 0x044b; // CYRILLIC SMALL LETTER YERU\r
+ case 0xda: return (char) 0x0437; // CYRILLIC SMALL LETTER ZE\r
+ case 0xdb: return (char) 0x0448; // CYRILLIC SMALL LETTER SHA\r
+ case 0xdc: return (char) 0x044d; // CYRILLIC SMALL LETTER E\r
+ case 0xdd: return (char) 0x0449; // CYRILLIC SMALL LETTER SHCHA\r
+ case 0xde: return (char) 0x0447; // CYRILLIC SMALL LETTER CHE\r
+ case 0xdf: return (char) 0x044a; // CYRILLIC SMALL LETTER HARD SIGN\r
+ case 0xe0: return (char) 0x042e; // CYRILLIC CAPITAL LETTER YU\r
+ case 0xe1: return (char) 0x0410; // CYRILLIC CAPITAL LETTER A\r
+ case 0xe2: return (char) 0x0411; // CYRILLIC CAPITAL LETTER BE\r
+ case 0xe3: return (char) 0x0426; // CYRILLIC CAPITAL LETTER TSE\r
+ case 0xe4: return (char) 0x0414; // CYRILLIC CAPITAL LETTER DE\r
+ case 0xe5: return (char) 0x0415; // CYRILLIC CAPITAL LETTER IE\r
+ case 0xe6: return (char) 0x0424; // CYRILLIC CAPITAL LETTER EF\r
+ case 0xe7: return (char) 0x0413; // CYRILLIC CAPITAL LETTER GHE\r
+ case 0xe8: return (char) 0x0425; // CYRILLIC CAPITAL LETTER HA\r
+ case 0xe9: return (char) 0x0418; // CYRILLIC CAPITAL LETTER I\r
+ case 0xea: return (char) 0x0419; // CYRILLIC CAPITAL LETTER SHORT I\r
+ case 0xeb: return (char) 0x041a; // CYRILLIC CAPITAL LETTER KA\r
+ case 0xec: return (char) 0x041b; // CYRILLIC CAPITAL LETTER EL\r
+ case 0xed: return (char) 0x041c; // CYRILLIC CAPITAL LETTER EM\r
+ case 0xee: return (char) 0x041d; // CYRILLIC CAPITAL LETTER EN\r
+ case 0xef: return (char) 0x041e; // CYRILLIC CAPITAL LETTER O\r
+ case 0xf0: return (char) 0x041f; // CYRILLIC CAPITAL LETTER PE\r
+ case 0xf1: return (char) 0x042f; // CYRILLIC CAPITAL LETTER YA\r
+ case 0xf2: return (char) 0x0420; // CYRILLIC CAPITAL LETTER ER\r
+ case 0xf3: return (char) 0x0421; // CYRILLIC CAPITAL LETTER ES\r
+ case 0xf4: return (char) 0x0422; // CYRILLIC CAPITAL LETTER TE\r
+ case 0xf5: return (char) 0x0423; // CYRILLIC CAPITAL LETTER U\r
+ case 0xf6: return (char) 0x0416; // CYRILLIC CAPITAL LETTER ZHE\r
+ case 0xf7: return (char) 0x0412; // CYRILLIC CAPITAL LETTER VE\r
+ case 0xf8: return (char) 0x042c; // CYRILLIC CAPITAL LETTER SOFT SIGN\r
+ case 0xf9: return (char) 0x042b; // CYRILLIC CAPITAL LETTER YERU\r
+ case 0xfa: return (char) 0x0417; // CYRILLIC CAPITAL LETTER ZE\r
+ case 0xfb: return (char) 0x0428; // CYRILLIC CAPITAL LETTER SHA\r
+ case 0xfc: return (char) 0x042d; // CYRILLIC CAPITAL LETTER E\r
+ case 0xfd: return (char) 0x0429; // CYRILLIC CAPITAL LETTER SHCHA\r
+ case 0xfe: return (char) 0x0427; // CYRILLIC CAPITAL LETTER CHE\r
+ case 0xff: return (char) 0x042a; // CYRILLIC CAPITAL LETTER HARD SIGN\r
+ default:\r
+ return (char) P;\r
+ }\r
+ }\r
+\r
+ public static char cp10000_MacRomanToUTF16Char(byte P)\r
+ {\r
+ switch (P)\r
+ {\r
+ case 0x80: return (char) 0x00c4; // LATIN CAPITAL LETTER A WITH DIAERESIS\r
+ case 0x81: return (char) 0x00c5; // LATIN CAPITAL LETTER A WITH RING ABOVE\r
+ case 0x82: return (char) 0x00c7; // LATIN CAPITAL LETTER C WITH CEDILLA\r
+ case 0x83: return (char) 0x00c9; // LATIN CAPITAL LETTER E WITH ACUTE\r
+ case 0x84: return (char) 0x00d1; // LATIN CAPITAL LETTER N WITH TILDE\r
+ case 0x85: return (char) 0x00d6; // LATIN CAPITAL LETTER O WITH DIAERESIS\r
+ case 0x86: return (char) 0x00dc; // LATIN CAPITAL LETTER U WITH DIAERESIS\r
+ case 0x87: return (char) 0x00e1; // LATIN SMALL LETTER A WITH ACUTE\r
+ case 0x88: return (char) 0x00e0; // LATIN SMALL LETTER A WITH GRAVE\r
+ case 0x89: return (char) 0x00e2; // LATIN SMALL LETTER A WITH CIRCUMFLEX\r
+ case 0x8a: return (char) 0x00e4; // LATIN SMALL LETTER A WITH DIAERESIS\r
+ case 0x8b: return (char) 0x00e3; // LATIN SMALL LETTER A WITH TILDE\r
+ case 0x8c: return (char) 0x00e5; // LATIN SMALL LETTER A WITH RING ABOVE\r
+ case 0x8d: return (char) 0x00e7; // LATIN SMALL LETTER C WITH CEDILLA\r
+ case 0x8e: return (char) 0x00e9; // LATIN SMALL LETTER E WITH ACUTE\r
+ case 0x8f: return (char) 0x00e8; // LATIN SMALL LETTER E WITH GRAVE\r
+ case 0x90: return (char) 0x00ea; // LATIN SMALL LETTER E WITH CIRCUMFLEX\r
+ case 0x91: return (char) 0x00eb; // LATIN SMALL LETTER E WITH DIAERESIS\r
+ case 0x92: return (char) 0x00ed; // LATIN SMALL LETTER I WITH ACUTE\r
+ case 0x93: return (char) 0x00ec; // LATIN SMALL LETTER I WITH GRAVE\r
+ case 0x94: return (char) 0x00ee; // LATIN SMALL LETTER I WITH CIRCUMFLEX\r
+ case 0x95: return (char) 0x00ef; // LATIN SMALL LETTER I WITH DIAERESIS\r
+ case 0x96: return (char) 0x00f1; // LATIN SMALL LETTER N WITH TILDE\r
+ case 0x97: return (char) 0x00f3; // LATIN SMALL LETTER O WITH ACUTE\r
+ case 0x98: return (char) 0x00f2; // LATIN SMALL LETTER O WITH GRAVE\r
+ case 0x99: return (char) 0x00f4; // LATIN SMALL LETTER O WITH CIRCUMFLEX\r
+ case 0x9a: return (char) 0x00f6; // LATIN SMALL LETTER O WITH DIAERESIS\r
+ case 0x9b: return (char) 0x00f5; // LATIN SMALL LETTER O WITH TILDE\r
+ case 0x9c: return (char) 0x00fa; // LATIN SMALL LETTER U WITH ACUTE\r
+ case 0x9d: return (char) 0x00f9; // LATIN SMALL LETTER U WITH GRAVE\r
+ case 0x9e: return (char) 0x00fb; // LATIN SMALL LETTER U WITH CIRCUMFLEX\r
+ case 0x9f: return (char) 0x00fc; // LATIN SMALL LETTER U WITH DIAERESIS\r
+ case 0xa0: return (char) 0x2020; // DAGGER\r
+ case 0xa1: return (char) 0x00b0; // DEGREE SIGN\r
+ case 0xa4: return (char) 0x00a7; // SECTION SIGN\r
+ case 0xa5: return (char) 0x2022; // BULLET\r
+ case 0xa6: return (char) 0x00b6; // PILCROW SIGN\r
+ case 0xa7: return (char) 0x00df; // LATIN SMALL LETTER SHARP S\r
+ case 0xa8: return (char) 0x00ae; // REGISTERED SIGN\r
+ case 0xaa: return (char) 0x2122; // TRADE MARK SIGN\r
+ case 0xab: return (char) 0x00b4; // ACUTE ACCENT\r
+ case 0xac: return (char) 0x00a8; // DIAERESIS\r
+ case 0xad: return (char) 0x2260; // NOT EQUAL TO\r
+ case 0xae: return (char) 0x00c6; // LATIN CAPITAL LIGATURE AE\r
+ case 0xaf: return (char) 0x00d8; // LATIN CAPITAL LETTER O WITH STROKE\r
+ case 0xb0: return (char) 0x221e; // INFINITY\r
+ case 0xb2: return (char) 0x2264; // LESS-THAN OR EQUAL TO\r
+ case 0xb3: return (char) 0x2265; // GREATER-THAN OR EQUAL TO\r
+ case 0xb4: return (char) 0x00a5; // YEN SIGN\r
+ case 0xb6: return (char) 0x2202; // PARTIAL DIFFERENTIAL\r
+ case 0xb7: return (char) 0x2211; // N-ARY SUMMATION\r
+ case 0xb8: return (char) 0x220f; // N-ARY PRODUCT\r
+ case 0xb9: return (char) 0x03c0; // GREEK SMALL LETTER PI\r
+ case 0xba: return (char) 0x222b; // INTEGRAL\r
+ case 0xbb: return (char) 0x00aa; // FEMININE ORDINAL INDICATOR\r
+ case 0xbc: return (char) 0x00ba; // MASCULINE ORDINAL INDICATOR\r
+ case 0xbd: return (char) 0x2126; // OHM SIGN\r
+ case 0xbe: return (char) 0x00e6; // LATIN SMALL LIGATURE AE\r
+ case 0xbf: return (char) 0x00f8; // LATIN SMALL LETTER O WITH STROKE\r
+ case 0xc0: return (char) 0x00bf; // INVERTED QUESTION MARK\r
+ case 0xc1: return (char) 0x00a1; // INVERTED EXCLAMATION MARK\r
+ case 0xc2: return (char) 0x00ac; // NOT SIGN\r
+ case 0xc3: return (char) 0x221a; // SQUARE ROOT\r
+ case 0xc4: return (char) 0x0192; // LATIN SMALL LETTER F WITH HOOK\r
+ case 0xc5: return (char) 0x2248; // ALMOST EQUAL TO\r
+ case 0xc6: return (char) 0x2206; // INCREMENT\r
+ case 0xc7: return (char) 0x00ab; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK\r
+ case 0xc8: return (char) 0x00bb; // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK\r
+ case 0xc9: return (char) 0x2026; // HORIZONTAL ELLIPSIS\r
+ case 0xca: return (char) 0x00a0; // NO-BREAK SPACE\r
+ case 0xcb: return (char) 0x00c0; // LATIN CAPITAL LETTER A WITH GRAVE\r
+ case 0xcc: return (char) 0x00c3; // LATIN CAPITAL LETTER A WITH TILDE\r
+ case 0xcd: return (char) 0x00d5; // LATIN CAPITAL LETTER O WITH TILDE\r
+ case 0xce: return (char) 0x0152; // LATIN CAPITAL LIGATURE OE\r
+ case 0xcf: return (char) 0x0153; // LATIN SMALL LIGATURE OE\r
+ case 0xd0: return (char) 0x2013; // EN DASH\r
+ case 0xd1: return (char) 0x2014; // EM DASH\r
+ case 0xd2: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK\r
+ case 0xd3: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK\r
+ case 0xd4: return (char) 0x2018; // LEFT SINGLE QUOTATION MARK\r
+ case 0xd5: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK\r
+ case 0xd6: return (char) 0x00f7; // DIVISION SIGN\r
+ case 0xd7: return (char) 0x25ca; // LOZENGE\r
+ case 0xd8: return (char) 0x00ff; // LATIN SMALL LETTER Y WITH DIAERESIS\r
+ case 0xd9: return (char) 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS\r
+ case 0xda: return (char) 0x2044; // FRACTION SLASH\r
+ case 0xdb: return (char) 0x00a4; // CURRENCY SIGN\r
+ case 0xdc: return (char) 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK\r
+ case 0xdd: return (char) 0x203a; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK\r
+ case 0xde: return (char) 0xfb01; // LATIN SMALL LIGATURE FI\r
+ case 0xdf: return (char) 0xfb02; // LATIN SMALL LIGATURE FL\r
+ case 0xe0: return (char) 0x2021; // DOUBLE DAGGER\r
+ case 0xe1: return (char) 0x00b7; // MIDDLE DOT\r
+ case 0xe2: return (char) 0x201a; // SINGLE LOW-9 QUOTATION MARK\r
+ case 0xe3: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK\r
+ case 0xe4: return (char) 0x2030; // PER MILLE SIGN\r
+ case 0xe5: return (char) 0x00c2; // LATIN CAPITAL LETTER A WITH CIRCUMFLEX\r
+ case 0xe6: return (char) 0x00ca; // LATIN CAPITAL LETTER E WITH CIRCUMFLEX\r
+ case 0xe7: return (char) 0x00c1; // LATIN CAPITAL LETTER A WITH ACUTE\r
+ case 0xe8: return (char) 0x00cb; // LATIN CAPITAL LETTER E WITH DIAERESIS\r
+ case 0xe9: return (char) 0x00c8; // LATIN CAPITAL LETTER E WITH GRAVE\r
+ case 0xea: return (char) 0x00cd; // LATIN CAPITAL LETTER I WITH ACUTE\r
+ case 0xeb: return (char) 0x00ce; // LATIN CAPITAL LETTER I WITH CIRCUMFLEX\r
+ case 0xec: return (char) 0x00cf; // LATIN CAPITAL LETTER I WITH DIAERESIS\r
+ case 0xed: return (char) 0x00cc; // LATIN CAPITAL LETTER I WITH GRAVE\r
+ case 0xee: return (char) 0x00d3; // LATIN CAPITAL LETTER O WITH ACUTE\r
+ case 0xef: return (char) 0x00d4; // LATIN CAPITAL LETTER O WITH CIRCUMFLEX\r
+ case 0xf0: throw new InvalidOperationException("Invalid cp10000_MacRoman sequence [" + P.ToString() + "]"); \r
+ case 0xf1: return (char) 0x00d2; // LATIN CAPITAL LETTER O WITH GRAVE\r
+ case 0xf2: return (char) 0x00da; // LATIN CAPITAL LETTER U WITH ACUTE\r
+ case 0xf3: return (char) 0x00db; // LATIN CAPITAL LETTER U WITH CIRCUMFLEX\r
+ case 0xf4: return (char) 0x00d9; // LATIN CAPITAL LETTER U WITH GRAVE\r
+ case 0xf5: return (char) 0x0131; // LATIN SMALL LETTER DOTLESS I\r
+ case 0xf6: return (char) 0x02c6; // MODIFIER LETTER CIRCUMFLEX ACCENT\r
+ case 0xf7: return (char) 0x02dc; // SMALL TILDE\r
+ case 0xf8: return (char) 0x00af; // MACRON\r
+ case 0xf9: return (char) 0x02d8; // BREVE\r
+ case 0xfa: return (char) 0x02d9; // DOT ABOVE\r
+ case 0xfb: return (char) 0x02da; // RING ABOVE\r
+ case 0xfc: return (char) 0x00b8; // CEDILLA\r
+ case 0xfd: return (char) 0x02dd; // DOUBLE ACUTE ACCENT\r
+ case 0xfe: return (char) 0x02db; // OGONEK\r
+ case 0xff: return (char) 0x02c7; // CARON\r
+ default:\r
+ return (char) P;\r
+ }\r
+ }\r
+\r
+ public static char cp1250ToUTF16Char(byte P)\r
+ {\r
+ // This function was provided by Miloslav Skácel (ported by DrW)\r
+ switch (P)\r
+ {\r
+ case 0x80: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]"); \r
+ case 0x81: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]"); \r
+ case 0x83: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]"); \r
+ case 0x88: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]"); \r
+ case 0x90: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]"); \r
+ case 0x98: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]"); \r
+ case 0x82: return (char) 0x201a; // SINGLE LOW-9 QUOTATION MARK\r
+ case 0x84: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK\r
+ case 0x85: return (char) 0x2026; // HORIZONTAL ELLIPSIS\r
+ case 0x86: return (char) 0x2020; // DAGGER\r
+ case 0x87: return (char) 0x2021; // DOUBLE DAGGER\r
+ case 0x89: return (char) 0x2030; // PER MILLE SIGN\r
+ case 0x8a: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON\r
+ case 0x8b: return (char) 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK\r
+ case 0x8c: return (char) 0x015a; // LATIN CAPITAL LETTER S WITH ACUTE\r
+ case 0x8d: return (char) 0x0164; // LATIN CAPITAL LETTER T WITH CARON\r
+ case 0x8e: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON\r
+ case 0x8f: return (char) 0x0179; // LATIN CAPITAL LETTER Z WITH ACUTE\r
+ case 0x91: return (char) 0x2018; // LEFT SINGLE QUOTATION MARK\r
+ case 0x92: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK\r
+ case 0x93: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK\r
+ case 0x94: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK\r
+ case 0x95: return (char) 0x2022; // BULLET\r
+ case 0x96: return (char) 0x2013; // EN-DASH\r
+ case 0x97: return (char) 0x2014; // EM-DASH\r
+ case 0x99: return (char) 0x2122; // TRADE MARK SIGN\r
+ case 0x9a: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON\r
+ case 0x9b: return (char) 0x203a; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK\r
+ case 0x9c: return (char) 0x015b; // LATIN SMALL LETTER S WITH ACUTE\r
+ case 0x9d: return (char) 0x0165; // LATIN SMALL LETTER T WITH CARON\r
+ case 0x9e: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON\r
+ case 0x9f: return (char) 0x017a; // LATIN SMALL LETTER Z WITH ACUTE\r
+ case 0xa0: return (char) 0x00a0; // NO-BREAK SPACE\r
+ case 0xa1: return (char) 0x02c7; // CARON\r
+ case 0xa2: return (char) 0x02d8; // BREVE\r
+ case 0xa3: return (char) 0x0141; // LATIN CAPITAL LETTER L WITH STROKE\r
+ case 0xa4: return (char) 0x00a4; // CURRENCY SIGN\r
+ case 0xa5: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK\r
+ case 0xa6: return (char) 0x00a6; // BROKEN BAR\r
+ case 0xa7: return (char) 0x00a7; // SECTION SIGN\r
+ case 0xa8: return (char) 0x00a8; // DIAERESIS\r
+ case 0xa9: return (char) 0x00a9; // COPYRIGHT SIGN\r
+ case 0xaa: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA\r
+ case 0xab: return (char) 0x00ab; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK\r
+ case 0xac: return (char) 0x00ac; // NOT SIGN\r
+ case 0xad: return (char) 0x00ad; // SOFT HYPHEN\r
+ case 0xae: return (char) 0x00ae; // REGISTERED SIGN\r
+ case 0xaf: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT ABOVE\r
+ case 0xb0: return (char) 0x00b0; // DEGREE SIGN\r
+ case 0xb1: return (char) 0x00b1; // PLUS-MINUS SIGN\r
+ case 0xb2: return (char) 0x02db; // OGONEK\r
+ case 0xb3: return (char) 0x0142; // LATIN SMALL LETTER L WITH STROKE\r
+ case 0xb4: return (char) 0x00b4; // ACUTE ACCENT\r
+ case 0xb5: return (char) 0x00b5; // MIKRO SIGN\r
+ case 0xb6: return (char) 0x00b6; // PILCROW SIGN\r
+ case 0xb7: return (char) 0x00b7; // MIDDLE DOT\r
+ case 0xb8: return (char) 0x00b8; // CEDILLA\r
+ case 0xb9: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK\r
+ case 0xba: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA\r
+ case 0xbb: return (char) 0x00bb; // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK\r
+ case 0xbc: return (char) 0x013d; // LATIN CAPITAL LETTER L WITH CARON\r
+ case 0xbd: return (char) 0x02dd; // DOUBLE ACUTE ACCENT\r
+ case 0xbe: return (char) 0x013e; // LATIN SMALL LETTER L WITH CARON\r
+ case 0xbf: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT ABOVE\r
+ case 0xc0: return (char) 0x0154; // LATIN CAPITAL LETTER R WITH ACUTE\r
+ case 0xc1: return (char) 0x00c1; // LATIN CAPITAL LETTER A WITH ACUTE\r
+ case 0xc2: return (char) 0x00c2; // LATIN CAPITAL LETTER A WITH CIRCUMFLEX\r
+ case 0xc3: return (char) 0x0102; // LATIN CAPITAL LETTER A WITH BREVE\r
+ case 0xc4: return (char) 0x00c4; // LATIN CAPITAL LETTER A WITH DIAERESIS\r
+ case 0xc5: return (char) 0x0139; // LATIN CAPITAL LETTER L WITH ACUTE\r
+ case 0xc6: return (char) 0x0106; // LATIN CAPITAL LETTER C WITH ACUTE\r
+ case 0xc7: return (char) 0x00c7; // LATIN CAPITAL LETTER C WITH CEDILLA\r
+ case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON\r
+ case 0xc9: return (char) 0x00c9; // LATIN CAPITAL LETTER E WITH ACUTE\r
+ case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK\r
+ case 0xcb: return (char) 0x00cb; // LATIN CAPITAL LETTER E WITH DIAERESIS\r
+ case 0xcc: return (char) 0x011a; // LATIN CAPITAL LETTER E WITH CARON\r
+ case 0xcd: return (char) 0x00cd; // LATIN CAPITAL LETTER I WITH ACUTE\r
+ case 0xce: return (char) 0x00ce; // LATIN CAPITAL LETTER I WITH CIRCUMFLEX\r
+ case 0xcf: return (char) 0x010e; // LATIN CAPITAL LETTER D WITH CARON\r
+ case 0xd0: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE\r
+ case 0xd1: return (char) 0x0143; // LATIN CAPITAL LETTER N WITH ACUTE\r
+ case 0xd2: return (char) 0x0147; // LATIN CAPITAL LETTER N WITH CARON\r
+ case 0xd3: return (char) 0x00d3; // LATIN CAPITAL LETTER O WITH ACUTE\r
+ case 0xd4: return (char) 0x00d4; // LATIN CAPITAL LETTER O WITH CIRCUMFLEX\r
+ case 0xd5: return (char) 0x0150; // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE\r
+ case 0xd6: return (char) 0x00d6; // LATIN CAPITAL LETTER O WITH DIAERESIS\r
+ case 0xd7: return (char) 0x00d7; // MULTIPLICATION SIGN\r
+ case 0xd8: return (char) 0x0158; // LATIN CAPITAL LETTER R WITH CARON\r
+ case 0xd9: return (char) 0x016e; // LATIN CAPITAL LETTER U WITH RING ABOVE\r
+ case 0xda: return (char) 0x00da; // LATIN CAPITAL LETTER U WITH ACUTE\r
+ case 0xdb: return (char) 0x0170; // LATIN CAPITAL LETTER U WITH WITH DOUBLE ACUTE\r
+ case 0xdc: return (char) 0x00dc; // LATIN CAPITAL LETTER U WITH DIAERESIS\r
+ case 0xdd: return (char) 0x00dd; // LATIN CAPITAL LETTER Y WITH ACUTE\r
+ case 0xde: return (char) 0x0162; // LATIN CAPITAL LETTER T WITH CEDILLA\r
+ case 0xdf: return (char) 0x00df; // LATIN SMALL LETTER SHARP S\r
+ case 0xe0: return (char) 0x0155; // LATIN SMALL LETTER R WITH ACUTE\r
+ case 0xe1: return (char) 0x00e1; // LATIN SMALL LETTER A WITH ACUTE\r
+ case 0xe2: return (char) 0x00e2; // LATIN SMALL LETTER A WITH CIRCUMFLEX\r
+ case 0xe3: return (char) 0x0103; // LATIN SMALL LETTER A WITH BREVE\r
+ case 0xe4: return (char) 0x00e4; // LATIN SMALL LETTER A WITH DIAERESIS\r
+ case 0xe5: return (char) 0x013a; // LATIN SMALL LETTER L WITH ACUTE\r
+ case 0xe6: return (char) 0x0107; // LATIN SMALL LETTER C WITH ACUTE\r
+ case 0xe7: return (char) 0x00e7; // LATIN SMALL LETTER C WITH CEDILLA\r
+ case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON 100D\r
+ case 0xe9: return (char) 0x00e9; // LATIN SMALL LETTER E WITH ACUTE\r
+ case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK\r
+ case 0xeb: return (char) 0x00eb; // LATIN SMALL LETTER E WITH DIAERESIS\r
+ case 0xec: return (char) 0x011b; // LATIN SMALL LETTER E WITH CARON\r
+ case 0xed: return (char) 0x00ed; // LATIN SMALL LETTER I WITH ACUTE\r
+ case 0xee: return (char) 0x00ee; // LATIN SMALL LETTER I WITH CIRCUMFLEX\r
+ case 0xef: return (char) 0x010f; // LATIN SMALL LETTER D WITH CARON\r
+ case 0xf0: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE\r
+ case 0xf1: return (char) 0x0144; // LATIN SMALL LETTER N WITH ACUTE\r
+ case 0xf2: return (char) 0x0148; // LATIN SMALL LETTER N WITH CARON\r
+ case 0xf3: return (char) 0x00f3; // LATIN SMALL LETTER O WITH ACUTE\r
+ case 0xf4: return (char) 0x00f4; // LATIN SMALL LETTER O WITH CIRCUMFLEX\r
+ case 0xf5: return (char) 0x0151; // LATIN SMALL LETTER O WITH DOUBLE ACUTE\r
+ case 0xf6: return (char) 0x00f6; // LATIN SMALL LETTER O WITH DIAERESIS\r
+ case 0xf7: return (char) 0x00f7; // DIVISION SIGN\r
+ case 0xf8: return (char) 0x0159; // LATIN SMALL LETTER R WITH CARON\r
+ case 0xf9: return (char) 0x016f; // LATIN SMALL LETTER U WITH RING ABOVE\r
+ case 0xfa: return (char) 0x00fa; // LATIN SMALL LETTER U WITH ACUTE\r
+ case 0xfb: return (char) 0x0171; // LATIN SMALL LETTER U WITH WITH DOUBLE ACUTE\r
+ case 0xfc: return (char) 0x00fc; // LATIN SMALL LETTER U WITH DIAERESIS\r
+ case 0xfd: return (char) 0x00fd; // LATIN SMALL LETTER Y WITH ACUTE\r
+ case 0xfe: return (char) 0x0163; // LATIN SMALL LETTER T WITH CEDILLA\r
+ case 0xff: return (char) 0x02d9; // DOT ABOVE\r
+ default:\r
+ return (char) P;\r
+ }\r
+ }\r
+\r
+ public static char cp1251ToUTF16Char(byte P)\r
+ {\r
+ switch (P)\r
+ {\r
+ case 0x80: return (char) 0x0402; // CYRILLIC CAPITAL LETTER DJE\r
+ case 0x81: return (char) 0x0403; // CYRILLIC CAPITAL LETTER GJE\r
+ case 0x82: return (char) 0x201a; // SINGLE LOW-9 QUOTATION MARK\r
+ case 0x83: return (char) 0x0453; // CYRILLIC SMALL LETTER GJE\r
+ case 0x84: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK\r
+ case 0x85: return (char) 0x2026; // HORIZONTAL ELLIPSIS\r
+ case 0x86: return (char) 0x2020; // DAGGER\r
+ case 0x87: return (char) 0x2021; // DOUBLE DAGGER\r
+ case 0x88: return (char) 0x20ac; // EURO SIGN\r
+ case 0x89: return (char) 0x2030; // PER MILLE SIGN\r
+ case 0x8a: return (char) 0x0409; // CYRILLIC CAPITAL LETTER LJE\r
+ case 0x8b: return (char) 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK\r
+ case 0x8c: return (char) 0x040a; // CYRILLIC CAPITAL LETTER NJE\r
+ case 0x8d: return (char) 0x040c; // CYRILLIC CAPITAL LETTER KJE\r
+ case 0x8e: return (char) 0x040b; // CYRILLIC CAPITAL LETTER TSHE\r
+ case 0x8f: return (char) 0x040f; // CYRILLIC CAPITAL LETTER DZHE\r
+ case 0x90: return (char) 0x0452; // CYRILLIC SMALL LETTER DJE\r
+ case 0x91: return (char) 0x2018; // LEFT SINGLE QUOTATION MARK\r
+ case 0x92: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK\r
+ case 0x93: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK\r
+ case 0x94: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK\r
+ case 0x95: return (char) 0x2022; // BULLET\r
+ case 0x96: return (char) 0x2013; // EN DASH\r
+ case 0x97: return (char) 0x2014; // EM DASH\r
+ case 0x98: throw new InvalidOperationException("Invalid cp1251 sequence [" + P.ToString() + "]"); \r
+ case 0x99: return (char) 0x2122; // TRADE MARK SIGN\r
+ case 0x9a: return (char) 0x0459; // CYRILLIC SMALL LETTER LJE\r
+ case 0x9b: return (char) 0x203a; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK\r
+ case 0x9c: return (char) 0x045a; // CYRILLIC SMALL LETTER NJE\r
+ case 0x9d: return (char) 0x045c; // CYRILLIC SMALL LETTER KJE\r
+ case 0x9e: return (char) 0x045b; // CYRILLIC SMALL LETTER TSHE\r
+ case 0x9f: return (char) 0x045f; // CYRILLIC SMALL LETTER DZHE\r
+ case 0xa0: return (char) 0x00a0; // NO-BREAK SPACE\r
+ case 0xa1: return (char) 0x040e; // CYRILLIC CAPITAL LETTER SHORT U\r
+ case 0xa2: return (char) 0x045e; // CYRILLIC SMALL LETTER SHORT U\r
+ case 0xa3: return (char) 0x0408; // CYRILLIC CAPITAL LETTER JE\r
+ case 0xa4: return (char) 0x00a4; // CURRENCY SIGN\r
+ case 0xa5: return (char) 0x0490; // CYRILLIC CAPITAL LETTER GHE WITH UPTURN\r
+ case 0xa8: return (char) 0x0401; // CYRILLIC CAPITAL LETTER IO\r
+ case 0xaa: return (char) 0x0404; // CYRILLIC CAPITAL LETTER UKRAINIAN IE\r
+ case 0xaf: return (char) 0x0407; // CYRILLIC CAPITAL LETTER YI\r
+ case 0xb2: return (char) 0x0406; // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I\r
+ case 0xb3: return (char) 0x0456; // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I\r
+ case 0xb4: return (char) 0x0491; // CYRILLIC SMALL LETTER GHE WITH UPTURN\r
+ case 0xb8: return (char) 0x0451; // CYRILLIC SMALL LETTER IO\r
+ case 0xb9: return (char) 0x2116; // NUMERO SIGN\r
+ case 0xba: return (char) 0x0454; // CYRILLIC SMALL LETTER UKRAINIAN IE\r
+ case 0xbc: return (char) 0x0458; // CYRILLIC SMALL LETTER JE\r
+ case 0xbd: return (char) 0x0405; // CYRILLIC CAPITAL LETTER DZE\r
+ case 0xbe: return (char) 0x0455; // CYRILLIC SMALL LETTER DZE\r
+ case 0xbf: return (char) 0x0457; // CYRILLIC SMALL LETTER YI\r
+ }\r
+\r
+ if ( (P >= 0xc0) | (P <= 0xff) )\r
+ return System.Convert.ToChar( P + 0x0350);\r
+ return (char) P;\r
+ }\r
+\r
+ public static char cp1252ToUTF16Char(byte P)\r
+ {\r
+ // Provided by Olaf Lösken. (ported by DrW)\r
+ // Info taken from\r
+ // ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT\r
+ switch (P)\r
+ {\r
+ case 0x80 : return (char) 0x20AC; //EUROSIGN\r
+ case 0x81 : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]"); \r
+ case 0x82 : return (char) 0x201A; //SINGLE LOW-9 QUOTATION MARK\r
+ case 0x83 : return (char) 0x0192; //ATIN SMALL LETTER F WITH HOOK\r
+ case 0x84 : return (char) 0x201E; //DOUBLE LOW-9 QUOTATION MARK\r
+ case 0x85 : return (char) 0x2026; //HORIZONTAL ELLIPSIS\r
+ case 0x86 : return (char) 0x2020; //DAGGER\r
+ case 0x87 : return (char) 0x2021; //DOUBLE DAGGER\r
+ case 0x88 : return (char) 0x02C6; //MODIFIER LETTER CIRCUMFLEX ACCENT\r
+ case 0x89 : return (char) 0x2030; //PER MILLE SIGN\r
+ case 0x8A : return (char) 0x0160; //LATIN CAPITAL LETTER S WITH CARON\r
+ case 0x8B : return (char) 0x2039; //SINGLE LEFT-POINTING ANGLE QUOTATION MARK\r
+ case 0x8C : return (char) 0x0152; //LATIN CAPITAL LIGATURE OE\r
+ case 0x8D : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]"); \r
+ case 0x8E : return (char) 0x017D; //LATIN CAPITAL LETTER Z WITH CARON\r
+ case 0x8F : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]"); \r
+ case 0x90 : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]"); \r
+ case 0x91 : return (char) 0x2018; //LEFT SINGLE QUOTATION MARK\r
+ case 0x92 : return (char) 0x2019; //RIGHT SINGLE QUOTATION MARK\r
+ case 0x93 : return (char) 0x201C; //LEFT DOUBLE QUOTATION MARK\r
+ case 0x94 : return (char) 0x201D; //RIGHT DOUBLE QUOTATION MARK\r
+ case 0x95 : return (char) 0x2022; //BULLET\r
+ case 0x96 : return (char) 0x2013; //EN DASH\r
+ case 0x97 : return (char) 0x2014; //EM DASH\r
+ case 0x98 : return (char) 0x02DC; //SMALL TILDE\r
+ case 0x99 : return (char) 0x2122; //TRADE MARK SIGN\r
+ case 0x9A : return (char) 0x0161; //LATIN SMALL LETTER S WITH CARON\r
+ case 0x9B : return (char) 0x203A; //SINGLE RIGHT-POINTING ANGLE QUOTATION MARK\r
+ case 0x9C : return (char) 0x0153; //LATIN SMALL LIGATURE OE\r
+ case 0x9D : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]"); \r
+ case 0x9E : return (char) 0x017E; //LATIN SMALL LETTER Z WITH CARON\r
+ case 0x9F : return (char) 0x0178; //LATIN CAPITAL LETTER Y WITH D\r
+ default:\r
+ return (char) P;\r
+ }\r
+ }\r
+\r
+ /// <summary>\r
+ /// Read in a UTF-8 encoded character. If no character is on the stream, throws\r
+ /// an ArgumentException.<seealso cref="http://www.ietf.org/rfc/rfc2279.txt"/>\r
+ /// </summary>\r
+ /// <exception cref="InvalidOperationException">Thrownn if 1) called at EOF, \r
+ /// 2) invalid UTF-8 encoding found.</exception>\r
+ /// <param name="stream">Stream to read from</param>\r
+ /// <returns>Encoded character (could be two characters, upper/lower Surragate pair)</returns>\r
+ public static int ReadUTF8Char(Stream stream)\r
+ {\r
+ byte[] buf = new byte[1];\r
+\r
+ if ( stream.Read(buf, 0, 1) != 1)\r
+ throw new InvalidOperationException("Unexptected EOF reading stream");\r
+ \r
+ if (buf[0] >= 0x80) // UTF-8 sequence\r
+ {\r
+ int numOctets = 1;\r
+ byte first = buf[0];\r
+ int mask = 0x40; \r
+ int ucs4 = buf[0];\r
+\r
+ // first octed must be 110x xxxx to 1111 110x if high order bit set\r
+ if ( (buf[0] & 0xc0) != 0xc0)\r
+ throw new InvalidOperationException("Invalid UTF-8 sequence at position " + stream.Position.ToString());\r
+\r
+ // we could mask off the first octet and get the number of octets,\r
+ // but it's easier to cycle through. If the bit is set, we have another character to read\r
+ while ( (mask & first) != 0 )\r
+ {\r
+ // read next character of stream\r
+ if (stream.Length == stream.Position)\r
+ throw new InvalidOperationException("Aborted UTF-8 (unexpected EOF) sequence at position " + stream.Position.ToString());\r
+ \r
+ if ( stream.Read(buf, 0, 1) != 1)\r
+ throw new InvalidOperationException("Aborted UTF-8 sequence (missing characters) at position " + stream.Position.ToString());\r
+ \r
+ // all octet sequence bytes start with 10nn nnnn, or they are invalid\r
+ if ( (buf[0] & 0xc0) != 0x80 )\r
+ throw new InvalidOperationException("Invalid UTF-8 sequence at position " + stream.Position.ToString());\r
+\r
+ // 6 bits are valid in this item (low order 6)\r
+ // mask them off and add them\r
+ ucs4 = (ucs4 << 6) | (buf[0] & 0x3F); // add bits to result\r
+ numOctets++; \r
+ mask = mask >> 1; // adjust mask\r
+ }\r
+\r
+ // Max 6 octets in sequence\r
+ if ( numOctets > 6) \r
+ throw new InvalidOperationException("Invalid UTF-8 sequence (no 0-bit in hdr) at position " + stream.Position.ToString());\r
+ \r
+ // UTF-8 can encode up to the following values, per octet size\r
+ int[] MaxCode = {0x7F, 0x7FF, 0xFFFF, 0x1FFFFF, 0x3FFFFFF, 0x7FFFFFFF};\r
+\r
+ // mask off the original header bits\r
+ ucs4 = ucs4 & MaxCode[numOctets - 1]; // array is zero-based\r
+\r
+ // check for invalid sequence as suggested by RFC2279\r
+ // (check that proper octet sequence size was used to encode character)\r
+ // (if 0x7F was mapped to a 2-octet sequence, this is an improper coding)\r
+ if ( (numOctets > 1) && (ucs4 <= MaxCode[numOctets -2]))\r
+ throw new InvalidOperationException("Invalid UTF-8 sequence (invalid sequence) at position " + stream.Position.ToString());\r
+\r
+ return ucs4;\r
+ }\r
+ else\r
+ // 1-byte value, return it\r
+ return buf[0];\r
+ }\r
+ \r
+ public static char Utf16LowSurrogate(int val)\r
+ {\r
+ int val2 = 0xDC00 ^ (val & 0x03FF); // 0xdc00 xor (val and 0x03ff)\r
+ return (char) val2;\r
+ }\r
+\r
+ public static char Utf16HighSurrogate(int val)\r
+ {\r
+ int value2 = 0xD7C0 + ( val >> 10 );\r
+ return (char) value2;\r
+ }\r
+\r
+ }\r
+}
\ No newline at end of file