Updates referencesource to .NET 4.7
[mono.git] / mcs / class / referencesource / System / net / System / Net / WebUtility.cs
1 //------------------------------------------------------------------------------
2 // <copyright file="WebUtility.cs" company="Microsoft">
3 //     Copyright (c) Microsoft Corporation.  All rights reserved.
4 // </copyright>
5 //------------------------------------------------------------------------------
6
7 // Don't entity encode high chars (160 to 256), to fix bugs VSWhidbey 85857/111927
8 // 
9 #define ENTITY_ENCODE_HIGH_ASCII_CHARS
10
11 namespace System.Net {
12     using System;
13     using System.Collections.Generic;
14 #if !FEATURE_NETCORE
15     using System.Configuration;
16 #endif
17     using System.Diagnostics;
18     using System.Diagnostics.CodeAnalysis;
19     using System.Globalization;
20     using System.IO;
21     using System.Net.Configuration;
22     using System.Runtime.Versioning;
23     using System.Text;
24 #if FEATURE_NETCORE
25     using System.Security;
26 #endif
27
28     public static class WebUtility
29     {
30         // some consts copied from Char / CharUnicodeInfo since we don't have friend access to those types
31         private const char HIGH_SURROGATE_START = '\uD800';
32         private const char LOW_SURROGATE_START = '\uDC00';
33         private const char LOW_SURROGATE_END = '\uDFFF';
34         private const int UNICODE_PLANE00_END = 0x00FFFF;
35         private const int UNICODE_PLANE01_START = 0x10000;
36         private const int UNICODE_PLANE16_END = 0x10FFFF;
37
38         private const int UnicodeReplacementChar = '\uFFFD';
39
40         private static readonly char[] _htmlEntityEndingChars = new char[] { ';', '&' };
41
42         private static volatile UnicodeDecodingConformance _htmlDecodeConformance = UnicodeDecodingConformance.Auto;
43         private static volatile UnicodeEncodingConformance _htmlEncodeConformance = UnicodeEncodingConformance.Auto;
44
45         #region HtmlEncode / HtmlDecode methods
46
47         public static string HtmlEncode(string value) {
48             if (String.IsNullOrEmpty(value)) {
49                 return value;
50             }
51
52             // Don't create string writer if we don't have nothing to encode
53             int index = IndexOfHtmlEncodingChars(value, 0);
54             if (index == -1) {
55                 return value;
56             }
57
58             StringWriter writer = new StringWriter(CultureInfo.InvariantCulture);
59             HtmlEncode(value, writer);
60             return writer.ToString();
61         }
62
63 #if FEATURE_NETCORE
64         [SecuritySafeCritical]
65 #endif
66         public static unsafe void HtmlEncode(string value, TextWriter output) {
67             if (value == null) {
68                 return;
69             }
70             if (output == null) {
71                 throw new ArgumentNullException("output");
72             }
73
74             int index = IndexOfHtmlEncodingChars(value, 0);
75             if (index == -1) {
76                 output.Write(value);
77                 return;
78             }
79
80             Debug.Assert(0 <= index && index <= value.Length, "0 <= index && index <= value.Length");
81
82             UnicodeEncodingConformance encodeConformance = HtmlEncodeConformance;
83             int cch = value.Length - index;
84             fixed (char* str = value) {
85                 char* pch = str;
86                 while (index-- > 0) {
87                     output.Write(*pch++);
88                 }
89
90                 for (; cch > 0; cch--, pch++) {
91                     char ch = *pch;
92                     if (ch <= '>') {
93                         switch (ch) {
94                             case '<':
95                                 output.Write("&lt;");
96                                 break;
97                             case '>':
98                                 output.Write("&gt;");
99                                 break;
100                             case '"':
101                                 output.Write("&quot;");
102                                 break;
103                             case '\'':
104                                 output.Write("&#39;");
105                                 break;
106                             case '&':
107                                 output.Write("&amp;");
108                                 break;
109                             default:
110                                 output.Write(ch);
111                                 break;
112                         }
113                     }
114                     else {
115                         int valueToEncode = -1; // set to >= 0 if needs to be encoded
116
117 #if ENTITY_ENCODE_HIGH_ASCII_CHARS
118
119 #if MONO
120                         // MS starts encoding with &# from 160 and stops at 255.
121                         // We don't do that. One reason is the 65308/65310 unicode
122                         // characters that look like '<' and '>'.
123                         if (ch >= 160 && !char.IsSurrogate (ch)) {
124                             valueToEncode = ch;
125 #else
126                         if (ch >= 160 && ch < 256) {
127                             // The seemingly arbitrary 160 comes from RFC
128                             valueToEncode = ch;
129 #endif
130                         } else
131 #endif // ENTITY_ENCODE_HIGH_ASCII_CHARS
132                         if (encodeConformance == UnicodeEncodingConformance.Strict && Char.IsSurrogate(ch)) {
133                             int scalarValue = GetNextUnicodeScalarValueFromUtf16Surrogate(ref pch, ref cch);
134                             if (scalarValue >= UNICODE_PLANE01_START) {
135                                 valueToEncode = scalarValue;
136                             }
137                             else {
138                                 // Don't encode BMP characters (like U+FFFD) since they wouldn't have
139                                 // been encoded if explicitly present in the string anyway.
140                                 ch = (char)scalarValue;
141                             }
142                         }
143
144                         if (valueToEncode >= 0) {
145                             // value needs to be encoded
146                             output.Write("&#");
147                             output.Write(valueToEncode.ToString(NumberFormatInfo.InvariantInfo));
148                             output.Write(';');
149                         }
150                         else {
151                             // write out the character directly
152                             output.Write(ch);
153                         }
154                     }
155                 }
156             }
157         }
158
159         public static string HtmlDecode(string value) {
160             if (String.IsNullOrEmpty(value)) {
161                 return value;
162             }
163
164             // Don't create string writer if we don't have nothing to encode
165             if (!StringRequiresHtmlDecoding(value)) {
166                 return value;
167             }
168
169             StringWriter writer = new StringWriter(CultureInfo.InvariantCulture);
170             HtmlDecode(value, writer);
171             return writer.ToString();
172         }
173
174         [SuppressMessage("Microsoft.Usage", "CA1806:DoNotIgnoreMethodResults", MessageId = "System.UInt16.TryParse(System.String,System.Globalization.NumberStyles,System.IFormatProvider,System.UInt16@)", Justification="UInt16.TryParse guarantees that result is zero if the parse fails.")]
175         public static void HtmlDecode(string value, TextWriter output) {
176             if (value == null) {
177                 return;
178             }
179             if (output == null) {
180                 throw new ArgumentNullException("output");
181             }
182
183             if (!StringRequiresHtmlDecoding(value)) {
184                 output.Write(value);        // good as is
185                 return;
186             }
187
188             UnicodeDecodingConformance decodeConformance = HtmlDecodeConformance;
189             int l = value.Length;
190             for (int i = 0; i < l; i++) {
191                 char ch = value[i];
192
193                 if (ch == '&') {
194                     // We found a '&'. Now look for the next ';' or '&'. The idea is that
195                     // if we find another '&' before finding a ';', then this is not an entity,
196                     // and the next '&' might start a real entity (VSWhidbey 275184)
197                     int index = value.IndexOfAny(_htmlEntityEndingChars, i + 1);
198                     if (index > 0 && value[index] == ';') {
199                         string entity = value.Substring(i + 1, index - i - 1);
200
201                         if (entity.Length > 1 && entity[0] == '#') {
202                             // The # syntax can be in decimal or hex, e.g.
203                             //      &#229;  --> decimal
204                             //      &#xE5;  --> same char in hex
205                             // See http://www.w3.org/TR/REC-html40/charset.html#entities
206
207                             bool parsedSuccessfully;
208                             uint parsedValue;
209                             if (entity[1] == 'x' || entity[1] == 'X') {
210                                 parsedSuccessfully = UInt32.TryParse(entity.Substring(2), NumberStyles.AllowHexSpecifier, NumberFormatInfo.InvariantInfo, out parsedValue);
211                             }
212                             else {
213                                 parsedSuccessfully = UInt32.TryParse(entity.Substring(1), NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out parsedValue);
214                             }
215
216                             if (parsedSuccessfully) {
217                                 switch (decodeConformance) {
218                                     case UnicodeDecodingConformance.Strict:
219                                         // decoded character must be U+0000 .. U+10FFFF, excluding surrogates
220                                         parsedSuccessfully = ((parsedValue < HIGH_SURROGATE_START) || (LOW_SURROGATE_END < parsedValue && parsedValue <= UNICODE_PLANE16_END));
221                                         break;
222
223                                     case UnicodeDecodingConformance.Compat:
224                                         // decoded character must be U+0001 .. U+FFFF
225                                         // null chars disallowed for compat with 4.0
226                                         parsedSuccessfully = (0 < parsedValue && parsedValue <= UNICODE_PLANE00_END);
227                                         break;
228
229                                     case UnicodeDecodingConformance.Loose:
230                                         // decoded character must be U+0000 .. U+10FFFF
231                                         parsedSuccessfully = (parsedValue <= UNICODE_PLANE16_END);
232                                         break;
233
234                                     default:
235                                         Debug.Assert(false, "Should never get here!");
236                                         parsedSuccessfully = false;
237                                         break;
238                                 }
239                             }
240
241                             if (parsedSuccessfully) {
242                                 if (parsedValue <= UNICODE_PLANE00_END) {
243                                     // single character
244                                     output.Write((char)parsedValue);
245                                 }
246                                 else {
247                                     // multi-character
248                                     char leadingSurrogate, trailingSurrogate;
249                                     ConvertSmpToUtf16(parsedValue, out leadingSurrogate, out trailingSurrogate);
250                                     output.Write(leadingSurrogate);
251                                     output.Write(trailingSurrogate);
252                                 }
253                                 
254                                 i = index; // already looked at everything until semicolon
255                                 continue;
256                             }
257                         }
258                         else {
259                             i = index; // already looked at everything until semicolon
260
261                             char entityChar = HtmlEntities.Lookup(entity);
262                             if (entityChar != (char)0) {
263                                 ch = entityChar;
264                             }
265                             else {
266                                 output.Write('&');
267                                 output.Write(entity);
268                                 output.Write(';');
269                                 continue;
270                             }
271                         }
272
273                     }
274                 }
275
276                 output.Write(ch);
277             }
278         }
279
280 #if FEATURE_NETCORE
281         [SecuritySafeCritical]
282 #endif
283         private static unsafe int IndexOfHtmlEncodingChars(string s, int startPos) {
284             Debug.Assert(0 <= startPos && startPos <= s.Length, "0 <= startPos && startPos <= s.Length");
285
286             UnicodeEncodingConformance encodeConformance = HtmlEncodeConformance;
287             int cch = s.Length - startPos;
288             fixed (char* str = s) {
289                 for (char* pch = &str[startPos]; cch > 0; pch++, cch--) {
290                     char ch = *pch;
291                     if (ch <= '>') {
292                         switch (ch) {
293                             case '<':
294                             case '>':
295                             case '"':
296                             case '\'':
297                             case '&':
298                                 return s.Length - cch;
299                         }
300                     }
301 #if ENTITY_ENCODE_HIGH_ASCII_CHARS
302                     else if (ch >= 160 
303 #if !MONO
304                                                 && ch < 256
305 #endif
306                                         ) {
307                         return s.Length - cch;
308                     }
309 #endif // ENTITY_ENCODE_HIGH_ASCII_CHARS
310                     else if (encodeConformance == UnicodeEncodingConformance.Strict && Char.IsSurrogate(ch)) {
311                         return s.Length - cch;
312                     }
313                 }
314             }
315
316             return -1;
317         }
318
319         private static UnicodeDecodingConformance HtmlDecodeConformance {
320             get {
321                 if (_htmlDecodeConformance != UnicodeDecodingConformance.Auto) {
322                     return _htmlDecodeConformance;
323                 }
324     
325                 UnicodeDecodingConformance defaultDecodeConformance = (BinaryCompatibility.TargetsAtLeast_Desktop_V4_5) ? UnicodeDecodingConformance.Strict : UnicodeDecodingConformance.Compat;
326                 UnicodeDecodingConformance decodingConformance = defaultDecodeConformance;
327
328 #if !FEATURE_NETCORE && !MOBILE
329                 try {
330                     // Read from config
331                     decodingConformance = SettingsSectionInternal.Section.WebUtilityUnicodeDecodingConformance;
332                     // Normalize conformance settings (turn 'Auto' into the actual setting)
333                     if (decodingConformance <= UnicodeDecodingConformance.Auto || decodingConformance > UnicodeDecodingConformance.Loose) {
334                         decodingConformance = defaultDecodeConformance;
335                     }
336                 }
337                 catch (ConfigurationException) {
338                     // Continue with default values
339                     // HtmlDecode and related methods can still be called and format the error page intended for the client
340                     // No need to retry again to initialize from the config in case of config errors
341                     decodingConformance = defaultDecodeConformance;
342                 }
343                 catch {
344                     // DevDiv: 642025
345                     // ASP.NET uses own ConfigurationManager which can throw in more situations than config errors (i.e. BadRequest)
346                     // It's ok to swallow the exception here and continue using the default value
347                     // Try to initialize again the next time
348                     return defaultDecodeConformance;
349                 }
350 #endif
351                 _htmlDecodeConformance = decodingConformance;
352
353                 return _htmlDecodeConformance;
354             }
355         }
356
357         private static UnicodeEncodingConformance HtmlEncodeConformance {
358             get {
359                 if (_htmlEncodeConformance != UnicodeEncodingConformance.Auto) {
360                     return _htmlEncodeConformance;
361                 }
362     
363                 UnicodeEncodingConformance defaultEncodeConformance = (BinaryCompatibility.TargetsAtLeast_Desktop_V4_5) ? UnicodeEncodingConformance.Strict : UnicodeEncodingConformance.Compat;
364                 UnicodeEncodingConformance encodingConformance = defaultEncodeConformance;
365
366 #if !FEATURE_NETCORE && !MOBILE
367                 try {
368                     // Read from config
369                     encodingConformance = SettingsSectionInternal.Section.WebUtilityUnicodeEncodingConformance;
370
371                     // Normalize conformance settings (turn 'Auto' into the actual setting)
372                     if (encodingConformance <= UnicodeEncodingConformance.Auto || encodingConformance > UnicodeEncodingConformance.Compat) {
373                         encodingConformance = defaultEncodeConformance;
374                     }
375                 }
376                 catch (ConfigurationException) {
377                     // Continue with default values
378                     // HtmlEncode and related methods can still be called and format the error page intended for the client
379                     // No need to retry again to initialize from the config in case of config errors
380                     encodingConformance = defaultEncodeConformance;
381                 }
382                 catch {
383                     // DevDiv: 642025
384                     // ASP.NET uses own ConfigurationManager which can throw in more situations than config errors (i.e. BadRequest)
385                     // It's ok to swallow the exception here and continue using the default value
386                     // Try to initialize again the next time
387                     return defaultEncodeConformance;
388                 }
389 #endif
390                 _htmlEncodeConformance = encodingConformance;
391
392                 return _htmlEncodeConformance;
393             }
394         }
395
396         #endregion
397
398         #region UrlEncode implementation
399
400         // *** Source: alm/tfs_core/Framework/Common/UriUtility/HttpUtility.cs
401         // This specific code was copied from above ASP.NET codebase.
402
403         private static byte[] UrlEncode(byte[] bytes, int offset, int count, bool alwaysCreateNewReturnValue)
404         {
405             byte[] encoded = UrlEncode(bytes, offset, count);
406
407             return (alwaysCreateNewReturnValue && (encoded != null) && (encoded == bytes))
408                 ? (byte[])encoded.Clone()
409                 : encoded;
410         }
411
412         private static byte[] UrlEncode(byte[] bytes, int offset, int count)
413         {
414             if (!ValidateUrlEncodingParameters(bytes, offset, count))
415             {
416                 return null;
417             }
418
419             int cSpaces = 0;
420             int cUnsafe = 0;
421
422             // count them first
423             for (int i = 0; i < count; i++)
424             {
425                 char ch = (char)bytes[offset + i];
426
427                 if (ch == ' ')
428                     cSpaces++;
429                 else if (!IsUrlSafeChar(ch))
430                     cUnsafe++;
431             }
432
433             // nothing to expand?
434             if (cSpaces == 0 && cUnsafe == 0) {
435                 // DevDiv 912606: respect "offset" and "count"
436                 if (0 == offset && bytes.Length == count) {
437                     return bytes;
438                 }
439                 else {
440                     var subarray = new byte[count];
441                     Buffer.BlockCopy(bytes, offset, subarray, 0, count);
442                     return subarray;
443                 }
444             }
445
446             // expand not 'safe' characters into %XX, spaces to +s
447             byte[] expandedBytes = new byte[count + cUnsafe * 2];
448             int pos = 0;
449
450             for (int i = 0; i < count; i++)
451             {
452                 byte b = bytes[offset + i];
453                 char ch = (char)b;
454
455                 if (IsUrlSafeChar(ch))
456                 {
457                     expandedBytes[pos++] = b;
458                 }
459                 else if (ch == ' ')
460                 {
461                     expandedBytes[pos++] = (byte)'+';
462                 }
463                 else
464                 {
465                     expandedBytes[pos++] = (byte)'%';
466                     expandedBytes[pos++] = (byte)IntToHex((b >> 4) & 0xf);
467                     expandedBytes[pos++] = (byte)IntToHex(b & 0x0f);
468                 }
469             }
470
471             return expandedBytes;
472         }
473
474         #endregion
475
476         #region UrlEncode public methods
477
478         [SuppressMessage("Microsoft.Design", "CA1055:UriReturnValuesShouldNotBeStrings", Justification="Already shipped public API; code moved here as part of API consolidation")]
479         public static string UrlEncode(string value)
480         {
481             if (value == null)
482                 return null;
483
484             byte[] bytes = Encoding.UTF8.GetBytes(value);
485             return Encoding.UTF8.GetString(UrlEncode(bytes, 0, bytes.Length, false /* alwaysCreateNewReturnValue */));
486         }
487
488         public static byte[] UrlEncodeToBytes(byte[] value, int offset, int count)
489         {
490             return UrlEncode(value, offset, count, true /* alwaysCreateNewReturnValue */);
491         }
492
493         #endregion
494
495         #region UrlDecode implementation
496
497         // *** Source: alm/tfs_core/Framework/Common/UriUtility/HttpUtility.cs
498         // This specific code was copied from above ASP.NET codebase.
499         // Changes done - Removed the logic to handle %Uxxxx as it is not standards compliant.
500
501         private static string UrlDecodeInternal(string value, Encoding encoding)
502         {
503             if (value == null)
504             {
505                 return null;
506             }
507
508             int count = value.Length;
509             UrlDecoder helper = new UrlDecoder(count, encoding);
510
511             // go through the string's chars collapsing %XX and
512             // appending each char as char, with exception of %XX constructs
513             // that are appended as bytes
514
515             for (int pos = 0; pos < count; pos++)
516             {
517                 char ch = value[pos];
518
519                 if (ch == '+')
520                 {
521                     ch = ' ';
522                 }
523                 else if (ch == '%' && pos < count - 2)
524                 {
525                     int h1 = HexToInt(value[pos + 1]);
526                     int h2 = HexToInt(value[pos + 2]);
527
528                     if (h1 >= 0 && h2 >= 0)
529                     {     // valid 2 hex chars
530                         byte b = (byte)((h1 << 4) | h2);
531                         pos += 2;
532
533                         // don't add as char
534                         helper.AddByte(b);
535                         continue;
536                     }
537                 }
538
539                 if ((ch & 0xFF80) == 0)
540                     helper.AddByte((byte)ch); // 7 bit have to go as bytes because of Unicode
541                 else
542                     helper.AddChar(ch);
543             }
544
545             return helper.GetString();
546         }
547
548         private static byte[] UrlDecodeInternal(byte[] bytes, int offset, int count)
549         {
550             if (!ValidateUrlEncodingParameters(bytes, offset, count))
551             {
552                 return null;
553             }
554
555             int decodedBytesCount = 0;
556             byte[] decodedBytes = new byte[count];
557
558             for (int i = 0; i < count; i++)
559             {
560                 int pos = offset + i;
561                 byte b = bytes[pos];
562
563                 if (b == '+')
564                 {
565                     b = (byte)' ';
566                 }
567                 else if (b == '%' && i < count - 2)
568                 {
569                     int h1 = HexToInt((char)bytes[pos + 1]);
570                     int h2 = HexToInt((char)bytes[pos + 2]);
571
572                     if (h1 >= 0 && h2 >= 0)
573                     {     // valid 2 hex chars
574                         b = (byte)((h1 << 4) | h2);
575                         i += 2;
576                     }
577                 }
578
579                 decodedBytes[decodedBytesCount++] = b;
580             }
581
582             if (decodedBytesCount < decodedBytes.Length)
583             {
584                 byte[] newDecodedBytes = new byte[decodedBytesCount];
585                 Array.Copy(decodedBytes, newDecodedBytes, decodedBytesCount);
586                 decodedBytes = newDecodedBytes;
587             }
588
589             return decodedBytes;
590         }
591
592         #endregion
593
594         #region UrlDecode public methods
595
596
597         [SuppressMessage("Microsoft.Design", "CA1055:UriReturnValuesShouldNotBeStrings", Justification="Already shipped public API; code moved here as part of API consolidation")]
598         public static string UrlDecode(string encodedValue)
599         {
600             if (encodedValue == null)
601                 return null;
602
603             return UrlDecodeInternal(encodedValue, Encoding.UTF8);
604         }
605
606         public static byte[] UrlDecodeToBytes(byte[] encodedValue, int offset, int count)
607         {
608             return UrlDecodeInternal(encodedValue, offset, count);
609         }
610
611         #endregion
612
613         #region Helper methods
614
615         // similar to Char.ConvertFromUtf32, but doesn't check arguments or generate strings
616         // input is assumed to be an SMP character
617         private static void ConvertSmpToUtf16(uint smpChar, out char leadingSurrogate, out char trailingSurrogate) {
618             Debug.Assert(UNICODE_PLANE01_START <= smpChar && smpChar <= UNICODE_PLANE16_END);
619
620             int utf32 = (int)(smpChar - UNICODE_PLANE01_START);
621             leadingSurrogate = (char)((utf32 / 0x400) + HIGH_SURROGATE_START);
622             trailingSurrogate = (char)((utf32 % 0x400) + LOW_SURROGATE_START);
623         }
624
625 #if FEATURE_NETCORE
626         [SecuritySafeCritical]
627 #endif
628         private static unsafe int GetNextUnicodeScalarValueFromUtf16Surrogate(ref char* pch, ref int charsRemaining) {
629             // invariants
630             Debug.Assert(charsRemaining >= 1);
631             Debug.Assert(Char.IsSurrogate(*pch));
632
633             if (charsRemaining <= 1) {
634                 // not enough characters remaining to resurrect the original scalar value
635                 return UnicodeReplacementChar;
636             }
637
638             char leadingSurrogate = pch[0];
639             char trailingSurrogate = pch[1];
640
641             if (Char.IsSurrogatePair(leadingSurrogate, trailingSurrogate)) {
642                 // we're going to consume an extra char
643                 pch++;
644                 charsRemaining--;
645
646                 // below code is from Char.ConvertToUtf32, but without the checks (since we just performed them)
647                 return (((leadingSurrogate - HIGH_SURROGATE_START) * 0x400) + (trailingSurrogate - LOW_SURROGATE_START) + UNICODE_PLANE01_START);
648             }
649             else {
650                 // unmatched surrogate
651                 return UnicodeReplacementChar;
652             }
653         }
654
655         private static int HexToInt(char h)
656         {
657             return (h >= '0' && h <= '9') ? h - '0' :
658             (h >= 'a' && h <= 'f') ? h - 'a' + 10 :
659             (h >= 'A' && h <= 'F') ? h - 'A' + 10 :
660             -1;
661         }
662
663         private static char IntToHex(int n)
664         {
665             Debug.Assert(n < 0x10);
666
667             if (n <= 9)
668                 return (char)(n + (int)'0');
669             else
670                 return (char)(n - 10 + (int)'A');
671         }
672
673         // Set of safe chars, from RFC 1738.4 minus '+'
674         private static bool IsUrlSafeChar(char ch)
675         {
676             if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9')
677                 return true;
678
679             switch (ch)
680             {
681                 case '-':
682                 case '_':
683                 case '.':
684                 case '!':
685                 case '*':
686                 case '(':
687                 case ')':
688                     return true;
689             }
690
691             return false;
692         }
693
694         private static bool ValidateUrlEncodingParameters(byte[] bytes, int offset, int count)
695         {
696             if (bytes == null && count == 0)
697                 return false;
698             if (bytes == null)
699             {
700                 throw new ArgumentNullException("bytes");
701             }
702             if (offset < 0 || offset > bytes.Length)
703             {
704                 throw new ArgumentOutOfRangeException("offset");
705             }
706             if (count < 0 || offset + count > bytes.Length)
707             {
708                 throw new ArgumentOutOfRangeException("count");
709             }
710
711             return true;
712         }
713
714         private static bool StringRequiresHtmlDecoding(string s) {
715             if (HtmlDecodeConformance == UnicodeDecodingConformance.Compat) {
716                 // this string requires html decoding only if it contains '&'
717                 return (s.IndexOf('&') >= 0);
718             }
719             else {
720                 // this string requires html decoding if it contains '&' or a surrogate character
721                 for (int i = 0; i < s.Length; i++) {
722                     char c = s[i];
723                     if (c == '&' || Char.IsSurrogate(c)) {
724                         return true;
725                     }
726                 }
727                 return false;
728             }
729         }
730
731         #endregion
732
733         #region UrlDecoder nested class
734
735         // *** Source: alm/tfs_core/Framework/Common/UriUtility/HttpUtility.cs
736         // This specific code was copied from above ASP.NET codebase.
737
738         // Internal class to facilitate URL decoding -- keeps char buffer and byte buffer, allows appending of either chars or bytes
739         private class UrlDecoder
740         {
741             private int _bufferSize;
742
743             // Accumulate characters in a special array
744             private int _numChars;
745             private char[] _charBuffer;
746
747             // Accumulate bytes for decoding into characters in a special array
748             private int _numBytes;
749             private byte[] _byteBuffer;
750
751             // Encoding to convert chars to bytes
752             private Encoding _encoding;
753
754             private void FlushBytes()
755             {
756                 if (_numBytes > 0)
757                 {
758                     _numChars += _encoding.GetChars(_byteBuffer, 0, _numBytes, _charBuffer, _numChars);
759                     _numBytes = 0;
760                 }
761             }
762
763             internal UrlDecoder(int bufferSize, Encoding encoding)
764             {
765                 _bufferSize = bufferSize;
766                 _encoding = encoding;
767
768                 _charBuffer = new char[bufferSize];
769                 // byte buffer created on demand
770             }
771
772             internal void AddChar(char ch)
773             {
774                 if (_numBytes > 0)
775                     FlushBytes();
776
777                 _charBuffer[_numChars++] = ch;
778             }
779
780             internal void AddByte(byte b)
781             {
782                 if (_byteBuffer == null)
783                     _byteBuffer = new byte[_bufferSize];
784
785                 _byteBuffer[_numBytes++] = b;
786             }
787
788             internal String GetString()
789             {
790                 if (_numBytes > 0)
791                     FlushBytes();
792
793                 if (_numChars > 0)
794                     return new String(_charBuffer, 0, _numChars);
795                 else
796                     return String.Empty;
797             }
798         }
799
800         #endregion
801
802         #region HtmlEntities nested class
803
804         // helper class for lookup of HTML encoding entities
805         private static class HtmlEntities {
806
807 #if MONO
808             public static char Lookup (string entity)
809             {
810                 var token = CalculateKeyValue (entity);
811                 if (token == 0) {
812                     return '\0';
813                 }
814
815                 var idx = Array.BinarySearch (entities, token);
816                 if (idx < 0) {
817                     return '\0';
818                 }
819
820                 return entities_values [idx];
821             }
822
823             static long CalculateKeyValue (string s)
824             {
825                 if (s.Length > 8)
826                     return 0;
827
828                 long key = 0;
829                 for (int i = 0; i < s.Length; ++i) {
830                     long ch = s[i];
831                     if (ch > 'z' || ch < '0')
832                         return 0;
833
834                     key |= ch << ((7 - i) * 8);
835                 }
836
837                 return key;
838             }
839
840             // Must be sorted
841             static readonly long[] entities = new long[] {
842                 (long)'A' << 56 | (long)'E' << 48 | (long)'l' << 40 | (long)'i' << 32 | (long)'g' << 24,
843                 (long)'A' << 56 | (long)'a' << 48 | (long)'c' << 40 | (long)'u' << 32 | (long)'t' << 24 | (long)'e' << 16,
844                 (long)'A' << 56 | (long)'c' << 48 | (long)'i' << 40 | (long)'r' << 32 | (long)'c' << 24,
845                 (long)'A' << 56 | (long)'g' << 48 | (long)'r' << 40 | (long)'a' << 32 | (long)'v' << 24 | (long)'e' << 16,
846                 (long)'A' << 56 | (long)'l' << 48 | (long)'p' << 40 | (long)'h' << 32 | (long)'a' << 24,
847                 (long)'A' << 56 | (long)'r' << 48 | (long)'i' << 40 | (long)'n' << 32 | (long)'g' << 24,
848                 (long)'A' << 56 | (long)'t' << 48 | (long)'i' << 40 | (long)'l' << 32 | (long)'d' << 24 | (long)'e' << 16,
849                 (long)'A' << 56 | (long)'u' << 48 | (long)'m' << 40 | (long)'l' << 32,
850                 (long)'B' << 56 | (long)'e' << 48 | (long)'t' << 40 | (long)'a' << 32,
851                 (long)'C' << 56 | (long)'c' << 48 | (long)'e' << 40 | (long)'d' << 32 | (long)'i' << 24 | (long)'l' << 16,
852                 (long)'C' << 56 | (long)'h' << 48 | (long)'i' << 40,
853                 (long)'D' << 56 | (long)'a' << 48 | (long)'g' << 40 | (long)'g' << 32 | (long)'e' << 24 | (long)'r' << 16,
854                 (long)'D' << 56 | (long)'e' << 48 | (long)'l' << 40 | (long)'t' << 32 | (long)'a' << 24,
855                 (long)'E' << 56 | (long)'T' << 48 | (long)'H' << 40,
856                 (long)'E' << 56 | (long)'a' << 48 | (long)'c' << 40 | (long)'u' << 32 | (long)'t' << 24 | (long)'e' << 16,
857                 (long)'E' << 56 | (long)'c' << 48 | (long)'i' << 40 | (long)'r' << 32 | (long)'c' << 24,
858                 (long)'E' << 56 | (long)'g' << 48 | (long)'r' << 40 | (long)'a' << 32 | (long)'v' << 24 | (long)'e' << 16,
859                 (long)'E' << 56 | (long)'p' << 48 | (long)'s' << 40 | (long)'i' << 32 | (long)'l' << 24 | (long)'o' << 16 | (long)'n' << 8,
860                 (long)'E' << 56 | (long)'t' << 48 | (long)'a' << 40,
861                 (long)'E' << 56 | (long)'u' << 48 | (long)'m' << 40 | (long)'l' << 32,
862                 (long)'G' << 56 | (long)'a' << 48 | (long)'m' << 40 | (long)'m' << 32 | (long)'a' << 24,
863                 (long)'I' << 56 | (long)'a' << 48 | (long)'c' << 40 | (long)'u' << 32 | (long)'t' << 24 | (long)'e' << 16,
864                 (long)'I' << 56 | (long)'c' << 48 | (long)'i' << 40 | (long)'r' << 32 | (long)'c' << 24,
865                 (long)'I' << 56 | (long)'g' << 48 | (long)'r' << 40 | (long)'a' << 32 | (long)'v' << 24 | (long)'e' << 16,
866                 (long)'I' << 56 | (long)'o' << 48 | (long)'t' << 40 | (long)'a' << 32,
867                 (long)'I' << 56 | (long)'u' << 48 | (long)'m' << 40 | (long)'l' << 32,
868                 (long)'K' << 56 | (long)'a' << 48 | (long)'p' << 40 | (long)'p' << 32 | (long)'a' << 24,
869                 (long)'L' << 56 | (long)'a' << 48 | (long)'m' << 40 | (long)'b' << 32 | (long)'d' << 24 | (long)'a' << 16,
870                 (long)'M' << 56 | (long)'u' << 48,
871                 (long)'N' << 56 | (long)'t' << 48 | (long)'i' << 40 | (long)'l' << 32 | (long)'d' << 24 | (long)'e' << 16,
872                 (long)'N' << 56 | (long)'u' << 48,
873                 (long)'O' << 56 | (long)'E' << 48 | (long)'l' << 40 | (long)'i' << 32 | (long)'g' << 24,
874                 (long)'O' << 56 | (long)'a' << 48 | (long)'c' << 40 | (long)'u' << 32 | (long)'t' << 24 | (long)'e' << 16,
875                 (long)'O' << 56 | (long)'c' << 48 | (long)'i' << 40 | (long)'r' << 32 | (long)'c' << 24,
876                 (long)'O' << 56 | (long)'g' << 48 | (long)'r' << 40 | (long)'a' << 32 | (long)'v' << 24 | (long)'e' << 16,
877                 (long)'O' << 56 | (long)'m' << 48 | (long)'e' << 40 | (long)'g' << 32 | (long)'a' << 24,
878                 (long)'O' << 56 | (long)'m' << 48 | (long)'i' << 40 | (long)'c' << 32 | (long)'r' << 24 | (long)'o' << 16 | (long)'n' << 8,
879                 (long)'O' << 56 | (long)'s' << 48 | (long)'l' << 40 | (long)'a' << 32 | (long)'s' << 24 | (long)'h' << 16,
880                 (long)'O' << 56 | (long)'t' << 48 | (long)'i' << 40 | (long)'l' << 32 | (long)'d' << 24 | (long)'e' << 16,
881                 (long)'O' << 56 | (long)'u' << 48 | (long)'m' << 40 | (long)'l' << 32,
882                 (long)'P' << 56 | (long)'h' << 48 | (long)'i' << 40,
883                 (long)'P' << 56 | (long)'i' << 48,
884                 (long)'P' << 56 | (long)'r' << 48 | (long)'i' << 40 | (long)'m' << 32 | (long)'e' << 24,
885                 (long)'P' << 56 | (long)'s' << 48 | (long)'i' << 40,
886                 (long)'R' << 56 | (long)'h' << 48 | (long)'o' << 40,
887                 (long)'S' << 56 | (long)'c' << 48 | (long)'a' << 40 | (long)'r' << 32 | (long)'o' << 24 | (long)'n' << 16,
888                 (long)'S' << 56 | (long)'i' << 48 | (long)'g' << 40 | (long)'m' << 32 | (long)'a' << 24,
889                 (long)'T' << 56 | (long)'H' << 48 | (long)'O' << 40 | (long)'R' << 32 | (long)'N' << 24,
890                 (long)'T' << 56 | (long)'a' << 48 | (long)'u' << 40,
891                 (long)'T' << 56 | (long)'h' << 48 | (long)'e' << 40 | (long)'t' << 32 | (long)'a' << 24,
892                 (long)'U' << 56 | (long)'a' << 48 | (long)'c' << 40 | (long)'u' << 32 | (long)'t' << 24 | (long)'e' << 16,
893                 (long)'U' << 56 | (long)'c' << 48 | (long)'i' << 40 | (long)'r' << 32 | (long)'c' << 24,
894                 (long)'U' << 56 | (long)'g' << 48 | (long)'r' << 40 | (long)'a' << 32 | (long)'v' << 24 | (long)'e' << 16,
895                 (long)'U' << 56 | (long)'p' << 48 | (long)'s' << 40 | (long)'i' << 32 | (long)'l' << 24 | (long)'o' << 16 | (long)'n' << 8,
896                 (long)'U' << 56 | (long)'u' << 48 | (long)'m' << 40 | (long)'l' << 32,
897                 (long)'X' << 56 | (long)'i' << 48,
898                 (long)'Y' << 56 | (long)'a' << 48 | (long)'c' << 40 | (long)'u' << 32 | (long)'t' << 24 | (long)'e' << 16,
899                 (long)'Y' << 56 | (long)'u' << 48 | (long)'m' << 40 | (long)'l' << 32,
900                 (long)'Z' << 56 | (long)'e' << 48 | (long)'t' << 40 | (long)'a' << 32,
901                 (long)'a' << 56 | (long)'a' << 48 | (long)'c' << 40 | (long)'u' << 32 | (long)'t' << 24 | (long)'e' << 16,
902                 (long)'a' << 56 | (long)'c' << 48 | (long)'i' << 40 | (long)'r' << 32 | (long)'c' << 24,
903                 (long)'a' << 56 | (long)'c' << 48 | (long)'u' << 40 | (long)'t' << 32 | (long)'e' << 24,
904                 (long)'a' << 56 | (long)'e' << 48 | (long)'l' << 40 | (long)'i' << 32 | (long)'g' << 24,
905                 (long)'a' << 56 | (long)'g' << 48 | (long)'r' << 40 | (long)'a' << 32 | (long)'v' << 24 | (long)'e' << 16,
906                 (long)'a' << 56 | (long)'l' << 48 | (long)'e' << 40 | (long)'f' << 32 | (long)'s' << 24 | (long)'y' << 16 | (long)'m' << 8,
907                 (long)'a' << 56 | (long)'l' << 48 | (long)'p' << 40 | (long)'h' << 32 | (long)'a' << 24,
908                 (long)'a' << 56 | (long)'m' << 48 | (long)'p' << 40,
909                 (long)'a' << 56 | (long)'n' << 48 | (long)'d' << 40,
910                 (long)'a' << 56 | (long)'n' << 48 | (long)'g' << 40,
911                 (long)'a' << 56 | (long)'p' << 48 | (long)'o' << 40 | (long)'s' << 32,
912                 (long)'a' << 56 | (long)'r' << 48 | (long)'i' << 40 | (long)'n' << 32 | (long)'g' << 24,
913                 (long)'a' << 56 | (long)'s' << 48 | (long)'y' << 40 | (long)'m' << 32 | (long)'p' << 24,
914                 (long)'a' << 56 | (long)'t' << 48 | (long)'i' << 40 | (long)'l' << 32 | (long)'d' << 24 | (long)'e' << 16,
915                 (long)'a' << 56 | (long)'u' << 48 | (long)'m' << 40 | (long)'l' << 32,
916                 (long)'b' << 56 | (long)'d' << 48 | (long)'q' << 40 | (long)'u' << 32 | (long)'o' << 24,
917                 (long)'b' << 56 | (long)'e' << 48 | (long)'t' << 40 | (long)'a' << 32,
918                 (long)'b' << 56 | (long)'r' << 48 | (long)'v' << 40 | (long)'b' << 32 | (long)'a' << 24 | (long)'r' << 16,
919                 (long)'b' << 56 | (long)'u' << 48 | (long)'l' << 40 | (long)'l' << 32,
920                 (long)'c' << 56 | (long)'a' << 48 | (long)'p' << 40,
921                 (long)'c' << 56 | (long)'c' << 48 | (long)'e' << 40 | (long)'d' << 32 | (long)'i' << 24 | (long)'l' << 16,
922                 (long)'c' << 56 | (long)'e' << 48 | (long)'d' << 40 | (long)'i' << 32 | (long)'l' << 24,
923                 (long)'c' << 56 | (long)'e' << 48 | (long)'n' << 40 | (long)'t' << 32,
924                 (long)'c' << 56 | (long)'h' << 48 | (long)'i' << 40,
925                 (long)'c' << 56 | (long)'i' << 48 | (long)'r' << 40 | (long)'c' << 32,
926                 (long)'c' << 56 | (long)'l' << 48 | (long)'u' << 40 | (long)'b' << 32 | (long)'s' << 24,
927                 (long)'c' << 56 | (long)'o' << 48 | (long)'n' << 40 | (long)'g' << 32,
928                 (long)'c' << 56 | (long)'o' << 48 | (long)'p' << 40 | (long)'y' << 32,
929                 (long)'c' << 56 | (long)'r' << 48 | (long)'a' << 40 | (long)'r' << 32 | (long)'r' << 24,
930                 (long)'c' << 56 | (long)'u' << 48 | (long)'p' << 40,
931                 (long)'c' << 56 | (long)'u' << 48 | (long)'r' << 40 | (long)'r' << 32 | (long)'e' << 24 | (long)'n' << 16,
932                 (long)'d' << 56 | (long)'A' << 48 | (long)'r' << 40 | (long)'r' << 32,
933                 (long)'d' << 56 | (long)'a' << 48 | (long)'g' << 40 | (long)'g' << 32 | (long)'e' << 24 | (long)'r' << 16,
934                 (long)'d' << 56 | (long)'a' << 48 | (long)'r' << 40 | (long)'r' << 32,
935                 (long)'d' << 56 | (long)'e' << 48 | (long)'g' << 40,
936                 (long)'d' << 56 | (long)'e' << 48 | (long)'l' << 40 | (long)'t' << 32 | (long)'a' << 24,
937                 (long)'d' << 56 | (long)'i' << 48 | (long)'a' << 40 | (long)'m' << 32 | (long)'s' << 24,
938                 (long)'d' << 56 | (long)'i' << 48 | (long)'v' << 40 | (long)'i' << 32 | (long)'d' << 24 | (long)'e' << 16,
939                 (long)'e' << 56 | (long)'a' << 48 | (long)'c' << 40 | (long)'u' << 32 | (long)'t' << 24 | (long)'e' << 16,
940                 (long)'e' << 56 | (long)'c' << 48 | (long)'i' << 40 | (long)'r' << 32 | (long)'c' << 24,
941                 (long)'e' << 56 | (long)'g' << 48 | (long)'r' << 40 | (long)'a' << 32 | (long)'v' << 24 | (long)'e' << 16,
942                 (long)'e' << 56 | (long)'m' << 48 | (long)'p' << 40 | (long)'t' << 32 | (long)'y' << 24,
943                 (long)'e' << 56 | (long)'m' << 48 | (long)'s' << 40 | (long)'p' << 32,
944                 (long)'e' << 56 | (long)'n' << 48 | (long)'s' << 40 | (long)'p' << 32,
945                 (long)'e' << 56 | (long)'p' << 48 | (long)'s' << 40 | (long)'i' << 32 | (long)'l' << 24 | (long)'o' << 16 | (long)'n' << 8,
946                 (long)'e' << 56 | (long)'q' << 48 | (long)'u' << 40 | (long)'i' << 32 | (long)'v' << 24,
947                 (long)'e' << 56 | (long)'t' << 48 | (long)'a' << 40,
948                 (long)'e' << 56 | (long)'t' << 48 | (long)'h' << 40,
949                 (long)'e' << 56 | (long)'u' << 48 | (long)'m' << 40 | (long)'l' << 32,
950                 (long)'e' << 56 | (long)'u' << 48 | (long)'r' << 40 | (long)'o' << 32,
951                 (long)'e' << 56 | (long)'x' << 48 | (long)'i' << 40 | (long)'s' << 32 | (long)'t' << 24,
952                 (long)'f' << 56 | (long)'n' << 48 | (long)'o' << 40 | (long)'f' << 32,
953                 (long)'f' << 56 | (long)'o' << 48 | (long)'r' << 40 | (long)'a' << 32 | (long)'l' << 24 | (long)'l' << 16,
954                 (long)'f' << 56 | (long)'r' << 48 | (long)'a' << 40 | (long)'c' << 32 | (long)'1' << 24 | (long)'2' << 16,
955                 (long)'f' << 56 | (long)'r' << 48 | (long)'a' << 40 | (long)'c' << 32 | (long)'1' << 24 | (long)'4' << 16,
956                 (long)'f' << 56 | (long)'r' << 48 | (long)'a' << 40 | (long)'c' << 32 | (long)'3' << 24 | (long)'4' << 16,
957                 (long)'f' << 56 | (long)'r' << 48 | (long)'a' << 40 | (long)'s' << 32 | (long)'l' << 24,
958                 (long)'g' << 56 | (long)'a' << 48 | (long)'m' << 40 | (long)'m' << 32 | (long)'a' << 24,
959                 (long)'g' << 56 | (long)'e' << 48,
960                 (long)'g' << 56 | (long)'t' << 48,
961                 (long)'h' << 56 | (long)'A' << 48 | (long)'r' << 40 | (long)'r' << 32,
962                 (long)'h' << 56 | (long)'a' << 48 | (long)'r' << 40 | (long)'r' << 32,
963                 (long)'h' << 56 | (long)'e' << 48 | (long)'a' << 40 | (long)'r' << 32 | (long)'t' << 24 | (long)'s' << 16,
964                 (long)'h' << 56 | (long)'e' << 48 | (long)'l' << 40 | (long)'l' << 32 | (long)'i' << 24 | (long)'p' << 16,
965                 (long)'i' << 56 | (long)'a' << 48 | (long)'c' << 40 | (long)'u' << 32 | (long)'t' << 24 | (long)'e' << 16,
966                 (long)'i' << 56 | (long)'c' << 48 | (long)'i' << 40 | (long)'r' << 32 | (long)'c' << 24,
967                 (long)'i' << 56 | (long)'e' << 48 | (long)'x' << 40 | (long)'c' << 32 | (long)'l' << 24,
968                 (long)'i' << 56 | (long)'g' << 48 | (long)'r' << 40 | (long)'a' << 32 | (long)'v' << 24 | (long)'e' << 16,
969                 (long)'i' << 56 | (long)'m' << 48 | (long)'a' << 40 | (long)'g' << 32 | (long)'e' << 24,
970                 (long)'i' << 56 | (long)'n' << 48 | (long)'f' << 40 | (long)'i' << 32 | (long)'n' << 24,
971                 (long)'i' << 56 | (long)'n' << 48 | (long)'t' << 40,
972                 (long)'i' << 56 | (long)'o' << 48 | (long)'t' << 40 | (long)'a' << 32,
973                 (long)'i' << 56 | (long)'q' << 48 | (long)'u' << 40 | (long)'e' << 32 | (long)'s' << 24 | (long)'t' << 16,
974                 (long)'i' << 56 | (long)'s' << 48 | (long)'i' << 40 | (long)'n' << 32,
975                 (long)'i' << 56 | (long)'u' << 48 | (long)'m' << 40 | (long)'l' << 32,
976                 (long)'k' << 56 | (long)'a' << 48 | (long)'p' << 40 | (long)'p' << 32 | (long)'a' << 24,
977                 (long)'l' << 56 | (long)'A' << 48 | (long)'r' << 40 | (long)'r' << 32,
978                 (long)'l' << 56 | (long)'a' << 48 | (long)'m' << 40 | (long)'b' << 32 | (long)'d' << 24 | (long)'a' << 16,
979                 (long)'l' << 56 | (long)'a' << 48 | (long)'n' << 40 | (long)'g' << 32,
980                 (long)'l' << 56 | (long)'a' << 48 | (long)'q' << 40 | (long)'u' << 32 | (long)'o' << 24,
981                 (long)'l' << 56 | (long)'a' << 48 | (long)'r' << 40 | (long)'r' << 32,
982                 (long)'l' << 56 | (long)'c' << 48 | (long)'e' << 40 | (long)'i' << 32 | (long)'l' << 24,
983                 (long)'l' << 56 | (long)'d' << 48 | (long)'q' << 40 | (long)'u' << 32 | (long)'o' << 24,
984                 (long)'l' << 56 | (long)'e' << 48,
985                 (long)'l' << 56 | (long)'f' << 48 | (long)'l' << 40 | (long)'o' << 32 | (long)'o' << 24 | (long)'r' << 16,
986                 (long)'l' << 56 | (long)'o' << 48 | (long)'w' << 40 | (long)'a' << 32 | (long)'s' << 24 | (long)'t' << 16,
987                 (long)'l' << 56 | (long)'o' << 48 | (long)'z' << 40,
988                 (long)'l' << 56 | (long)'r' << 48 | (long)'m' << 40,
989                 (long)'l' << 56 | (long)'s' << 48 | (long)'a' << 40 | (long)'q' << 32 | (long)'u' << 24 | (long)'o' << 16,
990                 (long)'l' << 56 | (long)'s' << 48 | (long)'q' << 40 | (long)'u' << 32 | (long)'o' << 24,
991                 (long)'l' << 56 | (long)'t' << 48,
992                 (long)'m' << 56 | (long)'a' << 48 | (long)'c' << 40 | (long)'r' << 32,
993                 (long)'m' << 56 | (long)'d' << 48 | (long)'a' << 40 | (long)'s' << 32 | (long)'h' << 24,
994                 (long)'m' << 56 | (long)'i' << 48 | (long)'c' << 40 | (long)'r' << 32 | (long)'o' << 24,
995                 (long)'m' << 56 | (long)'i' << 48 | (long)'d' << 40 | (long)'d' << 32 | (long)'o' << 24 | (long)'t' << 16,
996                 (long)'m' << 56 | (long)'i' << 48 | (long)'n' << 40 | (long)'u' << 32 | (long)'s' << 24,
997                 (long)'m' << 56 | (long)'u' << 48,
998                 (long)'n' << 56 | (long)'a' << 48 | (long)'b' << 40 | (long)'l' << 32 | (long)'a' << 24,
999                 (long)'n' << 56 | (long)'b' << 48 | (long)'s' << 40 | (long)'p' << 32,
1000                 (long)'n' << 56 | (long)'d' << 48 | (long)'a' << 40 | (long)'s' << 32 | (long)'h' << 24,
1001                 (long)'n' << 56 | (long)'e' << 48,
1002                 (long)'n' << 56 | (long)'i' << 48,
1003                 (long)'n' << 56 | (long)'o' << 48 | (long)'t' << 40,
1004                 (long)'n' << 56 | (long)'o' << 48 | (long)'t' << 40 | (long)'i' << 32 | (long)'n' << 24,
1005                 (long)'n' << 56 | (long)'s' << 48 | (long)'u' << 40 | (long)'b' << 32,
1006                 (long)'n' << 56 | (long)'t' << 48 | (long)'i' << 40 | (long)'l' << 32 | (long)'d' << 24 | (long)'e' << 16,
1007                 (long)'n' << 56 | (long)'u' << 48,
1008                 (long)'o' << 56 | (long)'a' << 48 | (long)'c' << 40 | (long)'u' << 32 | (long)'t' << 24 | (long)'e' << 16,
1009                 (long)'o' << 56 | (long)'c' << 48 | (long)'i' << 40 | (long)'r' << 32 | (long)'c' << 24,
1010                 (long)'o' << 56 | (long)'e' << 48 | (long)'l' << 40 | (long)'i' << 32 | (long)'g' << 24,
1011                 (long)'o' << 56 | (long)'g' << 48 | (long)'r' << 40 | (long)'a' << 32 | (long)'v' << 24 | (long)'e' << 16,
1012                 (long)'o' << 56 | (long)'l' << 48 | (long)'i' << 40 | (long)'n' << 32 | (long)'e' << 24,
1013                 (long)'o' << 56 | (long)'m' << 48 | (long)'e' << 40 | (long)'g' << 32 | (long)'a' << 24,
1014                 (long)'o' << 56 | (long)'m' << 48 | (long)'i' << 40 | (long)'c' << 32 | (long)'r' << 24 | (long)'o' << 16 | (long)'n' << 8,
1015                 (long)'o' << 56 | (long)'p' << 48 | (long)'l' << 40 | (long)'u' << 32 | (long)'s' << 24,
1016                 (long)'o' << 56 | (long)'r' << 48,
1017                 (long)'o' << 56 | (long)'r' << 48 | (long)'d' << 40 | (long)'f' << 32,
1018                 (long)'o' << 56 | (long)'r' << 48 | (long)'d' << 40 | (long)'m' << 32,
1019                 (long)'o' << 56 | (long)'s' << 48 | (long)'l' << 40 | (long)'a' << 32 | (long)'s' << 24 | (long)'h' << 16,
1020                 (long)'o' << 56 | (long)'t' << 48 | (long)'i' << 40 | (long)'l' << 32 | (long)'d' << 24 | (long)'e' << 16,
1021                 (long)'o' << 56 | (long)'t' << 48 | (long)'i' << 40 | (long)'m' << 32 | (long)'e' << 24 | (long)'s' << 16,
1022                 (long)'o' << 56 | (long)'u' << 48 | (long)'m' << 40 | (long)'l' << 32,
1023                 (long)'p' << 56 | (long)'a' << 48 | (long)'r' << 40 | (long)'a' << 32,
1024                 (long)'p' << 56 | (long)'a' << 48 | (long)'r' << 40 | (long)'t' << 32,
1025                 (long)'p' << 56 | (long)'e' << 48 | (long)'r' << 40 | (long)'m' << 32 | (long)'i' << 24 | (long)'l' << 16,
1026                 (long)'p' << 56 | (long)'e' << 48 | (long)'r' << 40 | (long)'p' << 32,
1027                 (long)'p' << 56 | (long)'h' << 48 | (long)'i' << 40,
1028                 (long)'p' << 56 | (long)'i' << 48,
1029                 (long)'p' << 56 | (long)'i' << 48 | (long)'v' << 40,
1030                 (long)'p' << 56 | (long)'l' << 48 | (long)'u' << 40 | (long)'s' << 32 | (long)'m' << 24 | (long)'n' << 16,
1031                 (long)'p' << 56 | (long)'o' << 48 | (long)'u' << 40 | (long)'n' << 32 | (long)'d' << 24,
1032                 (long)'p' << 56 | (long)'r' << 48 | (long)'i' << 40 | (long)'m' << 32 | (long)'e' << 24,
1033                 (long)'p' << 56 | (long)'r' << 48 | (long)'o' << 40 | (long)'d' << 32,
1034                 (long)'p' << 56 | (long)'r' << 48 | (long)'o' << 40 | (long)'p' << 32,
1035                 (long)'p' << 56 | (long)'s' << 48 | (long)'i' << 40,
1036                 (long)'q' << 56 | (long)'u' << 48 | (long)'o' << 40 | (long)'t' << 32,
1037                 (long)'r' << 56 | (long)'A' << 48 | (long)'r' << 40 | (long)'r' << 32,
1038                 (long)'r' << 56 | (long)'a' << 48 | (long)'d' << 40 | (long)'i' << 32 | (long)'c' << 24,
1039                 (long)'r' << 56 | (long)'a' << 48 | (long)'n' << 40 | (long)'g' << 32,
1040                 (long)'r' << 56 | (long)'a' << 48 | (long)'q' << 40 | (long)'u' << 32 | (long)'o' << 24,
1041                 (long)'r' << 56 | (long)'a' << 48 | (long)'r' << 40 | (long)'r' << 32,
1042                 (long)'r' << 56 | (long)'c' << 48 | (long)'e' << 40 | (long)'i' << 32 | (long)'l' << 24,
1043                 (long)'r' << 56 | (long)'d' << 48 | (long)'q' << 40 | (long)'u' << 32 | (long)'o' << 24,
1044                 (long)'r' << 56 | (long)'e' << 48 | (long)'a' << 40 | (long)'l' << 32,
1045                 (long)'r' << 56 | (long)'e' << 48 | (long)'g' << 40,
1046                 (long)'r' << 56 | (long)'f' << 48 | (long)'l' << 40 | (long)'o' << 32 | (long)'o' << 24 | (long)'r' << 16,
1047                 (long)'r' << 56 | (long)'h' << 48 | (long)'o' << 40,
1048                 (long)'r' << 56 | (long)'l' << 48 | (long)'m' << 40,
1049                 (long)'r' << 56 | (long)'s' << 48 | (long)'a' << 40 | (long)'q' << 32 | (long)'u' << 24 | (long)'o' << 16,
1050                 (long)'r' << 56 | (long)'s' << 48 | (long)'q' << 40 | (long)'u' << 32 | (long)'o' << 24,
1051                 (long)'s' << 56 | (long)'b' << 48 | (long)'q' << 40 | (long)'u' << 32 | (long)'o' << 24,
1052                 (long)'s' << 56 | (long)'c' << 48 | (long)'a' << 40 | (long)'r' << 32 | (long)'o' << 24 | (long)'n' << 16,
1053                 (long)'s' << 56 | (long)'d' << 48 | (long)'o' << 40 | (long)'t' << 32,
1054                 (long)'s' << 56 | (long)'e' << 48 | (long)'c' << 40 | (long)'t' << 32,
1055                 (long)'s' << 56 | (long)'h' << 48 | (long)'y' << 40,
1056                 (long)'s' << 56 | (long)'i' << 48 | (long)'g' << 40 | (long)'m' << 32 | (long)'a' << 24,
1057                 (long)'s' << 56 | (long)'i' << 48 | (long)'g' << 40 | (long)'m' << 32 | (long)'a' << 24 | (long)'f' << 16,
1058                 (long)'s' << 56 | (long)'i' << 48 | (long)'m' << 40,
1059                 (long)'s' << 56 | (long)'p' << 48 | (long)'a' << 40 | (long)'d' << 32 | (long)'e' << 24 | (long)'s' << 16,
1060                 (long)'s' << 56 | (long)'u' << 48 | (long)'b' << 40,
1061                 (long)'s' << 56 | (long)'u' << 48 | (long)'b' << 40 | (long)'e' << 32,
1062                 (long)'s' << 56 | (long)'u' << 48 | (long)'m' << 40,
1063                 (long)'s' << 56 | (long)'u' << 48 | (long)'p' << 40,
1064                 (long)'s' << 56 | (long)'u' << 48 | (long)'p' << 40 | (long)'1' << 32,
1065                 (long)'s' << 56 | (long)'u' << 48 | (long)'p' << 40 | (long)'2' << 32,
1066                 (long)'s' << 56 | (long)'u' << 48 | (long)'p' << 40 | (long)'3' << 32,
1067                 (long)'s' << 56 | (long)'u' << 48 | (long)'p' << 40 | (long)'e' << 32,
1068                 (long)'s' << 56 | (long)'z' << 48 | (long)'l' << 40 | (long)'i' << 32 | (long)'g' << 24,
1069                 (long)'t' << 56 | (long)'a' << 48 | (long)'u' << 40,
1070                 (long)'t' << 56 | (long)'h' << 48 | (long)'e' << 40 | (long)'r' << 32 | (long)'e' << 24 | (long)'4' << 16,
1071                 (long)'t' << 56 | (long)'h' << 48 | (long)'e' << 40 | (long)'t' << 32 | (long)'a' << 24,
1072                 (long)'t' << 56 | (long)'h' << 48 | (long)'e' << 40 | (long)'t' << 32 | (long)'a' << 24 | (long)'s' << 16 | (long)'y' << 8 | (long)'m' << 0,
1073                 (long)'t' << 56 | (long)'h' << 48 | (long)'i' << 40 | (long)'n' << 32 | (long)'s' << 24 | (long)'p' << 16,
1074                 (long)'t' << 56 | (long)'h' << 48 | (long)'o' << 40 | (long)'r' << 32 | (long)'n' << 24,
1075                 (long)'t' << 56 | (long)'i' << 48 | (long)'l' << 40 | (long)'d' << 32 | (long)'e' << 24,
1076                 (long)'t' << 56 | (long)'i' << 48 | (long)'m' << 40 | (long)'e' << 32 | (long)'s' << 24,
1077                 (long)'t' << 56 | (long)'r' << 48 | (long)'a' << 40 | (long)'d' << 32 | (long)'e' << 24,
1078                 (long)'u' << 56 | (long)'A' << 48 | (long)'r' << 40 | (long)'r' << 32,
1079                 (long)'u' << 56 | (long)'a' << 48 | (long)'c' << 40 | (long)'u' << 32 | (long)'t' << 24 | (long)'e' << 16,
1080                 (long)'u' << 56 | (long)'a' << 48 | (long)'r' << 40 | (long)'r' << 32,
1081                 (long)'u' << 56 | (long)'c' << 48 | (long)'i' << 40 | (long)'r' << 32 | (long)'c' << 24,
1082                 (long)'u' << 56 | (long)'g' << 48 | (long)'r' << 40 | (long)'a' << 32 | (long)'v' << 24 | (long)'e' << 16,
1083                 (long)'u' << 56 | (long)'m' << 48 | (long)'l' << 40,
1084                 (long)'u' << 56 | (long)'p' << 48 | (long)'s' << 40 | (long)'i' << 32 | (long)'h' << 24,
1085                 (long)'u' << 56 | (long)'p' << 48 | (long)'s' << 40 | (long)'i' << 32 | (long)'l' << 24 | (long)'o' << 16 | (long)'n' << 8,
1086                 (long)'u' << 56 | (long)'u' << 48 | (long)'m' << 40 | (long)'l' << 32,
1087                 (long)'w' << 56 | (long)'e' << 48 | (long)'i' << 40 | (long)'e' << 32 | (long)'r' << 24 | (long)'p' << 16,
1088                 (long)'x' << 56 | (long)'i' << 48,
1089                 (long)'y' << 56 | (long)'a' << 48 | (long)'c' << 40 | (long)'u' << 32 | (long)'t' << 24 | (long)'e' << 16,
1090                 (long)'y' << 56 | (long)'e' << 48 | (long)'n' << 40,
1091                 (long)'y' << 56 | (long)'u' << 48 | (long)'m' << 40 | (long)'l' << 32,
1092                 (long)'z' << 56 | (long)'e' << 48 | (long)'t' << 40 | (long)'a' << 32,
1093                 (long)'z' << 56 | (long)'w' << 48 | (long)'j' << 40,
1094                 (long)'z' << 56 | (long)'w' << 48 | (long)'n' << 40 | (long)'j' << 32
1095             };
1096
1097             static readonly char[] entities_values = new char[] {
1098                 '\u00C6',
1099                 '\u00C1',
1100                 '\u00C2',
1101                 '\u00C0',
1102                 '\u0391',
1103                 '\u00C5',
1104                 '\u00C3',
1105                 '\u00C4',
1106                 '\u0392',
1107                 '\u00C7',
1108                 '\u03A7',
1109                 '\u2021',
1110                 '\u0394',
1111                 '\u00D0',
1112                 '\u00C9',
1113                 '\u00CA',
1114                 '\u00C8',
1115                 '\u0395',
1116                 '\u0397',
1117                 '\u00CB',
1118                 '\u0393',
1119                 '\u00CD',
1120                 '\u00CE',
1121                 '\u00CC',
1122                 '\u0399',
1123                 '\u00CF',
1124                 '\u039A',
1125                 '\u039B',
1126                 '\u039C',
1127                 '\u00D1',
1128                 '\u039D',
1129                 '\u0152',
1130                 '\u00D3',
1131                 '\u00D4',
1132                 '\u00D2',
1133                 '\u03A9',
1134                 '\u039F',
1135                 '\u00D8',
1136                 '\u00D5',
1137                 '\u00D6',
1138                 '\u03A6',
1139                 '\u03A0',
1140                 '\u2033',
1141                 '\u03A8',
1142                 '\u03A1',
1143                 '\u0160',
1144                 '\u03A3',
1145                 '\u00DE',
1146                 '\u03A4',
1147                 '\u0398',
1148                 '\u00DA',
1149                 '\u00DB',
1150                 '\u00D9',
1151                 '\u03A5',
1152                 '\u00DC',
1153                 '\u039E',
1154                 '\u00DD',
1155                 '\u0178',
1156                 '\u0396',
1157                 '\u00E1',
1158                 '\u00E2',
1159                 '\u00B4',
1160                 '\u00E6',
1161                 '\u00E0',
1162                 '\u2135',
1163                 '\u03B1',
1164                 '\u0026',
1165                 '\u2227',
1166                 '\u2220',
1167                 '\u0027',
1168                 '\u00E5',
1169                 '\u2248',
1170                 '\u00E3',
1171                 '\u00E4',
1172                 '\u201E',
1173                 '\u03B2',
1174                 '\u00A6',
1175                 '\u2022',
1176                 '\u2229',
1177                 '\u00E7',
1178                 '\u00B8',
1179                 '\u00A2',
1180                 '\u03C7',
1181                 '\u02C6',
1182                 '\u2663',
1183                 '\u2245',
1184                 '\u00A9',
1185                 '\u21B5',
1186                 '\u222A',
1187                 '\u00A4',
1188                 '\u21D3',
1189                 '\u2020',
1190                 '\u2193',
1191                 '\u00B0',
1192                 '\u03B4',
1193                 '\u2666',
1194                 '\u00F7',
1195                 '\u00E9',
1196                 '\u00EA',
1197                 '\u00E8',
1198                 '\u2205',
1199                 '\u2003',
1200                 '\u2002',
1201                 '\u03B5',
1202                 '\u2261',
1203                 '\u03B7',
1204                 '\u00F0',
1205                 '\u00EB',
1206                 '\u20AC',
1207                 '\u2203',
1208                 '\u0192',
1209                 '\u2200',
1210                 '\u00BD',
1211                 '\u00BC',
1212                 '\u00BE',
1213                 '\u2044',
1214                 '\u03B3',
1215                 '\u2265',
1216                 '\u003E',
1217                 '\u21D4',
1218                 '\u2194',
1219                 '\u2665',
1220                 '\u2026',
1221                 '\u00ED',
1222                 '\u00EE',
1223                 '\u00A1',
1224                 '\u00EC',
1225                 '\u2111',
1226                 '\u221E',
1227                 '\u222B',
1228                 '\u03B9',
1229                 '\u00BF',
1230                 '\u2208',
1231                 '\u00EF',
1232                 '\u03BA',
1233                 '\u21D0',
1234                 '\u03BB',
1235                 '\u2329',
1236                 '\u00AB',
1237                 '\u2190',
1238                 '\u2308',
1239                 '\u201C',
1240                 '\u2264',
1241                 '\u230A',
1242                 '\u2217',
1243                 '\u25CA',
1244                 '\u200E',
1245                 '\u2039',
1246                 '\u2018',
1247                 '\u003C',
1248                 '\u00AF',
1249                 '\u2014',
1250                 '\u00B5',
1251                 '\u00B7',
1252                 '\u2212',
1253                 '\u03BC',
1254                 '\u2207',
1255                 '\u00A0',
1256                 '\u2013',
1257                 '\u2260',
1258                 '\u220B',
1259                 '\u00AC',
1260                 '\u2209',
1261                 '\u2284',
1262                 '\u00F1',
1263                 '\u03BD',
1264                 '\u00F3',
1265                 '\u00F4',
1266                 '\u0153',
1267                 '\u00F2',
1268                 '\u203E',
1269                 '\u03C9',
1270                 '\u03BF',
1271                 '\u2295',
1272                 '\u2228',
1273                 '\u00AA',
1274                 '\u00BA',
1275                 '\u00F8',
1276                 '\u00F5',
1277                 '\u2297',
1278                 '\u00F6',
1279                 '\u00B6',
1280                 '\u2202',
1281                 '\u2030',
1282                 '\u22A5',
1283                 '\u03C6',
1284                 '\u03C0',
1285                 '\u03D6',
1286                 '\u00B1',
1287                 '\u00A3',
1288                 '\u2032',
1289                 '\u220F',
1290                 '\u221D',
1291                 '\u03C8',
1292                 '\u0022',
1293                 '\u21D2',
1294                 '\u221A',
1295                 '\u232A',
1296                 '\u00BB',
1297                 '\u2192',
1298                 '\u2309',
1299                 '\u201D',
1300                 '\u211C',
1301                 '\u00AE',
1302                 '\u230B',
1303                 '\u03C1',
1304                 '\u200F',
1305                 '\u203A',
1306                 '\u2019',
1307                 '\u201A',
1308                 '\u0161',
1309                 '\u22C5',
1310                 '\u00A7',
1311                 '\u00AD',
1312                 '\u03C3',
1313                 '\u03C2',
1314                 '\u223C',
1315                 '\u2660',
1316                 '\u2282',
1317                 '\u2286',
1318                 '\u2211',
1319                 '\u2283',
1320                 '\u00B9',
1321                 '\u00B2',
1322                 '\u00B3',
1323                 '\u2287',
1324                 '\u00DF',
1325                 '\u03C4',
1326                 '\u2234',
1327                 '\u03B8',
1328                 '\u03D1',
1329                 '\u2009',
1330                 '\u00FE',
1331                 '\u02DC',
1332                 '\u00D7',
1333                 '\u2122',
1334                 '\u21D1',
1335                 '\u00FA',
1336                 '\u2191',
1337                 '\u00FB',
1338                 '\u00F9',
1339                 '\u00A8',
1340                 '\u03D2',
1341                 '\u03C5',
1342                 '\u00FC',
1343                 '\u2118',
1344                 '\u03BE',
1345                 '\u00FD',
1346                 '\u00A5',
1347                 '\u00FF',
1348                 '\u03B6',
1349                 '\u200D',
1350                 '\u200C'
1351             };
1352 #else
1353             // The list is from http://www.w3.org/TR/REC-html40/sgml/entities.html, except for &apos;, which
1354             // is defined in http://www.w3.org/TR/2008/REC-xml-20081126/#sec-predefined-ent.
1355
1356             private static String[] _entitiesList = new String[] {
1357                 "\x0022-quot",
1358                 "\x0026-amp",
1359                 "\x0027-apos",
1360                 "\x003c-lt",
1361                 "\x003e-gt",
1362                 "\x00a0-nbsp",
1363                 "\x00a1-iexcl",
1364                 "\x00a2-cent",
1365                 "\x00a3-pound",
1366                 "\x00a4-curren",
1367                 "\x00a5-yen",
1368                 "\x00a6-brvbar",
1369                 "\x00a7-sect",
1370                 "\x00a8-uml",
1371                 "\x00a9-copy",
1372                 "\x00aa-ordf",
1373                 "\x00ab-laquo",
1374                 "\x00ac-not",
1375                 "\x00ad-shy",
1376                 "\x00ae-reg",
1377                 "\x00af-macr",
1378                 "\x00b0-deg",
1379                 "\x00b1-plusmn",
1380                 "\x00b2-sup2",
1381                 "\x00b3-sup3",
1382                 "\x00b4-acute",
1383                 "\x00b5-micro",
1384                 "\x00b6-para",
1385                 "\x00b7-middot",
1386                 "\x00b8-cedil",
1387                 "\x00b9-sup1",
1388                 "\x00ba-ordm",
1389                 "\x00bb-raquo",
1390                 "\x00bc-frac14",
1391                 "\x00bd-frac12",
1392                 "\x00be-frac34",
1393                 "\x00bf-iquest",
1394                 "\x00c0-Agrave",
1395                 "\x00c1-Aacute",
1396                 "\x00c2-Acirc",
1397                 "\x00c3-Atilde",
1398                 "\x00c4-Auml",
1399                 "\x00c5-Aring",
1400                 "\x00c6-AElig",
1401                 "\x00c7-Ccedil",
1402                 "\x00c8-Egrave",
1403                 "\x00c9-Eacute",
1404                 "\x00ca-Ecirc",
1405                 "\x00cb-Euml",
1406                 "\x00cc-Igrave",
1407                 "\x00cd-Iacute",
1408                 "\x00ce-Icirc",
1409                 "\x00cf-Iuml",
1410                 "\x00d0-ETH",
1411                 "\x00d1-Ntilde",
1412                 "\x00d2-Ograve",
1413                 "\x00d3-Oacute",
1414                 "\x00d4-Ocirc",
1415                 "\x00d5-Otilde",
1416                 "\x00d6-Ouml",
1417                 "\x00d7-times",
1418                 "\x00d8-Oslash",
1419                 "\x00d9-Ugrave",
1420                 "\x00da-Uacute",
1421                 "\x00db-Ucirc",
1422                 "\x00dc-Uuml",
1423                 "\x00dd-Yacute",
1424                 "\x00de-THORN",
1425                 "\x00df-szlig",
1426                 "\x00e0-agrave",
1427                 "\x00e1-aacute",
1428                 "\x00e2-acirc",
1429                 "\x00e3-atilde",
1430                 "\x00e4-auml",
1431                 "\x00e5-aring",
1432                 "\x00e6-aelig",
1433                 "\x00e7-ccedil",
1434                 "\x00e8-egrave",
1435                 "\x00e9-eacute",
1436                 "\x00ea-ecirc",
1437                 "\x00eb-euml",
1438                 "\x00ec-igrave",
1439                 "\x00ed-iacute",
1440                 "\x00ee-icirc",
1441                 "\x00ef-iuml",
1442                 "\x00f0-eth",
1443                 "\x00f1-ntilde",
1444                 "\x00f2-ograve",
1445                 "\x00f3-oacute",
1446                 "\x00f4-ocirc",
1447                 "\x00f5-otilde",
1448                 "\x00f6-ouml",
1449                 "\x00f7-divide",
1450                 "\x00f8-oslash",
1451                 "\x00f9-ugrave",
1452                 "\x00fa-uacute",
1453                 "\x00fb-ucirc",
1454                 "\x00fc-uuml",
1455                 "\x00fd-yacute",
1456                 "\x00fe-thorn",
1457                 "\x00ff-yuml",
1458                 "\x0152-OElig",
1459                 "\x0153-oelig",
1460                 "\x0160-Scaron",
1461                 "\x0161-scaron",
1462                 "\x0178-Yuml",
1463                 "\x0192-fnof",
1464                 "\x02c6-circ",
1465                 "\x02dc-tilde",
1466                 "\x0391-Alpha",
1467                 "\x0392-Beta",
1468                 "\x0393-Gamma",
1469                 "\x0394-Delta",
1470                 "\x0395-Epsilon",
1471                 "\x0396-Zeta",
1472                 "\x0397-Eta",
1473                 "\x0398-Theta",
1474                 "\x0399-Iota",
1475                 "\x039a-Kappa",
1476                 "\x039b-Lambda",
1477                 "\x039c-Mu",
1478                 "\x039d-Nu",
1479                 "\x039e-Xi",
1480                 "\x039f-Omicron",
1481                 "\x03a0-Pi",
1482                 "\x03a1-Rho",
1483                 "\x03a3-Sigma",
1484                 "\x03a4-Tau",
1485                 "\x03a5-Upsilon",
1486                 "\x03a6-Phi",
1487                 "\x03a7-Chi",
1488                 "\x03a8-Psi",
1489                 "\x03a9-Omega",
1490                 "\x03b1-alpha",
1491                 "\x03b2-beta",
1492                 "\x03b3-gamma",
1493                 "\x03b4-delta",
1494                 "\x03b5-epsilon",
1495                 "\x03b6-zeta",
1496                 "\x03b7-eta",
1497                 "\x03b8-theta",
1498                 "\x03b9-iota",
1499                 "\x03ba-kappa",
1500                 "\x03bb-lambda",
1501                 "\x03bc-mu",
1502                 "\x03bd-nu",
1503                 "\x03be-xi",
1504                 "\x03bf-omicron",
1505                 "\x03c0-pi",
1506                 "\x03c1-rho",
1507                 "\x03c2-sigmaf",
1508                 "\x03c3-sigma",
1509                 "\x03c4-tau",
1510                 "\x03c5-upsilon",
1511                 "\x03c6-phi",
1512                 "\x03c7-chi",
1513                 "\x03c8-psi",
1514                 "\x03c9-omega",
1515                 "\x03d1-thetasym",
1516                 "\x03d2-upsih",
1517                 "\x03d6-piv",
1518                 "\x2002-ensp",
1519                 "\x2003-emsp",
1520                 "\x2009-thinsp",
1521                 "\x200c-zwnj",
1522                 "\x200d-zwj",
1523                 "\x200e-lrm",
1524                 "\x200f-rlm",
1525                 "\x2013-ndash",
1526                 "\x2014-mdash",
1527                 "\x2018-lsquo",
1528                 "\x2019-rsquo",
1529                 "\x201a-sbquo",
1530                 "\x201c-ldquo",
1531                 "\x201d-rdquo",
1532                 "\x201e-bdquo",
1533                 "\x2020-dagger",
1534                 "\x2021-Dagger",
1535                 "\x2022-bull",
1536                 "\x2026-hellip",
1537                 "\x2030-permil",
1538                 "\x2032-prime",
1539                 "\x2033-Prime",
1540                 "\x2039-lsaquo",
1541                 "\x203a-rsaquo",
1542                 "\x203e-oline",
1543                 "\x2044-frasl",
1544                 "\x20ac-euro",
1545                 "\x2111-image",
1546                 "\x2118-weierp",
1547                 "\x211c-real",
1548                 "\x2122-trade",
1549                 "\x2135-alefsym",
1550                 "\x2190-larr",
1551                 "\x2191-uarr",
1552                 "\x2192-rarr",
1553                 "\x2193-darr",
1554                 "\x2194-harr",
1555                 "\x21b5-crarr",
1556                 "\x21d0-lArr",
1557                 "\x21d1-uArr",
1558                 "\x21d2-rArr",
1559                 "\x21d3-dArr",
1560                 "\x21d4-hArr",
1561                 "\x2200-forall",
1562                 "\x2202-part",
1563                 "\x2203-exist",
1564                 "\x2205-empty",
1565                 "\x2207-nabla",
1566                 "\x2208-isin",
1567                 "\x2209-notin",
1568                 "\x220b-ni",
1569                 "\x220f-prod",
1570                 "\x2211-sum",
1571                 "\x2212-minus",
1572                 "\x2217-lowast",
1573                 "\x221a-radic",
1574                 "\x221d-prop",
1575                 "\x221e-infin",
1576                 "\x2220-ang",
1577                 "\x2227-and",
1578                 "\x2228-or",
1579                 "\x2229-cap",
1580                 "\x222a-cup",
1581                 "\x222b-int",
1582                 "\x2234-there4",
1583                 "\x223c-sim",
1584                 "\x2245-cong",
1585                 "\x2248-asymp",
1586                 "\x2260-ne",
1587                 "\x2261-equiv",
1588                 "\x2264-le",
1589                 "\x2265-ge",
1590                 "\x2282-sub",
1591                 "\x2283-sup",
1592                 "\x2284-nsub",
1593                 "\x2286-sube",
1594                 "\x2287-supe",
1595                 "\x2295-oplus",
1596                 "\x2297-otimes",
1597                 "\x22a5-perp",
1598                 "\x22c5-sdot",
1599                 "\x2308-lceil",
1600                 "\x2309-rceil",
1601                 "\x230a-lfloor",
1602                 "\x230b-rfloor",
1603                 "\x2329-lang",
1604                 "\x232a-rang",
1605                 "\x25ca-loz",
1606                 "\x2660-spades",
1607                 "\x2663-clubs",
1608                 "\x2665-hearts",
1609                 "\x2666-diams",
1610             };
1611
1612             private static Dictionary<string, char> _lookupTable = GenerateLookupTable();
1613
1614             private static Dictionary<string, char> GenerateLookupTable() {
1615                 // e[0] is unicode char, e[1] is '-', e[2+] is entity string
1616
1617                 Dictionary<string, char> lookupTable = new Dictionary<string, char>(StringComparer.Ordinal);
1618                 foreach (string e in _entitiesList) {
1619                     lookupTable.Add(e.Substring(2), e[0]);
1620                 }
1621
1622                 return lookupTable;
1623             }
1624
1625             public static char Lookup(string entity) {
1626                 char theChar;
1627                 _lookupTable.TryGetValue(entity, out theChar);
1628                 return theChar;
1629             }
1630 #endif
1631         }
1632
1633         #endregion
1634     }
1635 }