Alter #if BIGENDIAN code with run-time condition.
[mono.git] / mcs / class / referencesource / mscorlib / system / text / utf8encoding.cs
1 // ==++==
2 //
3 //   Copyright (c) Microsoft Corporation.  All rights reserved.
4 //
5 // ==--==
6
7 // The worker functions in this file was optimized for performance. If you make changes
8 // you should use care to consider all of the interesting cases.
9
10 // The code of all worker functions in this file is written twice: Once as as a slow loop, and the
11 // second time as a fast loop. The slow loops handles all special cases, throws exceptions, etc.
12 // The fast loops attempts to blaze through as fast as possible with optimistic range checks,
13 // processing multiple characters at a time, and falling back to the slow loop for all special cases.
14
15 // This define can be used to turn off the fast loops. Useful for finding whether
16 // the problem is fastloop-specific.
17 #define FASTLOOP
18
19 namespace System.Text
20 {
21     using System;
22     using System.Globalization;
23     using System.Runtime.Serialization;
24     using System.Security.Permissions;
25     using System.Diagnostics.Contracts;
26
27     // Encodes text into and out of UTF-8.  UTF-8 is a way of writing
28     // Unicode characters with variable numbers of bytes per character,
29     // optimized for the lower 127 ASCII characters.  It's an efficient way
30     // of encoding US English in an internationalizable way.
31     //
32     // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
33     //
34     // The UTF-8 byte order mark is simply the Unicode byte order mark
35     // (0xFEFF) written in UTF-8 (0xEF 0xBB 0xBF).  The byte order mark is
36     // used mostly to distinguish UTF-8 text from other encodings, and doesn't
37     // switch the byte orderings.
38
39     [Serializable]
40 [System.Runtime.InteropServices.ComVisible(true)]
41     public class UTF8Encoding : Encoding
42     {
43         /*
44             bytes   bits    UTF-8 representation
45             -----   ----    -----------------------------------
46             1        7      0vvvvvvv
47             2       11      110vvvvv 10vvvvvv
48             3       16      1110vvvv 10vvvvvv 10vvvvvv
49             4       21      11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
50             -----   ----    -----------------------------------
51
52             Surrogate:
53             Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
54          */
55
56         private const int UTF8_CODEPAGE=65001;
57
58         // Yes, the idea of emitting U+FEFF as a UTF-8 identifier has made it into
59         // the standard.
60         private bool emitUTF8Identifier = false;
61
62         private bool isThrowException = false;
63
64
65         public UTF8Encoding(): this(false)
66         {
67         }
68
69
70         public UTF8Encoding(bool encoderShouldEmitUTF8Identifier):
71             this(encoderShouldEmitUTF8Identifier, false)
72         {
73         }
74
75
76         public UTF8Encoding(bool encoderShouldEmitUTF8Identifier, bool throwOnInvalidBytes):
77             base(UTF8_CODEPAGE)
78         {
79             this.emitUTF8Identifier = encoderShouldEmitUTF8Identifier;
80             this.isThrowException = throwOnInvalidBytes;
81
82             // Encoding's constructor already did this, but it'll be wrong if we're throwing exceptions
83             if (this.isThrowException)
84                 SetDefaultFallbacks();
85         }
86
87         internal override void SetDefaultFallbacks()
88         {
89             // For UTF-X encodings, we use a replacement fallback with an empty string
90             if (this.isThrowException)
91             {
92                 this.encoderFallback = EncoderFallback.ExceptionFallback;
93                 this.decoderFallback = DecoderFallback.ExceptionFallback;
94             }
95             else
96             {
97                 this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
98                 this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
99             }
100         }
101
102
103         //
104         // WARNING: GetByteCount(string chars)
105         // WARNING: has different variable names than EncodingNLS.cs, so this can't just be cut & pasted,
106         // WARNING: otherwise it'll break VB's way of declaring these.
107         //
108         // The following methods are copied from EncodingNLS.cs.
109         // Unfortunately EncodingNLS.cs is internal and we're public, so we have to reimpliment them here.
110         // These should be kept in [....] for the following classes:
111         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
112         //
113
114         // Returns the number of bytes required to encode a range of characters in
115         // a character array.
116         //
117         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
118         // So if you fix this, fix the others.  Currently those include:
119         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
120         // parent method is safe
121
122         [System.Security.SecuritySafeCritical]  // auto-generated
123         public override unsafe int GetByteCount(char[] chars, int index, int count)
124         {
125             // Validate input parameters
126             if (chars == null)
127                 throw new ArgumentNullException("chars",
128                       Environment.GetResourceString("ArgumentNull_Array"));
129
130             if (index < 0 || count < 0)
131                 throw new ArgumentOutOfRangeException((index<0 ? "index" : "count"),
132                       Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
133
134             if (chars.Length - index < count)
135                 throw new ArgumentOutOfRangeException("chars",
136                       Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
137             Contract.EndContractBlock();
138
139             // If no input, return 0, avoid fixed empty array problem
140             if (chars.Length == 0)
141                 return 0;
142
143             // Just call the pointer version
144             fixed (char* pChars = chars)
145                 return GetByteCount(pChars + index, count, null);
146         }
147
148         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
149         // So if you fix this, fix the others.  Currently those include:
150         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
151         // parent method is safe
152
153         [System.Security.SecuritySafeCritical]  // auto-generated
154         public override unsafe int GetByteCount(String chars)
155         {
156             // Validate input
157             if (chars==null)
158                 throw new ArgumentNullException("s");
159             Contract.EndContractBlock();
160
161             fixed (char* pChars = chars)
162                 return GetByteCount(pChars, chars.Length, null);
163         }
164
165         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
166         // So if you fix this, fix the others.  Currently those include:
167         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
168
169         [System.Security.SecurityCritical]  // auto-generated
170         [CLSCompliant(false)]
171         [System.Runtime.InteropServices.ComVisible(false)]
172         public override unsafe int GetByteCount(char* chars, int count)
173         {
174             // Validate Parameters
175             if (chars == null)
176                 throw new ArgumentNullException("chars",
177                     Environment.GetResourceString("ArgumentNull_Array"));
178
179             if (count < 0)
180                 throw new ArgumentOutOfRangeException("count",
181                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
182             Contract.EndContractBlock();
183
184             // Call it with empty encoder
185             return GetByteCount(chars, count, null);
186         }
187
188         // Parent method is safe.
189         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
190         // So if you fix this, fix the others.  Currently those include:
191         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
192
193         [System.Security.SecuritySafeCritical]  // auto-generated
194         public override unsafe int GetBytes(String s, int charIndex, int charCount,
195                                               byte[] bytes, int byteIndex)
196         {
197             if (s == null || bytes == null)
198                 throw new ArgumentNullException((s == null ? "s" : "bytes"),
199                       Environment.GetResourceString("ArgumentNull_Array"));
200
201             if (charIndex < 0 || charCount < 0)
202                 throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
203                       Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
204
205             if (s.Length - charIndex < charCount)
206                 throw new ArgumentOutOfRangeException("s",
207                       Environment.GetResourceString("ArgumentOutOfRange_IndexCount"));
208
209             if (byteIndex < 0 || byteIndex > bytes.Length)
210                 throw new ArgumentOutOfRangeException("byteIndex",
211                     Environment.GetResourceString("ArgumentOutOfRange_Index"));
212             Contract.EndContractBlock();
213
214             int byteCount = bytes.Length - byteIndex;
215
216             // Fixed doesn't like 0 length arrays.
217             if (bytes.Length == 0)
218                 bytes = new byte[1];
219
220             fixed (char* pChars = s)
221                 fixed ( byte* pBytes = bytes)
222                     return GetBytes(pChars + charIndex, charCount,
223                                     pBytes + byteIndex, byteCount, null);
224         }
225
226         // Encodes a range of characters in a character array into a range of bytes
227         // in a byte array. An exception occurs if the byte array is not large
228         // enough to hold the complete encoding of the characters. The
229         // GetByteCount method can be used to determine the exact number of
230         // bytes that will be produced for a given range of characters.
231         // Alternatively, the GetMaxByteCount method can be used to
232         // determine the maximum number of bytes that will be produced for a given
233         // number of characters, regardless of the actual character values.
234         //
235         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
236         // So if you fix this, fix the others.  Currently those include:
237         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
238         // parent method is safe
239
240         [System.Security.SecuritySafeCritical]  // auto-generated
241         public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
242                                                byte[] bytes, int byteIndex)
243         {
244             // Validate parameters
245             if (chars == null || bytes == null)
246                 throw new ArgumentNullException((chars == null ? "chars" : "bytes"),
247                       Environment.GetResourceString("ArgumentNull_Array"));
248
249             if (charIndex < 0 || charCount < 0)
250                 throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
251                       Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
252
253             if (chars.Length - charIndex < charCount)
254                 throw new ArgumentOutOfRangeException("chars",
255                       Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
256
257             if (byteIndex < 0 || byteIndex > bytes.Length)
258                 throw new ArgumentOutOfRangeException("byteIndex",
259                      Environment.GetResourceString("ArgumentOutOfRange_Index"));
260             Contract.EndContractBlock();
261
262             // If nothing to encode return 0, avoid fixed problem
263             if (chars.Length == 0)
264                 return 0;
265
266             // Just call pointer version
267             int byteCount = bytes.Length - byteIndex;
268
269             // Fixed doesn't like 0 length arrays.
270             if (bytes.Length == 0)
271                 bytes = new byte[1];
272
273             fixed (char* pChars = chars)
274                 fixed (byte* pBytes = bytes)
275                     // Remember that byteCount is # to decode, not size of array.
276                     return GetBytes(pChars + charIndex, charCount,
277                                     pBytes + byteIndex, byteCount, null);
278         }
279
280         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
281         // So if you fix this, fix the others.  Currently those include:
282         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
283
284         [System.Security.SecurityCritical]  // auto-generated
285         [CLSCompliant(false)]
286         [System.Runtime.InteropServices.ComVisible(false)]
287         public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
288         {
289             // Validate Parameters
290             if (bytes == null || chars == null)
291                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
292                     Environment.GetResourceString("ArgumentNull_Array"));
293
294             if (charCount < 0 || byteCount < 0)
295                 throw new ArgumentOutOfRangeException((charCount<0 ? "charCount" : "byteCount"),
296                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
297             Contract.EndContractBlock();
298
299             return GetBytes(chars, charCount, bytes, byteCount, null);
300         }
301
302         // Returns the number of characters produced by decoding a range of bytes
303         // in a byte array.
304         //
305         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
306         // So if you fix this, fix the others.  Currently those include:
307         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
308         // parent method is safe
309
310         [System.Security.SecuritySafeCritical]  // auto-generated
311         public override unsafe int GetCharCount(byte[] bytes, int index, int count)
312         {
313             // Validate Parameters
314             if (bytes == null)
315                 throw new ArgumentNullException("bytes",
316                     Environment.GetResourceString("ArgumentNull_Array"));
317
318             if (index < 0 || count < 0)
319                 throw new ArgumentOutOfRangeException((index<0 ? "index" : "count"),
320                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
321
322             if (bytes.Length - index < count)
323                 throw new ArgumentOutOfRangeException("bytes",
324                     Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
325             Contract.EndContractBlock();
326
327             // If no input just return 0, fixed doesn't like 0 length arrays.
328             if (bytes.Length == 0)
329                 return 0;
330
331             // Just call pointer version
332             fixed (byte* pBytes = bytes)
333                 return GetCharCount(pBytes + index, count, null);
334         }
335
336         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
337         // So if you fix this, fix the others.  Currently those include:
338         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
339
340         [System.Security.SecurityCritical]  // auto-generated
341         [CLSCompliant(false)]
342         [System.Runtime.InteropServices.ComVisible(false)]
343         public override unsafe int GetCharCount(byte* bytes, int count)
344         {
345             // Validate Parameters
346             if (bytes == null)
347                 throw new ArgumentNullException("bytes",
348                     Environment.GetResourceString("ArgumentNull_Array"));
349
350             if (count < 0)
351                 throw new ArgumentOutOfRangeException("count",
352                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
353             Contract.EndContractBlock();
354
355             return GetCharCount(bytes, count, null);
356         }
357
358         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
359         // So if you fix this, fix the others.  Currently those include:
360         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
361         // parent method is safe
362
363         [System.Security.SecuritySafeCritical]  // auto-generated
364         public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
365                                               char[] chars, int charIndex)
366         {
367             // Validate Parameters
368             if (bytes == null || chars == null)
369                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
370                     Environment.GetResourceString("ArgumentNull_Array"));
371
372             if (byteIndex < 0 || byteCount < 0)
373                 throw new ArgumentOutOfRangeException((byteIndex<0 ? "byteIndex" : "byteCount"),
374                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
375
376             if ( bytes.Length - byteIndex < byteCount)
377                 throw new ArgumentOutOfRangeException("bytes",
378                     Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
379
380             if (charIndex < 0 || charIndex > chars.Length)
381                 throw new ArgumentOutOfRangeException("charIndex",
382                     Environment.GetResourceString("ArgumentOutOfRange_Index"));
383             Contract.EndContractBlock();
384
385             // If no input, return 0 & avoid fixed problem
386             if (bytes.Length == 0)
387                 return 0;
388
389             // Just call pointer version
390             int charCount = chars.Length - charIndex;
391
392             // Fixed doesn't like 0 length arrays.
393             if (chars.Length == 0)
394                 chars = new char[1];
395
396             fixed (byte* pBytes = bytes)
397                 fixed (char* pChars = chars)
398                     // Remember that charCount is # to decode, not size of array
399                     return GetChars(pBytes + byteIndex, byteCount,
400                                     pChars + charIndex, charCount, null);
401         }
402
403         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
404         // So if you fix this, fix the others.  Currently those include:
405         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
406
407         [System.Security.SecurityCritical]  // auto-generated
408         [CLSCompliant(false)]
409         [System.Runtime.InteropServices.ComVisible(false)]
410         public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
411         {
412             // Validate Parameters
413             if (bytes == null || chars == null)
414                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
415                     Environment.GetResourceString("ArgumentNull_Array"));
416
417             if (charCount < 0 || byteCount < 0)
418                 throw new ArgumentOutOfRangeException((charCount<0 ? "charCount" : "byteCount"),
419                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
420             Contract.EndContractBlock();
421
422             return GetChars(bytes, byteCount, chars, charCount, null);
423         }
424
425         // Returns a string containing the decoded representation of a range of
426         // bytes in a byte array.
427         //
428         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
429         // So if you fix this, fix the others.  Currently those include:
430         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
431         // parent method is safe
432
433         [System.Security.SecuritySafeCritical]  // auto-generated
434         [System.Runtime.InteropServices.ComVisible(false)]
435         public override unsafe String GetString(byte[] bytes, int index, int count)
436         {
437             // Validate Parameters
438             if (bytes == null)
439                 throw new ArgumentNullException("bytes",
440                     Environment.GetResourceString("ArgumentNull_Array"));
441
442             if (index < 0 || count < 0)
443                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"),
444                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
445
446             if (bytes.Length - index < count)
447                 throw new ArgumentOutOfRangeException("bytes",
448                     Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
449             Contract.EndContractBlock();
450
451             // Avoid problems with empty input buffer
452             if (bytes.Length == 0) return String.Empty;
453
454             fixed (byte* pBytes = bytes)
455                 return String.CreateStringFromEncoding(
456                     pBytes + index, count, this);
457         }
458
459         //
460         // End of standard methods copied from EncodingNLS.cs
461         //
462
463         // To simplify maintenance, the structure of GetByteCount and GetBytes should be
464         // kept the same as much as possible
465         [System.Security.SecurityCritical]  // auto-generated
466         internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS baseEncoder)
467         {
468             // For fallback we may need a fallback buffer.
469             // We wait to initialize it though in case we don't have any broken input unicode
470             EncoderFallbackBuffer fallbackBuffer = null;
471             char *pSrc = chars;
472             char *pEnd = pSrc+count;
473
474             // Start by assuming we have as many as count
475             int byteCount = count;
476
477             int ch = 0;
478
479             if (baseEncoder != null) {
480                 UTF8Encoder encoder = (UTF8Encoder)baseEncoder;
481                 ch = encoder.surrogateChar;
482
483                 // We mustn't have left over fallback data when counting
484                 if (encoder.InternalHasFallbackBuffer)
485                 {
486                     fallbackBuffer = encoder.FallbackBuffer;
487                     if (fallbackBuffer.Remaining > 0)
488                         throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty",
489                         this.EncodingName, encoder.Fallback.GetType()));
490
491                     // Set our internal fallback interesting things.
492                     fallbackBuffer.InternalInitialize(chars, pEnd, encoder, false);
493                 }
494             }
495
496             for (;;) {
497                 // SLOWLOOP: does all range checks, handles all special cases, but it is slow
498                 if (pSrc >= pEnd) {
499                 
500                     if (ch == 0) {
501                         // Unroll any fallback that happens at the end
502                         ch = fallbackBuffer != null ? fallbackBuffer.InternalGetNextChar() : 0;
503                         if (ch > 0) {
504                             byteCount++;
505                             goto ProcessChar;
506                         }
507                     } else {
508                         // Case of surrogates in the fallback.
509                         if (fallbackBuffer != null && fallbackBuffer.bFallingBack) {
510                             Contract.Assert(ch >= 0xD800 && ch <= 0xDBFF,
511                                 "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
512                             
513                             ch = fallbackBuffer.InternalGetNextChar();
514                             byteCount++;
515                             
516                             if (InRange(ch, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) {
517                                 ch = 0xfffd;
518                                 byteCount++;
519                                 goto EncodeChar;
520                             } else if (ch > 0){
521                                 goto ProcessChar;
522                             } else {
523                                 byteCount--; // ignore last one.
524                                 break;
525                             }
526                         }
527                     }
528                 
529                     if (ch <= 0) {
530                         break;
531                     }
532                     if (baseEncoder != null && !baseEncoder.MustFlush) {
533                         break;
534                     }
535
536                     // attempt to encode the partial surrogate (will fallback or ignore it), it'll also subtract 1.
537                     byteCount++;
538                     goto EncodeChar;
539                 }
540
541                 if (ch > 0) {
542                     Contract.Assert(ch >= 0xD800 && ch <= 0xDBFF,
543                         "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
544
545                     // use separate helper variables for local contexts so that the jit optimizations
546                     // won't get confused about the variable lifetimes
547                     int cha = *pSrc;
548
549                     // count the pending surrogate
550                     byteCount++;
551
552                     // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
553                     // if (IsLowSurrogate(cha)) {
554                     if (InRange(cha, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) {
555                         // Don't need a real # because we're just counting, anything > 0x7ff ('cept surrogate) will do.
556                         ch = 0xfffd;
557 //                        ch = cha + (ch << 10) +
558 //                            (0x10000
559 //                            - CharUnicodeInfo.LOW_SURROGATE_START
560 //                            - (CharUnicodeInfo.HIGH_SURROGATE_START << 10) );
561
562                         // Use this next char
563                         pSrc++;
564                     }
565                     // else ch is still high surrogate and encoding will fail (so don't add count)
566
567                     // attempt to encode the surrogate or partial surrogate
568                     goto EncodeChar;
569                 }
570
571                 // If we've used a fallback, then we have to check for it
572                 if (fallbackBuffer != null)
573                 {
574                     ch = fallbackBuffer.InternalGetNextChar();
575                     if (ch > 0)
576                     {
577                         // We have an extra byte we weren't expecting.
578                         byteCount++;
579                         goto ProcessChar;
580                     }
581                 }
582
583                 // read next char. The JIT optimization seems to be getting confused when
584                 // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
585                 ch = *pSrc;
586                 pSrc++;
587
588             ProcessChar:
589                 // if (IsHighSurrogate(ch)) {
590                 if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.HIGH_SURROGATE_END)) {
591                     // we will count this surrogate next time around
592                     byteCount--;
593                     continue;
594                 }
595                 // either good char or partial surrogate
596
597             EncodeChar:
598                 // throw exception on partial surrogate if necessary
599                 // if (IsLowSurrogate(ch) || IsHighSurrogate(ch))
600                 if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
601                 {
602                     // Lone surrogates aren't allowed
603                     // Have to make a fallback buffer if we don't have one
604                     if (fallbackBuffer == null)
605                     {
606                         // wait on fallbacks if we can
607                         // For fallback we may need a fallback buffer
608                         if (baseEncoder == null)
609                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
610                         else
611                             fallbackBuffer = baseEncoder.FallbackBuffer;
612
613                         // Set our internal fallback interesting things.
614                         fallbackBuffer.InternalInitialize(chars, chars + count, baseEncoder, false);
615                     }
616
617                     // Do our fallback.  Actually we already know its a mixed up surrogate,
618                     // so the ref pSrc isn't gonna do anything.
619                     fallbackBuffer.InternalFallback(unchecked((char)ch), ref pSrc);
620
621                     // Ignore it if we don't throw (we had preallocated this ch)
622                     byteCount--;
623                     ch = 0;
624                     continue;
625                 }
626
627                 // Count them
628                 if (ch > 0x7F) {
629                     if (ch > 0x7FF) {
630                         // the extra surrogate byte was compensated by the second surrogate character
631                         // (2 surrogates make 4 bytes.  We've already counted 2 bytes, 1 per char)
632                         byteCount++;
633                     }
634                     byteCount++;
635                 }
636
637 #if WIN64
638                 // check for overflow
639                 if (byteCount < 0) {
640                     break;
641                 }
642 #endif
643
644 #if FASTLOOP
645                 // If still have fallback don't do fast loop
646                 if (fallbackBuffer != null && (ch = fallbackBuffer.InternalGetNextChar()) != 0)
647                 {
648                     // We're reserving 1 byte for each char by default
649                     byteCount++;
650                     goto ProcessChar;
651                 }
652
653                 int availableChars = PtrDiff(pEnd, pSrc);
654
655                 // don't fall into the fast decoding loop if we don't have enough characters
656                 if (availableChars <= 13) {
657                     // try to get over the remainder of the ascii characters fast though
658                     char* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
659                     while (pSrc < pLocalEnd) {
660                         ch = *pSrc;
661                         pSrc++;
662                         if (ch > 0x7F)
663                             goto ProcessChar;
664                     }
665                     
666                     // we are done
667                     break;
668                 }
669
670 #if WIN64
671                 // make sure that we won't get a silent overflow inside the fast loop
672                 // (Fall out to slow loop if we have this many characters)
673                 availableChars &= 0x0FFFFFFF;
674 #endif
675
676                 // To compute the upper bound, assume that all characters are ASCII characters at this point,
677                 //  the boundary will be decreased for every non-ASCII character we encounter
678                 // Also, we need 3 + 4 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
679                 char *pStop = pSrc + availableChars - (3 + 4);
680
681                 while (pSrc < pStop) {
682                     ch = *pSrc;
683                     pSrc++;
684
685                     if (ch > 0x7F)                                                  // Not ASCII
686                     {
687                         if (ch > 0x7FF)                                             // Not 2 Byte
688                         {
689                             if ((ch & 0xF800) == 0xD800)                            // See if its a Surrogate
690                                 goto LongCode;
691                             byteCount++;
692                         }
693                         byteCount ++;
694                     }
695
696                     // get pSrc aligned
697                     if ((unchecked((int)pSrc) & 0x2) != 0) {
698                         ch = *pSrc;
699                         pSrc++;
700                         if (ch > 0x7F)                                              // Not ASCII
701                         {
702                             if (ch > 0x7FF)                                         // Not 2 Byte
703                             {
704                                 if ((ch & 0xF800) == 0xD800)                        // See if its a Surrogate
705                                     goto LongCode;
706                                 byteCount++;
707                             }
708                             byteCount ++;
709                         }
710                     }
711
712                     // Run 2 * 4 characters at a time!
713                     while (pSrc < pStop) {
714                         ch = *(int*)pSrc;
715                         int chc = *(int*)(pSrc+2);
716                         if (((ch | chc) & unchecked((int)0xFF80FF80)) != 0)         // See if not ASCII
717                         {
718                             if (((ch | chc) & unchecked((int)0xF800F800)) != 0)     // See if not 2 Byte
719                             {
720                                 goto LongCodeWithMask;
721                             }
722
723
724                             if ((ch & unchecked((int)0xFF800000)) != 0)             // Actually 0x07800780 is all we care about (4 bits)
725                                 byteCount++;
726                             if ((ch & unchecked((int)0xFF80)) != 0)
727                                 byteCount++;
728                             if ((chc & unchecked((int)0xFF800000)) != 0)
729                                 byteCount++;
730                             if ((chc & unchecked((int)0xFF80)) != 0)
731                                 byteCount++;
732                         }
733                         pSrc += 4;
734
735                         ch = *(int*)pSrc;
736                         chc = *(int*)(pSrc+2);
737                         if (((ch | chc) & unchecked((int)0xFF80FF80)) != 0)         // See if not ASCII
738                         {
739                             if (((ch | chc) & unchecked((int)0xF800F800)) != 0)     // See if not 2 Byte
740                             {
741                                 goto LongCodeWithMask;
742                             }
743
744                             if ((ch & unchecked((int)0xFF800000)) != 0)
745                                 byteCount++;
746                             if ((ch & unchecked((int)0xFF80)) != 0)
747                                 byteCount++;
748                             if ((chc & unchecked((int)0xFF800000)) != 0)
749                                 byteCount++;
750                             if ((chc & unchecked((int)0xFF80)) != 0)
751                                 byteCount++;
752                         }
753                         pSrc += 4;
754                     }
755                     break;
756
757                 LongCodeWithMask:
758 if (!BitConverter.IsLittleEndian) {
759                     // be careful about the sign extension
760                     ch = (int)(((uint)ch) >> 16);
761 } else {
762                     ch = (char)ch;
763 }
764                     pSrc++;
765
766                     if (ch <= 0x7F) {
767                         continue;
768                     }
769
770                 LongCode:
771                     // use separate helper variables for slow and fast loop so that the jit optimizations
772                     // won't get confused about the variable lifetimes
773                     if (ch > 0x7FF) {
774                         // if (IsLowSurrogate(ch) || IsHighSurrogate(ch))
775                         if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) {
776                             // 4 byte encoding - high surrogate + low surrogate
777
778                             int chd = *pSrc;
779                             if (
780                                 // !IsHighSurrogate(ch) // low without high -> bad
781                                 ch > CharUnicodeInfo.HIGH_SURROGATE_END ||
782                                 // !IsLowSurrogate(chd) // high not followed by low -> bad
783                                 !InRange(chd, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END) )
784                             {
785                                 // Back up and drop out to slow loop to figure out error
786                                 pSrc--;
787                                 break;
788                             }
789                             pSrc++;
790
791                             // byteCount - this byte is compensated by the second surrogate character
792                         }
793                         byteCount++;
794                     }
795                     byteCount++;
796
797                     // byteCount - the last byte is already included
798                 }
799 #endif // FASTLOOP
800
801                 // no pending char at this point
802                 ch = 0;
803             }
804
805 #if WIN64
806             // check for overflow
807             if (byteCount < 0) {
808                 throw new ArgumentException(
809                         Environment.GetResourceString("Argument_ConversionOverflow"));
810             }
811 #endif
812
813             Contract.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
814                 "[UTF8Encoding.GetByteCount]Expected Empty fallback buffer");
815
816             return byteCount;
817         }
818
819         // diffs two char pointers using unsigned arithmetic. The unsigned arithmetic
820         // is good enough for us, and it tends to generate better code than the signed
821         // arithmetic generated by default
822         [System.Security.SecurityCritical]  // auto-generated
823         unsafe private static int PtrDiff(char *a, char* b)
824         {
825             return (int)(((uint)((byte*)a - (byte*)b)) >> 1);
826         }
827
828         // byte* flavor just for parity
829         [System.Security.SecurityCritical]  // auto-generated
830         unsafe private static int PtrDiff(byte* a, byte* b)
831         {
832             return (int)(a - b);
833         }
834
835         private static bool InRange(int ch, int start, int end)
836         {
837             return (uint)(ch - start) <= (uint)(end - start);
838         }
839
840         // Our workhorse
841         // Note:  We ignore mismatched surrogates, unless the exception flag is set in which case we throw
842         [System.Security.SecurityCritical]  // auto-generated
843         internal override unsafe int GetBytes(char* chars, int charCount,
844                                                 byte* bytes, int byteCount, EncoderNLS baseEncoder)
845         {
846             Contract.Assert(chars!=null, "[UTF8Encoding.GetBytes]chars!=null");
847             Contract.Assert(byteCount >=0, "[UTF8Encoding.GetBytes]byteCount >=0");
848             Contract.Assert(charCount >=0, "[UTF8Encoding.GetBytes]charCount >=0");
849             Contract.Assert(bytes!=null, "[UTF8Encoding.GetBytes]bytes!=null");
850
851             UTF8Encoder encoder = null;
852
853             // For fallback we may need a fallback buffer.
854             // We wait to initialize it though in case we don't have any broken input unicode
855             EncoderFallbackBuffer fallbackBuffer = null;
856             char *pSrc = chars;
857             byte *pTarget = bytes;
858
859             char *pEnd = pSrc+charCount;
860             byte *pAllocatedBufferEnd = pTarget+byteCount;
861
862             int ch = 0;
863
864             // assume that JIT will enregister pSrc, pTarget and ch
865
866             if (baseEncoder != null) {
867                 encoder = (UTF8Encoder)baseEncoder;
868                 ch = encoder.surrogateChar;
869
870                 // We mustn't have left over fallback data when counting
871                 if (encoder.InternalHasFallbackBuffer)
872                 {
873                     // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
874                     fallbackBuffer = encoder.FallbackBuffer;
875                     if (fallbackBuffer.Remaining > 0 && encoder.m_throwOnOverflow)
876                         throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty",
877                         this.EncodingName, encoder.Fallback.GetType()));
878
879                     // Set our internal fallback interesting things.
880                     fallbackBuffer.InternalInitialize(chars, pEnd, encoder, true);
881                 }
882             }
883
884             for (;;) {
885                 // SLOWLOOP: does all range checks, handles all special cases, but it is slow
886
887                 if (pSrc >= pEnd) {
888                     
889                     if (ch == 0) {
890                         // Check if there's anthing left to get out of the fallback buffer
891                         ch = fallbackBuffer != null ? fallbackBuffer.InternalGetNextChar() : 0;
892                         if (ch > 0) {
893                             goto ProcessChar;
894                         }
895                     } else {
896                         // Case of leftover surrogates in the fallback buffer
897                         if (fallbackBuffer != null && fallbackBuffer.bFallingBack) {
898                             Contract.Assert(ch >= 0xD800 && ch <= 0xDBFF,
899                                 "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
900                             
901                             int cha = ch;
902                             
903                             ch = fallbackBuffer.InternalGetNextChar();
904                             
905                             if (InRange(ch, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) {
906                                 ch = ch + (cha << 10) + (0x10000 - CharUnicodeInfo.LOW_SURROGATE_START - (CharUnicodeInfo.HIGH_SURROGATE_START << 10));
907                                 goto EncodeChar;
908                             } else if (ch > 0){
909                                 goto ProcessChar;
910                             } else {
911                                 break;
912                             }
913                         }
914                     }
915                     
916                     // attempt to encode the partial surrogate (will fail or ignore)
917                     if (ch > 0 && (encoder == null || encoder.MustFlush))
918                         goto EncodeChar;
919
920                     // We're done
921                     break;
922                 }
923
924                 if (ch > 0) {
925                     // We have a high surrogate left over from a previous loop.
926                     Contract.Assert(ch >= 0xD800 && ch <= 0xDBFF,
927                         "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
928
929                     // use separate helper variables for local contexts so that the jit optimizations
930                     // won't get confused about the variable lifetimes
931                     int cha = *pSrc;
932
933                     // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
934                     // if (IsLowSurrogate(cha)) {
935                     if (InRange(cha, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) {
936                         ch = cha + (ch << 10) +
937                             (0x10000
938                             - CharUnicodeInfo.LOW_SURROGATE_START
939                             - (CharUnicodeInfo.HIGH_SURROGATE_START << 10) );
940
941                         pSrc++;
942                     }
943                     // else ch is still high surrogate and encoding will fail
944
945                     // attempt to encode the surrogate or partial surrogate
946                     goto EncodeChar;
947                 }
948
949                 // If we've used a fallback, then we have to check for it
950                 if (fallbackBuffer != null)
951                 {
952                     ch = fallbackBuffer.InternalGetNextChar();
953                     if (ch > 0) goto ProcessChar;
954                 }
955
956                 // read next char. The JIT optimization seems to be getting confused when
957                 // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
958                 ch = *pSrc;
959                 pSrc++;
960
961             ProcessChar:
962                 // if (IsHighSurrogate(ch)) {
963                 if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.HIGH_SURROGATE_END)) {
964                     continue;
965                 }
966                 // either good char or partial surrogate
967
968             EncodeChar:
969                 // throw exception on partial surrogate if necessary
970                 // if (IsLowSurrogate(ch) || IsHighSurrogate(ch))
971                 if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
972                 {
973                     // Lone surrogates aren't allowed, we have to do fallback for them
974                     // Have to make a fallback buffer if we don't have one
975                     if (fallbackBuffer == null)
976                     {
977                         // wait on fallbacks if we can
978                         // For fallback we may need a fallback buffer
979                         if (baseEncoder == null)
980                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
981                         else
982                             fallbackBuffer = baseEncoder.FallbackBuffer;
983
984                         // Set our internal fallback interesting things.
985                         fallbackBuffer.InternalInitialize(chars, pEnd, baseEncoder, true);
986                     }
987
988                     // Do our fallback.  Actually we already know its a mixed up surrogate,
989                     // so the ref pSrc isn't gonna do anything.
990                     fallbackBuffer.InternalFallback(unchecked((char)ch), ref pSrc);
991
992                     // Ignore it if we don't throw
993                     ch = 0;
994                     continue;
995                 }
996
997                 // Count bytes needed
998                 int bytesNeeded = 1;
999                 if (ch > 0x7F) {
1000                     if (ch > 0x7FF) {
1001                         if (ch > 0xFFFF) {
1002                             bytesNeeded++;  // 4 bytes (surrogate pair)
1003                         }
1004                         bytesNeeded++;      // 3 bytes (800-FFFF)
1005                     }
1006                     bytesNeeded++;          // 2 bytes (80-7FF)
1007                 }
1008
1009                 if (pTarget > pAllocatedBufferEnd - bytesNeeded) {
1010                     // Left over surrogate from last time will cause pSrc == chars, so we'll throw
1011                     if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
1012                     {
1013                         fallbackBuffer.MovePrevious();              // Didn't use this fallback char
1014                         if (ch > 0xFFFF)
1015                             fallbackBuffer.MovePrevious();          // Was surrogate, didn't use 2nd part either
1016                     }
1017                     else
1018                     {
1019                         pSrc--;                                     // Didn't use this char
1020                         if (ch > 0xFFFF)
1021                             pSrc--;                                 // Was surrogate, didn't use 2nd part either
1022                     }
1023                     Contract.Assert(pSrc >= chars || pTarget == bytes,
1024                         "[UTF8Encoding.GetBytes]Expected pSrc to be within buffer or to throw with insufficient room.");
1025                     ThrowBytesOverflow(encoder, pTarget == bytes);  // Throw if we must
1026                     ch = 0;                                         // Nothing left over (we backed up to start of pair if supplimentary)
1027                     break;
1028                 }
1029
1030                 if (ch <= 0x7F) {
1031                     *pTarget = (byte)ch;
1032                 }
1033                 else {
1034                     // use separate helper variables for local contexts so that the jit optimizations
1035                     // won't get confused about the variable lifetimes
1036                     int chb;
1037                     if (ch <= 0x7FF) {
1038                         // 2 byte encoding
1039                         chb = (byte)(unchecked((sbyte)0xC0) | (ch >> 6));
1040                     }
1041                     else
1042                     {
1043                         if (ch <= 0xFFFF) {
1044                             chb = (byte)(unchecked((sbyte)0xE0) | (ch >> 12));
1045                         }
1046                         else
1047                         {
1048                             *pTarget = (byte)(unchecked((sbyte)0xF0) | (ch >> 18));
1049                             pTarget++;
1050
1051                             chb = unchecked((sbyte)0x80) | (ch >> 12) & 0x3F;
1052                         }
1053                         *pTarget = (byte)chb;
1054                         pTarget++;
1055
1056                         chb = unchecked((sbyte)0x80) | (ch >> 6) & 0x3F;
1057                     }
1058                     *pTarget = (byte)chb;
1059                     pTarget++;
1060
1061                     *pTarget = (byte)(unchecked((sbyte)0x80) | ch & 0x3F);
1062                 }
1063                 pTarget++;
1064
1065
1066 #if FASTLOOP
1067                 // If still have fallback don't do fast loop
1068                 if (fallbackBuffer != null && (ch = fallbackBuffer.InternalGetNextChar()) != 0)
1069                     goto ProcessChar;
1070
1071                 int availableChars = PtrDiff(pEnd, pSrc);
1072                 int availableBytes = PtrDiff(pAllocatedBufferEnd, pTarget);
1073
1074                 // don't fall into the fast decoding loop if we don't have enough characters
1075                 // Note that if we don't have enough bytes, pStop will prevent us from entering the fast loop.
1076                 if (availableChars <= 13) {
1077                     // we are hoping for 1 byte per char
1078                     if (availableBytes < availableChars) {
1079                         // not enough output room.  no pending bits at this point
1080                         ch = 0;
1081                         continue;
1082                     }
1083                     
1084                     // try to get over the remainder of the ascii characters fast though
1085                     char* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
1086                     while (pSrc < pLocalEnd) {
1087                         ch = *pSrc;
1088                         pSrc++;
1089
1090                         // Not ASCII, need more than 1 byte per char
1091                         if (ch > 0x7F)
1092                             goto ProcessChar;
1093
1094                         *pTarget = (byte)ch;
1095                         pTarget++;
1096                     }
1097                     // we are done, let ch be 0 to clear encoder
1098                     ch = 0;
1099                     break;
1100                 }
1101
1102                 // we need at least 1 byte per character, but Convert might allow us to convert
1103                 // only part of the input, so try as much as we can.  Reduce charCount if necessary
1104                 if (availableBytes < availableChars)
1105                 {
1106                     availableChars = availableBytes;
1107                 }
1108
1109                 // FASTLOOP:
1110                 // - optimistic range checks
1111                 // - fallbacks to the slow loop for all special cases, exception throwing, etc.
1112
1113                 // To compute the upper bound, assume that all characters are ASCII characters at this point,
1114                 //  the boundary will be decreased for every non-ASCII character we encounter
1115                 // Also, we need 5 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
1116                 // If there aren't enough bytes for the output, then pStop will be <= pSrc and will bypass the loop.
1117                 char *pStop = pSrc + availableChars - 5;
1118
1119                 while (pSrc < pStop) {
1120                     ch = *pSrc;
1121                     pSrc++;
1122
1123                     if (ch > 0x7F) {
1124                         goto LongCode;
1125                     }
1126                     *pTarget = (byte)ch;
1127                     pTarget++;
1128
1129                     // get pSrc aligned
1130                     if ((unchecked((int)pSrc) & 0x2) != 0) {
1131                         ch = *pSrc;
1132                         pSrc++;
1133                         if (ch > 0x7F) {
1134                             goto LongCode;
1135                         }
1136                         *pTarget = (byte)ch;
1137                         pTarget++;
1138                     }
1139
1140                     // Run 4 characters at a time!
1141                     while (pSrc < pStop) {
1142                         ch = *(int*)pSrc;
1143                         int chc = *(int*)(pSrc+2);
1144                         if (((ch | chc) & unchecked((int)0xFF80FF80)) != 0) {
1145                             goto LongCodeWithMask;
1146                         }
1147
1148                         // Unfortunately, this is endianess sensitive
1149 if (!BitConverter.IsLittleEndian) {
1150                         *pTarget = (byte)(ch>>16);
1151                         *(pTarget+1) = (byte)ch;
1152                         pSrc += 4;
1153                         *(pTarget+2) = (byte)(chc>>16);
1154                         *(pTarget+3) = (byte)chc;
1155                         pTarget += 4;
1156 } else {
1157                         *pTarget = (byte)ch;
1158                         *(pTarget+1) = (byte)(ch>>16);
1159                         pSrc += 4;
1160                         *(pTarget+2) = (byte)chc;
1161                         *(pTarget+3) = (byte)(chc>>16);
1162                         pTarget += 4;
1163 }
1164                     }
1165                     continue;
1166
1167                 LongCodeWithMask:
1168 if (!BitConverter.IsLittleEndian) {
1169                     // be careful about the sign extension
1170                     ch = (int)(((uint)ch) >> 16);
1171 } else {
1172                     ch = (char)ch;
1173 }
1174                     pSrc++;
1175
1176                     if (ch > 0x7F) {
1177                         goto LongCode;
1178                     }
1179                     *pTarget = (byte)ch;
1180                     pTarget++;
1181                     continue;
1182
1183                 LongCode:
1184                     // use separate helper variables for slow and fast loop so that the jit optimizations
1185                     // won't get confused about the variable lifetimes
1186                     int chd;
1187                     if (ch <= 0x7FF) {
1188                         // 2 byte encoding
1189                         chd = unchecked((sbyte)0xC0) | (ch >> 6);
1190                     }
1191                     else {
1192                         // if (!IsLowSurrogate(ch) && !IsHighSurrogate(ch))
1193                         if (!InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) {
1194                             // 3 byte encoding
1195                             chd = unchecked((sbyte)0xE0) | (ch >> 12);
1196                         }
1197                         else
1198                         {
1199                             // 4 byte encoding - high surrogate + low surrogate
1200                             // if (!IsHighSurrogate(ch))
1201                             if (ch > CharUnicodeInfo.HIGH_SURROGATE_END) {
1202                                 // low without high -> bad, try again in slow loop
1203                                 pSrc -= 1;
1204                                 break;
1205                             }
1206
1207                             chd = *pSrc;
1208                             pSrc++;
1209
1210                             // if (!IsLowSurrogate(chd)) {
1211                             if (!InRange(chd, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) {
1212                                 // high not followed by low -> bad, try again in slow loop
1213                                 pSrc -= 2;
1214                                 break;
1215                             }
1216
1217                             ch = chd + (ch << 10) +
1218                                 (0x10000
1219                                 - CharUnicodeInfo.LOW_SURROGATE_START
1220                                 - (CharUnicodeInfo.HIGH_SURROGATE_START << 10) );
1221
1222                             *pTarget = (byte)(unchecked((sbyte)0xF0) | (ch >> 18));
1223                             // pStop - this byte is compensated by the second surrogate character
1224                             // 2 input chars require 4 output bytes.  2 have been anticipated already
1225                             // and 2 more will be accounted for by the 2 pStop-- calls below.
1226                             pTarget++;                           
1227
1228                             chd = unchecked((sbyte)0x80) | (ch >> 12) & 0x3F;
1229                         }
1230                         *pTarget = (byte)chd;
1231                         pStop--;                    // 3 byte sequence for 1 char, so need pStop-- and the one below too.
1232                         pTarget++;
1233
1234                         chd = unchecked((sbyte)0x80) | (ch >> 6) & 0x3F;
1235                     }
1236                     *pTarget = (byte)chd;
1237                     pStop--;                        // 2 byte sequence for 1 char so need pStop--.
1238                     pTarget++;
1239
1240                     *pTarget = (byte)(unchecked((sbyte)0x80) | ch & 0x3F);
1241                     // pStop - this byte is already included
1242                     pTarget++;
1243                 }
1244
1245                 Contract.Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetBytes]pTarget <= pAllocatedBufferEnd");
1246
1247 #endif // FASTLOOP
1248
1249                 // no pending char at this point
1250                 ch = 0;
1251             }
1252
1253             // Do we have to set the encoder bytes?
1254             if (encoder != null)
1255             {
1256                 Contract.Assert(!encoder.MustFlush || ch == 0,
1257                     "[UTF8Encoding.GetBytes] Expected no mustflush or 0 leftover ch " + ch.ToString("X2", CultureInfo.InvariantCulture));
1258
1259                 encoder.surrogateChar = ch;
1260                 encoder.m_charsUsed = (int)(pSrc - chars);
1261             }
1262
1263             Contract.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
1264                 baseEncoder == null || !baseEncoder.m_throwOnOverflow,
1265                 "[UTF8Encoding.GetBytes]Expected empty fallback buffer if not converting");
1266
1267             return (int)(pTarget - bytes);
1268         }
1269
1270
1271         // These are bitmasks used to maintain the state in the decoder. They occupy the higher bits
1272         // while the actual character is being built in the lower bits. They are shifted together
1273         // with the actual bits of the character.
1274
1275         // bits 30 & 31 are used for pending bits fixup
1276         private const int FinalByte         = 1 << 29;
1277         private const int SupplimentarySeq  = 1 << 28;
1278         private const int ThreeByteSeq      = 1 << 27;
1279
1280         // Note:  We throw exceptions on individually encoded surrogates and other non-shortest forms.
1281         //        If exceptions aren't turned on, then we drop all non-shortest &individual surrogates.
1282         //
1283         // To simplify maintenance, the structure of GetCharCount and GetChars should be
1284         // kept the same as much as possible
1285         [System.Security.SecurityCritical]  // auto-generated
1286         internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
1287         {
1288             Contract.Assert(count >=0, "[UTF8Encoding.GetCharCount]count >=0");
1289             Contract.Assert(bytes!=null, "[UTF8Encoding.GetCharCount]bytes!=null");
1290
1291             // Initialize stuff
1292             byte *pSrc = bytes;
1293             byte *pEnd = pSrc+count;
1294
1295             // Start by assuming we have as many as count, charCount always includes the adjustment
1296             // for the character being decoded
1297             int charCount = count;
1298             int ch = 0;
1299             DecoderFallbackBuffer fallback = null;
1300
1301             if (baseDecoder != null) {
1302                 UTF8Decoder decoder = (UTF8Decoder)baseDecoder;
1303                 ch = decoder.bits;
1304                 charCount -= (ch >> 30);        // Adjust char count for # of expected bytes and expected output chars.
1305
1306                 // Shouldn't have anything in fallback buffer for GetCharCount
1307                 // (don't have to check m_throwOnOverflow for count)
1308                 Contract.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1309                     "[UTF8Encoding.GetCharCount]Expected empty fallback buffer at start");
1310             }
1311
1312             for (;;)
1313             {
1314                 // SLOWLOOP: does all range checks, handles all special cases, but it is slow
1315
1316                 if (pSrc >= pEnd) {
1317                     break;
1318                 }
1319
1320                 if (ch == 0) {
1321                     // no pending bits
1322                     goto ReadChar;
1323                 }
1324
1325                 // read next byte. The JIT optimization seems to be getting confused when
1326                 // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
1327                 int cha = *pSrc;
1328                 pSrc++;
1329
1330                 // we are expecting to see trailing bytes like 10vvvvvv
1331                 if ((cha & unchecked((sbyte)0xC0)) != 0x80) {
1332                     // This can be a valid starting byte for another UTF8 byte sequence, so let's put
1333                     // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
1334                     pSrc--;
1335                     charCount += (ch >> 30);
1336                     goto InvalidByteSequence;
1337                 }
1338
1339                 // fold in the new byte
1340                 ch = (ch << 6) | (cha & 0x3F);
1341
1342                 if ((ch & FinalByte) == 0) {
1343                     Contract.Assert( (ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
1344                         "[UTF8Encoding.GetChars]Invariant volation");
1345
1346                     if ((ch & SupplimentarySeq) != 0) {
1347                         if ((ch & (FinalByte >> 6)) != 0) {
1348                             // this is 3rd byte (of 4 byte supplimentary) - nothing to do
1349                             continue;
1350                         }
1351
1352                         // 2nd byte, check for non-shortest form of supplimentary char and the valid
1353                         // supplimentary characters in range 0x010000 - 0x10FFFF at the same time
1354                         if (!InRange(ch & 0x1F0, 0x10, 0x100)) {
1355                             goto InvalidByteSequence;
1356                         }
1357                     }
1358                     else {
1359                         // Must be 2nd byte of a 3-byte sequence
1360                         // check for non-shortest form of 3 byte seq
1361                         if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
1362                             (ch & (0xF800 >> 6) ) == (0xD800 >> 6))     // illegal individually encoded surrogate
1363                         {
1364                             goto InvalidByteSequence;
1365                         }
1366                     }
1367                     continue;
1368                 }
1369
1370                 // ready to punch
1371
1372                 // adjust for surrogates in non-shortest form
1373                 if ((ch & (SupplimentarySeq | 0x1F0000)) == SupplimentarySeq) {
1374                     charCount--;
1375                 }
1376                 goto EncodeChar;
1377
1378             InvalidByteSequence:
1379                 // this code fragment should be close to the gotos referencing it
1380                 // Have to do fallback for invalid bytes
1381                 if (fallback == null)
1382                 {
1383                     if (baseDecoder == null)
1384                         fallback = this.decoderFallback.CreateFallbackBuffer();
1385                     else
1386                         fallback = baseDecoder.FallbackBuffer;
1387                     fallback.InternalInitialize(bytes, null);
1388                 }
1389                 charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
1390
1391                 ch = 0;
1392                 continue;
1393
1394             ReadChar:
1395                 ch = *pSrc;
1396                 pSrc++;
1397
1398             ProcessChar:
1399                 if (ch > 0x7F) {
1400                     // If its > 0x7F, its start of a new multi-byte sequence
1401
1402                     // Long sequence, so unreserve our char.
1403                     charCount--;
1404
1405                     // bit 6 has to be non-zero for start of multibyte chars.
1406                     if ((ch & 0x40) == 0) {
1407                         // Unexpected trail byte
1408                         goto InvalidByteSequence;
1409                     }
1410
1411                     // start a new long code
1412                     if ((ch & 0x20) != 0) {
1413                         if ((ch & 0x10) != 0) {
1414                             // 4 byte encoding - supplimentary character (2 surrogates)
1415
1416                             ch &= 0x0F;
1417
1418                             // check that bit 4 is zero and the valid supplimentary character
1419                             // range 0x000000 - 0x10FFFF at the same time
1420                             if (ch > 0x04) {
1421                                 ch |= 0xf0;
1422                                 goto InvalidByteSequence;
1423                             }
1424
1425                             // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
1426                             // Final byte flag, count fix if we don't make final byte & supplimentary sequence flag.
1427                             ch |= (FinalByte >> 3*6) |  // Final byte is 3 more bytes from now
1428                                   (1 << 30) |           // If it dies on next byte we'll need an extra char
1429                                   (3 << (30-2*6)) |     // If it dies on last byte we'll need to subtract a char
1430                                 (SupplimentarySeq) | (SupplimentarySeq >> 6) |
1431                                 (SupplimentarySeq >> 2*6) | (SupplimentarySeq >> 3*6);
1432
1433                             // Our character count will be 2 characters for these 4 bytes, so subtract another char
1434                             charCount--;
1435                         }
1436                         else {
1437                             // 3 byte encoding
1438                             // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
1439                             ch = (ch & 0x0F) | ( (FinalByte >> 2*6) | (1 << 30) |
1440                                 (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2*6) );
1441
1442                             // We'll expect 1 character for these 3 bytes, so subtract another char.
1443                             charCount--;
1444                         }
1445                     }
1446                     else {
1447                         // 2 byte encoding
1448
1449                         ch &= 0x1F;
1450
1451                         // check for non-shortest form
1452                         if (ch <= 1) {
1453                             ch |= 0xc0;
1454                             goto InvalidByteSequence;
1455                         }
1456
1457                         // Add bit flags so we'll be flagged correctly
1458                         ch |= (FinalByte >> 6);
1459                     }
1460                     continue;
1461                 }
1462
1463             EncodeChar:
1464
1465 #if FASTLOOP
1466                 int availableBytes = PtrDiff(pEnd, pSrc);
1467
1468                 // don't fall into the fast decoding loop if we don't have enough bytes
1469                 if (availableBytes <= 13) {
1470                     // try to get over the remainder of the ascii characters fast though
1471                     byte* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
1472                     while (pSrc < pLocalEnd) {
1473                         ch = *pSrc;
1474                         pSrc++;
1475
1476                         if (ch > 0x7F)
1477                             goto ProcessChar;
1478                     }
1479                     // we are done
1480                     ch = 0;
1481                     break;
1482                 }
1483
1484                 // To compute the upper bound, assume that all characters are ASCII characters at this point,
1485                 //  the boundary will be decreased for every non-ASCII character we encounter
1486                 // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
1487                 byte *pStop = pSrc + availableBytes - 7;
1488
1489                 while (pSrc < pStop) {
1490                     ch = *pSrc;
1491                     pSrc++;
1492
1493                     if (ch > 0x7F) {
1494                         goto LongCode;
1495                     }
1496
1497                     // get pSrc 2-byte aligned
1498                     if ((unchecked((int)pSrc) & 0x1) != 0) {
1499                         ch = *pSrc;
1500                         pSrc++;
1501                         if (ch > 0x7F) {
1502                             goto LongCode;
1503                         }
1504                     }
1505
1506                     // get pSrc 4-byte aligned
1507                     if ((unchecked((int)pSrc) & 0x2) != 0) {
1508                         ch = *(ushort*)pSrc;
1509                         if ((ch & 0x8080) != 0) {
1510                             goto LongCodeWithMask16;
1511                         }
1512                         pSrc += 2;
1513                     }
1514
1515                     // Run 8 + 8 characters at a time!
1516                     while (pSrc < pStop) {
1517                         ch = *(int*)pSrc;
1518                         int chb = *(int*)(pSrc+4);
1519                         if (((ch | chb) & unchecked((int)0x80808080)) != 0) {
1520                             goto LongCodeWithMask32;
1521                         }
1522                         pSrc += 8;
1523
1524                         // This is a really small loop - unroll it
1525                         if (pSrc >= pStop)
1526                             break;
1527
1528                         ch = *(int*)pSrc;
1529                         chb = *(int*)(pSrc+4);
1530                         if (((ch | chb) & unchecked((int)0x80808080)) != 0) {
1531                             goto LongCodeWithMask32;
1532                         }
1533                         pSrc += 8;
1534                     }
1535                     break;
1536
1537                 LongCodeWithMask32:
1538                     // be careful about the sign extension
1539 if (!BitConverter.IsLittleEndian) {
1540                     ch = (int)(((uint)ch) >> 16);
1541 } else {
1542                     ch &= 0xFF;
1543 }
1544                 LongCodeWithMask16:
1545 if (!BitConverter.IsLittleEndian) {
1546                     ch = (int)(((uint)ch) >> 8);
1547 } else {
1548                     ch &= 0xFF;
1549 }
1550                     pSrc++;
1551                     if (ch <= 0x7F) {
1552                         continue;
1553                     }
1554
1555                 LongCode:
1556                     int chc = *pSrc;
1557                     pSrc++;
1558
1559                     if (
1560                         // bit 6 has to be zero
1561                         (ch & 0x40) == 0 ||
1562                         // we are expecting to see trailing bytes like 10vvvvvv
1563                         (chc & unchecked((sbyte)0xC0)) != 0x80)
1564                     {
1565                         goto BadLongCode;
1566                     }
1567
1568                     chc &= 0x3F;
1569
1570                     // start a new long code
1571                     if ((ch & 0x20) != 0) {
1572
1573                         // fold the first two bytes together
1574                         chc |= (ch & 0x0F) << 6;
1575
1576                         if ((ch & 0x10) != 0) {
1577                             // 4 byte encoding - surrogate
1578                             ch = *pSrc;
1579                             if (
1580                                 // check that bit 4 is zero, the non-shortest form of surrogate
1581                                 // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
1582                                 !InRange(chc >> 4, 0x01, 0x10) ||
1583                                 // we are expecting to see trailing bytes like 10vvvvvv
1584                                 (ch & unchecked((sbyte)0xC0)) != 0x80 )
1585                             {
1586                                 goto BadLongCode;
1587                             }
1588
1589                             chc = (chc << 6) | (ch & 0x3F);
1590
1591                             ch = *(pSrc+1);
1592                             // we are expecting to see trailing bytes like 10vvvvvv
1593                             if ((ch & unchecked((sbyte)0xC0)) != 0x80) {
1594                                 goto BadLongCode;
1595                             }
1596                             pSrc += 2;
1597
1598                             // extra byte
1599                             charCount--;
1600                         }
1601                         else {
1602                             // 3 byte encoding
1603                             ch = *pSrc;
1604                             if (
1605                                 // check for non-shortest form of 3 byte seq
1606                                 (chc & (0x1F << 5)) == 0 ||
1607                                 // Can't have surrogates here.
1608                                 (chc & (0xF800 >> 6) ) == (0xD800 >> 6) ||
1609                                 // we are expecting to see trailing bytes like 10vvvvvv
1610                                 (ch & unchecked((sbyte)0xC0)) != 0x80 )
1611                             {
1612                                 goto BadLongCode;
1613                             }
1614                             pSrc++;
1615
1616                             // extra byte
1617                             charCount--;
1618                         }
1619                     }
1620                     else {
1621                         // 2 byte encoding
1622
1623                         // check for non-shortest form
1624                         if ((ch & 0x1E) == 0) {
1625                             goto BadLongCode;
1626                         }
1627                     }
1628
1629                     // extra byte
1630                     charCount--;
1631                 }
1632 #endif // FASTLOOP
1633
1634                 // no pending bits at this point
1635                 ch = 0;
1636                 continue;
1637
1638             BadLongCode:
1639                 pSrc -= 2;
1640                 ch = 0;
1641                 continue;
1642             }
1643
1644             // May have a problem if we have to flush
1645             if (ch != 0)
1646             {
1647                 // We were already adjusting for these, so need to unadjust
1648                 charCount += (ch >> 30);
1649                 if (baseDecoder == null || baseDecoder.MustFlush)
1650                 {
1651                     // Have to do fallback for invalid bytes
1652                     if (fallback == null)
1653                     {
1654                         if (baseDecoder == null)
1655                             fallback = this.decoderFallback.CreateFallbackBuffer();
1656                         else
1657                             fallback = baseDecoder.FallbackBuffer;
1658                         fallback.InternalInitialize(bytes, null);
1659                     }
1660                     charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
1661                 }
1662             }
1663
1664             // Shouldn't have anything in fallback buffer for GetCharCount
1665             // (don't have to check m_throwOnOverflow for count)
1666             Contract.Assert(fallback == null || fallback.Remaining == 0,
1667                 "[UTF8Encoding.GetCharCount]Expected empty fallback buffer at end");
1668
1669             return charCount;
1670         }
1671
1672         // WARNING:  If we throw an error, then System.Resources.ResourceReader calls this method.
1673         //           So if we're really broken, then that could also throw an error... recursively.
1674         //           So try to make sure GetChars can at least process all uses by
1675         //           System.Resources.ResourceReader!
1676         //
1677         // Note:  We throw exceptions on individually encoded surrogates and other non-shortest forms.
1678         //        If exceptions aren't turned on, then we drop all non-shortest &individual surrogates.
1679         //
1680         // To simplify maintenance, the structure of GetCharCount and GetChars should be
1681         // kept the same as much as possible
1682         [System.Security.SecurityCritical]  // auto-generated
1683         internal override unsafe int GetChars(byte* bytes, int byteCount,
1684                                                 char* chars, int charCount, DecoderNLS baseDecoder)
1685         {
1686             Contract.Assert(chars!=null, "[UTF8Encoding.GetChars]chars!=null");
1687             Contract.Assert(byteCount >=0, "[UTF8Encoding.GetChars]count >=0");
1688             Contract.Assert(charCount >=0, "[UTF8Encoding.GetChars]charCount >=0");
1689             Contract.Assert(bytes!=null, "[UTF8Encoding.GetChars]bytes!=null");
1690
1691             byte *pSrc = bytes;
1692             char *pTarget = chars;
1693
1694             byte *pEnd = pSrc+byteCount;
1695             char *pAllocatedBufferEnd = pTarget+charCount;
1696
1697             int ch = 0;
1698
1699             DecoderFallbackBuffer fallback = null;
1700             if (baseDecoder != null) {
1701                 UTF8Decoder decoder = (UTF8Decoder)baseDecoder;
1702                 ch = decoder.bits;
1703
1704                 // Shouldn't have anything in fallback buffer for GetChars
1705                 // (don't have to check m_throwOnOverflow for chars, we always use all or none so always should be empty)
1706                 Contract.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1707                     "[UTF8Encoding.GetChars]Expected empty fallback buffer at start");
1708             }
1709
1710             for (;;)
1711             {
1712                 // SLOWLOOP: does all range checks, handles all special cases, but it is slow
1713
1714                 if (pSrc >= pEnd) {
1715                     break;
1716                 }
1717
1718                 if (ch == 0) {
1719                     // no pending bits
1720                     goto ReadChar;
1721                 }
1722
1723                 // read next byte. The JIT optimization seems to be getting confused when
1724                 // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
1725                 int cha = *pSrc;
1726                 pSrc++;
1727
1728                 // we are expecting to see trailing bytes like 10vvvvvv
1729                 if ((cha & unchecked((sbyte)0xC0)) != 0x80) {
1730                     // This can be a valid starting byte for another UTF8 byte sequence, so let's put
1731                     // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
1732                     pSrc--;
1733                     goto InvalidByteSequence;
1734                 }
1735
1736                 // fold in the new byte
1737                 ch = (ch << 6) | (cha & 0x3F);
1738
1739                 if ((ch & FinalByte) == 0) {
1740                     // Not at last byte yet
1741                     Contract.Assert( (ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
1742                         "[UTF8Encoding.GetChars]Invariant volation");
1743
1744                     if ((ch & SupplimentarySeq) != 0) {
1745                         // Its a 4-byte supplimentary sequence
1746                         if ((ch & (FinalByte >> 6)) != 0) {
1747                             // this is 3rd byte of 4 byte sequence - nothing to do
1748                             continue;
1749                         }
1750
1751                         // 2nd byte of 4 bytes
1752                         // check for non-shortest form of surrogate and the valid surrogate
1753                         // range 0x000000 - 0x10FFFF at the same time
1754                         if (!InRange(ch & 0x1F0, 0x10, 0x100)) {
1755                             goto InvalidByteSequence;
1756                         }
1757                     }
1758                     else {
1759                         // Must be 2nd byte of a 3-byte sequence
1760                         // check for non-shortest form of 3 byte seq
1761                         if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
1762                             (ch & (0xF800 >> 6) ) == (0xD800 >> 6))     // illegal individually encoded surrogate
1763                         {
1764                             goto InvalidByteSequence;
1765                         }
1766                     }
1767                     continue;
1768                 }
1769
1770                 // ready to punch
1771
1772                 // surrogate in shortest form?
1773                 // Might be possible to get rid of this?  Already did non-shortest check for 4-byte sequence when reading 2nd byte?
1774                 if ((ch & (SupplimentarySeq | 0x1F0000)) > SupplimentarySeq) {
1775                     // let the range check for the second char throw the exception
1776                     if (pTarget < pAllocatedBufferEnd) {
1777                         *pTarget = (char)( ((ch >> 10) & 0x7FF) +
1778                             unchecked((short)((CharUnicodeInfo.HIGH_SURROGATE_START - (0x10000 >> 10)))) );
1779                         pTarget++;
1780
1781                         ch = (ch & 0x3FF) +
1782                             unchecked((int)(CharUnicodeInfo.LOW_SURROGATE_START));
1783                     }
1784                 }
1785
1786                 goto EncodeChar;
1787
1788             InvalidByteSequence:
1789                 // this code fragment should be close to the gotos referencing it
1790                 // Have to do fallback for invalid bytes
1791                 if (fallback == null)
1792                 {
1793                     if (baseDecoder == null)
1794                         fallback = this.decoderFallback.CreateFallbackBuffer();
1795                     else
1796                         fallback = baseDecoder.FallbackBuffer;
1797                     fallback.InternalInitialize(bytes, pAllocatedBufferEnd);
1798                 }
1799                 // This'll back us up the appropriate # of bytes if we didn't get anywhere
1800                 if (!FallbackInvalidByteSequence(ref pSrc, ch, fallback, ref pTarget))
1801                 {
1802                     // Ran out of buffer space
1803                     // Need to throw an exception?
1804                     Contract.Assert(pSrc >= bytes || pTarget == chars,
1805                         "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer after fallback");
1806                     fallback.InternalReset();
1807                     ThrowCharsOverflow(baseDecoder, pTarget == chars);
1808                     ch = 0;
1809                     break;
1810                 }
1811                 Contract.Assert(pSrc >= bytes, 
1812                     "[UTF8Encoding.GetChars]Expected invalid byte sequence to have remained within the byte array");
1813                 ch = 0;
1814                 continue;
1815
1816             ReadChar:
1817                 ch = *pSrc;
1818                 pSrc++;
1819
1820             ProcessChar:
1821                 if (ch > 0x7F) {
1822                     // If its > 0x7F, its start of a new multi-byte sequence
1823
1824                     // bit 6 has to be non-zero
1825                     if ((ch & 0x40) == 0) {
1826                         goto InvalidByteSequence;
1827                     }
1828
1829                     // start a new long code
1830                     if ((ch & 0x20) != 0) {
1831                         if ((ch & 0x10) != 0) {
1832                             // 4 byte encoding - supplimentary character (2 surrogates)
1833
1834                             ch &= 0x0F;
1835
1836                             // check that bit 4 is zero and the valid supplimentary character
1837                             // range 0x000000 - 0x10FFFF at the same time
1838                             if (ch > 0x04) {
1839                                 ch |= 0xf0;
1840                                 goto InvalidByteSequence;
1841                             }
1842
1843                             ch |= (FinalByte >> 3*6) | (1 << 30) | (3 << (30-2*6)) |
1844                                 (SupplimentarySeq) | (SupplimentarySeq >> 6) |
1845                                 (SupplimentarySeq >> 2*6) | (SupplimentarySeq >> 3*6);
1846                         }
1847                         else {
1848                             // 3 byte encoding
1849                             ch = (ch & 0x0F) | ( (FinalByte >> 2*6) | (1 << 30) |
1850                                 (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2*6) );
1851                         }
1852                     }
1853                     else {
1854                         // 2 byte encoding
1855
1856                         ch &= 0x1F;
1857
1858                         // check for non-shortest form
1859                         if (ch <= 1) {
1860                             ch |= 0xc0;
1861                             goto InvalidByteSequence;
1862                         }
1863
1864                         ch |= (FinalByte >> 6);
1865                     }
1866                     continue;
1867                 }
1868
1869             EncodeChar:
1870                 // write the pending character
1871                 if (pTarget >= pAllocatedBufferEnd)
1872                 {
1873                     // Fix chars so we make sure to throw if we didn't output anything
1874                     ch &= 0x1fffff;
1875                     if (ch > 0x7f)
1876                     {
1877                         if (ch > 0x7ff)
1878                         {
1879                             if (ch >= CharUnicodeInfo.LOW_SURROGATE_START &&
1880                                 ch <= CharUnicodeInfo.LOW_SURROGATE_END)
1881                             {
1882                                 pSrc--;     // It was 4 bytes
1883                                 pTarget--;  // 1 was stored already, but we can't remember 1/2, so back up
1884                             }
1885                             else if (ch > 0xffff)
1886                             {
1887                                 pSrc--;     // It was 4 bytes, nothing was stored
1888                             }
1889                             pSrc--;         // It was at least 3 bytes
1890                         }
1891                         pSrc--;             // It was at least 2 bytes
1892                     }
1893                     pSrc--;
1894
1895                     // Throw that we don't have enough room (pSrc could be < chars if we had started to process
1896                     // a 4 byte sequence alredy)
1897                     Contract.Assert(pSrc >= bytes || pTarget == chars,
1898                         "[UTF8Encoding.GetChars]Expected pSrc to be within input buffer or throw due to no output]");
1899                     ThrowCharsOverflow(baseDecoder, pTarget == chars);
1900
1901                     // Don't store ch in decoder, we already backed up to its start
1902                     ch = 0;
1903
1904                     // Didn't throw, just use this buffer size.
1905                     break;
1906                 }
1907                 *pTarget = (char)ch;
1908                 pTarget++;
1909
1910 #if FASTLOOP
1911                 int availableChars = PtrDiff(pAllocatedBufferEnd, pTarget);
1912                 int availableBytes = PtrDiff(pEnd, pSrc);
1913
1914                 // don't fall into the fast decoding loop if we don't have enough bytes
1915                 // Test for availableChars is done because pStop would be <= pTarget.
1916                 if (availableBytes <= 13) {
1917                     // we may need as many as 1 character per byte
1918                     if (availableChars < availableBytes) {
1919                         // not enough output room.  no pending bits at this point
1920                         ch = 0;
1921                         continue;
1922                     }
1923
1924                     // try to get over the remainder of the ascii characters fast though
1925                     byte* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
1926                     while (pSrc < pLocalEnd) {
1927                         ch = *pSrc;
1928                         pSrc++;
1929
1930                         if (ch > 0x7F)
1931                             goto ProcessChar;
1932
1933                         *pTarget = (char)ch;
1934                         pTarget++;
1935                     }
1936                     // we are done
1937                     ch = 0;
1938                     break;
1939                 }
1940
1941                 // we may need as many as 1 character per byte, so reduce the byte count if necessary.
1942                 // If availableChars is too small, pStop will be before pTarget and we won't do fast loop.
1943                 if (availableChars < availableBytes) {
1944                     availableBytes = availableChars;
1945                 }
1946
1947                 // To compute the upper bound, assume that all characters are ASCII characters at this point,
1948                 //  the boundary will be decreased for every non-ASCII character we encounter
1949                 // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
1950                 char *pStop = pTarget + availableBytes - 7;
1951
1952                 while (pTarget < pStop) {
1953                     ch = *pSrc;
1954                     pSrc++;
1955
1956                     if (ch > 0x7F) {
1957                         goto LongCode;
1958                     }
1959                     *pTarget = (char)ch;
1960                     pTarget++;
1961
1962                     // get pSrc to be 2-byte aligned
1963                     if ((unchecked((int)pSrc) & 0x1) != 0) {
1964                         ch = *pSrc;
1965                         pSrc++;
1966                         if (ch > 0x7F) {
1967                             goto LongCode;
1968                         }
1969                         *pTarget = (char)ch;
1970                         pTarget++;
1971                     }
1972
1973                     // get pSrc to be 4-byte aligned
1974                     if ((unchecked((int)pSrc) & 0x2) != 0) {
1975                         ch = *(ushort*)pSrc;
1976                         if ((ch & 0x8080) != 0) {
1977                             goto LongCodeWithMask16;
1978                         }
1979
1980                         // Unfortunately, this is endianess sensitive
1981 if (!BitConverter.IsLittleEndian) {
1982                         *pTarget = (char)((ch >> 8) & 0x7F);
1983                         pSrc += 2;
1984                         *(pTarget+1) = (char)(ch & 0x7F);
1985                         pTarget += 2;
1986 } else {
1987                         *pTarget = (char)(ch & 0x7F);
1988                         pSrc += 2;
1989                         *(pTarget+1) = (char)((ch >> 8) & 0x7F);
1990                         pTarget += 2;
1991 }
1992                     }
1993
1994                     // Run 8 characters at a time!
1995                     while (pTarget < pStop) {
1996                         ch = *(int*)pSrc;
1997                         int chb = *(int*)(pSrc+4);
1998                         if (((ch | chb) & unchecked((int)0x80808080)) != 0) {
1999                             goto LongCodeWithMask32;
2000                         }
2001
2002                         // Unfortunately, this is endianess sensitive
2003 if (!BitConverter.IsLittleEndian) {
2004                         *pTarget = (char)((ch >> 24) & 0x7F);
2005                         *(pTarget+1) = (char)((ch >> 16) & 0x7F);
2006                         *(pTarget+2) = (char)((ch >> 8) & 0x7F);
2007                         *(pTarget+3) = (char)(ch & 0x7F);
2008                         pSrc += 8;
2009                         *(pTarget+4) = (char)((chb >> 24) & 0x7F);
2010                         *(pTarget+5) = (char)((chb >> 16) & 0x7F);
2011                         *(pTarget+6) = (char)((chb >> 8) & 0x7F);
2012                         *(pTarget+7) = (char)(chb & 0x7F);
2013                         pTarget += 8;
2014 } else {
2015                         *pTarget = (char)(ch & 0x7F);
2016                         *(pTarget+1) = (char)((ch >> 8) & 0x7F);
2017                         *(pTarget+2) = (char)((ch >> 16) & 0x7F);
2018                         *(pTarget+3) = (char)((ch >> 24) & 0x7F);
2019                         pSrc += 8;
2020                         *(pTarget+4) = (char)(chb & 0x7F);
2021                         *(pTarget+5) = (char)((chb >> 8) & 0x7F);
2022                         *(pTarget+6) = (char)((chb >> 16) & 0x7F);
2023                         *(pTarget+7) = (char)((chb >> 24) & 0x7F);
2024                         pTarget += 8;
2025 }
2026                     }
2027                     break;
2028
2029                 LongCodeWithMask32:
2030 if (!BitConverter.IsLittleEndian) {
2031                     // be careful about the sign extension
2032                     ch = (int)(((uint)ch) >> 16);
2033 } else {
2034                     ch &= 0xFF;
2035 }
2036                 LongCodeWithMask16:
2037 if (!BitConverter.IsLittleEndian) {
2038                     ch = (int)(((uint)ch) >> 8);
2039 } else {
2040                     ch &= 0xFF;
2041 }
2042                     pSrc++;
2043                     if (ch <= 0x7F) {
2044                         *pTarget = (char)ch;
2045                         pTarget++;
2046                         continue;
2047                     }
2048
2049                 LongCode:
2050                     int chc = *pSrc;
2051                     pSrc++;
2052
2053                     if (
2054                         // bit 6 has to be zero
2055                         (ch & 0x40) == 0 ||
2056                         // we are expecting to see trailing bytes like 10vvvvvv
2057                         (chc & unchecked((sbyte)0xC0)) != 0x80)
2058                     {
2059                         goto BadLongCode;
2060                     }
2061
2062                     chc &= 0x3F;
2063
2064                     // start a new long code
2065                     if ((ch & 0x20) != 0) {
2066
2067                         // fold the first two bytes together
2068                         chc |= (ch & 0x0F) << 6;
2069
2070                         if ((ch & 0x10) != 0) {
2071                             // 4 byte encoding - surrogate
2072                             ch = *pSrc;
2073                             if (
2074                                 // check that bit 4 is zero, the non-shortest form of surrogate
2075                                 // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
2076                                 !InRange(chc >> 4, 0x01, 0x10) ||
2077                                 // we are expecting to see trailing bytes like 10vvvvvv
2078                                 (ch & unchecked((sbyte)0xC0)) != 0x80 )
2079                             {
2080                                 goto BadLongCode;
2081                             }
2082
2083                             chc = (chc << 6) | (ch & 0x3F);
2084
2085                             ch = *(pSrc+1);
2086                             // we are expecting to see trailing bytes like 10vvvvvv
2087                             if ((ch & unchecked((sbyte)0xC0)) != 0x80) {
2088                                 goto BadLongCode;
2089                             }
2090                             pSrc += 2;
2091
2092                             ch = (chc << 6) | (ch & 0x3F);
2093
2094                             *pTarget = (char)( ((ch >> 10) & 0x7FF) +
2095                                 unchecked((short)(CharUnicodeInfo.HIGH_SURROGATE_START - (0x10000 >> 10))) );
2096                             pTarget++;
2097
2098                             ch = (ch & 0x3FF) +
2099                                 unchecked((short)(CharUnicodeInfo.LOW_SURROGATE_START));
2100
2101                             // extra byte, we're already planning 2 chars for 2 of these bytes,
2102                             // but the big loop is testing the target against pStop, so we need
2103                             // to subtract 2 more or we risk overrunning the input.  Subtract 
2104                             // one here and one below.
2105                             pStop--;
2106                         }
2107                         else {
2108                             // 3 byte encoding
2109                             ch = *pSrc;
2110                             if (
2111                                 // check for non-shortest form of 3 byte seq
2112                                 (chc & (0x1F << 5)) == 0 ||
2113                                 // Can't have surrogates here.
2114                                 (chc & (0xF800 >> 6) ) == (0xD800 >> 6) ||
2115                                 // we are expecting to see trailing bytes like 10vvvvvv
2116                                 (ch & unchecked((sbyte)0xC0)) != 0x80 )
2117                             {
2118                                 goto BadLongCode;
2119                             }
2120                             pSrc++;
2121
2122                             ch = (chc << 6) | (ch & 0x3F);
2123
2124                             // extra byte, we're only expecting 1 char for each of these 3 bytes,
2125                             // but the loop is testing the target (not source) against pStop, so
2126                             // we need to subtract 2 more or we risk overrunning the input.
2127                             // Subtract 1 here and one more below
2128                             pStop--;
2129                         }
2130                     }
2131                     else {
2132                         // 2 byte encoding
2133
2134                         ch &= 0x1F;
2135
2136                         // check for non-shortest form
2137                         if (ch <= 1) {
2138                             goto BadLongCode;
2139                         }
2140                         ch = (ch << 6) | chc;
2141                     }
2142
2143                     *pTarget = (char)ch;
2144                     pTarget++;
2145
2146                     // extra byte, we're only expecting 1 char for each of these 2 bytes,
2147                     // but the loop is testing the target (not source) against pStop.
2148                     // subtract an extra count from pStop so that we don't overrun the input.
2149                     pStop--;
2150                 }
2151 #endif // FASTLOOP
2152
2153                 Contract.Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetChars]pTarget <= pAllocatedBufferEnd");
2154
2155                 // no pending bits at this point
2156                 ch = 0;
2157                 continue;
2158
2159             BadLongCode:
2160                 pSrc -= 2;
2161                 ch = 0;
2162                 continue;
2163             }
2164
2165             if (ch != 0 && (baseDecoder == null || baseDecoder.MustFlush))
2166             {
2167                 // Have to do fallback for invalid bytes
2168                 if (fallback == null)
2169                 {
2170                     if (baseDecoder == null)
2171                         fallback = this.decoderFallback.CreateFallbackBuffer();
2172                     else
2173                         fallback = baseDecoder.FallbackBuffer;
2174                     fallback.InternalInitialize(bytes, pAllocatedBufferEnd);
2175                 }
2176
2177                 // This'll back us up the appropriate # of bytes if we didn't get anywhere
2178                 if (!FallbackInvalidByteSequence(ref pSrc, ch, fallback, ref pTarget))
2179                 {
2180                     Contract.Assert(pSrc >= bytes || pTarget == chars,
2181                         "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer while flushing");
2182
2183                     // Ran out of buffer space
2184                     // Need to throw an exception?
2185                     fallback.InternalReset();
2186                     ThrowCharsOverflow(baseDecoder, pTarget == chars);
2187                 }
2188                 Contract.Assert(pSrc >= bytes, 
2189                     "[UTF8Encoding.GetChars]Expected flushing invalid byte sequence to have remained within the byte array");                
2190                 ch = 0;
2191             }
2192
2193             if (baseDecoder != null)
2194             {
2195                 UTF8Decoder decoder = (UTF8Decoder)baseDecoder;
2196
2197                 // If we're storing flush data we expect all bits to be used or else
2198                 // we're stuck in the middle of a conversion
2199                 Contract.Assert(!baseDecoder.MustFlush || ch == 0 || !baseDecoder.m_throwOnOverflow,
2200                     "[UTF8Encoding.GetChars]Expected no must flush or no left over bits or no throw on overflow.");
2201
2202                 // Remember our leftover bits.
2203                 decoder.bits = ch;
2204
2205                 baseDecoder.m_bytesUsed = (int)(pSrc - bytes);
2206             }
2207
2208             // Shouldn't have anything in fallback buffer for GetChars
2209             // (don't have to check m_throwOnOverflow for chars)
2210             Contract.Assert(fallback == null || fallback.Remaining == 0,
2211                 "[UTF8Encoding.GetChars]Expected empty fallback buffer at end");
2212
2213             return PtrDiff(pTarget, chars);
2214         }
2215
2216         // During GetChars we had an invalid byte sequence
2217         // pSrc is backed up to the start of the bad sequence if we didn't have room to
2218         // fall it back.  Otherwise pSrc remains wher it is.
2219         [System.Security.SecurityCritical]  // auto-generated
2220         private unsafe bool FallbackInvalidByteSequence(
2221             ref byte* pSrc, int ch, DecoderFallbackBuffer fallback, ref char* pTarget)
2222         {
2223             // Get our byte[]
2224             byte *pStart = pSrc;
2225             byte[] bytesUnknown = GetBytesUnknown(ref pStart, ch);
2226
2227             // Do the actual fallback
2228             if (!fallback.InternalFallback(bytesUnknown, pSrc, ref pTarget))
2229             {
2230                 // Oops, it failed, back up to pStart
2231                 pSrc = pStart;
2232                 return false;
2233             }
2234
2235             // It worked
2236             return true;
2237         }
2238
2239         // During GetCharCount we had an invalid byte sequence
2240         // pSrc is used to find the index that points to the invalid bytes,
2241         // however the byte[] contains the fallback bytes (in case the index is -1)
2242         [System.Security.SecurityCritical]  // auto-generated
2243         private unsafe int FallbackInvalidByteSequence(
2244             byte* pSrc, int ch, DecoderFallbackBuffer fallback)
2245         {
2246             // Get our byte[]
2247             byte[] bytesUnknown = GetBytesUnknown(ref pSrc, ch);
2248
2249             // Do the actual fallback
2250             int count = fallback.InternalFallback(bytesUnknown, pSrc);
2251
2252             // # of fallback chars expected.
2253             // Note that we only get here for "long" sequences, and have already unreserved
2254             // the count that we prereserved for the input bytes
2255             return count;
2256         }
2257
2258         // Note that some of these bytes may have come from a previous fallback, so we cannot
2259         // just decrement the pointer and use the values we read.  In those cases we have 
2260         // to regenerate the original values.
2261         [System.Security.SecurityCritical]  // auto-generated
2262         private unsafe byte[] GetBytesUnknown(ref byte* pSrc, int ch)
2263         {
2264             // Get our byte[]
2265             byte[] bytesUnknown = null;
2266
2267             // See if it was a plain char
2268             // (have to check >= 0 because we have all sorts of wierd bit flags)
2269             if (ch < 0x100 && ch >= 0)
2270             {
2271                 pSrc--;
2272                 bytesUnknown = new byte[] { unchecked((byte)ch) };
2273             }
2274             // See if its an unfinished 2 byte sequence
2275             else if ((ch & (SupplimentarySeq | ThreeByteSeq)) == 0)
2276             {
2277                 pSrc--;
2278                 bytesUnknown = new byte[] { unchecked((byte)((ch & 0x1F )| 0xc0)) };
2279             }
2280             // So now we're either 2nd byte of 3 or 4 byte sequence or
2281             // we hit a non-trail byte or we ran out of space for 3rd byte of 4 byte sequence
2282             // 1st check if its a 4 byte sequence
2283             else if ((ch & SupplimentarySeq) != 0)
2284             {
2285                 //  3rd byte of 4 byte sequence?
2286                 if ((ch & (FinalByte >> 6)) != 0)
2287                 {
2288                     // 3rd byte of 4 byte sequence
2289                     pSrc-=3;
2290                     bytesUnknown = new byte[] {
2291                         unchecked((byte)(((ch >> 12) & 0x07) | 0xF0)),
2292                         unchecked((byte)(((ch >> 6) & 0x3F) | 0x80)),
2293                         unchecked((byte)(((ch) & 0x3F) | 0x80)) };
2294                 }
2295                 else if ((ch & (FinalByte >> 12)) != 0)
2296                 {
2297                     // 2nd byte of a 4 byte sequence
2298                     pSrc-=2;
2299                     bytesUnknown = new byte[] {
2300                         unchecked((byte)(((ch >> 6) & 0x07) | 0xF0)),
2301                         unchecked((byte)(((ch) & 0x3F) | 0x80)) };
2302                 }
2303                 else
2304                 {
2305                     // 4th byte of a 4 byte sequence
2306                     pSrc--;
2307                     bytesUnknown = new byte[] { unchecked((byte)(((ch) & 0x07) | 0xF0))};
2308                 }
2309             }
2310             else
2311             {
2312                 // 2nd byte of 3 byte sequence?
2313                 if ((ch & (FinalByte >> 6)) != 0)
2314                 {
2315                     // So its 2nd byte of a 3 byte sequence
2316                     pSrc-=2;
2317                     bytesUnknown = new byte[] {
2318                         unchecked((byte)(((ch >> 6) & 0x0F) | 0xE0)), unchecked ((byte)(((ch) & 0x3F) | 0x80)) };
2319                 }
2320                 else
2321                 {
2322                     // 1st byte of a 3 byte sequence
2323                     pSrc--;
2324                     bytesUnknown = new byte[] { unchecked((byte)(((ch) & 0x0F) | 0xE0))};
2325                 }
2326             }
2327
2328             return bytesUnknown;
2329         }
2330
2331
2332         public override Decoder GetDecoder() {
2333             return new UTF8Decoder(this);
2334         }
2335
2336
2337         public override Encoder GetEncoder() {
2338             return new UTF8Encoder(this);
2339         }
2340
2341
2342         public override int GetMaxByteCount(int charCount)
2343         {
2344             if (charCount < 0)
2345                throw new ArgumentOutOfRangeException("charCount",
2346                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
2347             Contract.EndContractBlock();
2348
2349             // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
2350             long byteCount = (long)charCount + 1;
2351
2352             if (EncoderFallback.MaxCharCount > 1)
2353                 byteCount *= EncoderFallback.MaxCharCount;
2354
2355             // Max 3 bytes per char.  (4 bytes per 2 chars for surrogates)
2356             byteCount *= 3;
2357
2358             if (byteCount > 0x7fffffff)
2359                 throw new ArgumentOutOfRangeException("charCount", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
2360
2361             return (int)byteCount;
2362         }
2363
2364
2365         public override int GetMaxCharCount(int byteCount)
2366         {
2367             if (byteCount < 0)
2368                throw new ArgumentOutOfRangeException("byteCount",
2369                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
2370             Contract.EndContractBlock();
2371
2372             // Figure out our length, 1 char per input byte + 1 char if 1st byte is last byte of 4 byte surrogate pair
2373             long charCount = ((long)byteCount + 1);
2374
2375             // Non-shortest form would fall back, so get max count from fallback.
2376             // So would 11... followed by 11..., so you could fall back every byte
2377             if (DecoderFallback.MaxCharCount > 1)
2378             {
2379                 charCount *= DecoderFallback.MaxCharCount;
2380             }
2381
2382             if (charCount > 0x7fffffff)
2383                 throw new ArgumentOutOfRangeException("byteCount", Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
2384
2385             return (int)charCount;
2386         }
2387
2388
2389         public override byte[] GetPreamble()
2390         {
2391             if (emitUTF8Identifier) {
2392                 // Allocate new array to prevent users from modifying it.
2393                 return new byte[3] { 0xEF, 0xBB, 0xBF };
2394             }
2395             else
2396                 return EmptyArray<Byte>.Value;
2397         }
2398
2399
2400         public override bool Equals(Object value) {
2401             UTF8Encoding that = value as UTF8Encoding;
2402             if (that != null) {
2403                 return (emitUTF8Identifier == that.emitUTF8Identifier) &&
2404 //                       (isThrowException == that.isThrowException) && // Same as encoder/decoderfallbacks being exception
2405                        (EncoderFallback.Equals(that.EncoderFallback)) &&
2406                        (DecoderFallback.Equals(that.DecoderFallback));
2407             }
2408             return (false);
2409         }
2410
2411
2412         public override int GetHashCode() {
2413             //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
2414             return this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
2415                    UTF8_CODEPAGE + (emitUTF8Identifier?1:0);
2416         }
2417
2418         [Serializable]
2419         internal class UTF8Encoder : EncoderNLS, ISerializable
2420         {
2421             // We must save a high surrogate value until the next call, looking
2422             // for a low surrogate value.
2423             internal int surrogateChar;
2424
2425             public UTF8Encoder(UTF8Encoding encoding) : base(encoding)
2426             {
2427                 // base calls reset
2428             }
2429
2430             // Constructor called by serialization, have to handle deserializing from Everett
2431             internal UTF8Encoder(SerializationInfo info, StreamingContext context)
2432             {
2433                 // Any info?
2434                 if (info==null) throw new ArgumentNullException("info");
2435                 Contract.EndContractBlock();
2436
2437                 // Get common info
2438                 this.m_encoding = (Encoding)info.GetValue("encoding", typeof(Encoding));
2439
2440                 // SurrogateChar happens to mean the same thing
2441                 this.surrogateChar = (int)info.GetValue("surrogateChar", typeof(int));
2442
2443                 try 
2444                 {
2445                     this.m_fallback = (EncoderFallback) info.GetValue("m_fallback", typeof(EncoderFallback));
2446                 } 
2447                 catch (SerializationException)
2448                 {
2449                     this.m_fallback = null;
2450                 }
2451             }
2452
2453 #if FEATURE_SERIALIZATION
2454             // ISerializable implementation, get data for this object
2455             [System.Security.SecurityCritical]  // auto-generated_required
2456             void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
2457             {
2458                 // Any info?
2459                 if (info==null) throw new ArgumentNullException("info");
2460                 Contract.EndContractBlock();
2461
2462                 // Save Whidbey data
2463                 // Just need Everett maxCharSize (BaseCodePageEncoding) or m_maxByteSize (MLangBaseCodePageEncoding)
2464                 info.AddValue("encoding", this.m_encoding);
2465                 info.AddValue("surrogateChar", this.surrogateChar);
2466
2467                 info.AddValue("m_fallback", this.m_fallback);
2468
2469                 // Extra stuff for Everett that Whidbey doesn't use
2470                 info.AddValue("storedSurrogate", this.surrogateChar > 0 ? true : false);
2471                 info.AddValue("mustFlush", false);  // Everett doesn't actually use this either, but it accidently serialized it!
2472             }
2473 #endif
2474
2475             public override void Reset()
2476
2477             {
2478                 this.surrogateChar = 0;
2479                 if (m_fallbackBuffer != null)
2480                     m_fallbackBuffer.Reset();                
2481             }
2482
2483             // Anything left in our encoder?
2484             internal override bool HasState
2485             {
2486                 get
2487                 {
2488                     return (this.surrogateChar != 0);
2489                 }
2490             }
2491         }
2492
2493         [Serializable]
2494         internal class UTF8Decoder : DecoderNLS, ISerializable
2495         {
2496             // We'll need to remember the previous information. See the comments around definition
2497             // of FinalByte for details.
2498             internal int bits;
2499
2500             public UTF8Decoder(UTF8Encoding encoding) : base(encoding)
2501             {
2502                 // base calls reset
2503             }
2504
2505             // Constructor called by serialization, have to handle deserializing from Everett
2506             internal UTF8Decoder(SerializationInfo info, StreamingContext context)
2507             {
2508                 // Any info?
2509                 if (info==null) throw new ArgumentNullException("info");
2510                 Contract.EndContractBlock();
2511
2512                 // Get common info
2513                 this.m_encoding = (Encoding)info.GetValue("encoding", typeof(Encoding));
2514
2515                 try
2516                 {
2517                     // Get whidbey version of bits
2518                     this.bits = (int)info.GetValue("wbits", typeof(int));
2519                     this.m_fallback = (DecoderFallback) info.GetValue("m_fallback", typeof(DecoderFallback));
2520                 }
2521                 catch (SerializationException)
2522                 {
2523                     // Everett calls bits bits instead of wbits, so this is Everett
2524                     this.bits = 0;
2525                     this.m_fallback = null;
2526                 }
2527             }
2528
2529 #if FEATURE_SERIALIZATION
2530             // ISerializable implementation, get data for this object
2531             [System.Security.SecurityCritical]  // auto-generated_required
2532             void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
2533             {
2534                 // Any info?
2535                 if (info==null) throw new ArgumentNullException("info");
2536                 Contract.EndContractBlock();
2537
2538                 // Save new Whidbey data
2539                 info.AddValue("encoding", this.m_encoding);
2540                 info.AddValue("wbits", this.bits);          // Special whidbey bits name
2541                 info.AddValue("m_fallback", this.m_fallback);  
2542
2543                 // Everett has extra stuff, we set it all to 0 in case this deserializes in Everett
2544                 info.AddValue("bits", (int)0);
2545                 info.AddValue("trailCount", (int)0);
2546                 info.AddValue("isSurrogate", false);
2547                 info.AddValue("byteSequence", (int)0);
2548             }
2549 #endif
2550
2551             public override void Reset()
2552             {
2553                 this.bits = 0;
2554                 if (m_fallbackBuffer != null)
2555                     m_fallbackBuffer.Reset();
2556             }
2557
2558             // Anything left in our decoder?
2559             internal override bool HasState
2560             {
2561                 get
2562                 {
2563                     return (this.bits != 0);
2564                 }
2565             }
2566         }
2567     }
2568 }