Updates referencesource to .NET 4.7
[mono.git] / mcs / class / referencesource / mscorlib / system / text / utf32encoding.cs
1 // ==++==
2 //
3 //   Copyright (c) Microsoft Corporation.  All rights reserved.
4 //
5 // ==--==
6 //
7 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
8 //
9
10 #if FEATURE_UTF32
11
12 namespace System.Text
13 {
14
15     using System;
16     using System.Diagnostics.Contracts;
17     using System.Globalization;
18     // Encodes text into and out of UTF-32.  UTF-32 is a way of writing
19     // Unicode characters with a single storage unit (32 bits) per character,
20     //
21     // The UTF-32 byte order mark is simply the Unicode byte order mark
22     // (0x00FEFF) written in UTF-32 (0x0000FEFF or 0xFFFE0000).  The byte order
23     // mark is used mostly to distinguish UTF-32 text from other encodings, and doesn't
24     // switch the byte orderings.
25
26     [Serializable]
27     public sealed class UTF32Encoding : Encoding
28     {
29         /*
30             words   bits    UTF-32 representation
31             -----   ----    -----------------------------------
32             1       16      00000000 00000000 xxxxxxxx xxxxxxxx
33             2       21      00000000 000xxxxx hhhhhhll llllllll
34             -----   ----    -----------------------------------
35
36             Surrogate:
37             Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
38          */
39
40         //
41         private bool emitUTF32ByteOrderMark = false;
42         private bool isThrowException = false;
43         private bool bigEndian = false;
44
45
46         public UTF32Encoding(): this(false, true, false)
47         {
48         }
49
50
51         public UTF32Encoding(bool bigEndian, bool byteOrderMark):
52             this(bigEndian, byteOrderMark, false)
53         {
54         }
55
56
57         public UTF32Encoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidCharacters):
58             base(bigEndian ? 12001 : 12000)
59         {
60             this.bigEndian = bigEndian;
61             this.emitUTF32ByteOrderMark = byteOrderMark;
62             this.isThrowException = throwOnInvalidCharacters;
63
64             // Encoding's constructor already did this, but it'll be wrong if we're throwing exceptions
65             if (this.isThrowException)
66                 SetDefaultFallbacks();
67         }
68
69         internal override void SetDefaultFallbacks()
70         {
71             // For UTF-X encodings, we use a replacement fallback with an empty string
72             if (this.isThrowException)
73             {
74                 this.encoderFallback = EncoderFallback.ExceptionFallback;
75                 this.decoderFallback = DecoderFallback.ExceptionFallback;
76             }
77             else
78             {
79                 this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
80                 this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
81             }
82         }
83
84
85         //
86         // The following methods are copied from EncodingNLS.cs.
87         // Unfortunately EncodingNLS.cs is internal and we're public, so we have to reimpliment them here.
88         // These should be kept in sync for the following classes:
89         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
90         //
91
92         // Returns the number of bytes required to encode a range of characters in
93         // a character array.
94         //
95         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
96         // So if you fix this, fix the others.  Currently those include:
97         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
98         // parent method is safe
99
100         [System.Security.SecuritySafeCritical]  // auto-generated
101         public override unsafe int GetByteCount(char[] chars, int index, int count)
102         {
103             // Validate input parameters
104             if (chars == null)
105                 throw new ArgumentNullException("chars",
106                       Environment.GetResourceString("ArgumentNull_Array"));
107
108             if (index < 0 || count < 0)
109                 throw new ArgumentOutOfRangeException((index<0 ? "index" : "count"),
110                       Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
111
112             if (chars.Length - index < count)
113                 throw new ArgumentOutOfRangeException("chars",
114                       Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
115             Contract.EndContractBlock();
116
117             // If no input, return 0, avoid fixed empty array problem
118             if (chars.Length == 0)
119                 return 0;
120
121             // Just call the pointer version
122             fixed (char* pChars = chars)
123                 return GetByteCount(pChars + index, count, null);
124         }
125
126         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
127         // So if you fix this, fix the others.  Currently those include:
128         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
129         // parent method is safe
130
131         [System.Security.SecuritySafeCritical]  // auto-generated
132         public override unsafe int GetByteCount(String s)
133         {
134             // Validate input
135             if (s==null)
136                 throw new ArgumentNullException("s");
137             Contract.EndContractBlock();
138
139             fixed (char* pChars = s)
140                 return GetByteCount(pChars, s.Length, null);
141         }
142
143         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
144         // So if you fix this, fix the others.  Currently those include:
145         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
146
147         [System.Security.SecurityCritical]  // auto-generated
148         [CLSCompliant(false)]
149         public override unsafe int GetByteCount(char* chars, int count)
150         {
151             // Validate Parameters
152             if (chars == null)
153                 throw new ArgumentNullException("chars",
154                     Environment.GetResourceString("ArgumentNull_Array"));
155
156             if (count < 0)
157                 throw new ArgumentOutOfRangeException("count",
158                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
159             Contract.EndContractBlock();
160
161             // Call it with empty encoder
162             return GetByteCount(chars, count, null);
163         }
164
165         // Parent method is safe.
166         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
167         // So if you fix this, fix the others.  Currently those include:
168         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
169
170         [System.Security.SecuritySafeCritical]  // auto-generated
171         public override unsafe int GetBytes(String s, int charIndex, int charCount,
172                                               byte[] bytes, int byteIndex)
173         {
174             if (s == null || bytes == null)
175                 throw new ArgumentNullException((s == null ? "s" : "bytes"),
176                       Environment.GetResourceString("ArgumentNull_Array"));
177
178             if (charIndex < 0 || charCount < 0)
179                 throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
180                       Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
181
182             if (s.Length - charIndex < charCount)
183                 throw new ArgumentOutOfRangeException("s",
184                       Environment.GetResourceString("ArgumentOutOfRange_IndexCount"));
185
186             if (byteIndex < 0 || byteIndex > bytes.Length)
187                 throw new ArgumentOutOfRangeException("byteIndex",
188                     Environment.GetResourceString("ArgumentOutOfRange_Index"));
189             Contract.EndContractBlock();
190
191             int byteCount = bytes.Length - byteIndex;
192
193             // Fix our input array if 0 length because fixed doesn't like 0 length arrays
194             if (bytes.Length == 0)
195                 bytes = new byte[1];
196
197             fixed (char* pChars = s)
198                 fixed ( byte* pBytes = bytes)
199                     return GetBytes(pChars + charIndex, charCount,
200                                     pBytes + byteIndex, byteCount, null);
201         }
202
203         // Encodes a range of characters in a character array into a range of bytes
204         // in a byte array. An exception occurs if the byte array is not large
205         // enough to hold the complete encoding of the characters. The
206         // GetByteCount method can be used to determine the exact number of
207         // bytes that will be produced for a given range of characters.
208         // Alternatively, the GetMaxByteCount method can be used to
209         // determine the maximum number of bytes that will be produced for a given
210         // number of characters, regardless of the actual character values.
211         //
212         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
213         // So if you fix this, fix the others.  Currently those include:
214         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
215         // parent method is safe
216
217         [System.Security.SecuritySafeCritical]  // auto-generated
218         public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
219                                                byte[] bytes, int byteIndex)
220         {
221             // Validate parameters
222             if (chars == null || bytes == null)
223                 throw new ArgumentNullException((chars == null ? "chars" : "bytes"),
224                       Environment.GetResourceString("ArgumentNull_Array"));
225
226             if (charIndex < 0 || charCount < 0)
227                 throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
228                       Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
229
230             if (chars.Length - charIndex < charCount)
231                 throw new ArgumentOutOfRangeException("chars",
232                       Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
233
234             if (byteIndex < 0 || byteIndex > bytes.Length)
235                 throw new ArgumentOutOfRangeException("byteIndex",
236                      Environment.GetResourceString("ArgumentOutOfRange_Index"));
237             Contract.EndContractBlock();
238
239             // If nothing to encode return 0, avoid fixed problem
240             if (chars.Length == 0)
241                 return 0;
242
243             // Just call pointer version
244             int byteCount = bytes.Length - byteIndex;
245
246             // Fix our input array if 0 length because fixed doesn't like 0 length arrays
247             if (bytes.Length == 0)
248                 bytes = new byte[1];
249
250             fixed (char* pChars = chars)
251                 fixed (byte* pBytes = bytes)
252                     // Remember that byteCount is # to decode, not size of array.
253                     return GetBytes(pChars + charIndex, charCount,
254                                     pBytes + byteIndex, byteCount, null);
255         }
256
257         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
258         // So if you fix this, fix the others.  Currently those include:
259         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
260
261         [System.Security.SecurityCritical]  // auto-generated
262         [CLSCompliant(false)]
263         public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
264         {
265             // Validate Parameters
266             if (bytes == null || chars == null)
267                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
268                     Environment.GetResourceString("ArgumentNull_Array"));
269
270             if (charCount < 0 || byteCount < 0)
271                 throw new ArgumentOutOfRangeException((charCount<0 ? "charCount" : "byteCount"),
272                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
273             Contract.EndContractBlock();
274
275             return GetBytes(chars, charCount, bytes, byteCount, null);
276         }
277
278         // Returns the number of characters produced by decoding a range of bytes
279         // in a byte array.
280         //
281         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
282         // So if you fix this, fix the others.  Currently those include:
283         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
284         // parent method is safe
285
286         [System.Security.SecuritySafeCritical]  // auto-generated
287         public override unsafe int GetCharCount(byte[] bytes, int index, int count)
288         {
289             // Validate Parameters
290             if (bytes == null)
291                 throw new ArgumentNullException("bytes",
292                     Environment.GetResourceString("ArgumentNull_Array"));
293
294             if (index < 0 || count < 0)
295                 throw new ArgumentOutOfRangeException((index<0 ? "index" : "count"),
296                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
297
298             if (bytes.Length - index < count)
299                 throw new ArgumentOutOfRangeException("bytes",
300                     Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
301             Contract.EndContractBlock();
302
303             // If no input just return 0, fixed doesn't like 0 length arrays.
304             if (bytes.Length == 0)
305                 return 0;
306
307             // Just call pointer version
308             fixed (byte* pBytes = bytes)
309                 return GetCharCount(pBytes + index, count, null);
310         }
311
312         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
313         // So if you fix this, fix the others.  Currently those include:
314         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
315
316         [System.Security.SecurityCritical]  // auto-generated
317         [CLSCompliant(false)]
318         public override unsafe int GetCharCount(byte* bytes, int count)
319         {
320             // Validate Parameters
321             if (bytes == null)
322                 throw new ArgumentNullException("bytes",
323                     Environment.GetResourceString("ArgumentNull_Array"));
324
325             if (count < 0)
326                 throw new ArgumentOutOfRangeException("count",
327                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
328             Contract.EndContractBlock();
329
330             return GetCharCount(bytes, count, null);
331         }
332
333         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
334         // So if you fix this, fix the others.  Currently those include:
335         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
336         // parent method is safe
337
338         [System.Security.SecuritySafeCritical]  // auto-generated
339         public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
340                                               char[] chars, int charIndex)
341         {
342             // Validate Parameters
343             if (bytes == null || chars == null)
344                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
345                     Environment.GetResourceString("ArgumentNull_Array"));
346
347             if (byteIndex < 0 || byteCount < 0)
348                 throw new ArgumentOutOfRangeException((byteIndex<0 ? "byteIndex" : "byteCount"),
349                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
350
351             if ( bytes.Length - byteIndex < byteCount)
352                 throw new ArgumentOutOfRangeException("bytes",
353                     Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
354
355             if (charIndex < 0 || charIndex > chars.Length)
356                 throw new ArgumentOutOfRangeException("charIndex",
357                     Environment.GetResourceString("ArgumentOutOfRange_Index"));
358             Contract.EndContractBlock();
359
360             // If no input, return 0 & avoid fixed problem
361             if (bytes.Length == 0)
362                 return 0;
363
364             // Just call pointer version
365             int charCount = chars.Length - charIndex;
366
367             // Fix our input array if 0 length because fixed doesn't like 0 length arrays
368             if (chars.Length == 0)
369                 chars = new char[1];
370
371             fixed (byte* pBytes = bytes)
372                 fixed (char* pChars = chars)
373                     // Remember that charCount is # to decode, not size of array
374                     return GetChars(pBytes + byteIndex, byteCount,
375                                     pChars + charIndex, charCount, null);
376         }
377
378         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
379         // So if you fix this, fix the others.  Currently those include:
380         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
381
382         [System.Security.SecurityCritical]  // auto-generated
383         [CLSCompliant(false)]
384         public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
385         {
386             // Validate Parameters
387             if (bytes == null || chars == null)
388                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
389                     Environment.GetResourceString("ArgumentNull_Array"));
390
391             if (charCount < 0 || byteCount < 0)
392                 throw new ArgumentOutOfRangeException((charCount<0 ? "charCount" : "byteCount"),
393                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
394             Contract.EndContractBlock();
395
396             return GetChars(bytes, byteCount, chars, charCount, null);
397         }
398
399         // Returns a string containing the decoded representation of a range of
400         // bytes in a byte array.
401         //
402         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
403         // So if you fix this, fix the others.  Currently those include:
404         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
405         // parent method is safe
406
407         [System.Security.SecuritySafeCritical]  // auto-generated
408         public override unsafe String GetString(byte[] bytes, int index, int count)
409         {
410             // Validate Parameters
411             if (bytes == null)
412                 throw new ArgumentNullException("bytes",
413                     Environment.GetResourceString("ArgumentNull_Array"));
414
415             if (index < 0 || count < 0)
416                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"),
417                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
418
419             if (bytes.Length - index < count)
420                 throw new ArgumentOutOfRangeException("bytes",
421                     Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
422             Contract.EndContractBlock();
423
424             // Avoid problems with empty input buffer
425             if (bytes.Length == 0) return String.Empty;
426
427             fixed (byte* pBytes = bytes)
428                 return String.CreateStringFromEncoding(
429                     pBytes + index, count, this);
430         }
431
432         //
433         // End of standard methods copied from EncodingNLS.cs
434         //
435
436         [System.Security.SecurityCritical]  // auto-generated
437         internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS encoder)
438         {
439             Contract.Assert(chars!=null, "[UTF32Encoding.GetByteCount]chars!=null");
440             Contract.Assert(count >=0, "[UTF32Encoding.GetByteCount]count >=0");
441
442             char* end = chars + count;
443             char* charStart = chars;
444             int byteCount = 0;
445
446             char highSurrogate = '\0';
447
448             // For fallback we may need a fallback buffer
449             EncoderFallbackBuffer fallbackBuffer = null;
450             if (encoder != null)
451             {
452                 highSurrogate = encoder.charLeftOver;
453                 fallbackBuffer = encoder.FallbackBuffer;
454
455                 // We mustn't have left over fallback data when counting
456                 if (fallbackBuffer.Remaining > 0)
457                     throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty",
458                     this.EncodingName, encoder.Fallback.GetType()));
459             }
460             else
461             {
462                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
463             }
464
465             // Set our internal fallback interesting things.
466             fallbackBuffer.InternalInitialize(charStart, end, encoder, false);
467
468             char ch;
469             TryAgain:
470
471             while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < end)
472             {
473                 // First unwind any fallback
474                 if (ch == 0)
475                 {
476                     // No fallback, just get next char
477                     ch = *chars;
478                     chars++;
479                 }
480
481                 // Do we need a low surrogate?
482                 if (highSurrogate != '\0')
483                 {
484                     //
485                     // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here.
486                     //
487                     if (Char.IsLowSurrogate(ch))
488                     {
489                         // They're all legal
490                         highSurrogate = '\0';
491
492                         //
493                         // One surrogate pair will be translated into 4 bytes UTF32.
494                         //
495
496                         byteCount += 4;
497                         continue;
498                     }
499
500                     // We are missing our low surrogate, decrement chars and fallback the high surrogate
501                     // The high surrogate may have come from the encoder, but nothing else did.
502                     Contract.Assert(chars > charStart, 
503                         "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate");
504                     chars--;
505
506                     // Do the fallback
507                     fallbackBuffer.InternalFallback(highSurrogate, ref chars);
508
509                     // We're going to fallback the old high surrogate.
510                     highSurrogate = '\0';
511                     continue;
512
513                 }
514
515                 // Do we have another high surrogate?
516                 if (Char.IsHighSurrogate(ch))
517                 {
518                     //
519                     // We'll have a high surrogate to check next time.
520                     //
521                     highSurrogate = ch;
522                     continue;
523                 }
524
525                 // Check for illegal characters
526                 if (Char.IsLowSurrogate(ch))
527                 {
528                     // We have a leading low surrogate, do the fallback
529                     fallbackBuffer.InternalFallback(ch, ref chars);
530
531                     // Try again with fallback buffer
532                     continue;
533                 }
534
535                 // We get to add the character (4 bytes UTF32)
536                 byteCount += 4;
537             }
538
539             // May have to do our last surrogate
540             if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
541             {
542                 // We have to do the fallback for the lonely high surrogate
543                 fallbackBuffer.InternalFallback(highSurrogate, ref chars);
544                 highSurrogate = (char)0;
545                 goto TryAgain;
546             }
547
548             // Check for overflows.
549             if (byteCount < 0)
550                 throw new ArgumentOutOfRangeException("count", Environment.GetResourceString(
551                     "ArgumentOutOfRange_GetByteCountOverflow"));
552
553             // Shouldn't have anything in fallback buffer for GetByteCount
554             // (don't have to check m_throwOnOverflow for count)
555             Contract.Assert(fallbackBuffer.Remaining == 0,
556                 "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end");
557
558             // Return our count
559             return byteCount;
560         }
561
562         [System.Security.SecurityCritical]  // auto-generated
563         internal override unsafe int GetBytes(char *chars, int charCount,
564                                                  byte* bytes, int byteCount, EncoderNLS encoder)
565         {
566             Contract.Assert(chars!=null, "[UTF32Encoding.GetBytes]chars!=null");
567             Contract.Assert(bytes!=null, "[UTF32Encoding.GetBytes]bytes!=null");
568             Contract.Assert(byteCount >=0, "[UTF32Encoding.GetBytes]byteCount >=0");
569             Contract.Assert(charCount >=0, "[UTF32Encoding.GetBytes]charCount >=0");
570
571             char* charStart = chars;
572             char* charEnd = chars + charCount;
573             byte* byteStart = bytes;
574             byte* byteEnd = bytes + byteCount;
575
576             char highSurrogate = '\0';
577
578             // For fallback we may need a fallback buffer
579             EncoderFallbackBuffer fallbackBuffer = null;
580             if (encoder != null)
581             {
582                 highSurrogate = encoder.charLeftOver;
583                 fallbackBuffer = encoder.FallbackBuffer;
584
585                 // We mustn't have left over fallback data when not converting
586                 if (encoder.m_throwOnOverflow && fallbackBuffer.Remaining > 0)
587                     throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty",
588                     this.EncodingName, encoder.Fallback.GetType()));
589             }
590             else
591             {
592                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
593             }
594
595             // Set our internal fallback interesting things.
596             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
597
598             char ch;
599             TryAgain:
600
601             while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
602             {
603                 // First unwind any fallback
604                 if (ch == 0)
605                 {
606                     // No fallback, just get next char
607                     ch = *chars;
608                     chars++;
609                 }
610
611                 // Do we need a low surrogate?
612                 if (highSurrogate != '\0')
613                 {
614                     //
615                     // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here.
616                     //
617                     if (Char.IsLowSurrogate(ch))
618                     {
619                         // Is it a legal one?
620                         uint iTemp = GetSurrogate(highSurrogate, ch);
621                         highSurrogate = '\0';
622
623                         //
624                         // One surrogate pair will be translated into 4 bytes UTF32.
625                         //
626                         if (bytes+3 >= byteEnd)
627                         {
628                             // Don't have 4 bytes
629                             if (fallbackBuffer.bFallingBack)
630                             {
631                                 fallbackBuffer.MovePrevious();                  // Aren't using these 2 fallback chars
632                                 fallbackBuffer.MovePrevious();
633                             }
634                             else
635                             {
636                                 // If we don't have enough room, then either we should've advanced a while
637                                 // or we should have bytes==byteStart and throw below
638                                 Contract.Assert(chars > charStart + 1 || bytes == byteStart, 
639                                     "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
640                                 chars-=2;                                       // Aren't using those 2 chars
641                             }
642                             ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
643                             highSurrogate = (char)0;                            // Nothing left over (we backed up to start of pair if supplimentary)
644                             break;
645                         }
646
647                         if (bigEndian)
648                         {
649                             *(bytes++) = (byte)(0x00);
650                             *(bytes++) = (byte)(iTemp >> 16);       // Implies & 0xFF, which isn't needed cause high are all 0
651                             *(bytes++) = (byte)(iTemp >> 8);        // Implies & 0xFF
652                             *(bytes++) = (byte)(iTemp);             // Implies & 0xFF
653                         }
654                         else
655                         {
656                             *(bytes++) = (byte)(iTemp);             // Implies & 0xFF
657                             *(bytes++) = (byte)(iTemp >> 8);        // Implies & 0xFF
658                             *(bytes++) = (byte)(iTemp >> 16);       // Implies & 0xFF, which isn't needed cause high are all 0
659                             *(bytes++) = (byte)(0x00);
660                         }
661                         continue;
662                     }
663
664                     // We are missing our low surrogate, decrement chars and fallback the high surrogate
665                     // The high surrogate may have come from the encoder, but nothing else did.
666                     Contract.Assert(chars > charStart, 
667                         "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate");
668                     chars--;
669
670                     // Do the fallback
671                     fallbackBuffer.InternalFallback(highSurrogate, ref chars);
672
673                     // We're going to fallback the old high surrogate.
674                     highSurrogate = '\0';
675                     continue;
676                 }
677
678                 // Do we have another high surrogate?, if so remember it
679                 if (Char.IsHighSurrogate(ch))
680                 {
681                     //
682                     // We'll have a high surrogate to check next time.
683                     //
684                     highSurrogate = ch;
685                     continue;
686                 }
687
688                 // Check for illegal characters (low surrogate)
689                 if (Char.IsLowSurrogate(ch))
690                 {
691                     // We have a leading low surrogate, do the fallback
692                     fallbackBuffer.InternalFallback(ch, ref chars);
693
694                     // Try again with fallback buffer
695                     continue;
696                 }
697
698                 // We get to add the character, yippee.
699                 if (bytes+3 >= byteEnd)
700                 {
701                     // Don't have 4 bytes
702                     if (fallbackBuffer.bFallingBack)
703                         fallbackBuffer.MovePrevious();                  // Aren't using this fallback char
704                     else
705                     {
706                         // Must've advanced already
707                         Contract.Assert(chars > charStart,
708                             "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character");
709                         chars--;                                        // Aren't using this char
710                     }
711                     ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
712                     break;                                              // Didn't throw, stop
713                 }
714
715                 if (bigEndian)
716                 {
717                     *(bytes++) = (byte)(0x00);
718                     *(bytes++) = (byte)(0x00);
719                     *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
720                     *(bytes++) = (byte)(ch);            // Implies & 0xFF
721                 }
722                 else
723                 {
724                     *(bytes++) = (byte)(ch);            // Implies & 0xFF
725                     *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
726                     *(bytes++) = (byte)(0x00);
727                     *(bytes++) = (byte)(0x00);
728                 }
729             }
730
731             // May have to do our last surrogate
732             if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
733             {
734                 // We have to do the fallback for the lonely high surrogate
735                 fallbackBuffer.InternalFallback(highSurrogate, ref chars);
736                 highSurrogate = (char)0;
737                 goto TryAgain;
738             }
739
740             // Fix our encoder if we have one
741             Contract.Assert(highSurrogate == 0 || (encoder != null && !encoder.MustFlush),
742                 "[UTF32Encoding.GetBytes]Expected encoder to be flushed.");
743
744             if (encoder != null)
745             {
746                 // Remember our left over surrogate (or 0 if flushing)
747                 encoder.charLeftOver = highSurrogate;
748
749                 // Need # chars used
750                 encoder.m_charsUsed = (int)(chars-charStart);
751             }
752
753             // return the new length
754             return (int)(bytes - byteStart);
755         }
756
757         [System.Security.SecurityCritical]  // auto-generated
758         internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
759         {
760             Contract.Assert(bytes!=null, "[UTF32Encoding.GetCharCount]bytes!=null");
761             Contract.Assert(count >=0, "[UTF32Encoding.GetCharCount]count >=0");
762
763             UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
764
765             // None so far!
766             int charCount = 0;
767             byte* end = bytes + count;
768             byte* byteStart = bytes;
769
770             // Set up decoder
771             int readCount = 0;
772             uint iChar = 0;
773
774             // For fallback we may need a fallback buffer
775             DecoderFallbackBuffer fallbackBuffer = null;
776
777             // See if there's anything in our decoder
778             if (decoder != null)
779             {
780                 readCount = decoder.readByteCount;
781                 iChar = (uint)decoder.iChar;
782                 fallbackBuffer = decoder.FallbackBuffer;
783
784                 // Shouldn't have anything in fallback buffer for GetCharCount
785                 // (don't have to check m_throwOnOverflow for chars or count)
786                 Contract.Assert(fallbackBuffer.Remaining == 0,
787                     "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
788             }
789             else
790             {
791                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
792             }
793
794             // Set our internal fallback interesting things.
795             fallbackBuffer.InternalInitialize(byteStart, null);
796
797             // Loop through our input, 4 characters at a time!
798             while (bytes < end && charCount >= 0)
799             {
800                 // Get our next character
801                 if(bigEndian)
802                 {
803                     // Scoot left and add it to the bottom
804                     iChar <<= 8;
805                     iChar += *(bytes++);
806                 }
807                 else
808                 {
809                     // Scoot right and add it to the top
810                     iChar >>= 8;
811                     iChar += (uint)(*(bytes++)) << 24;
812                 }
813
814                 readCount++;
815
816                 // See if we have all the bytes yet
817                 if (readCount < 4)
818                     continue;
819
820                 // Have the bytes
821                 readCount = 0;
822
823                 // See if its valid to encode
824                 if ( iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
825                 {
826                     // Need to fall back these 4 bytes
827                     byte[] fallbackBytes;
828                     if (this.bigEndian)
829                     {
830                         fallbackBytes = new byte[] {
831                             unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
832                             unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
833                     }
834                     else
835                     {
836                         fallbackBytes = new byte[] {
837                             unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
838                             unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
839                     }
840
841                     charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
842
843                     // Ignore the illegal character
844                     iChar = 0;
845                     continue;
846                 }
847
848                 // Ok, we have something we can add to our output
849                 if (iChar >= 0x10000)
850                 {
851                     // Surrogates take 2
852                     charCount++;
853                 }
854
855                 // Add the rest of the surrogate or our normal character
856                 charCount++;
857
858                 // iChar is back to 0
859                 iChar = 0;
860             }
861
862             // See if we have something left over that has to be decoded
863             if (readCount > 0 && (decoder == null || decoder.MustFlush))
864             {
865                 // Oops, there's something left over with no place to go.
866                 byte[] fallbackBytes = new byte[readCount];
867                 if (this.bigEndian)
868                 {
869                     while(readCount > 0)
870                     {
871                         fallbackBytes[--readCount] = unchecked((byte)iChar);
872                         iChar >>= 8;
873                     }
874                 }
875                 else
876                 {
877                     while (readCount > 0)
878                     {
879                         fallbackBytes[--readCount] = unchecked((byte)(iChar>>24));
880                         iChar <<= 8;
881                     }
882                 }
883
884                 charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
885             }
886
887             // Check for overflows.
888             if (charCount < 0)
889                 throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
890
891             // Shouldn't have anything in fallback buffer for GetCharCount
892             // (don't have to check m_throwOnOverflow for chars or count)
893             Contract.Assert(fallbackBuffer.Remaining == 0,
894                 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");
895
896             // Return our count
897             return charCount;
898         }
899
900         [System.Security.SecurityCritical]  // auto-generated
901         internal override unsafe int GetChars(byte* bytes, int byteCount,
902                                                 char* chars, int charCount, DecoderNLS baseDecoder)
903         {
904             Contract.Assert(chars!=null, "[UTF32Encoding.GetChars]chars!=null");
905             Contract.Assert(bytes!=null, "[UTF32Encoding.GetChars]bytes!=null");
906             Contract.Assert(byteCount >=0, "[UTF32Encoding.GetChars]byteCount >=0");
907             Contract.Assert(charCount >=0, "[UTF32Encoding.GetChars]charCount >=0");
908
909             UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
910
911             // None so far!
912             char* charStart = chars;
913             char* charEnd = chars + charCount;
914
915             byte* byteStart = bytes;
916             byte* byteEnd = bytes + byteCount;
917
918             // See if there's anything in our decoder (but don't clear it yet)
919             int readCount = 0;
920             uint iChar = 0;
921
922             // For fallback we may need a fallback buffer
923             DecoderFallbackBuffer fallbackBuffer = null;
924
925             // See if there's anything in our decoder
926             if (decoder != null)
927             {
928                 readCount = decoder.readByteCount;
929                 iChar = (uint)decoder.iChar;
930                 fallbackBuffer = baseDecoder.FallbackBuffer;
931
932                 // Shouldn't have anything in fallback buffer for GetChars
933                 // (don't have to check m_throwOnOverflow for chars)
934                 Contract.Assert(fallbackBuffer.Remaining == 0,
935                     "[UTF32Encoding.GetChars]Expected empty fallback buffer at start");
936             }
937             else
938             {
939                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
940             }
941
942             // Set our internal fallback interesting things.
943             fallbackBuffer.InternalInitialize(bytes, chars + charCount);
944
945             // Loop through our input, 4 characters at a time!
946             while (bytes < byteEnd)
947             {
948                 // Get our next character
949                 if(bigEndian)
950                 {
951                     // Scoot left and add it to the bottom
952                     iChar <<= 8;
953                     iChar += *(bytes++);
954                 }
955                 else
956                 {
957                     // Scoot right and add it to the top
958                     iChar >>= 8;
959                     iChar += (uint)(*(bytes++)) << 24;
960                 }
961
962                 readCount++;
963
964                 // See if we have all the bytes yet
965                 if (readCount < 4)
966                     continue;
967
968                 // Have the bytes
969                 readCount = 0;
970
971                 // See if its valid to encode
972                 if ( iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
973                 {
974                     // Need to fall back these 4 bytes
975                     byte[] fallbackBytes;
976                     if (this.bigEndian)
977                     {
978                         fallbackBytes = new byte[] {
979                             unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
980                             unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
981                     }
982                     else
983                     {
984                         fallbackBytes = new byte[] {
985                             unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
986                             unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
987                     }
988
989                     // Chars won't be updated unless this works.
990                     if (!fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref chars))
991                     {
992                         // Couldn't fallback, throw or wait til next time
993                         // We either read enough bytes for bytes-=4 to work, or we're
994                         // going to throw in ThrowCharsOverflow because chars == charStart
995                         Contract.Assert(bytes >= byteStart + 4 || chars == charStart,
996                             "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
997                         bytes-=4;                                       // get back to where we were
998                         iChar=0;                                        // Remembering nothing
999                         fallbackBuffer.InternalReset();
1000                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1001                         break;                                          // Stop here, didn't throw
1002                     }
1003
1004                     // Ignore the illegal character
1005                     iChar = 0;
1006                     continue;
1007                 }
1008
1009
1010                 // Ok, we have something we can add to our output
1011                 if (iChar >= 0x10000)
1012                 {
1013                     // Surrogates take 2
1014                     if (chars >= charEnd - 1)
1015                     {
1016                         // Throwing or stopping
1017                         // We either read enough bytes for bytes-=4 to work, or we're
1018                         // going to throw in ThrowCharsOverflow because chars == charStart
1019                         Contract.Assert(bytes >= byteStart + 4 || chars == charStart,
1020                             "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
1021                         bytes-=4;                                       // get back to where we were
1022                         iChar=0;                                        // Remembering nothing
1023                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1024                         break;                                          // Stop here, didn't throw
1025                     }
1026
1027                     *(chars++) = GetHighSurrogate(iChar);
1028                     iChar = GetLowSurrogate(iChar);
1029                 }
1030                 // Bounds check for normal character
1031                 else if (chars >= charEnd)
1032                 {
1033                     // Throwing or stopping
1034                     // We either read enough bytes for bytes-=4 to work, or we're
1035                     // going to throw in ThrowCharsOverflow because chars == charStart
1036                     Contract.Assert(bytes >= byteStart + 4 || chars == charStart,
1037                         "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
1038                     bytes-=4;                                       // get back to where we were
1039                     iChar=0;                                        // Remembering nothing                    
1040                     ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1041                     break;                                          // Stop here, didn't throw
1042                 }
1043
1044                 // Add the rest of the surrogate or our normal character
1045                 *(chars++) = (char)iChar;
1046
1047                 // iChar is back to 0
1048                 iChar = 0;
1049             }
1050
1051             // See if we have something left over that has to be decoded
1052             if (readCount > 0 && (decoder == null || decoder.MustFlush))
1053             {
1054                 // Oops, there's something left over with no place to go.
1055                 byte[] fallbackBytes = new byte[readCount];
1056                 int tempCount = readCount;
1057                 if (this.bigEndian)
1058                 {
1059                     while(tempCount > 0)
1060                     {
1061                         fallbackBytes[--tempCount] = unchecked((byte)iChar);
1062                         iChar >>= 8;
1063                     }
1064                 }
1065                 else
1066                 {
1067                     while (tempCount > 0)
1068                     {
1069                         fallbackBytes[--tempCount] = unchecked((byte)(iChar>>24));
1070                         iChar <<= 8;
1071                     }
1072                 }
1073
1074                 if (!fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref chars))
1075                 {
1076                     // Couldn't fallback.
1077                     fallbackBuffer.InternalReset();
1078                     ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1079                     // Stop here, didn't throw, backed up, so still nothing in buffer
1080                 }
1081                 else
1082                 {
1083                     // Don't clear our decoder unless we could fall it back.
1084                     // If we caught the if above, then we're a convert() and will catch this next time.
1085                     readCount = 0;
1086                     iChar = 0;
1087                 }
1088             }
1089
1090             // Remember any left over stuff, clearing buffer as well for MustFlush
1091             if (decoder != null)
1092             {
1093                 decoder.iChar = (int)iChar;
1094                 decoder.readByteCount = readCount;
1095                 decoder.m_bytesUsed = (int)(bytes - byteStart);
1096             }
1097
1098             // Shouldn't have anything in fallback buffer for GetChars
1099             // (don't have to check m_throwOnOverflow for chars)
1100             Contract.Assert(fallbackBuffer.Remaining == 0,
1101                 "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");
1102
1103             // Return our count
1104             return (int)(chars - charStart);
1105         }
1106
1107
1108         private uint GetSurrogate(char cHigh, char cLow)
1109         {
1110             return (((uint)cHigh - 0xD800) * 0x400) + ((uint)cLow - 0xDC00) + 0x10000;
1111         }
1112
1113         private char GetHighSurrogate(uint iChar)
1114         {
1115             return (char)((iChar - 0x10000) / 0x400 + 0xD800);
1116         }
1117
1118         private char GetLowSurrogate(uint iChar)
1119         {
1120             return (char)((iChar - 0x10000) % 0x400 + 0xDC00);
1121         }
1122
1123
1124         public override Decoder GetDecoder()
1125         {
1126             return new UTF32Decoder(this);
1127         }
1128
1129
1130         public override Encoder GetEncoder()
1131         {
1132             return new EncoderNLS(this);
1133         }
1134
1135
1136         public override int GetMaxByteCount(int charCount)
1137         {
1138             if (charCount < 0)
1139                throw new ArgumentOutOfRangeException("charCount",
1140                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1141             Contract.EndContractBlock();
1142
1143             // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1144             long byteCount = (long)charCount + 1;
1145
1146             if (EncoderFallback.MaxCharCount > 1)
1147                 byteCount *= EncoderFallback.MaxCharCount;
1148
1149             // 4 bytes per char
1150             byteCount *= 4;
1151
1152             if (byteCount > 0x7fffffff)
1153                 throw new ArgumentOutOfRangeException("charCount", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
1154
1155             return (int)byteCount;
1156         }
1157
1158
1159         public override int GetMaxCharCount(int byteCount)
1160         {
1161             if (byteCount < 0)
1162                throw new ArgumentOutOfRangeException("byteCount",
1163                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1164             Contract.EndContractBlock();
1165
1166             // A supplementary character becomes 2 surrogate characters, so 4 input bytes becomes 2 chars,
1167             // plus we may have 1 surrogate char left over if the decoder has 3 bytes in it already for a non-bmp char.
1168             // Have to add another one because 1/2 == 0, but 3 bytes left over could be 2 char surrogate pair
1169             int charCount = (byteCount / 2) + 2;
1170
1171             // Also consider fallback because our input bytes could be out of range of unicode.
1172             // Since fallback would fallback 4 bytes at a time, we'll only fall back 1/2 of MaxCharCount.
1173             if (DecoderFallback.MaxCharCount > 2)
1174             {
1175                 // Multiply time fallback size
1176                 charCount *= DecoderFallback.MaxCharCount;
1177
1178                 // We were already figuring 2 chars per 4 bytes, but fallback will be different #
1179                 charCount /= 2;
1180             }
1181
1182             if (charCount > 0x7fffffff)
1183                 throw new ArgumentOutOfRangeException("byteCount", Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
1184
1185             return (int)charCount;
1186         }
1187
1188
1189         public override byte[] GetPreamble()
1190         {
1191             if (emitUTF32ByteOrderMark)
1192             {
1193                 // Allocate new array to prevent users from modifying it.
1194                 if (bigEndian)
1195                 {
1196                     return new byte[4] { 0x00, 0x00, 0xFE, 0xFF };
1197                 }
1198                 else
1199                 {
1200                     return new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }; // 00 00 FE FF
1201                 }
1202             }
1203             else
1204                 return EmptyArray<Byte>.Value;
1205         }
1206
1207
1208         public override bool Equals(Object value)
1209         {
1210             UTF32Encoding that = value as UTF32Encoding;
1211             if (that != null)
1212             {
1213                 return (emitUTF32ByteOrderMark == that.emitUTF32ByteOrderMark) &&
1214                        (bigEndian == that.bigEndian) &&
1215 //                       (isThrowException == that.isThrowException) && // same as encoder/decoderfallback being exceptions
1216                        (EncoderFallback.Equals(that.EncoderFallback)) &&
1217                        (DecoderFallback.Equals(that.DecoderFallback));
1218             }
1219             return (false);
1220         }
1221
1222
1223         public override int GetHashCode()
1224         {
1225             //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
1226             return this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
1227                    CodePage + (emitUTF32ByteOrderMark?4:0) + (bigEndian?8:0);
1228         }
1229
1230         [Serializable]
1231         internal class UTF32Decoder : DecoderNLS
1232         {
1233             // Need a place to store any extra bytes we may have picked up
1234             internal int iChar = 0;
1235             internal int readByteCount = 0;
1236
1237             public UTF32Decoder(UTF32Encoding encoding) : base(encoding)
1238             {
1239                 // base calls reset
1240             }
1241
1242             public override void Reset()
1243             {
1244                 this.iChar = 0;
1245                 this.readByteCount = 0;
1246                 if (m_fallbackBuffer != null)
1247                     m_fallbackBuffer.Reset();
1248             }
1249
1250             // Anything left in our decoder?
1251             internal override bool HasState
1252             {
1253                 get
1254                 {
1255                     // ReadByteCount is our flag.  (iChar==0 doesn't mean much).
1256                     return (this.readByteCount != 0);
1257                 }
1258             }
1259         }
1260     }
1261 }
1262
1263 #endif // FEATURE_UTF32