Update Reference Sources to .NET Framework 4.6.1
[mono.git] / mcs / class / referencesource / System.Runtime.Serialization / System / Xml / EncodingStreamWrapper.cs
1 //-----------------------------------------------------------------------------
2 // Copyright (c) Microsoft Corporation.  All rights reserved.
3 //-----------------------------------------------------------------------------
4 namespace System.Xml
5 {
6     using System;
7     using System.IO;
8     using System.Text;
9     using System.Runtime.Serialization;
10
11     // This wrapper does not support seek.
12     // Constructors consume/emit byte order mark.
13     // Supports: UTF-8, Unicode, BigEndianUnicode
14     // ASSUMPTION ([....]): This class will only be used for EITHER reading OR writing.  It can be done, it would just mean more buffers.
15     // ASSUMPTION ([....]): The byte buffer is large enough to hold the declaration
16     // ASSUMPTION ([....]): The buffer manipulation methods (FillBuffer/Compare/etc.) will only be used to parse the declaration
17     //                      during construction.
18     class EncodingStreamWrapper : Stream
19     {
20         enum SupportedEncoding { UTF8, UTF16LE, UTF16BE, None }
21         static readonly UTF8Encoding SafeUTF8 = new UTF8Encoding(false, false);
22         static readonly UnicodeEncoding SafeUTF16 = new UnicodeEncoding(false, false, false);
23         static readonly UnicodeEncoding SafeBEUTF16 = new UnicodeEncoding(true, false, false);
24         static readonly UTF8Encoding ValidatingUTF8 = new UTF8Encoding(false, true);
25         static readonly UnicodeEncoding ValidatingUTF16 = new UnicodeEncoding(false, false, true);
26         static readonly UnicodeEncoding ValidatingBEUTF16 = new UnicodeEncoding(true, false, true);
27         const int BufferLength = 128;
28
29         // UTF-8 is fastpath, so that's how these are stored
30         // Compare methods adapt to unicodes.
31         static readonly byte[] encodingAttr = new byte[] { (byte)'e', (byte)'n', (byte)'c', (byte)'o', (byte)'d', (byte)'i', (byte)'n', (byte)'g' };
32         static readonly byte[] encodingUTF8 = new byte[] { (byte)'u', (byte)'t', (byte)'f', (byte)'-', (byte)'8' };
33         static readonly byte[] encodingUnicode = new byte[] { (byte)'u', (byte)'t', (byte)'f', (byte)'-', (byte)'1', (byte)'6' };
34         static readonly byte[] encodingUnicodeLE = new byte[] { (byte)'u', (byte)'t', (byte)'f', (byte)'-', (byte)'1', (byte)'6', (byte)'l', (byte)'e' };
35         static readonly byte[] encodingUnicodeBE = new byte[] { (byte)'u', (byte)'t', (byte)'f', (byte)'-', (byte)'1', (byte)'6', (byte)'b', (byte)'e' };
36
37         SupportedEncoding encodingCode;
38         Encoding encoding;
39         Encoder enc;
40         Decoder dec;
41         bool isReading;
42
43         Stream stream;
44         char[] chars;
45         byte[] bytes;
46         int byteOffset;
47         int byteCount;
48
49         byte[] byteBuffer = new byte[1];
50
51         // Reading constructor
52         public EncodingStreamWrapper(Stream stream, Encoding encoding)
53         {
54             try
55             {
56                 this.isReading = true;
57                 this.stream = new BufferedStream(stream);
58
59                 // Decode the expected encoding
60                 SupportedEncoding expectedEnc = GetSupportedEncoding(encoding);
61
62                 // Get the byte order mark so we can determine the encoding
63                 // May want to try to delay allocating everything until we know the BOM
64                 SupportedEncoding declEnc = ReadBOMEncoding(encoding == null);
65
66                 // Check that the expected encoding matches the decl encoding.
67                 if (expectedEnc != SupportedEncoding.None && expectedEnc != declEnc)
68                     ThrowExpectedEncodingMismatch(expectedEnc, declEnc);
69
70                 // Fastpath: UTF-8 BOM
71                 if (declEnc == SupportedEncoding.UTF8)
72                 {
73                     // Fastpath: UTF-8 BOM, No declaration
74                     FillBuffer(2);
75                     if (bytes[byteOffset + 1] != '?' || bytes[byteOffset] != '<')
76                     {
77                         return;
78                     }
79
80                     FillBuffer(BufferLength);
81                     CheckUTF8DeclarationEncoding(bytes, byteOffset, byteCount, declEnc, expectedEnc);
82                 }
83                 else
84                 {
85                     // Convert to UTF-8
86                     EnsureBuffers();
87                     FillBuffer((BufferLength - 1) * 2);
88                     SetReadDocumentEncoding(declEnc);
89                     CleanupCharBreak();
90                     int count = this.encoding.GetChars(bytes, byteOffset, byteCount, chars, 0);
91                     byteOffset = 0;
92                     byteCount = ValidatingUTF8.GetBytes(chars, 0, count, bytes, 0);
93
94                     // Check for declaration
95                     if (bytes[1] == '?' && bytes[0] == '<')
96                     {
97                         CheckUTF8DeclarationEncoding(bytes, 0, byteCount, declEnc, expectedEnc);
98                     }
99                     else
100                     {
101                         // Declaration required if no out-of-band encoding
102                         if (expectedEnc == SupportedEncoding.None)
103                             throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlDeclarationRequired)));
104                     }
105                 }
106             }
107             catch (DecoderFallbackException ex)
108             {
109                 throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlInvalidBytes), ex));
110             }
111         }
112
113         void SetReadDocumentEncoding(SupportedEncoding e)
114         {
115             EnsureBuffers();
116             this.encodingCode = e;
117             this.encoding = GetEncoding(e);
118         }
119
120         static Encoding GetEncoding(SupportedEncoding e)
121         {
122             switch (e)
123             {
124                 case SupportedEncoding.UTF8:
125                     return ValidatingUTF8;
126
127                 case SupportedEncoding.UTF16LE:
128                     return ValidatingUTF16;
129
130                 case SupportedEncoding.UTF16BE:
131                     return ValidatingBEUTF16;
132
133                 default:
134                     throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlEncodingNotSupported)));
135             }
136         }
137
138         static Encoding GetSafeEncoding(SupportedEncoding e)
139         {
140             switch (e)
141             {
142                 case SupportedEncoding.UTF8:
143                     return SafeUTF8;
144
145                 case SupportedEncoding.UTF16LE:
146                     return SafeUTF16;
147
148                 case SupportedEncoding.UTF16BE:
149                     return SafeBEUTF16;
150
151                 default:
152                     throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlEncodingNotSupported)));
153             }
154         }
155
156         static string GetEncodingName(SupportedEncoding enc)
157         {
158             switch (enc)
159             {
160                 case SupportedEncoding.UTF8:
161                     return "utf-8";
162
163                 case SupportedEncoding.UTF16LE:
164                     return "utf-16LE";
165
166                 case SupportedEncoding.UTF16BE:
167                     return "utf-16BE";
168
169                 default:
170                     throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlEncodingNotSupported)));
171             }
172         }
173
174         static SupportedEncoding GetSupportedEncoding(Encoding encoding)
175         {
176             if (encoding == null)
177                 return SupportedEncoding.None;
178             else if (encoding.WebName == ValidatingUTF8.WebName)
179                 return SupportedEncoding.UTF8;
180             else if (encoding.WebName == ValidatingUTF16.WebName)
181                 return SupportedEncoding.UTF16LE;
182             else if (encoding.WebName == ValidatingBEUTF16.WebName)
183                 return SupportedEncoding.UTF16BE;
184             else
185                 throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlEncodingNotSupported)));
186         }
187
188         // Writing constructor
189         public EncodingStreamWrapper(Stream stream, Encoding encoding, bool emitBOM)
190         {
191             this.isReading = false;
192             this.encoding = encoding;
193             this.stream = new BufferedStream(stream);
194
195             // Set the encoding code
196             this.encodingCode = GetSupportedEncoding(encoding);
197
198             if (encodingCode != SupportedEncoding.UTF8)
199             {
200                 EnsureBuffers();
201                 dec = ValidatingUTF8.GetDecoder();
202                 enc = this.encoding.GetEncoder();
203
204                 // Emit BOM
205                 if (emitBOM)
206                 {
207                     byte[] bom = this.encoding.GetPreamble();
208                     if (bom.Length > 0)
209                         this.stream.Write(bom, 0, bom.Length);
210                 }
211             }
212         }
213
214         SupportedEncoding ReadBOMEncoding(bool notOutOfBand)
215         {
216             int b1 = this.stream.ReadByte();
217             int b2 = this.stream.ReadByte();
218             int b3 = this.stream.ReadByte();
219             int b4 = this.stream.ReadByte();
220
221             // Premature end of stream
222             if (b4 == -1)
223                 throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.UnexpectedEndOfFile)));
224
225             int preserve;
226             SupportedEncoding e = ReadBOMEncoding((byte)b1, (byte)b2, (byte)b3, (byte)b4, notOutOfBand, out preserve);
227
228             EnsureByteBuffer();
229             switch (preserve)
230             {
231                 case 1:
232                     bytes[0] = (byte)b4;
233                     break;
234
235                 case 2:
236                     bytes[0] = (byte)b3;
237                     bytes[1] = (byte)b4;
238                     break;
239
240                 case 4:
241                     bytes[0] = (byte)b1;
242                     bytes[1] = (byte)b2;
243                     bytes[2] = (byte)b3;
244                     bytes[3] = (byte)b4;
245                     break;
246             }
247             byteCount = preserve;
248
249             return e;
250         }
251
252         static SupportedEncoding ReadBOMEncoding(byte b1, byte b2, byte b3, byte b4, bool notOutOfBand, out int preserve)
253         {
254             SupportedEncoding e = SupportedEncoding.UTF8; // Default
255
256             preserve = 0;
257             if (b1 == '<' && b2 != 0x00) // UTF-8, no BOM
258             {
259                 e = SupportedEncoding.UTF8;
260                 preserve = 4;
261             }
262             else if (b1 == 0xFF && b2 == 0xFE) // UTF-16 little endian
263             {
264                 e = SupportedEncoding.UTF16LE;
265                 preserve = 2;
266             }
267             else if (b1 == 0xFE && b2 == 0xFF) // UTF-16 big endian
268             {
269                 e = SupportedEncoding.UTF16BE;
270                 preserve = 2;
271             }
272             else if (b1 == 0x00 && b2 == '<') // UTF-16 big endian, no BOM
273             {
274                 e = SupportedEncoding.UTF16BE;
275
276                 if (notOutOfBand && (b3 != 0x00 || b4 != '?'))
277                     throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlDeclMissing)));
278                 preserve = 4;
279             }
280             else if (b1 == '<' && b2 == 0x00) // UTF-16 little endian, no BOM
281             {
282                 e = SupportedEncoding.UTF16LE;
283
284                 if (notOutOfBand && (b3 != '?' || b4 != 0x00))
285                     throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlDeclMissing)));
286                 preserve = 4;
287             }
288             else if (b1 == 0xEF && b2 == 0xBB) // UTF8 with BOM
289             {
290                 // Encoding error
291                 if (notOutOfBand && b3 != 0xBF)
292                     throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlBadBOM)));
293                 preserve = 1;
294             }
295             else  // Assume UTF8
296             {
297                 preserve = 4;
298             }
299
300             return e;
301         }
302
303         void FillBuffer(int count)
304         {
305             count -= byteCount;
306             while (count > 0)
307             {
308                 int read = stream.Read(bytes, byteOffset + byteCount, count);
309                 if (read == 0)
310                     break;
311
312                 byteCount += read;
313                 count -= read;
314             }
315         }
316
317         void EnsureBuffers()
318         {
319             EnsureByteBuffer();
320             if (chars == null)
321                 chars = new char[BufferLength];
322         }
323
324         void EnsureByteBuffer()
325         {
326             if (bytes != null)
327                 return;
328
329             bytes = new byte[BufferLength * 4];
330             byteOffset = 0;
331             byteCount = 0;
332         }
333
334         static void CheckUTF8DeclarationEncoding(byte[] buffer, int offset, int count, SupportedEncoding e, SupportedEncoding expectedEnc)
335         {
336             byte quot = 0;
337             int encEq = -1;
338             int max = offset + Math.Min(count, BufferLength);
339
340             // Encoding should be second "=", abort at first "?"
341             int i = 0;
342             int eq = 0;
343             for (i = offset + 2; i < max; i++)  // Skip the "<?" so we don't get caught by the first "?"
344             {
345                 if (quot != 0)
346                 {
347                     if (buffer[i] == quot)
348                     {
349                         quot = 0;
350                     }
351                     continue;
352                 }
353
354                 if (buffer[i] == (byte)'\'' || buffer[i] == (byte)'"')
355                 {
356                     quot = buffer[i];
357                 }
358                 else if (buffer[i] == (byte)'=')
359                 {
360                     if (eq == 1)
361                     {
362                         encEq = i;
363                         break;
364                     }
365                     eq++;
366                 }
367                 else if (buffer[i] == (byte)'?')  // Not legal character in a decl before second "="
368                 {
369                     break;
370                 }
371             }
372
373             // No encoding found
374             if (encEq == -1)
375             {
376                 if (e != SupportedEncoding.UTF8 && expectedEnc == SupportedEncoding.None)
377                     throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlDeclarationRequired)));
378                 return;
379             }
380
381             if (encEq < 28) // Earliest second "=" can appear
382                 throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlMalformedDecl)));
383
384             // Back off whitespace
385             for (i = encEq - 1; IsWhitespace(buffer[i]); i--);
386
387             // Check for encoding attribute
388             if (!Compare(encodingAttr, buffer, i - encodingAttr.Length + 1))
389             {
390                 if (e != SupportedEncoding.UTF8 && expectedEnc == SupportedEncoding.None)
391                     throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlDeclarationRequired)));
392                 return;
393             }
394
395             // Move ahead of whitespace
396             for (i = encEq + 1; i < max && IsWhitespace(buffer[i]); i++);
397
398             // Find the quotes
399             if (buffer[i] != '\'' && buffer[i] != '"')
400                 throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlMalformedDecl)));
401             quot = buffer[i];
402
403             int q = i;
404             for (i = q + 1; buffer[i] != quot && i < max; ++i);
405
406             if (buffer[i] != quot)
407                 throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlMalformedDecl)));
408
409             int encStart = q + 1;
410             int encCount = i - encStart;
411
412             // lookup the encoding
413             SupportedEncoding declEnc = e;
414             if (encCount == encodingUTF8.Length && CompareCaseInsensitive(encodingUTF8, buffer, encStart))
415             {
416                 declEnc = SupportedEncoding.UTF8;
417             }
418             else if (encCount == encodingUnicodeLE.Length && CompareCaseInsensitive(encodingUnicodeLE, buffer, encStart))
419             {
420                 declEnc = SupportedEncoding.UTF16LE;
421             }
422             else if (encCount == encodingUnicodeBE.Length && CompareCaseInsensitive(encodingUnicodeBE, buffer, encStart))
423             {
424                 declEnc = SupportedEncoding.UTF16BE;
425             }
426             else if (encCount == encodingUnicode.Length && CompareCaseInsensitive(encodingUnicode, buffer, encStart))
427             {
428                 if (e == SupportedEncoding.UTF8)
429                     ThrowEncodingMismatch(SafeUTF8.GetString(buffer, encStart, encCount), SafeUTF8.GetString(encodingUTF8, 0, encodingUTF8.Length));
430             }
431             else
432             {
433                 ThrowEncodingMismatch(SafeUTF8.GetString(buffer, encStart, encCount), e);
434             }
435
436             if (e != declEnc)
437                 ThrowEncodingMismatch(SafeUTF8.GetString(buffer, encStart, encCount), e);
438         }
439
440         static bool CompareCaseInsensitive(byte[] key, byte[] buffer, int offset)
441         {
442             for (int i = 0; i < key.Length; i++)
443             {
444                 if (key[i] == buffer[offset + i])
445                     continue;
446
447                 if (key[i] != Char.ToLower((char)buffer[offset + i], System.Globalization.CultureInfo.InvariantCulture))
448                     return false;
449             }
450             return true;
451         }
452
453         static bool Compare(byte[] key, byte[] buffer, int offset)
454         {
455             for (int i = 0; i < key.Length; i++)
456             {
457                 if (key[i] != buffer[offset + i])
458                     return false;
459             }
460             return true;
461         }
462
463         static bool IsWhitespace(byte ch)
464         {
465             return ch == (byte)' ' || ch == (byte)'\n' || ch == (byte)'\t' || ch == (byte)'\r';
466         }
467
468         internal static ArraySegment<byte> ProcessBuffer(byte[] buffer, int offset, int count, Encoding encoding)
469         {
470             if (count < 4)
471                 throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.UnexpectedEndOfFile)));
472
473             try
474             {
475                 int preserve;
476                 ArraySegment<byte> seg;
477
478                 SupportedEncoding expectedEnc = GetSupportedEncoding(encoding);
479                 SupportedEncoding declEnc = ReadBOMEncoding(buffer[offset], buffer[offset + 1], buffer[offset + 2], buffer[offset + 3], encoding == null, out preserve);
480                 if (expectedEnc != SupportedEncoding.None && expectedEnc != declEnc)
481                     ThrowExpectedEncodingMismatch(expectedEnc, declEnc);
482
483                 offset += 4 - preserve;
484                 count -= 4 - preserve;
485
486                 // Fastpath: UTF-8
487                 char[] chars;
488                 byte[] bytes;
489                 Encoding localEnc;
490                 if (declEnc == SupportedEncoding.UTF8)
491                 {
492                     // Fastpath: No declaration
493                     if (buffer[offset + 1] != '?' || buffer[offset] != '<')
494                     {
495                         seg = new ArraySegment<byte>(buffer, offset, count);
496                         return seg;
497                     }
498
499                     CheckUTF8DeclarationEncoding(buffer, offset, count, declEnc, expectedEnc);
500                     seg = new ArraySegment<byte>(buffer, offset, count);
501                     return seg;
502                 }
503
504                 // Convert to UTF-8
505                 localEnc = GetSafeEncoding(declEnc);
506                 int inputCount = Math.Min(count, BufferLength * 2);
507                 chars = new char[localEnc.GetMaxCharCount(inputCount)];
508                 int ccount = localEnc.GetChars(buffer, offset, inputCount, chars, 0);
509                 bytes = new byte[ValidatingUTF8.GetMaxByteCount(ccount)];
510                 int bcount = ValidatingUTF8.GetBytes(chars, 0, ccount, bytes, 0);
511
512                 // Check for declaration
513                 if (bytes[1] == '?' && bytes[0] == '<')
514                 {
515                     CheckUTF8DeclarationEncoding(bytes, 0, bcount, declEnc, expectedEnc);
516                 }
517                 else
518                 {
519                     // Declaration required if no out-of-band encoding
520                     if (expectedEnc == SupportedEncoding.None)
521                         throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlDeclarationRequired)));
522                 }
523
524                 seg = new ArraySegment<byte>(ValidatingUTF8.GetBytes(GetEncoding(declEnc).GetChars(buffer, offset, count)));
525                 return seg;
526             }
527             catch (DecoderFallbackException e)
528             {
529                 throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlInvalidBytes), e));
530             }
531         }
532
533         static void ThrowExpectedEncodingMismatch(SupportedEncoding expEnc, SupportedEncoding actualEnc)
534         {
535             throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlExpectedEncoding, GetEncodingName(expEnc), GetEncodingName(actualEnc))));
536         }
537
538         static void ThrowEncodingMismatch(string declEnc, SupportedEncoding enc)
539         {
540             ThrowEncodingMismatch(declEnc, GetEncodingName(enc));
541         }
542
543         static void ThrowEncodingMismatch(string declEnc, string docEnc)
544         {
545             throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlEncodingMismatch, declEnc, docEnc)));
546         }
547
548         // This stream wrapper does not support duplex
549         public override bool CanRead
550         {
551             get
552             {
553                 if (!isReading)
554                     return false;
555
556                 return this.stream.CanRead;
557             }
558         }
559
560         // The encoding conversion and buffering breaks seeking.
561         public override bool CanSeek
562         {
563             get
564             {
565                 return false;
566             }
567         }
568
569         // This stream wrapper does not support duplex
570         public override bool CanWrite
571         {
572             get
573             {
574                 if (isReading)
575                     return false;
576
577                 return this.stream.CanWrite;
578             }
579         }
580
581
582         // The encoding conversion and buffering breaks seeking.
583         public override long Position
584         {
585             get
586             {
587 #pragma warning suppress 56503 // The contract for non seekable stream is to throw exception
588                 throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new NotSupportedException());
589             }
590             set
591             {
592                 throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new NotSupportedException());
593             }
594         }
595
596         public override void Close()
597         {
598             Flush();
599             base.Close();
600             this.stream.Close();
601         }
602
603         public override void Flush()
604         {
605             this.stream.Flush();
606         }
607
608         public override int ReadByte()
609         {
610             if (byteCount == 0 && encodingCode == SupportedEncoding.UTF8)
611                 return this.stream.ReadByte();
612             if (Read(byteBuffer, 0, 1) == 0)
613                 return -1;
614             return byteBuffer[0];
615         }
616
617         public override int Read(byte[] buffer, int offset, int count)
618         {
619             try
620             {
621                 if (byteCount == 0)
622                 {
623                     if (encodingCode == SupportedEncoding.UTF8)
624                         return this.stream.Read(buffer, offset, count);
625
626                     // No more bytes than can be turned into characters
627                     byteOffset = 0;
628                     byteCount = this.stream.Read(bytes, byteCount, (chars.Length - 1) * 2);
629
630                     // Check for end of stream
631                     if (byteCount == 0)
632                         return 0;
633
634                     // Fix up incomplete chars
635                     CleanupCharBreak();
636
637                     // Change encoding
638                     int charCount = this.encoding.GetChars(bytes, 0, byteCount, chars, 0);
639                     byteCount = Encoding.UTF8.GetBytes(chars, 0, charCount, bytes, 0);
640                 }
641
642                 // Give them bytes
643                 if (byteCount < count)
644                     count = byteCount;
645                 Buffer.BlockCopy(bytes, byteOffset, buffer, offset, count);
646                 byteOffset += count;
647                 byteCount -= count;
648                 return count;
649             }
650             catch (DecoderFallbackException ex)
651             {
652                 throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlInvalidBytes), ex));
653             }
654         }
655
656         void CleanupCharBreak()
657         {
658             int max = byteOffset + byteCount;
659
660             // Read on 2 byte boundaries
661             if ((byteCount % 2) != 0)
662             {
663                 int b = this.stream.ReadByte();
664                 if (b < 0)
665                     throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.UnexpectedEndOfFile)));
666
667                 bytes[max++] = (byte)b;
668                 byteCount++;
669             }
670
671             // Don't cut off a surrogate character
672             int w;
673             if (encodingCode == SupportedEncoding.UTF16LE)
674             {
675                 w = bytes[max - 2] + (bytes[max - 1] << 8);
676             }
677             else
678             {
679                 w = bytes[max - 1] + (bytes[max - 2] << 8);
680             }
681             if ((w & 0xDC00) != 0xDC00 && w >= 0xD800 && w <= 0xDBFF)  // First 16-bit number of surrogate pair
682             {
683                 int b1 = this.stream.ReadByte();
684                 int b2 = this.stream.ReadByte();
685                 if (b2 < 0)
686                     throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.UnexpectedEndOfFile)));
687                 bytes[max++] = (byte)b1;
688                 bytes[max++] = (byte)b2;
689                 byteCount += 2;
690             }
691         }
692
693         public override long Seek(long offset, SeekOrigin origin)
694         {
695             throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new NotSupportedException());
696         }
697
698         public override void WriteByte(byte b)
699         {
700             if (encodingCode == SupportedEncoding.UTF8)
701             {
702                 this.stream.WriteByte(b);
703                 return;
704             }
705             byteBuffer[0] = b;
706             Write(byteBuffer, 0, 1);
707         }
708
709         public override void Write(byte[] buffer, int offset, int count)
710         {
711             // Optimize UTF-8 case
712             if (encodingCode == SupportedEncoding.UTF8)
713             {
714                 this.stream.Write(buffer, offset, count);
715                 return;
716             }
717
718             while (count > 0)
719             {
720                 int size = chars.Length < count ? chars.Length : count;
721                 int charCount = dec.GetChars(buffer, offset, size, chars, 0, false);
722                 byteCount = enc.GetBytes(chars, 0, charCount, bytes, 0, false);
723                 this.stream.Write(bytes, 0, byteCount);
724                 offset += size;
725                 count -= size;
726             }
727         }
728
729         // Delegate properties
730         public override bool CanTimeout { get { return this.stream.CanTimeout; } }
731         public override long Length { get { return this.stream.Length; } }
732         public override int ReadTimeout
733         {
734             get { return this.stream.ReadTimeout; }
735             set { this.stream.ReadTimeout = value; }
736         }
737         public override int WriteTimeout
738         {
739             get { return this.stream.WriteTimeout; }
740             set { this.stream.WriteTimeout = value; }
741         }
742
743         // Delegate methods
744         public override void SetLength(long value)
745         {
746             throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new NotSupportedException());
747         }
748     }
749
750     // Add format exceptions
751     // Do we need to modify the stream position/Seek to account for the buffer?
752     // ASSUMPTION ([....]): This class will only be used for EITHER reading OR writing.
753 #if NO
754     class UTF16Stream : Stream
755     {
756         const int BufferLength = 128;
757         
758         Stream stream;
759         bool bigEndian;
760         byte[] streamBuffer;
761         int streamOffset;
762         int streamMax;
763         byte[] trailBytes = new byte[4];
764         int trailCount;
765         
766         public UTF16Stream(Stream stream, bool bigEndian)
767         {
768             this.stream = stream;
769             this.bigEndian = bigEndian;
770             this.streamBuffer = byte[BufferLength];
771         }
772
773         public override void Close()
774         {
775             Flush();
776             base.Close();
777             this.stream.Close();
778         }
779
780         public override void Flush()
781         {
782             this.stream.Flush();
783         }
784
785         public override int Read(byte[] buffer, int offset, int count)
786         {
787             // Validate args
788             
789             // Read what we can if we aren't sure we have enough for a single character
790             if (this.streamMax < 4)
791                 this.streamMax += this.stream.Read(this.streamBuffer, streamOffset, streamBuffer.Length - this.streamMax);
792
793             int totalWritten = 0;
794             while (streamOffset < streamMax && count > 0)
795             {
796                 int ch;
797                 int read;
798                 
799                 read = ReadUTF16Char(out ch, streamBuffer, streamOffset, streamBuffer.Length - streamMax);
800                 if (read == 0)
801                     break;
802
803                 int written = WriteUTF8Char(ch, buffer, offset, count);
804                 if (written == 0)
805                     break;
806                 
807                 totalWritten += written;
808                 streamOffset += read;
809                 offset += written;
810                 count -= written;
811             }
812             
813             // Shift down the leftover data
814             if (this.streamOffset > 0 && this.streamOffset < this.streamMax)
815             {
816                 Buffer.BlockCopy(this.streamBuffer, this.streamOffset, this.streamBuffer, 0, this.streamMax - this.streamOffset);
817                 this.streamMax -= this.streamOffset;
818                 this.streamOffset = 0;
819             }
820
821             return totalWritten;
822         }
823
824         int ReadUTF8Char(out int ch, byte[] buffer, int offset, int count)
825         {
826             ch = -1;
827             if (buffer[offset] < 0x80)
828             {
829                 ch = buffer[offset];
830                 return 1;
831             }
832             
833             int mask = buffer[offset] & 0xF0;
834             byte b1, b2, b3, b4;
835             if (mask == 0xC0)
836             {
837                 if (count < 2)
838                     return 0;
839                 
840                 b1 = buffer[offset + 0];
841                 b2 = buffer[offset + 1];
842                 
843                 ch = ((b1 & 0x1F) << 6) + (b2 & 0x3F);
844                 
845                 return 2;
846             }
847             else if (mask == 0xE0)
848             {
849                 if (count < 3)
850                     return 0;
851                 
852                 b1 = buffer[offset + 0];
853                 b2 = buffer[offset + 1];
854                 b3 = buffer[offset + 2];
855
856                 ch = ((((b1 & 0x0F) << 6) + (b2 & 0x3F)) << 6) + (b3 & 0x3F);
857                 
858                 return 3;
859             }
860             else if (mask == 0xF0)
861             {
862                 if (count < 4)
863                     return 0;
864                 
865                 b1 = buffer[offset + 0];
866                 b2 = buffer[offset + 1];
867                 b3 = buffer[offset + 2];
868                 b4 = buffer[offset + 3];
869                 
870                 ch = ((((((b1 & 0x0F) << 6) + (b2 & 0x3F)) << 6) + (b3 & 0x3F)) << 6) + (b4 & 0x3F);
871                 
872                 return 4;
873             }
874              
875             // Invalid
876             return 0;
877         }
878         
879         int ReadUTF16Char(out int ch, byte[] buffer, int offset, int count)
880         {
881             ch = -1;
882             
883             if (count < 2)
884                 return 0;
885     
886             int w1 = ReadEndian(buffer, offset);
887             
888             if (w1 < 0xD800 || w1 > 0xDFFF)
889             {
890                 ch = w1;
891                 return 2;
892             }
893     
894             if (count < 4)
895                 return 0;
896     
897             int w2 = ReadEndian(buffer, offset + 2);
898     
899             ch = ((w1 & 0x03FF) << 10) + (w2 & 0x03FF);
900             return 4;
901         }
902
903         int ReadEndian(byte[] buffer, int offset)
904         {
905             if (bigEndian)
906             {
907                 return (buffer[offset + 0] << 8) + buffer[offset + 1];
908             }
909             else
910             {
911                 return (buffer[offset + 1] << 8) + buffer[offset + 0];
912             }
913         }
914
915         int WriteUTF8Char(int ch, byte[] buffer, int offset, int count)
916         {
917             if (ch < 0x80)
918             {
919                 buffer[offset] = (byte)ch;
920                 return 1;
921             }
922             else if (ch < 0x800)
923             {
924                 if (count < 2)
925                     return 0;
926
927                 buffer[offset + 1] = 0x80 | (ch & 0x3F);
928                 ch >>= 6;
929                 buffer[offset + 0] = 0xC0 | ch;
930                 return 2
931             }
932             else if (ch < 0x10000)
933             {
934                 if (count < 3)
935                     return 0;
936                 
937                 buffer[offset + 2] = 0x80 | (ch & 0x3F);
938                 ch >>= 6;
939                 buffer[offset + 1] = 0x80 | (ch & 0x3F);
940                 ch >>= 6;
941                 buffer[offset + 0] = 0xE0 | ch;
942                 return 3;
943             }
944             else if (ch <= 0x110000)
945             {
946                 if (count < 4)
947                     return 0;
948                 buffer[offset + 3] = 0x80 | (ch & 0x3F);
949                 ch >>= 6;
950                 buffer[offset + 2] = 0x80 | (ch & 0x3F);
951                 ch >>= 6;
952                 buffer[offset + 1] = 0x80 | (ch & 0x3F);
953                 ch >>= 6;
954                 buffer[offset + 0] = 0xF0 | ch;
955                 return 4;
956             }
957             
958             // Invalid?
959             return 0;
960         }
961
962         int WriteUTF16Char(int ch, byte[] buffer, int offset, int count)
963         {
964             if (ch < 0x10000)
965             {
966                 if (count < 2)
967                     return 0;
968
969                 WriteEndian(ch, buffer, offset);
970                 return 2;
971             }
972
973             if (count < 4)
974                 return 0;
975             
976             ch -= 0x10000;
977             int w2 = 0xDC00 | (ch & 0x03FF);
978             int w1 = 0xD800 | ch >> 10;
979             WriteEndian(w1, buffer, offset);
980             WriteEndian(w2, buffer, offset + 2);
981             return 4;
982         }
983
984         void WriteEndian(int ch, byte[] buffer, int offset)
985         {
986             if (bigEndian)
987             {
988                 buffer[offset + 1] = (byte)ch; 
989                 buffer[offset + 0] = ch >> 8;
990             }
991             else
992             {
993                 buffer[offset + 0] = (byte)ch; 
994                 buffer[offset + 1] = ch >> 8;
995             }
996         }
997         
998         public override void Write(byte[] buffer, int offset, int count)
999         {
1000             // Validate args
1001               
1002             // Write the trail bytes
1003             if (trailCount > 0)
1004             {
1005                 int free = 4-trailCount;
1006                 int total = (count < free ? count : free) + trialCount;
1007                 Buffer.BlockCopy(buffer, offset, trailBytes, trailCount, total);
1008                 
1009                 int c;
1010                 int r = ReadUTF8Char(out c, trailBuffer, 0, total);
1011                 if (r == 0 && count < free)
1012                 {
1013                     trailCount = total;
1014                     return;
1015                 }
1016
1017                 int diff = r - trailCount;
1018                 offset += diff;
1019                 count -= diff;
1020                 streamOffset = WriteUTF16Char(c, streamBuffer, 0, streamBuffer.Length - streamOffset);
1021             }
1022             
1023             while (count > 0)
1024             {
1025                 if (streamBuffer.Length - streamOffset < 4)
1026                 {
1027                     this.stream.Write(streamBuffer, 0, streamOffset);
1028                     streamOffset = 0;
1029                 }
1030
1031                 int ch;
1032                 int read = ReadUTF8Char(out ch, buffer, offset, count);
1033                 if (read == 0)
1034                     break;
1035
1036                 int written = WriteUTF16Char(ch, streamBuffer, streamOffset, streamBuffer.Length - streamOffset);
1037                 if (written == 0)
1038                     break;
1039                 
1040                 streamOffset += written;
1041                 offset += read;
1042                 count -= read;
1043             }
1044     
1045             if (streamOffset > 0)
1046             {
1047                 this.stream.Write(streamBuffer, 0, streamOffset);
1048                 streamOffset = 0;
1049             }
1050     
1051             // Save trailing bytes
1052             if (count > 0)
1053             {
1054                 Buffer.BlockCopy(buffer, offset, trailBytes, 0, count);
1055                 trailCount = count;
1056             }
1057         }
1058
1059         // Delegate properties
1060         public override bool CanRead { get { return this.stream.CanRead; } }
1061         public override bool CanSeek { get { return this.stream.CanSeek; } }
1062         public override bool CanTimeout { get { return this.stream.CanTimeout; } }
1063         public override bool CanWrite { get { return this.stream.CanWrite; } }
1064         public override long Length { get { return this.stream.Length; } }
1065         public override long Position 
1066         { 
1067             get { return this.stream.Position; } 
1068             set { this.stream.Position = value; }
1069         }
1070         public override int ReadTimeout 
1071         { 
1072             get { return this.stream.ReadTimeout; } 
1073             set { this.stream.ReadTimeout = value; }
1074         }
1075         public override int WriteTimeout 
1076         { 
1077             get { return this.stream.WriteTimeout; } 
1078             set { this.stream.WriteTimeout = value; }
1079         }
1080     
1081         // Delegate methods
1082         public override long Seek(long offset, SeekOrigin origin)
1083         {
1084             return this.stream.Seek(offset, origin);
1085         }
1086     
1087         public override void SetLength(long value)
1088         {
1089             this.stream.SetLength(value);
1090         }
1091     }
1092 #endif
1093 }