Merge branch 'master' of github.com:mono/mono
[mono.git] / mcs / class / System.XML / System.Xml / XmlInputStream.cs
1 //
2 // System.Xml.XmlInputStream 
3 //      encoding-specification-wise XML input stream and reader
4 //
5 // Author:
6 //      Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 //
8 //      (C)2003 Atsushi Enomoto
9 //
10
11 //
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
19 // 
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
22 // 
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 //
31 using System;
32 using System.IO;
33 using System.Text;
34 using System.Runtime.InteropServices;
35
36 namespace System.Xml
37 {
38         #region XmlStreamReader
39         internal class XmlStreamReader : NonBlockingStreamReader
40         {
41                 XmlInputStream input;
42
43                 XmlStreamReader (XmlInputStream input)
44                         : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
45                 {
46                         this.input = input;
47                 }
48
49                 public XmlStreamReader (Stream input)
50                         : this (new XmlInputStream (input))
51                 {
52                 }
53
54                 static XmlException invalidDataException = new XmlException ("invalid data.");
55
56                 public override void Close ()
57                 {
58                         this.input.Close ();
59                 }
60
61                 public override int Read ([In, Out] char[] dest_buffer, int index, int count)
62                 {
63                         try {
64                                 return base.Read (dest_buffer, index, count);
65                         }
66 #if NET_1_1
67                         catch (System.ArgumentException ex) {
68                                 throw new XmlException ("Invalid data", ex);
69                         }
70 #else
71                         catch (System.Text.DecoderFallbackException) {
72                                 throw invalidDataException;
73                         }
74 #endif
75                 }
76
77                 protected override void Dispose (bool disposing)
78                 {
79                         base.Dispose (disposing);
80                         if (disposing) {
81                                 Close ();
82                         }
83                 }
84
85         }
86         #endregion
87
88         #region NonBlockingStreamReader
89         // mostly copied from StreamReader, removing BOM checks, ctor
90         // parameter checks and some extra public members.
91         internal class NonBlockingStreamReader : TextReader {
92
93                 const int DefaultBufferSize = 1024;
94                 const int DefaultFileBufferSize = 4096;
95                 const int MinimumBufferSize = 128;
96
97                 //
98                 // The input buffer
99                 //
100                 byte [] input_buffer;
101
102                 //
103                 // The decoded buffer from the above input buffer
104                 //
105                 char [] decoded_buffer;
106
107                 //
108                 // Decoded bytes in decoded_buffer.
109                 //
110                 int decoded_count;
111
112                 //
113                 // Current position in the decoded_buffer
114                 //
115                 int pos;
116
117                 //
118                 // The buffer size that we are using
119                 //
120                 int buffer_size;
121
122                 Encoding encoding;
123                 Decoder decoder;
124
125                 Stream base_stream;
126                 bool mayBlock;
127                 StringBuilder line_builder;
128
129                 public NonBlockingStreamReader(Stream stream, Encoding encoding)
130                 {
131                         int buffer_size = DefaultBufferSize;
132                         base_stream = stream;
133                         input_buffer = new byte [buffer_size];
134                         this.buffer_size = buffer_size;
135                         this.encoding = encoding;
136                         decoder = encoding.GetDecoder ();
137
138                         decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
139                         decoded_count = 0;
140                         pos = 0;
141                 }
142
143                 public Encoding Encoding {
144                         get { return encoding; }
145                 }
146
147                 public override void Close ()
148                 {
149                         Dispose (true);
150                 }
151
152                 protected override void Dispose (bool disposing)
153                 {
154                         if (disposing && base_stream != null)
155                                 base_stream.Close ();
156                         
157                         input_buffer = null;
158                         decoded_buffer = null;
159                         encoding = null;
160                         decoder = null;
161                         base_stream = null;
162                         base.Dispose (disposing);
163                 }
164
165                 public void DiscardBufferedData ()
166                 {
167                         pos = decoded_count = 0;
168                         mayBlock = false;
169 #if NET_2_0
170                         decoder.Reset ();
171 #else
172                         decoder = encoding.GetDecoder ();
173 #endif
174                 }
175                 
176                 // the buffer is empty, fill it again
177                 private int ReadBuffer ()
178                 {
179                         pos = 0;
180                         int cbEncoded = 0;
181
182                         // keep looping until the decoder gives us some chars
183                         decoded_count = 0;
184                         int parse_start = 0;
185                         do      
186                         {
187                                 cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
188                                 
189                                 if (cbEncoded == 0)
190                                         return 0;
191
192                                 mayBlock = (cbEncoded < buffer_size);
193                                 decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
194                                 parse_start = 0;
195                         } while (decoded_count == 0);
196
197                         return decoded_count;
198                 }
199
200                 public override int Peek ()
201                 {
202                         if (base_stream == null)
203                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
204                         if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
205                                 return -1;
206
207                         return decoded_buffer [pos];
208                 }
209
210                 public override int Read ()
211                 {
212                         if (base_stream == null)
213                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
214                         if (pos >= decoded_count && ReadBuffer () == 0)
215                                 return -1;
216
217                         return decoded_buffer [pos++];
218                 }
219
220                 public override int Read ([In, Out] char[] dest_buffer, int index, int count)
221                 {
222                         if (base_stream == null)
223                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
224                         if (dest_buffer == null)
225                                 throw new ArgumentNullException ("dest_buffer");
226                         if (index < 0)
227                                 throw new ArgumentOutOfRangeException ("index", "< 0");
228                         if (count < 0)
229                                 throw new ArgumentOutOfRangeException ("count", "< 0");
230                         // re-ordered to avoid possible integer overflow
231                         if (index > dest_buffer.Length - count)
232                                 throw new ArgumentException ("index + count > dest_buffer.Length");
233
234                         int chars_read = 0;
235 //                      while (count > 0)
236                         {
237                                 if (pos >= decoded_count && ReadBuffer () == 0)
238                                         return chars_read > 0 ? chars_read : 0;
239
240                                 int cch = Math.Min (decoded_count - pos, count);
241                                 Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
242                                 pos += cch;
243                                 index += cch;
244                                 count -= cch;
245                                 chars_read += cch;
246                         }
247                         return chars_read;
248                 }
249
250                 bool foundCR;
251                 int FindNextEOL ()
252                 {
253                         char c = '\0';
254                         for (; pos < decoded_count; pos++) {
255                                 c = decoded_buffer [pos];
256                                 if (c == '\n') {
257                                         pos++;
258                                         int res = (foundCR) ? (pos - 2) : (pos - 1);
259                                         if (res < 0)
260                                                 res = 0; // if a new buffer starts with a \n and there was a \r at
261                                                         // the end of the previous one, we get here.
262                                         foundCR = false;
263                                         return res;
264                                 } else if (foundCR) {
265                                         foundCR = false;
266                                         return pos - 1;
267                                 }
268
269                                 foundCR = (c == '\r');
270                         }
271
272                         return -1;
273                 }
274
275                 public override string ReadLine()
276                 {
277                         if (base_stream == null)
278                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
279
280                         if (pos >= decoded_count && ReadBuffer () == 0)
281                                 return null;
282
283                         int begin = pos;
284                         int end = FindNextEOL ();
285                         if (end < decoded_count && end >= begin)
286                                 return new string (decoded_buffer, begin, end - begin);
287
288                         if (line_builder == null)
289                                 line_builder = new StringBuilder ();
290                         else
291                                 line_builder.Length = 0;
292
293                         while (true) {
294                                 if (foundCR) // don't include the trailing CR if present
295                                         decoded_count--;
296
297                                 line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin));
298                                 if (ReadBuffer () == 0) {
299                                         if (line_builder.Capacity > 32768) {
300                                                 StringBuilder sb = line_builder;
301                                                 line_builder = null;
302                                                 return sb.ToString (0, sb.Length);
303                                         }
304                                         return line_builder.ToString (0, line_builder.Length);
305                                 }
306
307                                 begin = pos;
308                                 end = FindNextEOL ();
309                                 if (end < decoded_count && end >= begin) {
310                                         line_builder.Append (new string (decoded_buffer, begin, end - begin));
311                                         if (line_builder.Capacity > 32768) {
312                                                 StringBuilder sb = line_builder;
313                                                 line_builder = null;
314                                                 return sb.ToString (0, sb.Length);
315                                         }
316                                         return line_builder.ToString (0, line_builder.Length);
317                                 }
318                         }
319                 }
320
321                 public override string ReadToEnd()
322                 {
323                         if (base_stream == null)
324                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
325
326                         StringBuilder text = new StringBuilder ();
327
328                         int size = decoded_buffer.Length;
329                         char [] buffer = new char [size];
330                         int len;
331                         
332                         while ((len = Read (buffer, 0, size)) != 0)
333                                 text.Append (buffer, 0, len);
334
335                         return text.ToString ();
336                 }
337         }
338         #endregion
339
340         class XmlInputStream : Stream
341         {
342                 public static readonly Encoding StrictUTF8;
343
344                 static XmlInputStream ()
345                 {
346                         StrictUTF8 = new UTF8Encoding (false, true);
347                 }
348
349                 Encoding enc;
350                 Stream stream;
351                 byte[] buffer;
352                 int bufLength;
353                 int bufPos;
354
355                 static XmlException encodingException = new XmlException ("invalid encoding specification.");
356
357                 public XmlInputStream (Stream stream)
358                 {
359                         Initialize (stream);
360                 }
361
362                 // this returns null, instead of throwing ArgumentOutOfRangeException
363                 string GetStringFromBytes (int index, int count)
364                 {
365                         int posBak = bufPos;
366                         while (bufPos < index + count)
367                                 if (ReadByteSpecial () < 0)
368                                         return null;
369                         bufPos = posBak;
370 #if MOONLIGHT
371                         char [] chars = new char [count];
372                         for (int i = index; i < count; i++)
373                                 chars [i] = (char) buffer [i];
374
375                         return new string (chars);
376 #else
377                         return Encoding.ASCII.GetString (buffer, index, count);
378 #endif
379                 }
380
381                 private void Initialize (Stream stream)
382                 {
383                         buffer = new byte [6];
384                         this.stream = stream;
385                         enc = StrictUTF8; // Default to UTF8 if we can't guess it
386                         bufLength = stream.Read (buffer, 0, buffer.Length);
387                         if (bufLength == -1 || bufLength == 0) {
388                                 return;
389                         }
390
391                         int c = ReadByteSpecial ();
392                         switch (c) {
393                         case 0xFF:
394                                 c = ReadByteSpecial ();
395                                 if (c == 0xFE) {
396                                         // BOM-ed little endian utf-16
397                                         enc = Encoding.Unicode;
398                                 } else {
399                                         // It doesn't start from "<?xml" then its encoding is utf-8
400                                         bufPos = 0;
401                                 }
402                                 break;
403                         case 0xFE:
404                                 c = ReadByteSpecial ();
405                                 if (c == 0xFF) {
406                                         // BOM-ed big endian utf-16
407                                         enc = Encoding.BigEndianUnicode;
408                                         return;
409                                 } else {
410                                         // It doesn't start from "<?xml" then its encoding is utf-8
411                                         bufPos = 0;
412                                 }
413                                 break;
414                         case 0xEF:
415                                 c = ReadByteSpecial ();
416                                 if (c == 0xBB) {
417                                         c = ReadByteSpecial ();
418                                         if (c != 0xBF) {
419                                                 bufPos = 0;
420                                         }
421                                 } else {
422                                         buffer [--bufPos] = 0xEF;
423                                 }
424                                 break;
425                         case '<':
426                                 // try to get encoding name from XMLDecl.
427                                 if (bufLength >= 5 && GetStringFromBytes (1, 4) == "?xml") {
428                                         bufPos += 4;
429                                         c = SkipWhitespace ();
430
431                                         // version. It is optional here.
432                                         if (c == 'v') {
433                                                 while (c >= 0) {
434                                                         c = ReadByteSpecial ();
435                                                         if (c == '0') { // 0 of 1.0
436                                                                 ReadByteSpecial ();
437                                                                 break;
438                                                         }
439                                                 }
440                                                 c = SkipWhitespace ();
441                                         }
442
443                                         if (c == 'e') {
444                                                 if (GetStringFromBytes (bufPos, 7) == "ncoding") {
445                                                         bufPos += 7;
446                                                         c = SkipWhitespace();
447                                                         if (c != '=')
448                                                                 throw encodingException;
449                                                         c = SkipWhitespace ();
450                                                         int quoteChar = c;
451                                                         StringBuilder sb = new StringBuilder ();
452                                                         while (true) {
453                                                                 c = ReadByteSpecial ();
454                                                                 if (c == quoteChar)
455                                                                         break;
456                                                                 else if (c < 0)
457                                                                         throw encodingException;
458
459                                                                 sb.Append ((char) c);
460                                                         }
461                                                         string encodingName = sb.ToString ();
462                                                         if (!XmlChar.IsValidIANAEncoding (encodingName))
463                                                                 throw encodingException;
464                                                         enc = Encoding.GetEncoding (encodingName);
465                                                 }
466                                         }
467                                 }
468 #if TARGET_JVM
469                                 else {
470                                         if (bufLength >= 10 && Encoding.Unicode.GetString (buffer, 2, 8) == "?xml")
471                                                 enc = Encoding.Unicode;
472                                 }
473 #endif
474                                 bufPos = 0;
475                                 break;
476                         default:
477                                 bufPos = 0;
478                                 break;
479                         }
480                 }
481
482                 // Just like readbyte, but grows the buffer too.
483                 int ReadByteSpecial ()
484                 {
485                         if (bufLength > bufPos)
486                                 return buffer [bufPos++];
487
488                         byte [] newbuf = new byte [buffer.Length * 2];
489                         Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
490                         int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
491                         if (nbytes == -1 || nbytes == 0)
492                                 return -1;
493                                 
494                         bufLength += nbytes;
495                         buffer = newbuf;
496                         return buffer [bufPos++];
497                 }
498
499                 // skips whitespace and returns misc char that was read from stream
500                 private int SkipWhitespace ()
501                 {
502                         int c;
503                         while (true) {
504                                 c = ReadByteSpecial ();
505                                 switch ((char) c) {
506                                 case '\r': goto case ' ';
507                                 case '\n': goto case ' ';
508                                 case '\t': goto case ' ';
509                                 case ' ':
510                                         continue;
511                                 default:
512                                         return c;
513                                 }
514                         }
515                 }
516
517                 public Encoding ActualEncoding {
518                         get { return enc; }
519                 }
520
521                 #region Public Overrides
522                 public override bool CanRead {
523                         get {
524                                 if (bufLength > bufPos)
525                                         return true;
526                                 else
527                                         return stream.CanRead; 
528                         }
529                 }
530
531                 // FIXME: It should support base stream's CanSeek.
532                 public override bool CanSeek {
533                         get { return false; } // stream.CanSeek; }
534                 }
535
536                 public override bool CanWrite {
537                         get { return false; }
538                 }
539
540                 public override long Length {
541                         get {
542                                 return stream.Length;
543                         }
544                 }
545
546                 public override long Position {
547                         get {
548                                 return stream.Position - bufLength + bufPos;
549                         }
550                         set {
551                                 if(value < bufLength)
552                                         bufPos = (int)value;
553                                 else
554                                         stream.Position = value - bufLength;
555                         }
556                 }
557
558                 public override void Close ()
559                 {
560                         stream.Close ();
561                 }
562
563                 public override void Flush ()
564                 {
565                         stream.Flush ();
566                 }
567
568                 public override int Read (byte[] buffer, int offset, int count)
569                 {
570                         int ret;
571                         if (count <= bufLength - bufPos)        {       // all from buffer
572                                 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
573                                 bufPos += count;
574                                 ret = count;
575                         } else {
576                                 int bufRest = bufLength - bufPos;
577                                 if (bufLength > bufPos) {
578                                         Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
579                                         bufPos += bufRest;
580                                 }
581                                 ret = bufRest +
582                                         stream.Read (buffer, offset + bufRest, count - bufRest);
583                         }
584                         return ret;
585                 }
586
587                 public override int ReadByte ()
588                 {
589                         if (bufLength > bufPos) {
590                                 return buffer [bufPos++];
591                         }
592                         return stream.ReadByte ();
593                 }
594
595                 public override long Seek (long offset, System.IO.SeekOrigin origin)
596                 {
597                         int bufRest = bufLength - bufPos;
598                         if (origin == SeekOrigin.Current)
599                                 if (offset < bufRest)
600                                         return buffer [bufPos + offset];
601                                 else
602                                         return stream.Seek (offset - bufRest, origin);
603                         else
604                                 return stream.Seek (offset, origin);
605                 }
606
607                 public override void SetLength (long value)
608                 {
609                         stream.SetLength (value);
610                 }
611
612                 public override void Write (byte[] buffer, int offset, int count)
613                 {
614                         throw new NotSupportedException ();
615                 }
616                 #endregion
617         }
618 }