2007-08-04 Jb Evain <jbevain@novell.com>
[mono.git] / mcs / class / System.XML / System.Xml / XmlInputStream.cs
1 //
2 // System.Xml.XmlInputStream 
3 //      encoding-specification-wise XML input stream and reader
4 //
5 // Author:
6 //      Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 //
8 //      (C)2003 Atsushi Enomoto
9 //
10
11 //
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
19 // 
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
22 // 
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 //
31 using System;
32 using System.IO;
33 using System.Text;
34 using System.Runtime.InteropServices;
35
36 namespace System.Xml
37 {
38         #region XmlStreamReader
39         internal class XmlStreamReader : NonBlockingStreamReader
40         {
41                 XmlInputStream input;
42
43                 XmlStreamReader (XmlInputStream input)
44                         : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
45                 {
46                         this.input = input;
47                 }
48
49                 public XmlStreamReader (Stream input)
50                         : this (new XmlInputStream (input))
51                 {
52                 }
53
54                 public override void Close ()
55                 {
56                         this.input.Close ();
57                 }
58
59                 protected override void Dispose (bool disposing)
60                 {
61                         base.Dispose (disposing);
62                         if (disposing) {
63                                 Close ();
64                         }
65                 }
66
67         }
68         #endregion
69
70         #region NonBlockingStreamReader
71         // mostly copied from StreamReader, removing BOM checks, ctor
72         // parameter checks and some extra public members.
73         internal class NonBlockingStreamReader : TextReader {
74
75                 const int DefaultBufferSize = 1024;
76                 const int DefaultFileBufferSize = 4096;
77                 const int MinimumBufferSize = 128;
78
79                 //
80                 // The input buffer
81                 //
82                 byte [] input_buffer;
83
84                 //
85                 // The decoded buffer from the above input buffer
86                 //
87                 char [] decoded_buffer;
88
89                 //
90                 // Decoded bytes in decoded_buffer.
91                 //
92                 int decoded_count;
93
94                 //
95                 // Current position in the decoded_buffer
96                 //
97                 int pos;
98
99                 //
100                 // The buffer size that we are using
101                 //
102                 int buffer_size;
103
104                 Encoding encoding;
105                 Decoder decoder;
106
107                 Stream base_stream;
108                 bool mayBlock;
109                 StringBuilder line_builder;
110
111                 public NonBlockingStreamReader(Stream stream, Encoding encoding)
112                 {
113                         int buffer_size = DefaultBufferSize;
114                         base_stream = stream;
115                         input_buffer = new byte [buffer_size];
116                         this.buffer_size = buffer_size;
117                         this.encoding = encoding;
118                         decoder = encoding.GetDecoder ();
119
120                         decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
121                         decoded_count = 0;
122                         pos = 0;
123                 }
124
125                 public Encoding Encoding {
126                         get { return encoding; }
127                 }
128
129                 public override void Close ()
130                 {
131                         Dispose (true);
132                 }
133
134                 protected override void Dispose (bool disposing)
135                 {
136                         if (disposing && base_stream != null)
137                                 base_stream.Close ();
138                         
139                         input_buffer = null;
140                         decoded_buffer = null;
141                         encoding = null;
142                         decoder = null;
143                         base_stream = null;
144                         base.Dispose (disposing);
145                 }
146
147                 public void DiscardBufferedData ()
148                 {
149                         pos = decoded_count = 0;
150                         mayBlock = false;
151 #if NET_2_0
152                         decoder.Reset ();
153 #else
154                         decoder = encoding.GetDecoder ();
155 #endif
156                 }
157                 
158                 // the buffer is empty, fill it again
159                 private int ReadBuffer ()
160                 {
161                         pos = 0;
162                         int cbEncoded = 0;
163
164                         // keep looping until the decoder gives us some chars
165                         decoded_count = 0;
166                         int parse_start = 0;
167                         do      
168                         {
169                                 cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
170                                 
171                                 if (cbEncoded == 0)
172                                         return 0;
173
174                                 mayBlock = (cbEncoded < buffer_size);
175                                 decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
176                                 parse_start = 0;
177                         } while (decoded_count == 0);
178
179                         return decoded_count;
180                 }
181
182                 public override int Peek ()
183                 {
184                         if (base_stream == null)
185                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
186                         if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
187                                 return -1;
188
189                         return decoded_buffer [pos];
190                 }
191
192                 public override int Read ()
193                 {
194                         if (base_stream == null)
195                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
196                         if (pos >= decoded_count && ReadBuffer () == 0)
197                                 return -1;
198
199                         return decoded_buffer [pos++];
200                 }
201
202                 public override int Read ([In, Out] char[] dest_buffer, int index, int count)
203                 {
204                         if (base_stream == null)
205                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
206                         if (dest_buffer == null)
207                                 throw new ArgumentNullException ("dest_buffer");
208                         if (index < 0)
209                                 throw new ArgumentOutOfRangeException ("index", "< 0");
210                         if (count < 0)
211                                 throw new ArgumentOutOfRangeException ("count", "< 0");
212                         // re-ordered to avoid possible integer overflow
213                         if (index > dest_buffer.Length - count)
214                                 throw new ArgumentException ("index + count > dest_buffer.Length");
215
216                         int chars_read = 0;
217 //                      while (count > 0)
218                         {
219                                 if (pos >= decoded_count && ReadBuffer () == 0)
220                                         return chars_read > 0 ? chars_read : 0;
221
222                                 int cch = Math.Min (decoded_count - pos, count);
223                                 Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
224                                 pos += cch;
225                                 index += cch;
226                                 count -= cch;
227                                 chars_read += cch;
228                         }
229                         return chars_read;
230                 }
231
232                 bool foundCR;
233                 int FindNextEOL ()
234                 {
235                         char c = '\0';
236                         for (; pos < decoded_count; pos++) {
237                                 c = decoded_buffer [pos];
238                                 if (c == '\n') {
239                                         pos++;
240                                         int res = (foundCR) ? (pos - 2) : (pos - 1);
241                                         if (res < 0)
242                                                 res = 0; // if a new buffer starts with a \n and there was a \r at
243                                                         // the end of the previous one, we get here.
244                                         foundCR = false;
245                                         return res;
246                                 } else if (foundCR) {
247                                         foundCR = false;
248                                         return pos - 1;
249                                 }
250
251                                 foundCR = (c == '\r');
252                         }
253
254                         return -1;
255                 }
256
257                 public override string ReadLine()
258                 {
259                         if (base_stream == null)
260                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
261
262                         if (pos >= decoded_count && ReadBuffer () == 0)
263                                 return null;
264
265                         int begin = pos;
266                         int end = FindNextEOL ();
267                         if (end < decoded_count && end >= begin)
268                                 return new string (decoded_buffer, begin, end - begin);
269
270                         if (line_builder == null)
271                                 line_builder = new StringBuilder ();
272                         else
273                                 line_builder.Length = 0;
274
275                         while (true) {
276                                 if (foundCR) // don't include the trailing CR if present
277                                         decoded_count--;
278
279                                 line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin));
280                                 if (ReadBuffer () == 0) {
281                                         if (line_builder.Capacity > 32768) {
282                                                 StringBuilder sb = line_builder;
283                                                 line_builder = null;
284                                                 return sb.ToString (0, sb.Length);
285                                         }
286                                         return line_builder.ToString (0, line_builder.Length);
287                                 }
288
289                                 begin = pos;
290                                 end = FindNextEOL ();
291                                 if (end < decoded_count && end >= begin) {
292                                         line_builder.Append (new string (decoded_buffer, begin, end - begin));
293                                         if (line_builder.Capacity > 32768) {
294                                                 StringBuilder sb = line_builder;
295                                                 line_builder = null;
296                                                 return sb.ToString (0, sb.Length);
297                                         }
298                                         return line_builder.ToString (0, line_builder.Length);
299                                 }
300                         }
301                 }
302
303                 public override string ReadToEnd()
304                 {
305                         if (base_stream == null)
306                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
307
308                         StringBuilder text = new StringBuilder ();
309
310                         int size = decoded_buffer.Length;
311                         char [] buffer = new char [size];
312                         int len;
313                         
314                         while ((len = Read (buffer, 0, size)) != 0)
315                                 text.Append (buffer, 0, len);
316
317                         return text.ToString ();
318                 }
319         }
320         #endregion
321
322         class XmlInputStream : Stream
323         {
324                 public static readonly Encoding StrictUTF8;
325
326                 static XmlInputStream ()
327                 {
328                         StrictUTF8 = new UTF8Encoding (false, true);
329                 }
330
331                 Encoding enc;
332                 Stream stream;
333                 byte[] buffer;
334                 int bufLength;
335                 int bufPos;
336
337                 static XmlException encodingException = new XmlException ("invalid encoding specification.");
338
339                 public XmlInputStream (Stream stream)
340                 {
341                         Initialize (stream);
342                 }
343
344                 static string GetStringFromBytes (byte [] bytes, int index, int count)
345                 {
346 #if NET_2_1
347                         char [] chars = new char [count];
348                         for (int i = index; i < count; i++)
349                                 chars [i] = (char) bytes [i];
350
351                         return new string (chars);
352 #else
353                         return Encoding.ASCII.GetString (bytes, index, count);
354 #endif
355                 }
356
357                 private void Initialize (Stream stream)
358                 {
359                         buffer = new byte [64];
360                         this.stream = stream;
361                         enc = StrictUTF8; // Default to UTF8 if we can't guess it
362                         bufLength = stream.Read (buffer, 0, buffer.Length);
363                         if (bufLength == -1 || bufLength == 0) {
364                                 return;
365                         }
366
367                         int c = ReadByteSpecial ();
368                         switch (c) {
369                         case 0xFF:
370                                 c = ReadByteSpecial ();
371                                 if (c == 0xFE) {
372                                         // BOM-ed little endian utf-16
373                                         enc = Encoding.Unicode;
374                                 } else {
375                                         // It doesn't start from "<?xml" then its encoding is utf-8
376                                         bufPos = 0;
377                                 }
378                                 break;
379                         case 0xFE:
380                                 c = ReadByteSpecial ();
381                                 if (c == 0xFF) {
382                                         // BOM-ed big endian utf-16
383                                         enc = Encoding.BigEndianUnicode;
384                                         return;
385                                 } else {
386                                         // It doesn't start from "<?xml" then its encoding is utf-8
387                                         bufPos = 0;
388                                 }
389                                 break;
390                         case 0xEF:
391                                 c = ReadByteSpecial ();
392                                 if (c == 0xBB) {
393                                         c = ReadByteSpecial ();
394                                         if (c != 0xBF) {
395                                                 bufPos = 0;
396                                         }
397                                 } else {
398                                         buffer [--bufPos] = 0xEF;
399                                 }
400                                 break;
401                         case '<':
402                                 // try to get encoding name from XMLDecl.
403                                 if (bufLength >= 5 && GetStringFromBytes (buffer, 1, 4) == "?xml") {
404                                         bufPos += 4;
405                                         c = SkipWhitespace ();
406
407                                         // version. It is optional here.
408                                         if (c == 'v') {
409                                                 while (c >= 0) {
410                                                         c = ReadByteSpecial ();
411                                                         if (c == '0') { // 0 of 1.0
412                                                                 ReadByteSpecial ();
413                                                                 break;
414                                                         }
415                                                 }
416                                                 c = SkipWhitespace ();
417                                         }
418
419                                         if (c == 'e') {
420                                                 int remaining = bufLength - bufPos;
421                                                 if (remaining >= 7 && GetStringFromBytes (buffer, bufPos, 7) == "ncoding") {
422                                                         bufPos += 7;
423                                                         c = SkipWhitespace();
424                                                         if (c != '=')
425                                                                 throw encodingException;
426                                                         c = SkipWhitespace ();
427                                                         int quoteChar = c;
428                                                         StringBuilder sb = new StringBuilder ();
429                                                         while (true) {
430                                                                 c = ReadByteSpecial ();
431                                                                 if (c == quoteChar)
432                                                                         break;
433                                                                 else if (c < 0)
434                                                                         throw encodingException;
435
436                                                                 sb.Append ((char) c);
437                                                         }
438                                                         string encodingName = sb.ToString ();
439                                                         if (!XmlChar.IsValidIANAEncoding (encodingName))
440                                                                 throw encodingException;
441                                                         enc = Encoding.GetEncoding (encodingName);
442                                                 }
443                                         }
444                                 }
445 #if TARGET_JVM
446                                 else {
447                                         if (bufLength >= 10 && Encoding.Unicode.GetString (buffer, 2, 8) == "?xml")
448                                                 enc = Encoding.Unicode;
449                                 }
450 #endif
451                                 bufPos = 0;
452                                 break;
453                         default:
454                                 bufPos = 0;
455                                 break;
456                         }
457                 }
458
459                 // Just like readbyte, but grows the buffer too.
460                 int ReadByteSpecial ()
461                 {
462                         if (bufLength > bufPos)
463                                 return buffer [bufPos++];
464
465                         byte [] newbuf = new byte [buffer.Length * 2];
466                         Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
467                         int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
468                         if (nbytes == -1 || nbytes == 0)
469                                 return -1;
470                                 
471                         bufLength += nbytes;
472                         buffer = newbuf;
473                         return buffer [bufPos++];
474                 }
475
476                 // skips whitespace and returns misc char that was read from stream
477                 private int SkipWhitespace ()
478                 {
479                         int c;
480                         while (true) {
481                                 c = ReadByteSpecial ();
482                                 switch ((char) c) {
483                                 case '\r': goto case ' ';
484                                 case '\n': goto case ' ';
485                                 case '\t': goto case ' ';
486                                 case ' ':
487                                         continue;
488                                 default:
489                                         return c;
490                                 }
491                         }
492                         throw new InvalidOperationException ();
493                 }
494
495                 public Encoding ActualEncoding {
496                         get { return enc; }
497                 }
498
499                 #region Public Overrides
500                 public override bool CanRead {
501                         get {
502                                 if (bufLength > bufPos)
503                                         return true;
504                                 else
505                                         return stream.CanRead; 
506                         }
507                 }
508
509                 // FIXME: It should support base stream's CanSeek.
510                 public override bool CanSeek {
511                         get { return false; } // stream.CanSeek; }
512                 }
513
514                 public override bool CanWrite {
515                         get { return false; }
516                 }
517
518                 public override long Length {
519                         get {
520                                 return stream.Length;
521                         }
522                 }
523
524                 public override long Position {
525                         get {
526                                 return stream.Position - bufLength + bufPos;
527                         }
528                         set {
529                                 if(value < bufLength)
530                                         bufPos = (int)value;
531                                 else
532                                         stream.Position = value - bufLength;
533                         }
534                 }
535
536                 public override void Close ()
537                 {
538                         stream.Close ();
539                 }
540
541                 public override void Flush ()
542                 {
543                         stream.Flush ();
544                 }
545
546                 public override int Read (byte[] buffer, int offset, int count)
547                 {
548                         int ret;
549                         if (count <= bufLength - bufPos)        {       // all from buffer
550                                 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
551                                 bufPos += count;
552                                 ret = count;
553                         } else {
554                                 int bufRest = bufLength - bufPos;
555                                 if (bufLength > bufPos) {
556                                         Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
557                                         bufPos += bufRest;
558                                 }
559                                 ret = bufRest +
560                                         stream.Read (buffer, offset + bufRest, count - bufRest);
561                         }
562                         return ret;
563                 }
564
565                 public override int ReadByte ()
566                 {
567                         if (bufLength > bufPos) {
568                                 return buffer [bufPos++];
569                         }
570                         return stream.ReadByte ();
571                 }
572
573                 public override long Seek (long offset, System.IO.SeekOrigin origin)
574                 {
575                         int bufRest = bufLength - bufPos;
576                         if (origin == SeekOrigin.Current)
577                                 if (offset < bufRest)
578                                         return buffer [bufPos + offset];
579                                 else
580                                         return stream.Seek (offset - bufRest, origin);
581                         else
582                                 return stream.Seek (offset, origin);
583                 }
584
585                 public override void SetLength (long value)
586                 {
587                         stream.SetLength (value);
588                 }
589
590                 public override void Write (byte[] buffer, int offset, int count)
591                 {
592                         throw new NotSupportedException ();
593                 }
594                 #endregion
595         }
596 }