2005-11-24 Chris Toshok <toshok@ximian.com>
[mono.git] / mcs / class / System.XML / System.Xml / XmlInputStream.cs
1 //
2 // System.Xml.XmlInputStream 
3 //      encoding-specification-wise XML input stream and reader
4 //
5 // Author:
6 //      Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 //
8 //      (C)2003 Atsushi Enomoto
9 //
10
11 //
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
19 // 
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
22 // 
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 //
31 using System;
32 using System.IO;
33 using System.Text;
34 using System.Runtime.InteropServices;
35
36 namespace System.Xml
37 {
38         #region XmlStreamReader
39         internal class XmlStreamReader : NonBlockingStreamReader
40         {
41                 XmlInputStream input;
42
43                 XmlStreamReader (XmlInputStream input)
44                         : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
45                 {
46                         this.input = input;
47                 }
48
49                 public XmlStreamReader (Stream input)
50                         : this (new XmlInputStream (input))
51                 {
52                 }
53
54                 public override void Close ()
55                 {
56                         this.input.Close ();
57                 }
58
59                 protected override void Dispose (bool disposing)
60                 {
61                         base.Dispose (disposing);
62                         if (disposing) {
63                                 Close ();
64                         }
65                 }
66
67         }
68         #endregion
69
70         #region NonBlockingStreamReader
71         // mostly copied from StreamReader.
72         internal class NonBlockingStreamReader : TextReader {
73
74                 const int DefaultBufferSize = 1024;
75                 const int DefaultFileBufferSize = 4096;
76                 const int MinimumBufferSize = 128;
77
78                 //
79                 // The input buffer
80                 //
81                 byte [] input_buffer;
82
83                 //
84                 // The decoded buffer from the above input buffer
85                 //
86                 char [] decoded_buffer;
87
88                 //
89                 // Decoded bytes in decoded_buffer.
90                 //
91                 int decoded_count;
92
93                 //
94                 // Current position in the decoded_buffer
95                 //
96                 int pos;
97
98                 //
99                 // The buffer size that we are using
100                 //
101                 int buffer_size;
102
103                 Encoding encoding;
104                 Decoder decoder;
105
106                 Stream base_stream;
107                 bool mayBlock;
108
109                 public NonBlockingStreamReader(Stream stream, Encoding encoding)
110                 {
111                         int buffer_size = DefaultBufferSize;
112                         base_stream = stream;
113                         input_buffer = new byte [buffer_size];
114                         this.buffer_size = buffer_size;
115                         this.encoding = encoding;
116                         decoder = encoding.GetDecoder ();
117
118                         decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
119                         decoded_count = 0;
120                         pos = 0;
121                 }
122
123                 public override void Close ()
124                 {
125                         Dispose (true);
126                 }
127
128                 protected override void Dispose (bool disposing)
129                 {
130                         if (disposing && base_stream != null)
131                                 base_stream.Close ();
132                         
133                         input_buffer = null;
134                         decoded_buffer = null;
135                         encoding = null;
136                         decoder = null;
137                         base_stream = null;
138                         base.Dispose (disposing);
139                 }
140
141                 public void DiscardBufferedData ()
142                 {
143                         pos = decoded_count = 0;
144                         mayBlock = false;
145                 }
146                 
147                 // the buffer is empty, fill it again
148                 private int ReadBuffer ()
149                 {
150                         pos = 0;
151                         int cbEncoded = 0;
152
153                         // keep looping until the decoder gives us some chars
154                         decoded_count = 0;
155                         int parse_start = 0;
156                         do      
157                         {
158                                 cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
159                                 
160                                 if (cbEncoded == 0)
161                                         return 0;
162
163                                 mayBlock = (cbEncoded < buffer_size);
164                                 decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
165                                 parse_start = 0;
166                         } while (decoded_count == 0);
167
168                         return decoded_count;
169                 }
170
171                 public override int Peek ()
172                 {
173                         if (base_stream == null)
174                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
175                         if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
176                                 return -1;
177
178                         return decoded_buffer [pos];
179                 }
180
181                 public override int Read ()
182                 {
183                         if (base_stream == null)
184                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
185                         if (pos >= decoded_count && ReadBuffer () == 0)
186                                 return -1;
187
188                         return decoded_buffer [pos++];
189                 }
190
191                 public override int Read ([In, Out] char[] dest_buffer, int index, int count)
192                 {
193                         if (base_stream == null)
194                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
195                         if (dest_buffer == null)
196                                 throw new ArgumentNullException ("dest_buffer");
197                         if (index < 0)
198                                 throw new ArgumentOutOfRangeException ("index", "< 0");
199                         if (count < 0)
200                                 throw new ArgumentOutOfRangeException ("count", "< 0");
201                         // re-ordered to avoid possible integer overflow
202                         if (index > dest_buffer.Length - count)
203                                 throw new ArgumentException ("index + count > dest_buffer.Length");
204
205                         int chars_read = 0;
206 //                      while (count > 0)
207                         {
208                                 if (pos >= decoded_count && ReadBuffer () == 0)
209                                         return chars_read > 0 ? chars_read : 0;
210
211                                 int cch = Math.Min (decoded_count - pos, count);
212                                 Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
213                                 pos += cch;
214                                 index += cch;
215                                 count -= cch;
216                                 chars_read += cch;
217                         }
218                         return chars_read;
219                 }
220
221                 public override string ReadLine()
222                 {
223                         if (base_stream == null)
224                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
225                         
226                         bool foundCR = false;
227                         StringBuilder text = new StringBuilder ();
228
229                         while (true) {
230                                 int c = Read ();
231
232                                 if (c == -1) {                          // end of stream
233                                         if (text.Length == 0)
234                                                 return null;
235
236                                         if (foundCR)
237                                                 text.Length--;
238
239                                         break;
240                                 }
241
242                                 if (c == '\n') {                        // newline
243                                         if ((text.Length > 0) && (text [text.Length - 1] == '\r'))
244                                                 text.Length--;
245
246                                         foundCR = false;
247                                         break;
248                                 } else if (foundCR) {
249                                         pos--;
250                                         text.Length--;
251                                         break;
252                                 }
253
254                                 if (c == '\r')
255                                         foundCR = true;
256                                         
257
258                                 text.Append ((char) c);
259                         }
260
261                         return text.ToString ();
262                 }
263
264                 public override string ReadToEnd()
265                 {
266                         if (base_stream == null)
267                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
268
269                         StringBuilder text = new StringBuilder ();
270
271                         int size = decoded_buffer.Length;
272                         char [] buffer = new char [size];
273                         int len;
274                         
275                         while ((len = Read (buffer, 0, size)) != 0)
276                                 text.Append (buffer, 0, len);
277
278                         return text.ToString ();
279                 }
280         }
281         #endregion
282
283         class XmlInputStream : Stream
284         {
285                 public static readonly Encoding StrictUTF8;
286
287                 static XmlInputStream ()
288                 {
289                         StrictUTF8 = new UTF8Encoding (false, true);
290                 }
291
292                 Encoding enc;
293                 Stream stream;
294                 byte[] buffer;
295                 int bufLength;
296                 int bufPos;
297
298                 static XmlException encodingException = new XmlException ("invalid encoding specification.");
299
300                 public XmlInputStream (Stream stream)
301                 {
302                         Initialize (stream);
303                 }
304
305                 private void Initialize (Stream stream)
306                 {
307                         buffer = new byte [64];
308                         this.stream = stream;
309                         enc = StrictUTF8; // Default to UTF8 if we can't guess it
310                         bufLength = stream.Read (buffer, 0, buffer.Length);
311                         if (bufLength == -1 || bufLength == 0) {
312                                 return;
313                         }
314
315                         int c = ReadByteSpecial ();
316                         switch (c) {
317                         case 0xFF:
318                                 c = ReadByteSpecial ();
319                                 if (c == 0xFE) {
320                                         // BOM-ed little endian utf-16
321                                         enc = Encoding.Unicode;
322                                 } else {
323                                         // It doesn't start from "<?xml" then its encoding is utf-8
324                                         bufPos = 0;
325                                 }
326                                 break;
327                         case 0xFE:
328                                 c = ReadByteSpecial ();
329                                 if (c == 0xFF) {
330                                         // BOM-ed big endian utf-16
331                                         enc = Encoding.BigEndianUnicode;
332                                         return;
333                                 } else {
334                                         // It doesn't start from "<?xml" then its encoding is utf-8
335                                         bufPos = 0;
336                                 }
337                                 break;
338                         case 0xEF:
339                                 c = ReadByteSpecial ();
340                                 if (c == 0xBB) {
341                                         c = ReadByteSpecial ();
342                                         if (c != 0xBF) {
343                                                 bufPos = 0;
344                                         }
345                                 } else {
346                                         buffer [--bufPos] = 0xEF;
347                                 }
348                                 break;
349                         case '<':
350                                 // try to get encoding name from XMLDecl.
351                                 if (bufLength >= 5 && Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {
352                                         bufPos += 4;
353                                         c = SkipWhitespace ();
354
355                                         // version. It is optional here.
356                                         if (c == 'v') {
357                                                 while (c >= 0) {
358                                                         c = ReadByteSpecial ();
359                                                         if (c == '0') { // 0 of 1.0
360                                                                 ReadByteSpecial ();
361                                                                 break;
362                                                         }
363                                                 }
364                                                 c = SkipWhitespace ();
365                                         }
366
367                                         if (c == 'e') {
368                                                 int remaining = bufLength - bufPos;
369                                                 if (remaining >= 7 && Encoding.ASCII.GetString(buffer, bufPos, 7) == "ncoding") {
370                                                         bufPos += 7;
371                                                         c = SkipWhitespace();
372                                                         if (c != '=')
373                                                                 throw encodingException;
374                                                         c = SkipWhitespace ();
375                                                         int quoteChar = c;
376                                                         StringBuilder sb = new StringBuilder ();
377                                                         while (true) {
378                                                                 c = ReadByteSpecial ();
379                                                                 if (c == quoteChar)
380                                                                         break;
381                                                                 else if (c < 0)
382                                                                         throw encodingException;
383
384                                                                 sb.Append ((char) c);
385                                                         }
386                                                         string encodingName = sb.ToString ();
387                                                         if (!XmlChar.IsValidIANAEncoding (encodingName))
388                                                                 throw encodingException;
389                                                         enc = Encoding.GetEncoding (encodingName);
390                                                 }
391                                         }
392                                 }
393 #if TARGET_JVM
394                                 else {
395                                         if (bufLength >= 10 && Encoding.Unicode.GetString (buffer, 2, 8) == "?xml")
396                                                 enc = Encoding.Unicode;
397                                 }
398 #endif
399                                 bufPos = 0;
400                                 break;
401                         default:
402                                 bufPos = 0;
403                                 break;
404                         }
405                 }
406
407                 // Just like readbyte, but grows the buffer too.
408                 int ReadByteSpecial ()
409                 {
410                         if (bufLength > bufPos)
411                                 return buffer [bufPos++];
412
413                         byte [] newbuf = new byte [buffer.Length * 2];
414                         Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
415                         int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
416                         if (nbytes == -1 || nbytes == 0)
417                                 return -1;
418                                 
419                         bufLength += nbytes;
420                         buffer = newbuf;
421                         return buffer [bufPos++];
422                 }
423
424                 // skips whitespace and returns misc char that was read from stream
425                 private int SkipWhitespace ()
426                 {
427                         int c;
428                         while (true) {
429                                 c = ReadByteSpecial ();
430                                 switch ((char) c) {
431                                 case '\r': goto case ' ';
432                                 case '\n': goto case ' ';
433                                 case '\t': goto case ' ';
434                                 case ' ':
435                                         continue;
436                                 default:
437                                         return c;
438                                 }
439                         }
440                         throw new InvalidOperationException ();
441                 }
442
443                 public Encoding ActualEncoding {
444                         get { return enc; }
445                 }
446
447                 #region Public Overrides
448                 public override bool CanRead {
449                         get {
450                                 if (bufLength > bufPos)
451                                         return true;
452                                 else
453                                         return stream.CanRead; 
454                         }
455                 }
456
457                 // FIXME: It should support base stream's CanSeek.
458                 public override bool CanSeek {
459                         get { return false; } // stream.CanSeek; }
460                 }
461
462                 public override bool CanWrite {
463                         get { return false; }
464                 }
465
466                 public override long Length {
467                         get {
468                                 return stream.Length;
469                         }
470                 }
471
472                 public override long Position {
473                         get {
474                                 return stream.Position - bufLength + bufPos;
475                         }
476                         set {
477                                 if(value < bufLength)
478                                         bufPos = (int)value;
479                                 else
480                                         stream.Position = value - bufLength;
481                         }
482                 }
483
484                 public override void Close ()
485                 {
486                         stream.Close ();
487                 }
488
489                 public override void Flush ()
490                 {
491                         stream.Flush ();
492                 }
493
494                 public override int Read (byte[] buffer, int offset, int count)
495                 {
496                         int ret;
497                         if (count <= bufLength - bufPos)        {       // all from buffer
498                                 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
499                                 bufPos += count;
500                                 ret = count;
501                         } else {
502                                 int bufRest = bufLength - bufPos;
503                                 if (bufLength > bufPos) {
504                                         Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
505                                         bufPos += bufRest;
506                                 }
507                                 ret = bufRest +
508                                         stream.Read (buffer, offset + bufRest, count - bufRest);
509                         }
510                         return ret;
511                 }
512
513                 public override int ReadByte ()
514                 {
515                         if (bufLength > bufPos) {
516                                 return buffer [bufPos++];
517                         }
518                         return stream.ReadByte ();
519                 }
520
521                 public override long Seek (long offset, System.IO.SeekOrigin origin)
522                 {
523                         int bufRest = bufLength - bufPos;
524                         if (origin == SeekOrigin.Current)
525                                 if (offset < bufRest)
526                                         return buffer [bufPos + offset];
527                                 else
528                                         return stream.Seek (offset - bufRest, origin);
529                         else
530                                 return stream.Seek (offset, origin);
531                 }
532
533                 public override void SetLength (long value)
534                 {
535                         stream.SetLength (value);
536                 }
537
538                 public override void Write (byte[] buffer, int offset, int count)
539                 {
540                         throw new NotSupportedException ();
541                 }
542                 #endregion
543         }
544 }