2 // System.Xml.XmlInputStream
3 // encoding-specification-wise XML input stream and reader
6 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
8 // (C)2003 Atsushi Enomoto
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 using System.Runtime.InteropServices;
38 #region XmlStreamReader
39 internal class XmlStreamReader : NonBlockingStreamReader
43 XmlStreamReader (XmlInputStream input)
44 : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
49 public XmlStreamReader (Stream input)
50 : this (new XmlInputStream (input))
54 static XmlException invalidDataException = new XmlException ("invalid data.");
56 public override void Close ()
61 public override int Read ([In, Out] char[] dest_buffer, int index, int count)
64 return base.Read (dest_buffer, index, count);
67 catch (System.ArgumentException) {
68 throw invalidDataException;
71 catch (System.Text.DecoderFallbackException) {
72 throw invalidDataException;
77 protected override void Dispose (bool disposing)
79 base.Dispose (disposing);
88 #region NonBlockingStreamReader
89 // mostly copied from StreamReader, removing BOM checks, ctor
90 // parameter checks and some extra public members.
91 internal class NonBlockingStreamReader : TextReader {
93 const int DefaultBufferSize = 1024;
94 const int DefaultFileBufferSize = 4096;
95 const int MinimumBufferSize = 128;
100 byte [] input_buffer;
103 // The decoded buffer from the above input buffer
105 char [] decoded_buffer;
108 // Decoded bytes in decoded_buffer.
113 // Current position in the decoded_buffer
118 // The buffer size that we are using
127 StringBuilder line_builder;
129 public NonBlockingStreamReader(Stream stream, Encoding encoding)
131 int buffer_size = DefaultBufferSize;
132 base_stream = stream;
133 input_buffer = new byte [buffer_size];
134 this.buffer_size = buffer_size;
135 this.encoding = encoding;
136 decoder = encoding.GetDecoder ();
138 decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
143 public Encoding Encoding {
144 get { return encoding; }
147 public override void Close ()
152 protected override void Dispose (bool disposing)
154 if (disposing && base_stream != null)
155 base_stream.Close ();
158 decoded_buffer = null;
162 base.Dispose (disposing);
165 public void DiscardBufferedData ()
167 pos = decoded_count = 0;
172 decoder = encoding.GetDecoder ();
176 // the buffer is empty, fill it again
177 private int ReadBuffer ()
182 // keep looping until the decoder gives us some chars
187 cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
192 mayBlock = (cbEncoded < buffer_size);
193 decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
195 } while (decoded_count == 0);
197 return decoded_count;
200 public override int Peek ()
202 if (base_stream == null)
203 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
204 if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
207 return decoded_buffer [pos];
210 public override int Read ()
212 if (base_stream == null)
213 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
214 if (pos >= decoded_count && ReadBuffer () == 0)
217 return decoded_buffer [pos++];
220 public override int Read ([In, Out] char[] dest_buffer, int index, int count)
222 if (base_stream == null)
223 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
224 if (dest_buffer == null)
225 throw new ArgumentNullException ("dest_buffer");
227 throw new ArgumentOutOfRangeException ("index", "< 0");
229 throw new ArgumentOutOfRangeException ("count", "< 0");
230 // re-ordered to avoid possible integer overflow
231 if (index > dest_buffer.Length - count)
232 throw new ArgumentException ("index + count > dest_buffer.Length");
237 if (pos >= decoded_count && ReadBuffer () == 0)
238 return chars_read > 0 ? chars_read : 0;
240 int cch = Math.Min (decoded_count - pos, count);
241 Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
254 for (; pos < decoded_count; pos++) {
255 c = decoded_buffer [pos];
258 int res = (foundCR) ? (pos - 2) : (pos - 1);
260 res = 0; // if a new buffer starts with a \n and there was a \r at
261 // the end of the previous one, we get here.
264 } else if (foundCR) {
269 foundCR = (c == '\r');
275 public override string ReadLine()
277 if (base_stream == null)
278 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
280 if (pos >= decoded_count && ReadBuffer () == 0)
284 int end = FindNextEOL ();
285 if (end < decoded_count && end >= begin)
286 return new string (decoded_buffer, begin, end - begin);
288 if (line_builder == null)
289 line_builder = new StringBuilder ();
291 line_builder.Length = 0;
294 if (foundCR) // don't include the trailing CR if present
297 line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin));
298 if (ReadBuffer () == 0) {
299 if (line_builder.Capacity > 32768) {
300 StringBuilder sb = line_builder;
302 return sb.ToString (0, sb.Length);
304 return line_builder.ToString (0, line_builder.Length);
308 end = FindNextEOL ();
309 if (end < decoded_count && end >= begin) {
310 line_builder.Append (new string (decoded_buffer, begin, end - begin));
311 if (line_builder.Capacity > 32768) {
312 StringBuilder sb = line_builder;
314 return sb.ToString (0, sb.Length);
316 return line_builder.ToString (0, line_builder.Length);
321 public override string ReadToEnd()
323 if (base_stream == null)
324 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
326 StringBuilder text = new StringBuilder ();
328 int size = decoded_buffer.Length;
329 char [] buffer = new char [size];
332 while ((len = Read (buffer, 0, size)) != 0)
333 text.Append (buffer, 0, len);
335 return text.ToString ();
340 class XmlInputStream : Stream
342 public static readonly Encoding StrictUTF8;
344 static XmlInputStream ()
346 StrictUTF8 = new UTF8Encoding (false, true);
355 static XmlException encodingException = new XmlException ("invalid encoding specification.");
357 public XmlInputStream (Stream stream)
362 static string GetStringFromBytes (byte [] bytes, int index, int count)
365 char [] chars = new char [count];
366 for (int i = index; i < count; i++)
367 chars [i] = (char) bytes [i];
369 return new string (chars);
371 return Encoding.ASCII.GetString (bytes, index, count);
375 private void Initialize (Stream stream)
377 buffer = new byte [6];
378 this.stream = stream;
379 enc = StrictUTF8; // Default to UTF8 if we can't guess it
380 bufLength = stream.Read (buffer, 0, buffer.Length);
381 if (bufLength == -1 || bufLength == 0) {
385 int c = ReadByteSpecial ();
388 c = ReadByteSpecial ();
390 // BOM-ed little endian utf-16
391 enc = Encoding.Unicode;
393 // It doesn't start from "<?xml" then its encoding is utf-8
398 c = ReadByteSpecial ();
400 // BOM-ed big endian utf-16
401 enc = Encoding.BigEndianUnicode;
404 // It doesn't start from "<?xml" then its encoding is utf-8
409 c = ReadByteSpecial ();
411 c = ReadByteSpecial ();
416 buffer [--bufPos] = 0xEF;
420 // try to get encoding name from XMLDecl.
421 if (bufLength >= 5 && GetStringFromBytes (buffer, 1, 4) == "?xml") {
423 c = SkipWhitespace ();
425 // version. It is optional here.
428 c = ReadByteSpecial ();
429 if (c == '0') { // 0 of 1.0
434 c = SkipWhitespace ();
438 int remaining = bufLength - bufPos;
439 if (remaining >= 7 && GetStringFromBytes (buffer, bufPos, 7) == "ncoding") {
441 c = SkipWhitespace();
443 throw encodingException;
444 c = SkipWhitespace ();
446 StringBuilder sb = new StringBuilder ();
448 c = ReadByteSpecial ();
452 throw encodingException;
454 sb.Append ((char) c);
456 string encodingName = sb.ToString ();
457 if (!XmlChar.IsValidIANAEncoding (encodingName))
458 throw encodingException;
459 enc = Encoding.GetEncoding (encodingName);
465 if (bufLength >= 10 && Encoding.Unicode.GetString (buffer, 2, 8) == "?xml")
466 enc = Encoding.Unicode;
477 // Just like readbyte, but grows the buffer too.
478 int ReadByteSpecial ()
480 if (bufLength > bufPos)
481 return buffer [bufPos++];
483 byte [] newbuf = new byte [buffer.Length * 2];
484 Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
485 int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
486 if (nbytes == -1 || nbytes == 0)
491 return buffer [bufPos++];
494 // skips whitespace and returns misc char that was read from stream
495 private int SkipWhitespace ()
499 c = ReadByteSpecial ();
501 case '\r': goto case ' ';
502 case '\n': goto case ' ';
503 case '\t': goto case ' ';
512 public Encoding ActualEncoding {
516 #region Public Overrides
517 public override bool CanRead {
519 if (bufLength > bufPos)
522 return stream.CanRead;
526 // FIXME: It should support base stream's CanSeek.
527 public override bool CanSeek {
528 get { return false; } // stream.CanSeek; }
531 public override bool CanWrite {
532 get { return false; }
535 public override long Length {
537 return stream.Length;
541 public override long Position {
543 return stream.Position - bufLength + bufPos;
546 if(value < bufLength)
549 stream.Position = value - bufLength;
553 public override void Close ()
558 public override void Flush ()
563 public override int Read (byte[] buffer, int offset, int count)
566 if (count <= bufLength - bufPos) { // all from buffer
567 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
571 int bufRest = bufLength - bufPos;
572 if (bufLength > bufPos) {
573 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
577 stream.Read (buffer, offset + bufRest, count - bufRest);
582 public override int ReadByte ()
584 if (bufLength > bufPos) {
585 return buffer [bufPos++];
587 return stream.ReadByte ();
590 public override long Seek (long offset, System.IO.SeekOrigin origin)
592 int bufRest = bufLength - bufPos;
593 if (origin == SeekOrigin.Current)
594 if (offset < bufRest)
595 return buffer [bufPos + offset];
597 return stream.Seek (offset - bufRest, origin);
599 return stream.Seek (offset, origin);
602 public override void SetLength (long value)
604 stream.SetLength (value);
607 public override void Write (byte[] buffer, int offset, int count)
609 throw new NotSupportedException ();