2 // System.Xml.XmlInputStream
3 // encoding-specification-wise XML input stream and reader
6 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
8 // (C)2003 Atsushi Enomoto
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 using System.Runtime.InteropServices;
38 #region XmlStreamReader
39 internal class XmlStreamReader : NonBlockingStreamReader
43 XmlStreamReader (XmlInputStream input)
44 : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
49 public XmlStreamReader (Stream input)
50 : this (new XmlInputStream (input))
54 static XmlException invalidDataException = new XmlException ("invalid data.");
56 public override void Close ()
61 public override int Read ([In, Out] char[] dest_buffer, int index, int count)
64 return base.Read (dest_buffer, index, count);
67 catch (System.ArgumentException ex) {
68 throw new XmlException ("Invalid data", ex);
71 catch (System.Text.DecoderFallbackException) {
72 throw invalidDataException;
77 protected override void Dispose (bool disposing)
79 base.Dispose (disposing);
88 #region NonBlockingStreamReader
89 // mostly copied from StreamReader, removing BOM checks, ctor
90 // parameter checks and some extra public members.
91 internal class NonBlockingStreamReader : TextReader {
93 const int DefaultBufferSize = 1024;
94 const int DefaultFileBufferSize = 4096;
95 const int MinimumBufferSize = 128;
100 byte [] input_buffer;
103 // The decoded buffer from the above input buffer
105 char [] decoded_buffer;
108 // Decoded bytes in decoded_buffer.
113 // Current position in the decoded_buffer
118 // The buffer size that we are using
127 StringBuilder line_builder;
129 public NonBlockingStreamReader(Stream stream, Encoding encoding)
131 int buffer_size = DefaultBufferSize;
132 base_stream = stream;
133 input_buffer = new byte [buffer_size];
134 this.buffer_size = buffer_size;
135 this.encoding = encoding;
136 decoder = encoding.GetDecoder ();
138 decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
143 public Encoding Encoding {
144 get { return encoding; }
147 public override void Close ()
152 protected override void Dispose (bool disposing)
154 if (disposing && base_stream != null)
155 base_stream.Close ();
158 decoded_buffer = null;
162 base.Dispose (disposing);
165 public void DiscardBufferedData ()
167 pos = decoded_count = 0;
172 decoder = encoding.GetDecoder ();
176 // the buffer is empty, fill it again
177 private int ReadBuffer ()
182 // keep looping until the decoder gives us some chars
187 cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
192 mayBlock = (cbEncoded < buffer_size);
193 decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
195 } while (decoded_count == 0);
197 return decoded_count;
200 public override int Peek ()
202 if (base_stream == null)
203 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
204 if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
207 return decoded_buffer [pos];
210 public override int Read ()
212 if (base_stream == null)
213 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
214 if (pos >= decoded_count && ReadBuffer () == 0)
217 return decoded_buffer [pos++];
220 public override int Read ([In, Out] char[] dest_buffer, int index, int count)
222 if (base_stream == null)
223 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
224 if (dest_buffer == null)
225 throw new ArgumentNullException ("dest_buffer");
227 throw new ArgumentOutOfRangeException ("index", "< 0");
229 throw new ArgumentOutOfRangeException ("count", "< 0");
230 // re-ordered to avoid possible integer overflow
231 if (index > dest_buffer.Length - count)
232 throw new ArgumentException ("index + count > dest_buffer.Length");
237 if (pos >= decoded_count && ReadBuffer () == 0)
238 return chars_read > 0 ? chars_read : 0;
240 int cch = Math.Min (decoded_count - pos, count);
241 Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
254 for (; pos < decoded_count; pos++) {
255 c = decoded_buffer [pos];
258 int res = (foundCR) ? (pos - 2) : (pos - 1);
260 res = 0; // if a new buffer starts with a \n and there was a \r at
261 // the end of the previous one, we get here.
264 } else if (foundCR) {
269 foundCR = (c == '\r');
275 public override string ReadLine()
277 if (base_stream == null)
278 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
280 if (pos >= decoded_count && ReadBuffer () == 0)
284 int end = FindNextEOL ();
285 if (end < decoded_count && end >= begin)
286 return new string (decoded_buffer, begin, end - begin);
288 if (line_builder == null)
289 line_builder = new StringBuilder ();
291 line_builder.Length = 0;
294 if (foundCR) // don't include the trailing CR if present
297 line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin));
298 if (ReadBuffer () == 0) {
299 if (line_builder.Capacity > 32768) {
300 StringBuilder sb = line_builder;
302 return sb.ToString (0, sb.Length);
304 return line_builder.ToString (0, line_builder.Length);
308 end = FindNextEOL ();
309 if (end < decoded_count && end >= begin) {
310 line_builder.Append (new string (decoded_buffer, begin, end - begin));
311 if (line_builder.Capacity > 32768) {
312 StringBuilder sb = line_builder;
314 return sb.ToString (0, sb.Length);
316 return line_builder.ToString (0, line_builder.Length);
321 public override string ReadToEnd()
323 if (base_stream == null)
324 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
326 StringBuilder text = new StringBuilder ();
328 int size = decoded_buffer.Length;
329 char [] buffer = new char [size];
332 while ((len = Read (buffer, 0, size)) != 0)
333 text.Append (buffer, 0, len);
335 return text.ToString ();
340 class XmlInputStream : Stream
342 public static readonly Encoding StrictUTF8;
344 static XmlInputStream ()
346 StrictUTF8 = new UTF8Encoding (false, true);
355 static XmlException encodingException = new XmlException ("invalid encoding specification.");
357 public XmlInputStream (Stream stream)
362 // this returns null, instead of throwing ArgumentOutOfRangeException
363 string GetStringFromBytes (int index, int count)
366 while (bufPos < index + count)
367 if (ReadByteSpecial () < 0)
371 char [] chars = new char [count];
372 for (int i = index; i < count; i++)
373 chars [i] = (char) buffer [i];
375 return new string (chars);
377 return Encoding.ASCII.GetString (buffer, index, count);
381 private void Initialize (Stream stream)
383 buffer = new byte [6];
384 this.stream = stream;
385 enc = StrictUTF8; // Default to UTF8 if we can't guess it
386 bufLength = stream.Read (buffer, 0, buffer.Length);
387 if (bufLength == -1 || bufLength == 0) {
391 int c = ReadByteSpecial ();
394 c = ReadByteSpecial ();
396 // BOM-ed little endian utf-16
397 enc = Encoding.Unicode;
399 // It doesn't start from "<?xml" then its encoding is utf-8
404 c = ReadByteSpecial ();
406 // BOM-ed big endian utf-16
407 enc = Encoding.BigEndianUnicode;
410 // It doesn't start from "<?xml" then its encoding is utf-8
415 c = ReadByteSpecial ();
417 c = ReadByteSpecial ();
422 buffer [--bufPos] = 0xEF;
426 // try to get encoding name from XMLDecl.
427 if (bufLength >= 5 && GetStringFromBytes (1, 4) == "?xml") {
429 c = SkipWhitespace ();
431 // version. It is optional here.
434 c = ReadByteSpecial ();
435 if (c == '0') { // 0 of 1.0
440 c = SkipWhitespace ();
444 if (GetStringFromBytes (bufPos, 7) == "ncoding") {
446 c = SkipWhitespace();
448 throw encodingException;
449 c = SkipWhitespace ();
451 StringBuilder sb = new StringBuilder ();
453 c = ReadByteSpecial ();
457 throw encodingException;
459 sb.Append ((char) c);
461 string encodingName = sb.ToString ();
462 if (!XmlChar.IsValidIANAEncoding (encodingName))
463 throw encodingException;
464 enc = Encoding.GetEncoding (encodingName);
470 if (bufLength >= 10 && Encoding.Unicode.GetString (buffer, 2, 8) == "?xml")
471 enc = Encoding.Unicode;
482 // Just like readbyte, but grows the buffer too.
483 int ReadByteSpecial ()
485 if (bufLength > bufPos)
486 return buffer [bufPos++];
488 byte [] newbuf = new byte [buffer.Length * 2];
489 Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
490 int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
491 if (nbytes == -1 || nbytes == 0)
496 return buffer [bufPos++];
499 // skips whitespace and returns misc char that was read from stream
500 private int SkipWhitespace ()
504 c = ReadByteSpecial ();
506 case '\r': goto case ' ';
507 case '\n': goto case ' ';
508 case '\t': goto case ' ';
517 public Encoding ActualEncoding {
521 #region Public Overrides
522 public override bool CanRead {
524 if (bufLength > bufPos)
527 return stream.CanRead;
531 // FIXME: It should support base stream's CanSeek.
532 public override bool CanSeek {
533 get { return false; } // stream.CanSeek; }
536 public override bool CanWrite {
537 get { return false; }
540 public override long Length {
542 return stream.Length;
546 public override long Position {
548 return stream.Position - bufLength + bufPos;
551 if(value < bufLength)
554 stream.Position = value - bufLength;
558 public override void Close ()
563 public override void Flush ()
568 public override int Read (byte[] buffer, int offset, int count)
571 if (count <= bufLength - bufPos) { // all from buffer
572 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
576 int bufRest = bufLength - bufPos;
577 if (bufLength > bufPos) {
578 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
582 stream.Read (buffer, offset + bufRest, count - bufRest);
587 public override int ReadByte ()
589 if (bufLength > bufPos) {
590 return buffer [bufPos++];
592 return stream.ReadByte ();
595 public override long Seek (long offset, System.IO.SeekOrigin origin)
597 int bufRest = bufLength - bufPos;
598 if (origin == SeekOrigin.Current)
599 if (offset < bufRest)
600 return buffer [bufPos + offset];
602 return stream.Seek (offset - bufRest, origin);
604 return stream.Seek (offset, origin);
607 public override void SetLength (long value)
609 stream.SetLength (value);
612 public override void Write (byte[] buffer, int offset, int count)
614 throw new NotSupportedException ();