2 // System.Xml.XmlInputStream
3 // encoding-specification-wise XML input stream and reader
6 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
8 // (C)2003 Atsushi Enomoto
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 using System.Runtime.InteropServices;
38 #region XmlStreamReader
39 internal class XmlStreamReader : NonBlockingStreamReader
43 XmlStreamReader (XmlInputStream input)
44 : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
49 public XmlStreamReader (Stream input)
50 : this (new XmlInputStream (input))
54 public override void Close ()
59 protected override void Dispose (bool disposing)
61 base.Dispose (disposing);
70 #region NonBlockingStreamReader
71 // mostly copied from StreamReader, removing BOM checks, ctor
72 // parameter checks and some extra public members.
73 internal class NonBlockingStreamReader : TextReader {
75 const int DefaultBufferSize = 1024;
76 const int DefaultFileBufferSize = 4096;
77 const int MinimumBufferSize = 128;
85 // The decoded buffer from the above input buffer
87 char [] decoded_buffer;
90 // Decoded bytes in decoded_buffer.
95 // Current position in the decoded_buffer
100 // The buffer size that we are using
109 StringBuilder line_builder;
111 public NonBlockingStreamReader(Stream stream, Encoding encoding)
113 int buffer_size = DefaultBufferSize;
114 base_stream = stream;
115 input_buffer = new byte [buffer_size];
116 this.buffer_size = buffer_size;
117 this.encoding = encoding;
118 decoder = encoding.GetDecoder ();
120 decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
125 public Encoding Encoding {
126 get { return encoding; }
129 public override void Close ()
134 protected override void Dispose (bool disposing)
136 if (disposing && base_stream != null)
137 base_stream.Close ();
140 decoded_buffer = null;
144 base.Dispose (disposing);
147 public void DiscardBufferedData ()
149 pos = decoded_count = 0;
154 decoder = encoding.GetDecoder ();
158 // the buffer is empty, fill it again
159 private int ReadBuffer ()
164 // keep looping until the decoder gives us some chars
169 cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
174 mayBlock = (cbEncoded < buffer_size);
175 decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
177 } while (decoded_count == 0);
179 return decoded_count;
182 public override int Peek ()
184 if (base_stream == null)
185 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
186 if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
189 return decoded_buffer [pos];
192 public override int Read ()
194 if (base_stream == null)
195 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
196 if (pos >= decoded_count && ReadBuffer () == 0)
199 return decoded_buffer [pos++];
202 public override int Read ([In, Out] char[] dest_buffer, int index, int count)
204 if (base_stream == null)
205 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
206 if (dest_buffer == null)
207 throw new ArgumentNullException ("dest_buffer");
209 throw new ArgumentOutOfRangeException ("index", "< 0");
211 throw new ArgumentOutOfRangeException ("count", "< 0");
212 // re-ordered to avoid possible integer overflow
213 if (index > dest_buffer.Length - count)
214 throw new ArgumentException ("index + count > dest_buffer.Length");
219 if (pos >= decoded_count && ReadBuffer () == 0)
220 return chars_read > 0 ? chars_read : 0;
222 int cch = Math.Min (decoded_count - pos, count);
223 Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
236 for (; pos < decoded_count; pos++) {
237 c = decoded_buffer [pos];
240 int res = (foundCR) ? (pos - 2) : (pos - 1);
242 res = 0; // if a new buffer starts with a \n and there was a \r at
243 // the end of the previous one, we get here.
246 } else if (foundCR) {
251 foundCR = (c == '\r');
257 public override string ReadLine()
259 if (base_stream == null)
260 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
262 if (pos >= decoded_count && ReadBuffer () == 0)
266 int end = FindNextEOL ();
267 if (end < decoded_count && end >= begin)
268 return new string (decoded_buffer, begin, end - begin);
270 if (line_builder == null)
271 line_builder = new StringBuilder ();
273 line_builder.Length = 0;
276 if (foundCR) // don't include the trailing CR if present
279 line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin));
280 if (ReadBuffer () == 0) {
281 if (line_builder.Capacity > 32768) {
282 StringBuilder sb = line_builder;
284 return sb.ToString (0, sb.Length);
286 return line_builder.ToString (0, line_builder.Length);
290 end = FindNextEOL ();
291 if (end < decoded_count && end >= begin) {
292 line_builder.Append (new string (decoded_buffer, begin, end - begin));
293 if (line_builder.Capacity > 32768) {
294 StringBuilder sb = line_builder;
296 return sb.ToString (0, sb.Length);
298 return line_builder.ToString (0, line_builder.Length);
303 public override string ReadToEnd()
305 if (base_stream == null)
306 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
308 StringBuilder text = new StringBuilder ();
310 int size = decoded_buffer.Length;
311 char [] buffer = new char [size];
314 while ((len = Read (buffer, 0, size)) != 0)
315 text.Append (buffer, 0, len);
317 return text.ToString ();
322 class XmlInputStream : Stream
324 public static readonly Encoding StrictUTF8;
326 static XmlInputStream ()
328 StrictUTF8 = new UTF8Encoding (false, true);
337 static XmlException encodingException = new XmlException ("invalid encoding specification.");
339 public XmlInputStream (Stream stream)
344 static string GetStringFromBytes (byte [] bytes, int index, int count)
347 char [] chars = new char [count];
348 for (int i = index; i < count; i++)
349 chars [i] = (char) bytes [i];
351 return new string (chars);
353 return Encoding.ASCII.GetString (bytes, index, count);
357 private void Initialize (Stream stream)
359 buffer = new byte [64];
360 this.stream = stream;
361 enc = StrictUTF8; // Default to UTF8 if we can't guess it
362 bufLength = stream.Read (buffer, 0, buffer.Length);
363 if (bufLength == -1 || bufLength == 0) {
367 int c = ReadByteSpecial ();
370 c = ReadByteSpecial ();
372 // BOM-ed little endian utf-16
373 enc = Encoding.Unicode;
375 // It doesn't start from "<?xml" then its encoding is utf-8
380 c = ReadByteSpecial ();
382 // BOM-ed big endian utf-16
383 enc = Encoding.BigEndianUnicode;
386 // It doesn't start from "<?xml" then its encoding is utf-8
391 c = ReadByteSpecial ();
393 c = ReadByteSpecial ();
398 buffer [--bufPos] = 0xEF;
402 // try to get encoding name from XMLDecl.
403 if (bufLength >= 5 && GetStringFromBytes (buffer, 1, 4) == "?xml") {
405 c = SkipWhitespace ();
407 // version. It is optional here.
410 c = ReadByteSpecial ();
411 if (c == '0') { // 0 of 1.0
416 c = SkipWhitespace ();
420 int remaining = bufLength - bufPos;
421 if (remaining >= 7 && GetStringFromBytes (buffer, bufPos, 7) == "ncoding") {
423 c = SkipWhitespace();
425 throw encodingException;
426 c = SkipWhitespace ();
428 StringBuilder sb = new StringBuilder ();
430 c = ReadByteSpecial ();
434 throw encodingException;
436 sb.Append ((char) c);
438 string encodingName = sb.ToString ();
439 if (!XmlChar.IsValidIANAEncoding (encodingName))
440 throw encodingException;
441 enc = Encoding.GetEncoding (encodingName);
447 if (bufLength >= 10 && Encoding.Unicode.GetString (buffer, 2, 8) == "?xml")
448 enc = Encoding.Unicode;
459 // Just like readbyte, but grows the buffer too.
460 int ReadByteSpecial ()
462 if (bufLength > bufPos)
463 return buffer [bufPos++];
465 byte [] newbuf = new byte [buffer.Length * 2];
466 Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
467 int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
468 if (nbytes == -1 || nbytes == 0)
473 return buffer [bufPos++];
476 // skips whitespace and returns misc char that was read from stream
477 private int SkipWhitespace ()
481 c = ReadByteSpecial ();
483 case '\r': goto case ' ';
484 case '\n': goto case ' ';
485 case '\t': goto case ' ';
492 throw new InvalidOperationException ();
495 public Encoding ActualEncoding {
499 #region Public Overrides
500 public override bool CanRead {
502 if (bufLength > bufPos)
505 return stream.CanRead;
509 // FIXME: It should support base stream's CanSeek.
510 public override bool CanSeek {
511 get { return false; } // stream.CanSeek; }
514 public override bool CanWrite {
515 get { return false; }
518 public override long Length {
520 return stream.Length;
524 public override long Position {
526 return stream.Position - bufLength + bufPos;
529 if(value < bufLength)
532 stream.Position = value - bufLength;
536 public override void Close ()
541 public override void Flush ()
546 public override int Read (byte[] buffer, int offset, int count)
549 if (count <= bufLength - bufPos) { // all from buffer
550 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
554 int bufRest = bufLength - bufPos;
555 if (bufLength > bufPos) {
556 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
560 stream.Read (buffer, offset + bufRest, count - bufRest);
565 public override int ReadByte ()
567 if (bufLength > bufPos) {
568 return buffer [bufPos++];
570 return stream.ReadByte ();
573 public override long Seek (long offset, System.IO.SeekOrigin origin)
575 int bufRest = bufLength - bufPos;
576 if (origin == SeekOrigin.Current)
577 if (offset < bufRest)
578 return buffer [bufPos + offset];
580 return stream.Seek (offset - bufRest, origin);
582 return stream.Seek (offset, origin);
585 public override void SetLength (long value)
587 stream.SetLength (value);
590 public override void Write (byte[] buffer, int offset, int count)
592 throw new NotSupportedException ();