2 // System.Xml.XmlInputStream
3 // encoding-specification-wise XML input stream and reader
6 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
8 // (C)2003 Atsushi Enomoto
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 using System.Runtime.InteropServices;
38 #region XmlStreamReader
39 internal class XmlStreamReader : NonBlockingStreamReader
43 XmlStreamReader (XmlInputStream input)
44 : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
49 public XmlStreamReader (Stream input)
50 : this (new XmlInputStream (input))
54 public override void Close ()
59 protected override void Dispose (bool disposing)
61 base.Dispose (disposing);
70 #region NonBlockingStreamReader
71 // mostly copied from StreamReader, removing BOM checks, ctor
72 // parameter checks and some extra public members.
73 internal class NonBlockingStreamReader : TextReader {
75 const int DefaultBufferSize = 1024;
76 const int DefaultFileBufferSize = 4096;
77 const int MinimumBufferSize = 128;
85 // The decoded buffer from the above input buffer
87 char [] decoded_buffer;
90 // Decoded bytes in decoded_buffer.
95 // Current position in the decoded_buffer
100 // The buffer size that we are using
109 StringBuilder line_builder;
111 public NonBlockingStreamReader(Stream stream, Encoding encoding)
113 int buffer_size = DefaultBufferSize;
114 base_stream = stream;
115 input_buffer = new byte [buffer_size];
116 this.buffer_size = buffer_size;
117 this.encoding = encoding;
118 decoder = encoding.GetDecoder ();
120 decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
125 public Encoding Encoding {
126 get { return encoding; }
129 public override void Close ()
134 protected override void Dispose (bool disposing)
136 if (disposing && base_stream != null)
137 base_stream.Close ();
140 decoded_buffer = null;
144 base.Dispose (disposing);
147 public void DiscardBufferedData ()
149 pos = decoded_count = 0;
154 decoder = encoding.GetDecoder ();
158 // the buffer is empty, fill it again
159 private int ReadBuffer ()
164 // keep looping until the decoder gives us some chars
169 cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
174 mayBlock = (cbEncoded < buffer_size);
175 decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
177 } while (decoded_count == 0);
179 return decoded_count;
182 public override int Peek ()
184 if (base_stream == null)
185 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
186 if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
189 return decoded_buffer [pos];
192 public override int Read ()
194 if (base_stream == null)
195 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
196 if (pos >= decoded_count && ReadBuffer () == 0)
199 return decoded_buffer [pos++];
202 public override int Read ([In, Out] char[] dest_buffer, int index, int count)
204 if (base_stream == null)
205 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
206 if (dest_buffer == null)
207 throw new ArgumentNullException ("dest_buffer");
209 throw new ArgumentOutOfRangeException ("index", "< 0");
211 throw new ArgumentOutOfRangeException ("count", "< 0");
212 // re-ordered to avoid possible integer overflow
213 if (index > dest_buffer.Length - count)
214 throw new ArgumentException ("index + count > dest_buffer.Length");
219 if (pos >= decoded_count && ReadBuffer () == 0)
220 return chars_read > 0 ? chars_read : 0;
222 int cch = Math.Min (decoded_count - pos, count);
223 Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
236 for (; pos < decoded_count; pos++) {
237 c = decoded_buffer [pos];
240 int res = (foundCR) ? (pos - 2) : (pos - 1);
242 res = 0; // if a new buffer starts with a \n and there was a \r at
243 // the end of the previous one, we get here.
246 } else if (foundCR) {
251 foundCR = (c == '\r');
257 public override string ReadLine()
259 if (base_stream == null)
260 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
262 if (pos >= decoded_count && ReadBuffer () == 0)
266 int end = FindNextEOL ();
267 if (end < decoded_count && end >= begin)
268 return new string (decoded_buffer, begin, end - begin);
270 if (line_builder == null)
271 line_builder = new StringBuilder ();
273 line_builder.Length = 0;
276 if (foundCR) // don't include the trailing CR if present
279 line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin));
280 if (ReadBuffer () == 0) {
281 if (line_builder.Capacity > 32768) {
282 StringBuilder sb = line_builder;
284 return sb.ToString (0, sb.Length);
286 return line_builder.ToString (0, line_builder.Length);
290 end = FindNextEOL ();
291 if (end < decoded_count && end >= begin) {
292 line_builder.Append (new string (decoded_buffer, begin, end - begin));
293 if (line_builder.Capacity > 32768) {
294 StringBuilder sb = line_builder;
296 return sb.ToString (0, sb.Length);
298 return line_builder.ToString (0, line_builder.Length);
303 public override string ReadToEnd()
305 if (base_stream == null)
306 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
308 StringBuilder text = new StringBuilder ();
310 int size = decoded_buffer.Length;
311 char [] buffer = new char [size];
314 while ((len = Read (buffer, 0, size)) != 0)
315 text.Append (buffer, 0, len);
317 return text.ToString ();
322 class XmlInputStream : Stream
324 public static readonly Encoding StrictUTF8;
326 static XmlInputStream ()
328 StrictUTF8 = new UTF8Encoding (false, true);
337 static XmlException encodingException = new XmlException ("invalid encoding specification.");
339 public XmlInputStream (Stream stream)
344 private void Initialize (Stream stream)
346 buffer = new byte [64];
347 this.stream = stream;
348 enc = StrictUTF8; // Default to UTF8 if we can't guess it
349 bufLength = stream.Read (buffer, 0, buffer.Length);
350 if (bufLength == -1 || bufLength == 0) {
354 int c = ReadByteSpecial ();
357 c = ReadByteSpecial ();
359 // BOM-ed little endian utf-16
360 enc = Encoding.Unicode;
362 // It doesn't start from "<?xml" then its encoding is utf-8
367 c = ReadByteSpecial ();
369 // BOM-ed big endian utf-16
370 enc = Encoding.BigEndianUnicode;
373 // It doesn't start from "<?xml" then its encoding is utf-8
378 c = ReadByteSpecial ();
380 c = ReadByteSpecial ();
385 buffer [--bufPos] = 0xEF;
389 // try to get encoding name from XMLDecl.
390 if (bufLength >= 5 && Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {
392 c = SkipWhitespace ();
394 // version. It is optional here.
397 c = ReadByteSpecial ();
398 if (c == '0') { // 0 of 1.0
403 c = SkipWhitespace ();
407 int remaining = bufLength - bufPos;
408 if (remaining >= 7 && Encoding.ASCII.GetString(buffer, bufPos, 7) == "ncoding") {
410 c = SkipWhitespace();
412 throw encodingException;
413 c = SkipWhitespace ();
415 StringBuilder sb = new StringBuilder ();
417 c = ReadByteSpecial ();
421 throw encodingException;
423 sb.Append ((char) c);
425 string encodingName = sb.ToString ();
426 if (!XmlChar.IsValidIANAEncoding (encodingName))
427 throw encodingException;
428 enc = Encoding.GetEncoding (encodingName);
434 if (bufLength >= 10 && Encoding.Unicode.GetString (buffer, 2, 8) == "?xml")
435 enc = Encoding.Unicode;
446 // Just like readbyte, but grows the buffer too.
447 int ReadByteSpecial ()
449 if (bufLength > bufPos)
450 return buffer [bufPos++];
452 byte [] newbuf = new byte [buffer.Length * 2];
453 Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
454 int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
455 if (nbytes == -1 || nbytes == 0)
460 return buffer [bufPos++];
463 // skips whitespace and returns misc char that was read from stream
464 private int SkipWhitespace ()
468 c = ReadByteSpecial ();
470 case '\r': goto case ' ';
471 case '\n': goto case ' ';
472 case '\t': goto case ' ';
479 throw new InvalidOperationException ();
482 public Encoding ActualEncoding {
486 #region Public Overrides
487 public override bool CanRead {
489 if (bufLength > bufPos)
492 return stream.CanRead;
496 // FIXME: It should support base stream's CanSeek.
497 public override bool CanSeek {
498 get { return false; } // stream.CanSeek; }
501 public override bool CanWrite {
502 get { return false; }
505 public override long Length {
507 return stream.Length;
511 public override long Position {
513 return stream.Position - bufLength + bufPos;
516 if(value < bufLength)
519 stream.Position = value - bufLength;
523 public override void Close ()
528 public override void Flush ()
533 public override int Read (byte[] buffer, int offset, int count)
536 if (count <= bufLength - bufPos) { // all from buffer
537 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
541 int bufRest = bufLength - bufPos;
542 if (bufLength > bufPos) {
543 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
547 stream.Read (buffer, offset + bufRest, count - bufRest);
552 public override int ReadByte ()
554 if (bufLength > bufPos) {
555 return buffer [bufPos++];
557 return stream.ReadByte ();
560 public override long Seek (long offset, System.IO.SeekOrigin origin)
562 int bufRest = bufLength - bufPos;
563 if (origin == SeekOrigin.Current)
564 if (offset < bufRest)
565 return buffer [bufPos + offset];
567 return stream.Seek (offset - bufRest, origin);
569 return stream.Seek (offset, origin);
572 public override void SetLength (long value)
574 stream.SetLength (value);
577 public override void Write (byte[] buffer, int offset, int count)
579 throw new NotSupportedException ();