New test.
[mono.git] / mcs / class / System.XML / System.Xml / XmlInputStream.cs
1 //
2 // System.Xml.XmlInputStream 
3 //      encoding-specification-wise XML input stream and reader
4 //
5 // Author:
6 //      Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 //
8 //      (C)2003 Atsushi Enomoto
9 //
10
11 //
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
19 // 
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
22 // 
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 //
31 using System;
32 using System.IO;
33 using System.Text;
34 using System.Runtime.InteropServices;
35
36 namespace System.Xml
37 {
38         #region XmlStreamReader
39         internal class XmlStreamReader : NonBlockingStreamReader
40         {
41                 XmlInputStream input;
42
43                 XmlStreamReader (XmlInputStream input)
44                         : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
45                 {
46                         this.input = input;
47                 }
48
49                 public XmlStreamReader (Stream input)
50                         : this (new XmlInputStream (input))
51                 {
52                 }
53
54                 public override void Close ()
55                 {
56                         this.input.Close ();
57                 }
58
59                 protected override void Dispose (bool disposing)
60                 {
61                         base.Dispose (disposing);
62                         if (disposing) {
63                                 Close ();
64                         }
65                 }
66
67         }
68         #endregion
69
70         #region NonBlockingStreamReader
71         // mostly copied from StreamReader, removing BOM checks, ctor
72         // parameter checks and some extra public members.
73         internal class NonBlockingStreamReader : TextReader {
74
75                 const int DefaultBufferSize = 1024;
76                 const int DefaultFileBufferSize = 4096;
77                 const int MinimumBufferSize = 128;
78
79                 //
80                 // The input buffer
81                 //
82                 byte [] input_buffer;
83
84                 //
85                 // The decoded buffer from the above input buffer
86                 //
87                 char [] decoded_buffer;
88
89                 //
90                 // Decoded bytes in decoded_buffer.
91                 //
92                 int decoded_count;
93
94                 //
95                 // Current position in the decoded_buffer
96                 //
97                 int pos;
98
99                 //
100                 // The buffer size that we are using
101                 //
102                 int buffer_size;
103
104                 Encoding encoding;
105                 Decoder decoder;
106
107                 Stream base_stream;
108                 bool mayBlock;
109                 StringBuilder line_builder;
110
111                 public NonBlockingStreamReader(Stream stream, Encoding encoding)
112                 {
113                         int buffer_size = DefaultBufferSize;
114                         base_stream = stream;
115                         input_buffer = new byte [buffer_size];
116                         this.buffer_size = buffer_size;
117                         this.encoding = encoding;
118                         decoder = encoding.GetDecoder ();
119
120                         decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
121                         decoded_count = 0;
122                         pos = 0;
123                 }
124
125                 public Encoding Encoding {
126                         get { return encoding; }
127                 }
128
129                 public override void Close ()
130                 {
131                         Dispose (true);
132                 }
133
134                 protected override void Dispose (bool disposing)
135                 {
136                         if (disposing && base_stream != null)
137                                 base_stream.Close ();
138                         
139                         input_buffer = null;
140                         decoded_buffer = null;
141                         encoding = null;
142                         decoder = null;
143                         base_stream = null;
144                         base.Dispose (disposing);
145                 }
146
147                 public void DiscardBufferedData ()
148                 {
149                         pos = decoded_count = 0;
150                         mayBlock = false;
151 #if NET_2_0
152                         decoder.Reset ();
153 #else
154                         decoder = encoding.GetDecoder ();
155 #endif
156                 }
157                 
158                 // the buffer is empty, fill it again
159                 private int ReadBuffer ()
160                 {
161                         pos = 0;
162                         int cbEncoded = 0;
163
164                         // keep looping until the decoder gives us some chars
165                         decoded_count = 0;
166                         int parse_start = 0;
167                         do      
168                         {
169                                 cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
170                                 
171                                 if (cbEncoded == 0)
172                                         return 0;
173
174                                 mayBlock = (cbEncoded < buffer_size);
175                                 decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
176                                 parse_start = 0;
177                         } while (decoded_count == 0);
178
179                         return decoded_count;
180                 }
181
182                 public override int Peek ()
183                 {
184                         if (base_stream == null)
185                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
186                         if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
187                                 return -1;
188
189                         return decoded_buffer [pos];
190                 }
191
192                 public override int Read ()
193                 {
194                         if (base_stream == null)
195                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
196                         if (pos >= decoded_count && ReadBuffer () == 0)
197                                 return -1;
198
199                         return decoded_buffer [pos++];
200                 }
201
202                 public override int Read ([In, Out] char[] dest_buffer, int index, int count)
203                 {
204                         if (base_stream == null)
205                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
206                         if (dest_buffer == null)
207                                 throw new ArgumentNullException ("dest_buffer");
208                         if (index < 0)
209                                 throw new ArgumentOutOfRangeException ("index", "< 0");
210                         if (count < 0)
211                                 throw new ArgumentOutOfRangeException ("count", "< 0");
212                         // re-ordered to avoid possible integer overflow
213                         if (index > dest_buffer.Length - count)
214                                 throw new ArgumentException ("index + count > dest_buffer.Length");
215
216                         int chars_read = 0;
217 //                      while (count > 0)
218                         {
219                                 if (pos >= decoded_count && ReadBuffer () == 0)
220                                         return chars_read > 0 ? chars_read : 0;
221
222                                 int cch = Math.Min (decoded_count - pos, count);
223                                 Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
224                                 pos += cch;
225                                 index += cch;
226                                 count -= cch;
227                                 chars_read += cch;
228                         }
229                         return chars_read;
230                 }
231
232                 bool foundCR;
233                 int FindNextEOL ()
234                 {
235                         char c = '\0';
236                         for (; pos < decoded_count; pos++) {
237                                 c = decoded_buffer [pos];
238                                 if (c == '\n') {
239                                         pos++;
240                                         int res = (foundCR) ? (pos - 2) : (pos - 1);
241                                         if (res < 0)
242                                                 res = 0; // if a new buffer starts with a \n and there was a \r at
243                                                         // the end of the previous one, we get here.
244                                         foundCR = false;
245                                         return res;
246                                 } else if (foundCR) {
247                                         foundCR = false;
248                                         return pos - 1;
249                                 }
250
251                                 foundCR = (c == '\r');
252                         }
253
254                         return -1;
255                 }
256
257                 public override string ReadLine()
258                 {
259                         if (base_stream == null)
260                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
261
262                         if (pos >= decoded_count && ReadBuffer () == 0)
263                                 return null;
264
265                         int begin = pos;
266                         int end = FindNextEOL ();
267                         if (end < decoded_count && end >= begin)
268                                 return new string (decoded_buffer, begin, end - begin);
269
270                         if (line_builder == null)
271                                 line_builder = new StringBuilder ();
272                         else
273                                 line_builder.Length = 0;
274
275                         while (true) {
276                                 if (foundCR) // don't include the trailing CR if present
277                                         decoded_count--;
278
279                                 line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin));
280                                 if (ReadBuffer () == 0) {
281                                         if (line_builder.Capacity > 32768) {
282                                                 StringBuilder sb = line_builder;
283                                                 line_builder = null;
284                                                 return sb.ToString (0, sb.Length);
285                                         }
286                                         return line_builder.ToString (0, line_builder.Length);
287                                 }
288
289                                 begin = pos;
290                                 end = FindNextEOL ();
291                                 if (end < decoded_count && end >= begin) {
292                                         line_builder.Append (new string (decoded_buffer, begin, end - begin));
293                                         if (line_builder.Capacity > 32768) {
294                                                 StringBuilder sb = line_builder;
295                                                 line_builder = null;
296                                                 return sb.ToString (0, sb.Length);
297                                         }
298                                         return line_builder.ToString (0, line_builder.Length);
299                                 }
300                         }
301                 }
302
303                 public override string ReadToEnd()
304                 {
305                         if (base_stream == null)
306                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
307
308                         StringBuilder text = new StringBuilder ();
309
310                         int size = decoded_buffer.Length;
311                         char [] buffer = new char [size];
312                         int len;
313                         
314                         while ((len = Read (buffer, 0, size)) != 0)
315                                 text.Append (buffer, 0, len);
316
317                         return text.ToString ();
318                 }
319         }
320         #endregion
321
322         class XmlInputStream : Stream
323         {
324                 public static readonly Encoding StrictUTF8;
325
326                 static XmlInputStream ()
327                 {
328                         StrictUTF8 = new UTF8Encoding (false, true);
329                 }
330
331                 Encoding enc;
332                 Stream stream;
333                 byte[] buffer;
334                 int bufLength;
335                 int bufPos;
336
337                 static XmlException encodingException = new XmlException ("invalid encoding specification.");
338
339                 public XmlInputStream (Stream stream)
340                 {
341                         Initialize (stream);
342                 }
343
344                 private void Initialize (Stream stream)
345                 {
346                         buffer = new byte [64];
347                         this.stream = stream;
348                         enc = StrictUTF8; // Default to UTF8 if we can't guess it
349                         bufLength = stream.Read (buffer, 0, buffer.Length);
350                         if (bufLength == -1 || bufLength == 0) {
351                                 return;
352                         }
353
354                         int c = ReadByteSpecial ();
355                         switch (c) {
356                         case 0xFF:
357                                 c = ReadByteSpecial ();
358                                 if (c == 0xFE) {
359                                         // BOM-ed little endian utf-16
360                                         enc = Encoding.Unicode;
361                                 } else {
362                                         // It doesn't start from "<?xml" then its encoding is utf-8
363                                         bufPos = 0;
364                                 }
365                                 break;
366                         case 0xFE:
367                                 c = ReadByteSpecial ();
368                                 if (c == 0xFF) {
369                                         // BOM-ed big endian utf-16
370                                         enc = Encoding.BigEndianUnicode;
371                                         return;
372                                 } else {
373                                         // It doesn't start from "<?xml" then its encoding is utf-8
374                                         bufPos = 0;
375                                 }
376                                 break;
377                         case 0xEF:
378                                 c = ReadByteSpecial ();
379                                 if (c == 0xBB) {
380                                         c = ReadByteSpecial ();
381                                         if (c != 0xBF) {
382                                                 bufPos = 0;
383                                         }
384                                 } else {
385                                         buffer [--bufPos] = 0xEF;
386                                 }
387                                 break;
388                         case '<':
389                                 // try to get encoding name from XMLDecl.
390                                 if (bufLength >= 5 && Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {
391                                         bufPos += 4;
392                                         c = SkipWhitespace ();
393
394                                         // version. It is optional here.
395                                         if (c == 'v') {
396                                                 while (c >= 0) {
397                                                         c = ReadByteSpecial ();
398                                                         if (c == '0') { // 0 of 1.0
399                                                                 ReadByteSpecial ();
400                                                                 break;
401                                                         }
402                                                 }
403                                                 c = SkipWhitespace ();
404                                         }
405
406                                         if (c == 'e') {
407                                                 int remaining = bufLength - bufPos;
408                                                 if (remaining >= 7 && Encoding.ASCII.GetString(buffer, bufPos, 7) == "ncoding") {
409                                                         bufPos += 7;
410                                                         c = SkipWhitespace();
411                                                         if (c != '=')
412                                                                 throw encodingException;
413                                                         c = SkipWhitespace ();
414                                                         int quoteChar = c;
415                                                         StringBuilder sb = new StringBuilder ();
416                                                         while (true) {
417                                                                 c = ReadByteSpecial ();
418                                                                 if (c == quoteChar)
419                                                                         break;
420                                                                 else if (c < 0)
421                                                                         throw encodingException;
422
423                                                                 sb.Append ((char) c);
424                                                         }
425                                                         string encodingName = sb.ToString ();
426                                                         if (!XmlChar.IsValidIANAEncoding (encodingName))
427                                                                 throw encodingException;
428                                                         enc = Encoding.GetEncoding (encodingName);
429                                                 }
430                                         }
431                                 }
432 #if TARGET_JVM
433                                 else {
434                                         if (bufLength >= 10 && Encoding.Unicode.GetString (buffer, 2, 8) == "?xml")
435                                                 enc = Encoding.Unicode;
436                                 }
437 #endif
438                                 bufPos = 0;
439                                 break;
440                         default:
441                                 bufPos = 0;
442                                 break;
443                         }
444                 }
445
446                 // Just like readbyte, but grows the buffer too.
447                 int ReadByteSpecial ()
448                 {
449                         if (bufLength > bufPos)
450                                 return buffer [bufPos++];
451
452                         byte [] newbuf = new byte [buffer.Length * 2];
453                         Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
454                         int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
455                         if (nbytes == -1 || nbytes == 0)
456                                 return -1;
457                                 
458                         bufLength += nbytes;
459                         buffer = newbuf;
460                         return buffer [bufPos++];
461                 }
462
463                 // skips whitespace and returns misc char that was read from stream
464                 private int SkipWhitespace ()
465                 {
466                         int c;
467                         while (true) {
468                                 c = ReadByteSpecial ();
469                                 switch ((char) c) {
470                                 case '\r': goto case ' ';
471                                 case '\n': goto case ' ';
472                                 case '\t': goto case ' ';
473                                 case ' ':
474                                         continue;
475                                 default:
476                                         return c;
477                                 }
478                         }
479                         throw new InvalidOperationException ();
480                 }
481
482                 public Encoding ActualEncoding {
483                         get { return enc; }
484                 }
485
486                 #region Public Overrides
487                 public override bool CanRead {
488                         get {
489                                 if (bufLength > bufPos)
490                                         return true;
491                                 else
492                                         return stream.CanRead; 
493                         }
494                 }
495
496                 // FIXME: It should support base stream's CanSeek.
497                 public override bool CanSeek {
498                         get { return false; } // stream.CanSeek; }
499                 }
500
501                 public override bool CanWrite {
502                         get { return false; }
503                 }
504
505                 public override long Length {
506                         get {
507                                 return stream.Length;
508                         }
509                 }
510
511                 public override long Position {
512                         get {
513                                 return stream.Position - bufLength + bufPos;
514                         }
515                         set {
516                                 if(value < bufLength)
517                                         bufPos = (int)value;
518                                 else
519                                         stream.Position = value - bufLength;
520                         }
521                 }
522
523                 public override void Close ()
524                 {
525                         stream.Close ();
526                 }
527
528                 public override void Flush ()
529                 {
530                         stream.Flush ();
531                 }
532
533                 public override int Read (byte[] buffer, int offset, int count)
534                 {
535                         int ret;
536                         if (count <= bufLength - bufPos)        {       // all from buffer
537                                 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
538                                 bufPos += count;
539                                 ret = count;
540                         } else {
541                                 int bufRest = bufLength - bufPos;
542                                 if (bufLength > bufPos) {
543                                         Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
544                                         bufPos += bufRest;
545                                 }
546                                 ret = bufRest +
547                                         stream.Read (buffer, offset + bufRest, count - bufRest);
548                         }
549                         return ret;
550                 }
551
552                 public override int ReadByte ()
553                 {
554                         if (bufLength > bufPos) {
555                                 return buffer [bufPos++];
556                         }
557                         return stream.ReadByte ();
558                 }
559
560                 public override long Seek (long offset, System.IO.SeekOrigin origin)
561                 {
562                         int bufRest = bufLength - bufPos;
563                         if (origin == SeekOrigin.Current)
564                                 if (offset < bufRest)
565                                         return buffer [bufPos + offset];
566                                 else
567                                         return stream.Seek (offset - bufRest, origin);
568                         else
569                                 return stream.Seek (offset, origin);
570                 }
571
572                 public override void SetLength (long value)
573                 {
574                         stream.SetLength (value);
575                 }
576
577                 public override void Write (byte[] buffer, int offset, int count)
578                 {
579                         throw new NotSupportedException ();
580                 }
581                 #endregion
582         }
583 }