copied mono-api-diff.cs from mono-2-2 branch so new patch can be applied and history...
[mono.git] / mcs / class / System.XML / System.Xml / XmlInputStream.cs
1 //
2 // System.Xml.XmlInputStream 
3 //      encoding-specification-wise XML input stream and reader
4 //
5 // Author:
6 //      Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 //
8 //      (C)2003 Atsushi Enomoto
9 //
10
11 //
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
19 // 
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
22 // 
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 //
31 using System;
32 using System.IO;
33 using System.Text;
34 using System.Runtime.InteropServices;
35
36 namespace System.Xml
37 {
38         #region XmlStreamReader
39         internal class XmlStreamReader : NonBlockingStreamReader
40         {
41                 XmlInputStream input;
42
43                 XmlStreamReader (XmlInputStream input)
44                         : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
45                 {
46                         this.input = input;
47                 }
48
49                 public XmlStreamReader (Stream input)
50                         : this (new XmlInputStream (input))
51                 {
52                 }
53
54                 static XmlException invalidDataException = new XmlException ("invalid data.");
55
56                 public override void Close ()
57                 {
58                         this.input.Close ();
59                 }
60
61                 public override int Read ([In, Out] char[] dest_buffer, int index, int count)
62                 {
63                         try {
64                                 return base.Read (dest_buffer, index, count);
65                         }
66 #if NET_1_1
67                         catch (System.ArgumentException) {
68                                 throw invalidDataException;
69                         }
70 #else
71                         catch (System.Text.DecoderFallbackException) {
72                                 throw invalidDataException;
73                         }
74 #endif
75                 }
76
77                 protected override void Dispose (bool disposing)
78                 {
79                         base.Dispose (disposing);
80                         if (disposing) {
81                                 Close ();
82                         }
83                 }
84
85         }
86         #endregion
87
88         #region NonBlockingStreamReader
89         // mostly copied from StreamReader, removing BOM checks, ctor
90         // parameter checks and some extra public members.
91         internal class NonBlockingStreamReader : TextReader {
92
93                 const int DefaultBufferSize = 1024;
94                 const int DefaultFileBufferSize = 4096;
95                 const int MinimumBufferSize = 128;
96
97                 //
98                 // The input buffer
99                 //
100                 byte [] input_buffer;
101
102                 //
103                 // The decoded buffer from the above input buffer
104                 //
105                 char [] decoded_buffer;
106
107                 //
108                 // Decoded bytes in decoded_buffer.
109                 //
110                 int decoded_count;
111
112                 //
113                 // Current position in the decoded_buffer
114                 //
115                 int pos;
116
117                 //
118                 // The buffer size that we are using
119                 //
120                 int buffer_size;
121
122                 Encoding encoding;
123                 Decoder decoder;
124
125                 Stream base_stream;
126                 bool mayBlock;
127                 StringBuilder line_builder;
128
129                 public NonBlockingStreamReader(Stream stream, Encoding encoding)
130                 {
131                         int buffer_size = DefaultBufferSize;
132                         base_stream = stream;
133                         input_buffer = new byte [buffer_size];
134                         this.buffer_size = buffer_size;
135                         this.encoding = encoding;
136                         decoder = encoding.GetDecoder ();
137
138                         decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
139                         decoded_count = 0;
140                         pos = 0;
141                 }
142
143                 public Encoding Encoding {
144                         get { return encoding; }
145                 }
146
147                 public override void Close ()
148                 {
149                         Dispose (true);
150                 }
151
152                 protected override void Dispose (bool disposing)
153                 {
154                         if (disposing && base_stream != null)
155                                 base_stream.Close ();
156                         
157                         input_buffer = null;
158                         decoded_buffer = null;
159                         encoding = null;
160                         decoder = null;
161                         base_stream = null;
162                         base.Dispose (disposing);
163                 }
164
165                 public void DiscardBufferedData ()
166                 {
167                         pos = decoded_count = 0;
168                         mayBlock = false;
169 #if NET_2_0
170                         decoder.Reset ();
171 #else
172                         decoder = encoding.GetDecoder ();
173 #endif
174                 }
175                 
176                 // the buffer is empty, fill it again
177                 private int ReadBuffer ()
178                 {
179                         pos = 0;
180                         int cbEncoded = 0;
181
182                         // keep looping until the decoder gives us some chars
183                         decoded_count = 0;
184                         int parse_start = 0;
185                         do      
186                         {
187                                 cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
188                                 
189                                 if (cbEncoded == 0)
190                                         return 0;
191
192                                 mayBlock = (cbEncoded < buffer_size);
193                                 decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
194                                 parse_start = 0;
195                         } while (decoded_count == 0);
196
197                         return decoded_count;
198                 }
199
200                 public override int Peek ()
201                 {
202                         if (base_stream == null)
203                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
204                         if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
205                                 return -1;
206
207                         return decoded_buffer [pos];
208                 }
209
210                 public override int Read ()
211                 {
212                         if (base_stream == null)
213                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
214                         if (pos >= decoded_count && ReadBuffer () == 0)
215                                 return -1;
216
217                         return decoded_buffer [pos++];
218                 }
219
220                 public override int Read ([In, Out] char[] dest_buffer, int index, int count)
221                 {
222                         if (base_stream == null)
223                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
224                         if (dest_buffer == null)
225                                 throw new ArgumentNullException ("dest_buffer");
226                         if (index < 0)
227                                 throw new ArgumentOutOfRangeException ("index", "< 0");
228                         if (count < 0)
229                                 throw new ArgumentOutOfRangeException ("count", "< 0");
230                         // re-ordered to avoid possible integer overflow
231                         if (index > dest_buffer.Length - count)
232                                 throw new ArgumentException ("index + count > dest_buffer.Length");
233
234                         int chars_read = 0;
235 //                      while (count > 0)
236                         {
237                                 if (pos >= decoded_count && ReadBuffer () == 0)
238                                         return chars_read > 0 ? chars_read : 0;
239
240                                 int cch = Math.Min (decoded_count - pos, count);
241                                 Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
242                                 pos += cch;
243                                 index += cch;
244                                 count -= cch;
245                                 chars_read += cch;
246                         }
247                         return chars_read;
248                 }
249
250                 bool foundCR;
251                 int FindNextEOL ()
252                 {
253                         char c = '\0';
254                         for (; pos < decoded_count; pos++) {
255                                 c = decoded_buffer [pos];
256                                 if (c == '\n') {
257                                         pos++;
258                                         int res = (foundCR) ? (pos - 2) : (pos - 1);
259                                         if (res < 0)
260                                                 res = 0; // if a new buffer starts with a \n and there was a \r at
261                                                         // the end of the previous one, we get here.
262                                         foundCR = false;
263                                         return res;
264                                 } else if (foundCR) {
265                                         foundCR = false;
266                                         return pos - 1;
267                                 }
268
269                                 foundCR = (c == '\r');
270                         }
271
272                         return -1;
273                 }
274
275                 public override string ReadLine()
276                 {
277                         if (base_stream == null)
278                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
279
280                         if (pos >= decoded_count && ReadBuffer () == 0)
281                                 return null;
282
283                         int begin = pos;
284                         int end = FindNextEOL ();
285                         if (end < decoded_count && end >= begin)
286                                 return new string (decoded_buffer, begin, end - begin);
287
288                         if (line_builder == null)
289                                 line_builder = new StringBuilder ();
290                         else
291                                 line_builder.Length = 0;
292
293                         while (true) {
294                                 if (foundCR) // don't include the trailing CR if present
295                                         decoded_count--;
296
297                                 line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin));
298                                 if (ReadBuffer () == 0) {
299                                         if (line_builder.Capacity > 32768) {
300                                                 StringBuilder sb = line_builder;
301                                                 line_builder = null;
302                                                 return sb.ToString (0, sb.Length);
303                                         }
304                                         return line_builder.ToString (0, line_builder.Length);
305                                 }
306
307                                 begin = pos;
308                                 end = FindNextEOL ();
309                                 if (end < decoded_count && end >= begin) {
310                                         line_builder.Append (new string (decoded_buffer, begin, end - begin));
311                                         if (line_builder.Capacity > 32768) {
312                                                 StringBuilder sb = line_builder;
313                                                 line_builder = null;
314                                                 return sb.ToString (0, sb.Length);
315                                         }
316                                         return line_builder.ToString (0, line_builder.Length);
317                                 }
318                         }
319                 }
320
321                 public override string ReadToEnd()
322                 {
323                         if (base_stream == null)
324                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
325
326                         StringBuilder text = new StringBuilder ();
327
328                         int size = decoded_buffer.Length;
329                         char [] buffer = new char [size];
330                         int len;
331                         
332                         while ((len = Read (buffer, 0, size)) != 0)
333                                 text.Append (buffer, 0, len);
334
335                         return text.ToString ();
336                 }
337         }
338         #endregion
339
340         class XmlInputStream : Stream
341         {
342                 public static readonly Encoding StrictUTF8;
343
344                 static XmlInputStream ()
345                 {
346                         StrictUTF8 = new UTF8Encoding (false, true);
347                 }
348
349                 Encoding enc;
350                 Stream stream;
351                 byte[] buffer;
352                 int bufLength;
353                 int bufPos;
354
355                 static XmlException encodingException = new XmlException ("invalid encoding specification.");
356
357                 public XmlInputStream (Stream stream)
358                 {
359                         Initialize (stream);
360                 }
361
362                 static string GetStringFromBytes (byte [] bytes, int index, int count)
363                 {
364 #if MOONLIGHT
365                         char [] chars = new char [count];
366                         for (int i = index; i < count; i++)
367                                 chars [i] = (char) bytes [i];
368
369                         return new string (chars);
370 #else
371                         return Encoding.ASCII.GetString (bytes, index, count);
372 #endif
373                 }
374
375                 private void Initialize (Stream stream)
376                 {
377                         buffer = new byte [64];
378                         this.stream = stream;
379                         enc = StrictUTF8; // Default to UTF8 if we can't guess it
380                         bufLength = stream.Read (buffer, 0, buffer.Length);
381                         if (bufLength == -1 || bufLength == 0) {
382                                 return;
383                         }
384
385                         int c = ReadByteSpecial ();
386                         switch (c) {
387                         case 0xFF:
388                                 c = ReadByteSpecial ();
389                                 if (c == 0xFE) {
390                                         // BOM-ed little endian utf-16
391                                         enc = Encoding.Unicode;
392                                 } else {
393                                         // It doesn't start from "<?xml" then its encoding is utf-8
394                                         bufPos = 0;
395                                 }
396                                 break;
397                         case 0xFE:
398                                 c = ReadByteSpecial ();
399                                 if (c == 0xFF) {
400                                         // BOM-ed big endian utf-16
401                                         enc = Encoding.BigEndianUnicode;
402                                         return;
403                                 } else {
404                                         // It doesn't start from "<?xml" then its encoding is utf-8
405                                         bufPos = 0;
406                                 }
407                                 break;
408                         case 0xEF:
409                                 c = ReadByteSpecial ();
410                                 if (c == 0xBB) {
411                                         c = ReadByteSpecial ();
412                                         if (c != 0xBF) {
413                                                 bufPos = 0;
414                                         }
415                                 } else {
416                                         buffer [--bufPos] = 0xEF;
417                                 }
418                                 break;
419                         case '<':
420                                 // try to get encoding name from XMLDecl.
421                                 if (bufLength >= 5 && GetStringFromBytes (buffer, 1, 4) == "?xml") {
422                                         bufPos += 4;
423                                         c = SkipWhitespace ();
424
425                                         // version. It is optional here.
426                                         if (c == 'v') {
427                                                 while (c >= 0) {
428                                                         c = ReadByteSpecial ();
429                                                         if (c == '0') { // 0 of 1.0
430                                                                 ReadByteSpecial ();
431                                                                 break;
432                                                         }
433                                                 }
434                                                 c = SkipWhitespace ();
435                                         }
436
437                                         if (c == 'e') {
438                                                 int remaining = bufLength - bufPos;
439                                                 if (remaining >= 7 && GetStringFromBytes (buffer, bufPos, 7) == "ncoding") {
440                                                         bufPos += 7;
441                                                         c = SkipWhitespace();
442                                                         if (c != '=')
443                                                                 throw encodingException;
444                                                         c = SkipWhitespace ();
445                                                         int quoteChar = c;
446                                                         StringBuilder sb = new StringBuilder ();
447                                                         while (true) {
448                                                                 c = ReadByteSpecial ();
449                                                                 if (c == quoteChar)
450                                                                         break;
451                                                                 else if (c < 0)
452                                                                         throw encodingException;
453
454                                                                 sb.Append ((char) c);
455                                                         }
456                                                         string encodingName = sb.ToString ();
457                                                         if (!XmlChar.IsValidIANAEncoding (encodingName))
458                                                                 throw encodingException;
459                                                         enc = Encoding.GetEncoding (encodingName);
460                                                 }
461                                         }
462                                 }
463 #if TARGET_JVM
464                                 else {
465                                         if (bufLength >= 10 && Encoding.Unicode.GetString (buffer, 2, 8) == "?xml")
466                                                 enc = Encoding.Unicode;
467                                 }
468 #endif
469                                 bufPos = 0;
470                                 break;
471                         default:
472                                 bufPos = 0;
473                                 break;
474                         }
475                 }
476
477                 // Just like readbyte, but grows the buffer too.
478                 int ReadByteSpecial ()
479                 {
480                         if (bufLength > bufPos)
481                                 return buffer [bufPos++];
482
483                         byte [] newbuf = new byte [buffer.Length * 2];
484                         Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
485                         int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
486                         if (nbytes == -1 || nbytes == 0)
487                                 return -1;
488                                 
489                         bufLength += nbytes;
490                         buffer = newbuf;
491                         return buffer [bufPos++];
492                 }
493
494                 // skips whitespace and returns misc char that was read from stream
495                 private int SkipWhitespace ()
496                 {
497                         int c;
498                         while (true) {
499                                 c = ReadByteSpecial ();
500                                 switch ((char) c) {
501                                 case '\r': goto case ' ';
502                                 case '\n': goto case ' ';
503                                 case '\t': goto case ' ';
504                                 case ' ':
505                                         continue;
506                                 default:
507                                         return c;
508                                 }
509                         }
510                 }
511
512                 public Encoding ActualEncoding {
513                         get { return enc; }
514                 }
515
516                 #region Public Overrides
517                 public override bool CanRead {
518                         get {
519                                 if (bufLength > bufPos)
520                                         return true;
521                                 else
522                                         return stream.CanRead; 
523                         }
524                 }
525
526                 // FIXME: It should support base stream's CanSeek.
527                 public override bool CanSeek {
528                         get { return false; } // stream.CanSeek; }
529                 }
530
531                 public override bool CanWrite {
532                         get { return false; }
533                 }
534
535                 public override long Length {
536                         get {
537                                 return stream.Length;
538                         }
539                 }
540
541                 public override long Position {
542                         get {
543                                 return stream.Position - bufLength + bufPos;
544                         }
545                         set {
546                                 if(value < bufLength)
547                                         bufPos = (int)value;
548                                 else
549                                         stream.Position = value - bufLength;
550                         }
551                 }
552
553                 public override void Close ()
554                 {
555                         stream.Close ();
556                 }
557
558                 public override void Flush ()
559                 {
560                         stream.Flush ();
561                 }
562
563                 public override int Read (byte[] buffer, int offset, int count)
564                 {
565                         int ret;
566                         if (count <= bufLength - bufPos)        {       // all from buffer
567                                 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
568                                 bufPos += count;
569                                 ret = count;
570                         } else {
571                                 int bufRest = bufLength - bufPos;
572                                 if (bufLength > bufPos) {
573                                         Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
574                                         bufPos += bufRest;
575                                 }
576                                 ret = bufRest +
577                                         stream.Read (buffer, offset + bufRest, count - bufRest);
578                         }
579                         return ret;
580                 }
581
582                 public override int ReadByte ()
583                 {
584                         if (bufLength > bufPos) {
585                                 return buffer [bufPos++];
586                         }
587                         return stream.ReadByte ();
588                 }
589
590                 public override long Seek (long offset, System.IO.SeekOrigin origin)
591                 {
592                         int bufRest = bufLength - bufPos;
593                         if (origin == SeekOrigin.Current)
594                                 if (offset < bufRest)
595                                         return buffer [bufPos + offset];
596                                 else
597                                         return stream.Seek (offset - bufRest, origin);
598                         else
599                                 return stream.Seek (offset, origin);
600                 }
601
602                 public override void SetLength (long value)
603                 {
604                         stream.SetLength (value);
605                 }
606
607                 public override void Write (byte[] buffer, int offset, int count)
608                 {
609                         throw new NotSupportedException ();
610                 }
611                 #endregion
612         }
613 }