2005-02-22 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / System.XML / System.Xml / XmlInputStream.cs
1 //
2 // System.Xml.XmlInputStream 
3 //      encoding-specification-wise XML input stream and reader
4 //
5 // Author:
6 //      Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 //
8 //      (C)2003 Atsushi Enomoto
9 //
10
11 //
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
19 // 
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
22 // 
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 //
31 using System;
32 using System.IO;
33 using System.Text;
34 using System.Xml;
35
36 namespace System.Xml
37 {
38         #region XmlStreamReader
39         internal class XmlStreamReader : StreamReader
40         {
41                 XmlInputStream input;
42
43                 XmlStreamReader (XmlInputStream input)
44                         : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
45                 {
46                         this.input = input;
47                 }
48
49                 public XmlStreamReader (Stream input)
50                         : this (new XmlInputStream (input))
51                 {
52                 }
53
54                 public override void Close ()
55                 {
56                         this.input.Close ();
57                 }
58
59                 protected override void Dispose (bool disposing)
60                 {
61                         base.Dispose (disposing);
62                         if (disposing) {
63                                 Close ();
64                         }
65                 }
66
67         }
68         #endregion
69
70         class XmlInputStream : Stream
71         {
72                 public static readonly Encoding StrictUTF8;
73
74                 static XmlInputStream ()
75                 {
76                         StrictUTF8 = new UTF8Encoding (false, true);
77                 }
78
79                 Encoding enc;
80                 Stream stream;
81                 byte[] buffer;
82                 int bufLength;
83                 int bufPos;
84
85                 static XmlException encodingException = new XmlException ("invalid encoding specification.");
86
87                 public XmlInputStream (Stream stream)
88                 {
89                         Initialize (stream);
90                 }
91
92                 private void Initialize (Stream stream)
93                 {
94                         buffer = new byte [64];
95                         this.stream = stream;
96                         enc = StrictUTF8; // Default to UTF8 if we can't guess it
97                         bufLength = stream.Read (buffer, 0, buffer.Length);
98                         if (bufLength == -1 || bufLength == 0) {
99                                 return;
100                         }
101
102                         int c = ReadByteSpecial ();
103                         switch (c) {
104                         case 0xFF:
105                                 c = ReadByteSpecial ();
106                                 if (c == 0xFE) {
107                                         // BOM-ed little endian utf-16
108                                         enc = Encoding.Unicode;
109                                 } else {
110                                         // It doesn't start from "<?xml" then its encoding is utf-8
111                                         bufPos = 0;
112                                 }
113                                 break;
114                         case 0xFE:
115                                 c = ReadByteSpecial ();
116                                 if (c == 0xFF) {
117                                         // BOM-ed big endian utf-16
118                                         enc = Encoding.BigEndianUnicode;
119                                         return;
120                                 } else {
121                                         // It doesn't start from "<?xml" then its encoding is utf-8
122                                         bufPos = 0;
123                                 }
124                                 break;
125                         case 0xEF:
126                                 c = ReadByteSpecial ();
127                                 if (c == 0xBB) {
128                                         c = ReadByteSpecial ();
129                                         if (c != 0xBF) {
130                                                 bufPos = 0;
131                                         }
132                                 } else {
133                                         buffer [--bufPos] = 0xEF;
134                                 }
135                                 break;
136                         case '<':
137                                 // try to get encoding name from XMLDecl.
138                                 if (bufLength >= 5 && Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {
139                                         bufPos += 4;
140                                         c = SkipWhitespace ();
141
142                                         // version. It is optional here.
143                                         if (c == 'v') {
144                                                 while (c >= 0) {
145                                                         c = ReadByteSpecial ();
146                                                         if (c == '0') { // 0 of 1.0
147                                                                 ReadByteSpecial ();
148                                                                 break;
149                                                         }
150                                                 }
151                                                 c = SkipWhitespace ();
152                                         }
153
154                                         if (c == 'e') {
155                                                 int remaining = bufLength - bufPos;
156                                                 if (remaining >= 7 && Encoding.ASCII.GetString(buffer, bufPos, 7) == "ncoding") {
157                                                         bufPos += 7;
158                                                         c = SkipWhitespace();
159                                                         if (c != '=')
160                                                                 throw encodingException;
161                                                         c = SkipWhitespace ();
162                                                         int quoteChar = c;
163                                                         StringBuilder sb = new StringBuilder ();
164                                                         while (true) {
165                                                                 c = ReadByteSpecial ();
166                                                                 if (c == quoteChar)
167                                                                         break;
168                                                                 else if (c < 0)
169                                                                         throw encodingException;
170
171                                                                 sb.Append ((char) c);
172                                                         }
173                                                         string encodingName = sb.ToString ();
174                                                         if (!XmlChar.IsValidIANAEncoding (encodingName))
175                                                                 throw encodingException;
176                                                         enc = Encoding.GetEncoding (encodingName);
177                                                 }
178                                         }
179                                 }
180                                 bufPos = 0;
181                                 break;
182                         default:
183                                 bufPos = 0;
184                                 break;
185                         }
186                 }
187
188                 // Just like readbyte, but grows the buffer too.
189                 int ReadByteSpecial ()
190                 {
191                         if (bufLength > bufPos)
192                                 return buffer [bufPos++];
193
194                         byte [] newbuf = new byte [buffer.Length * 2];
195                         Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
196                         int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
197                         if (nbytes == -1 || nbytes == 0)
198                                 return -1;
199                                 
200                         bufLength += nbytes;
201                         buffer = newbuf;
202                         return buffer [bufPos++];
203                 }
204
205                 // skips whitespace and returns misc char that was read from stream
206                 private int SkipWhitespace ()
207                 {
208                         int c;
209                         while (true) {
210                                 c = ReadByteSpecial ();
211                                 switch ((char) c) {
212                                 case '\r': goto case ' ';
213                                 case '\n': goto case ' ';
214                                 case '\t': goto case ' ';
215                                 case ' ':
216                                         continue;
217                                 default:
218                                         return c;
219                                 }
220                         }
221                         throw new InvalidOperationException ();
222                 }
223
224                 public Encoding ActualEncoding {
225                         get { return enc; }
226                 }
227
228                 #region Public Overrides
229                 public override bool CanRead {
230                         get {
231                                 if (bufLength > bufPos)
232                                         return true;
233                                 else
234                                         return stream.CanRead; 
235                         }
236                 }
237
238                 // FIXME: It should support base stream's CanSeek.
239                 public override bool CanSeek {
240                         get { return false; } // stream.CanSeek; }
241                 }
242
243                 public override bool CanWrite {
244                         get { return false; }
245                 }
246
247                 public override long Length {
248                         get {
249                                 return stream.Length;
250                         }
251                 }
252
253                 public override long Position {
254                         get {
255                                 return stream.Position - bufLength + bufPos;
256                         }
257                         set {
258                                 if(value < bufLength)
259                                         bufPos = (int)value;
260                                 else
261                                         stream.Position = value - bufLength;
262                         }
263                 }
264
265                 public override void Close ()
266                 {
267                         stream.Close ();
268                 }
269
270                 public override void Flush ()
271                 {
272                         stream.Flush ();
273                 }
274
275                 public override int Read (byte[] buffer, int offset, int count)
276                 {
277                         int ret;
278                         if (count <= bufLength - bufPos)        {       // all from buffer
279                                 Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
280                                 bufPos += count;
281                                 ret = count;
282                         } else {
283                                 int bufRest = bufLength - bufPos;
284                                 if (bufLength > bufPos) {
285                                         Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
286                                         bufPos += bufRest;
287                                 }
288                                 ret = bufRest +
289                                         stream.Read (buffer, offset + bufRest, count - bufRest);
290                         }
291                         return ret;
292                 }
293
294                 public override int ReadByte ()
295                 {
296                         if (bufLength > bufPos) {
297                                 return buffer [bufPos++];
298                         }
299                         return stream.ReadByte ();
300                 }
301
302                 public override long Seek (long offset, System.IO.SeekOrigin origin)
303                 {
304                         int bufRest = bufLength - bufPos;
305                         if (origin == SeekOrigin.Current)
306                                 if (offset < bufRest)
307                                         return buffer [bufPos + offset];
308                                 else
309                                         return stream.Seek (offset - bufRest, origin);
310                         else
311                                 return stream.Seek (offset, origin);
312                 }
313
314                 public override void SetLength (long value)
315                 {
316                         stream.SetLength (value);
317                 }
318
319                 public override void Write (byte[] buffer, int offset, int count)
320                 {
321                         throw new NotSupportedException ();
322                 }
323                 #endregion
324         }
325 }