2004-06-07 Gonzalo Paniagua Javier <gonzalo@ximian.com>
[mono.git] / mcs / class / System.XML / System.Xml / XmlInputStream.cs
1 //
2 // System.Xml.XmlInputStream 
3 //      encoding-specification-wise XML input stream and reader
4 //
5 // Author:
6 //      Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 //
8 //      (C)2003 Atsushi Enomoto
9 //
10 using System;
11 using System.IO;
12 using System.Text;
13 using System.Xml;
14
15 namespace System.Xml
16 {
17         #region XmlStreamReader
18         internal class XmlStreamReader : StreamReader
19         {
20                 XmlInputStream input;
21
22                 XmlStreamReader (XmlInputStream input)
23                         : base (input, input.ActualEncoding != null ? input.ActualEncoding : Encoding.UTF8)
24                 {
25                         this.input = input;
26                 }
27
28                 public XmlStreamReader (Stream input)
29                         : this (new XmlInputStream (input))
30                 {
31                 }
32
33                 public override void Close ()
34                 {
35                         this.input.Close ();
36                 }
37
38                 protected override void Dispose (bool disposing)
39                 {
40                         base.Dispose (disposing);
41                         if (disposing) {
42                                 Close ();
43                         }
44                 }
45
46         }
47         #endregion
48
49         class XmlInputStream : Stream
50         {
51                 Encoding enc;
52                 Stream stream;
53                 byte[] buffer;
54                 int bufLength;
55                 int bufPos;
56
57                 static XmlException encodingException = new XmlException ("invalid encoding specification.");
58
59                 public XmlInputStream (Stream stream)
60                 {
61                         Initialize (stream);
62                 }
63
64                 private void Initialize (Stream stream)
65                 {
66                         buffer = new byte [1024];
67                         this.stream = stream;
68                         enc = Encoding.UTF8; // Default to UTF8 if we can't guess it
69                         bufLength = stream.Read (buffer, 0, buffer.Length);
70                         if (bufLength == -1 || bufLength == 0) {
71                                 return;
72                         }
73
74                         int c = ReadByteSpecial ();
75                         switch (c) {
76                         case 0xFF:
77                                 c = ReadByteSpecial ();
78                                 if (c == 0xFE) {
79                                         // BOM-ed little endian utf-16
80                                         enc = Encoding.Unicode;
81                                 } else {
82                                         // It doesn't start from "<?xml" then its encoding is utf-8
83                                         bufPos = 0;
84                                 }
85                                 break;
86                         case 0xFE:
87                                 c = ReadByteSpecial ();
88                                 if (c == 0xFF) {
89                                         // BOM-ed big endian utf-16
90                                         enc = Encoding.BigEndianUnicode;
91                                         return;
92                                 } else {
93                                         // It doesn't start from "<?xml" then its encoding is utf-8
94                                         bufPos = 0;
95                                 }
96                                 break;
97                         case 0xEF:
98                                 c = ReadByteSpecial ();
99                                 if (c == 0xBB) {
100                                         c = ReadByteSpecial ();
101                                         if (c != 0xBF) {
102                                                 bufPos = 0;
103                                         }
104                                 } else {
105                                         buffer [--bufPos] = 0xEF;
106                                 }
107                                 break;
108                         case '<':
109                                 // try to get encoding name from XMLDecl.
110                                 if (bufLength >= 5 && Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {
111                                         bufPos += 4;
112                                         int loop = 0;
113                                         c = SkipWhitespace ();
114
115                                         // version. It is optional here.
116                                         if (c == 'v') {
117                                                 while (loop++ >= 0 && c >= 0) {
118                                                         ReadByteSpecial ();
119                                                         c = ReadByteSpecial ();
120                                                         if (c == '0') { // 0 of 1.0
121                                                                 break;
122                                                         }
123                                                 }
124                                                 c = SkipWhitespace ();
125                                         }
126
127                                         if (c == 'e') {
128                                                 int remaining = bufLength - bufPos;
129                                                 if (remaining >= 7 && Encoding.ASCII.GetString(buffer, 0, 7) == "ncoding") {
130                                                         bufPos += 7;
131                                                         c = SkipWhitespace();
132                                                         if (c != '=')
133                                                                 throw encodingException;
134                                                         c = SkipWhitespace ();
135                                                         int quoteChar = c;
136                                                         StringBuilder sb = new StringBuilder ();
137                                                         while (loop++ >= 0) {
138                                                                 c = ReadByteSpecial ();
139                                                                 if (c == quoteChar)
140                                                                         break;
141                                                                 else if (c < 0)
142                                                                         throw encodingException;
143
144                                                                 sb.Append ((char) c);
145                                                         }
146                                                         string encodingName = sb.ToString ();
147                                                         if (!XmlChar.IsValidIANAEncoding (encodingName))
148                                                                 throw encodingException;
149                                                         enc = Encoding.GetEncoding (encodingName);
150                                                 }
151                                         }
152                                 }
153                                 bufPos = 0;
154                                 break;
155                         default:
156                                 bufPos = 0;
157                                 break;
158                         }
159                 }
160
161                 // Just like readbyte, but grows the buffer too.
162                 int ReadByteSpecial ()
163                 {
164                         if (bufLength > bufPos)
165                                 return buffer [bufPos++];
166
167                         byte [] newbuf = new byte [buffer.Length * 2];
168                         Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
169                         int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
170                         if (nbytes == -1 || nbytes == 0)
171                                 return -1;
172                                 
173                         bufLength += nbytes;
174                         buffer = newbuf;
175                         return buffer [bufPos++];
176                 }
177
178                 // skips whitespace and returns misc char that was read from stream
179                 private int SkipWhitespace ()   // ms may be null
180                 {
181                         int loop = 0;
182                         int c;
183                         while (loop++ >= 0) { // defends infinite loop (expecting overflow)
184                                 c = ReadByteSpecial ();
185                                 switch (c) {
186                                 case '\r': goto case ' ';
187                                 case '\n': goto case ' ';
188                                 case '\t': goto case ' ';
189                                 case ' ':
190                                         continue;
191                                 default:
192                                         return c;
193                                 }
194                         }
195                         throw new InvalidOperationException ();
196                 }
197
198                 public Encoding ActualEncoding {
199                         get { return enc; }
200                 }
201
202                 #region Public Overrides
203                 public override bool CanRead {
204                         get {
205                                 if (bufLength > bufPos)
206                                         return true;
207                                 else
208                                         return stream.CanRead; 
209                         }
210                 }
211
212                 // FIXME: It should support base stream's CanSeek.
213                 public override bool CanSeek {
214                         get { return false; } // stream.CanSeek; }
215                 }
216
217                 public override bool CanWrite {
218                         get { return false; }
219                 }
220
221                 public override long Length {
222                         get {
223                                 return stream.Length;
224                         }
225                 }
226
227                 public override long Position {
228                         get {
229                                 return stream.Position - bufLength + bufPos;
230                         }
231                         set {
232                                 if(value < bufLength)
233                                         bufPos = (int)value;
234                                 else
235                                         stream.Position = value - bufLength;
236                         }
237                 }
238
239                 public override void Close ()
240                 {
241                         stream.Close ();
242                 }
243
244                 public override void Flush ()
245                 {
246                         stream.Flush ();
247                 }
248
249                 public override int Read (byte[] buffer, int offset, int count)
250                 {
251                         int ret;
252                         if (count <= bufLength - bufPos)        {       // all from buffer
253                                 Array.Copy (this.buffer, bufPos, buffer, offset, count);
254                                 bufPos += count;
255                                 ret = count;
256                         } else {
257                                 int bufRest = bufLength - bufPos;
258                                 if (bufLength > bufPos) {
259                                         Array.Copy (this.buffer, bufPos, buffer, offset, bufRest);
260                                         bufPos += bufRest;
261                                 }
262                                 ret = bufRest +
263                                         stream.Read (buffer, offset + bufRest, count - bufRest);
264                         }
265                         return ret;
266                 }
267
268                 public override int ReadByte ()
269                 {
270                         if (bufLength > bufPos) {
271                                 return buffer [bufPos++];
272                         }
273                         return stream.ReadByte ();
274                 }
275
276                 public override long Seek (long offset, System.IO.SeekOrigin origin)
277                 {
278                         int bufRest = bufLength - bufPos;
279                         if (origin == SeekOrigin.Current)
280                                 if (offset < bufRest)
281                                         return buffer [bufPos + offset];
282                                 else
283                                         return stream.Seek (offset - bufRest, origin);
284                         else
285                                 return stream.Seek (offset, origin);
286                 }
287
288                 public override void SetLength (long value)
289                 {
290                         stream.SetLength (value);
291                 }
292
293                 public override void Write (byte[] buffer, int offset, int count)
294                 {
295                         throw new NotSupportedException ();
296                 }
297                 #endregion
298         }
299 }