2003-08-07 Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
[mono.git] / mcs / class / System.XML / System.Xml / XmlInputStream.cs
1 //
2 // System.Xml.XmlInputStream 
3 //      encoding-specification-wise XML input stream and reader
4 //
5 // Author:
6 //      Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 //
8 //      (C)2003 Atsushi Enomoto
9 //
10 using System;
11 using System.IO;
12 using System.Text;
13 using System.Xml;
14
15 namespace Mono.Xml.Native
16 {
17         #region XmlStreamReader
18         public class XmlStreamReader : StreamReader
19         {
20                 XmlInputStream input;
21
22                 XmlStreamReader (XmlInputStream input)
23                         : base (input, input.ActualEncoding != null ? input.ActualEncoding : Encoding.UTF8)
24                 {
25                         this.input = input;
26                 }
27
28                 public XmlStreamReader (Stream input)
29                         : this (new XmlInputStream (input, true))
30                 {
31                 }
32
33                 public XmlStreamReader (Stream input, bool docent)
34                         : this (new XmlInputStream (input, docent))
35                 {
36                 }
37
38 //              public XmlStreamReader (string url)
39 //                      : this (url, true)
40 //              {
41 //              }
42 //
43 //              public XmlStreamReader (string url, bool docent)
44 //                      : this (new XmlInputStream (url, docent, null, null))
45 //              {
46 //              }
47
48                 public XmlStreamReader (string url, XmlResolver resolver, string baseURI)
49                         : this (url, true, resolver, baseURI)
50                 {
51                 }
52
53                 public XmlStreamReader (string url, bool docent, XmlResolver resolver,
54                         string baseURI)
55                         : this (new XmlInputStream (url, docent, resolver, baseURI))
56                 {
57                 }
58
59                 public override void Close ()
60                 {
61                         this.input.Close ();
62                 }
63
64                 protected override void Dispose (bool disposing)
65                 {
66                         base.Dispose (disposing);
67                         if (disposing) {
68                                 Close ();
69                         }
70                 }
71         }
72         #endregion
73
74         class XmlInputStream : Stream
75         {
76                 Encoding enc;
77                 Stream stream;
78                 byte[] buffer = new byte[256];
79                 int bufLength;
80                 int bufPos;
81                 bool isDocumentEntity;  // allow omitting "version" or not.
82
83                 static XmlException encodingException = new XmlException ("invalid encoding specification.");
84 /*
85                 public XmlInputStream (string url)
86                         : this (url, true)
87                 {
88                 }
89 */
90                 public XmlInputStream (string url, bool docent, XmlResolver resolver, string baseURI)
91                 {
92                         this.isDocumentEntity = docent;
93                         // Use XmlResolver to resolve external entity.
94 #if true // #if REMOVE_IT_AFTER_URI_IMPLEMENTED
95                         if (resolver == null)
96                                 resolver = new XmlUrlResolver ();
97                         Uri uri = resolver.ResolveUri (
98                                 baseURI == null || baseURI == String.Empty ?
99                                 null : new Uri (baseURI), url);
100                         Stream s = resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
101 #else
102                         Stream s = new FileStream (url, FileMode.Open, FileAccess.Read);
103 #endif
104                         Initialize (s);
105                 }
106
107                 public XmlInputStream (Stream stream)
108                         : this (stream, true)
109                 {
110                 }
111
112                 public XmlInputStream (Stream stream, bool docent)
113                 {
114                         this.isDocumentEntity = docent;
115                         Initialize (stream);
116                 }
117
118                 private void Initialize (Stream stream)
119                 {
120                         // FIXME: seems too waste...
121                         MemoryStream ms = new MemoryStream ();
122                         this.stream = stream;
123                         int c = stream.ReadByte ();
124                         switch (c) {
125                         case 0xFF:
126                                 c = stream.ReadByte ();
127                                 if (c == 0xFE) {
128                                         // BOM-ed little endian utf-16
129                                         enc = Encoding.Unicode;
130                                 } else {
131                                         // It doesn't start from "<?xml" then its encoding is utf-8
132                                         enc = Encoding.UTF8;
133                                         ms.WriteByte ((byte)0xFF);
134                                         ms.WriteByte ((byte)c);
135                                 }
136                                 break;
137                         case 0xFE:
138                                 c = stream.ReadByte ();
139                                 if (c == 0xFF) {
140                                         // BOM-ed big endian utf-16
141                                         enc = Encoding.BigEndianUnicode;
142                                         return;
143                                 } else {
144                                         // It doesn't start from "<?xml" then its encoding is utf-8
145                                         enc = Encoding.UTF8;
146                                         ms.WriteByte ((byte)0xFE);
147                                         ms.WriteByte ((byte)c);
148                                 }
149                                 break;
150                         case 0xEF:
151                                 enc = Encoding.UTF8;
152                                 c = ReadByte ();
153                                 if (c == 0xBB) {
154                                         c = ReadByte ();
155                                         if (c != 0xBF) {
156                                                 ms.WriteByte ((byte)0xEF);
157                                                 ms.WriteByte ((byte)0xBB);
158                                                 ms.WriteByte ((byte)c);
159                                         }
160                                 } else {
161                                         ms.WriteByte ((byte)0xEF);
162                                 }
163                                 break;
164                         case '<':
165                                 // try to get encoding name from XMLDecl.
166                                 ms.WriteByte ((byte)'<');
167                                 int size = stream.Read (buffer, 1, 4);
168                                 ms.Write (buffer, 1, 4);
169                                 if (Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {
170                                         int loop = 0;
171                                         c = SkipWhitespace (ms);
172
173                                         // version. It is optional here.
174                                         if (c != 'v') {
175                                                 // FIXME: temporarily comment out here.
176 //                                              if (isDocumentEntity)
177 //                                                      throw new XmlException ("invalid xml declaration.");
178                                         } else {
179                                                 ms.WriteByte ((byte)'v');
180                                                 while (loop++ >= 0 && c >= 0) {
181                                                         c = stream.ReadByte ();
182                                                         ms.WriteByte ((byte)c);
183                                                         if (c == '0') { // 0 of 1.0
184                                                                 ms.WriteByte ((byte)stream.ReadByte ());
185                                                                 break;
186                                                         }
187                                                 }
188                                                 c = SkipWhitespace (ms);
189                                         }
190
191                                         if (c == 'e') {
192                                                 ms.WriteByte ((byte)'e');
193                                                 size = stream.Read (buffer, 0, 7);
194                                                 ms.Write (buffer, 0, 7);
195                                                 if (Encoding.ASCII.GetString(buffer, 0, 7) == "ncoding") {
196                                                         c = this.SkipWhitespace(ms);
197                                                         if (c != '=')
198                                                                 throw encodingException;
199                                                         ms.WriteByte ((byte)'=');
200                                                         c = this.SkipWhitespace (ms);
201                                                         int quoteChar = c;
202                                                         ms.WriteByte ((byte)c);
203                                                         int start = (int)ms.Position;
204                                                         while (loop++ >= 0) {
205                                                                 c = stream.ReadByte ();
206                                                                 if (c == quoteChar)
207                                                                         break;
208                                                                 else if (c < 0)
209                                                                         throw encodingException;
210                                                                 ms.WriteByte ((byte)c);
211                                                         }
212                                                         string encodingName = Encoding.UTF8.GetString (ms.GetBuffer (), start, (int)ms.Position - start);
213                                                         if (!XmlChar.IsValidIANAEncoding (encodingName))
214                                                                 throw encodingException;
215                                                         ms.WriteByte ((byte)quoteChar);
216                                                         enc = Encoding.GetEncoding (encodingName);
217                                                 }
218                                                 else
219                                                         ms.Write (buffer, 0, size);
220                                         }
221                                         else
222                                                 ms.WriteByte ((byte)c);
223                                 }
224                                 buffer = ms.ToArray ();
225                                 bufLength = buffer.Length;
226                                 bufPos = 0;
227                                 break;
228                         default:
229                                 buffer [0] = (byte)c;
230                                 bufLength = 1;
231                                 enc = Encoding.UTF8;
232                                 break;
233                         }
234                 }
235
236                 // skips whitespace and returns misc char that was read from stream
237                 private int SkipWhitespace (MemoryStream ms)    // ms may be null
238                 {
239                         int loop = 0;
240                         int c;
241                         while (loop++ >= 0) { // defends infinite loop (expecting overflow)
242                                 c = stream.ReadByte ();
243                                 switch (c) {
244                                 case '\r': goto case ' ';
245                                 case '\n': goto case ' ';
246                                 case '\t': goto case ' ';
247                                 case ' ':
248                                         if (ms != null)
249                                                 ms.WriteByte ((byte)c);
250                                         continue;
251                                 default:
252                                         return c;
253                                 }
254                         }
255                         throw new InvalidOperationException ();
256                 }
257
258                 public Encoding ActualEncoding {
259                         get { return enc; }
260                 }
261
262                 #region Public Overrides
263                 public override bool CanRead {
264                         get { return stream.CanRead; }
265                 }
266
267                 public override bool CanSeek {
268                         get { return false; } //stream.CanSeek; }
269                 }
270
271                 public override bool CanWrite {
272                         get { return false; }
273                 }
274
275                 public override long Length {
276                         get {
277                                 return stream.Length;
278                         }
279                 }
280
281                 public override long Position {
282                         get {
283                                 return stream.Position + bufLength;
284                         }
285                         set {
286                                 if(value < bufLength)
287                                         bufPos = (int)value;
288                                 else
289                                         stream.Position = value - bufLength;
290                         }
291                 }
292
293                 public override void Close ()
294                 {
295                         stream.Close ();
296                 }
297
298                 public override void Flush ()
299                 {
300                         stream.Flush ();
301                 }
302
303                 public override int Read (byte[] buffer, int offset, int count)
304                 {
305                         int ret;
306                         if (count <= bufLength - bufPos)        {       // all from buffer
307                                 Array.Copy (this.buffer, bufPos, buffer, offset, count);
308                                 bufPos += count;
309                                 ret = count;
310                         } else {
311                                 int bufRest = bufLength - bufPos;
312                                 if (bufLength > bufPos) {
313                                         Array.Copy (this.buffer, bufPos, buffer, offset, bufRest);
314                                         bufPos += bufRest;
315                                 }
316                                 ret = bufRest +
317                                         stream.Read (buffer, offset + bufRest, count - bufRest);
318                         }
319                         return ret;
320                 }
321
322                 public override int ReadByte ()
323                 {
324                         if (bufLength > bufPos) {
325                                 return buffer [bufPos++];
326                         }
327                         return stream.ReadByte ();
328                 }
329
330                 public override long Seek (long offset, System.IO.SeekOrigin origin)
331                 {
332                         int bufRest = bufLength - bufPos;
333                         if (origin == SeekOrigin.Current)
334                                 if (offset < bufRest)
335                                         return buffer [bufPos + offset];
336                                 else
337                                         return stream.Seek (offset - bufRest, origin);
338                         else
339                                 return stream.Seek (offset, origin);
340                 }
341
342                 public override void SetLength (long value)
343                 {
344                         stream.SetLength (value);
345                 }
346
347                 public override void Write (byte[] buffer, int offset, int count)
348                 {
349                         throw new NotSupportedException ();
350                 }
351                 #endregion
352         }
353 }