2003-04-25 Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
[mono.git] / mcs / class / System.XML / System.Xml / XmlInputStream.cs
1 //
2 // System.Xml.XmlInputStream 
3 //      encoding-specification-wise XML input stream and reader
4 //
5 // Author:
6 //      Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 //
8 //      (C)2003 Atsushi Enomoto
9 //
10 using System;\r
11 using System.IO;\r
12 using System.Text;\r
13 using System.Xml;\r
14 \r
15 namespace Mono.Xml.Native\r
16 {\r
17         #region XmlStreamReader
18         public class XmlStreamReader : StreamReader
19         {
20                 XmlInputStream input;
21
22                 XmlStreamReader (XmlInputStream input)
23                         : base (input, input.ActualEncoding != null ? input.ActualEncoding : Encoding.UTF8)
24                 {
25                         this.input = input;
26                 }
27
28                 public XmlStreamReader (Stream input)
29                         : this (new XmlInputStream (input, true))
30                 {
31                 }
32
33                 public XmlStreamReader (Stream input, bool docent)
34                         : this (new XmlInputStream (input, docent))
35                 {
36                 }
37
38                 public XmlStreamReader (string url)
39                         : this (url, true)
40                 {
41                 }
42
43                 public XmlStreamReader (string url, bool docent)
44                         : this (new XmlInputStream (url, docent))
45                 {
46                 }
47
48                 public override void Close ()
49                 {
50                         this.input.Close ();
51                 }
52
53                 protected override void Dispose (bool disposing)
54                 {
55                         base.Dispose (disposing);
56                         if (disposing) {
57                                 Close ();
58                         }
59                 }
60         }
61         #endregion
62 \r
63         class XmlInputStream : Stream\r
64         {\r
65                 Encoding enc;\r
66                 Stream stream;\r
67                 byte[] buffer = new byte[256];\r
68                 int bufLength;\r
69                 int bufPos;\r
70                 bool isDocumentEntity;  // allow omitting "version" or not.\r
71 \r
72                 static XmlException encodingException = new XmlException ("invalid encoding specification.");\r
73 \r
74                 public XmlInputStream (string url)\r
75                         : this (url, true)\r
76                 {\r
77                 }\r
78 \r
79                 public XmlInputStream (string url, bool docent)\r
80                 {\r
81                         this.isDocumentEntity = docent;\r
82 #if NetworkEnabled\r
83                         try {\r
84                                 Uri uri = new Uri (url);\r
85                                 Initialize (new MemoryStream (new System.Net.WebClient ().DownloadData (url)));\r
86                         } catch (UriFormatException ex) {\r
87                                 Initialize (new FileStream (url, FileMode.Open, FileAccess.Read));\r
88                         }\r
89 #else\r
90                         Initialize (new FileStream (url, FileMode.Open, FileAccess.Read));\r
91 #endif\r
92                 }\r
93 \r
94                 public XmlInputStream (Stream stream)\r
95                         : this (stream, true)\r
96                 {\r
97                 }\r
98 \r
99                 public XmlInputStream (Stream stream, bool docent)\r
100                 {\r
101                         this.isDocumentEntity = docent;\r
102                         Initialize (stream);\r
103                 }\r
104 \r
105                 private void Initialize (Stream stream)\r
106                 {\r
107                         // FIXME: seems too waste...\r
108                         MemoryStream ms = new MemoryStream ();\r
109                         this.stream = stream;\r
110                         int c = stream.ReadByte ();\r
111                         switch (c) {\r
112                         case 0xFF:\r
113                                 c = stream.ReadByte ();\r
114                                 if (c == 0xFE) {\r
115                                         // BOM-ed little endian utf-16\r
116                                         enc = Encoding.Unicode;\r
117                                 } else {\r
118                                         // It doesn't start from "<?xml" then its encoding is utf-8\r
119                                         enc = Encoding.UTF8;\r
120                                         ms.WriteByte ((byte)0xFF);\r
121                                         ms.WriteByte ((byte)c);\r
122                                 }\r
123                                 break;\r
124                         case 0xFE:\r
125                                 c = stream.ReadByte ();\r
126                                 if (c == 0xFF) {\r
127                                         // BOM-ed big endian utf-16\r
128                                         enc = Encoding.BigEndianUnicode;\r
129                                         return;\r
130                                 } else {\r
131                                         // It doesn't start from "<?xml" then its encoding is utf-8\r
132                                         enc = Encoding.UTF8;\r
133                                         ms.WriteByte ((byte)0xFE);\r
134                                         ms.WriteByte ((byte)c);\r
135                                 }\r
136                                 break;\r
137                         case 0xEF:\r
138                                 enc = Encoding.UTF8;\r
139                                 c = ReadByte ();\r
140                                 if (c == 0xBB) {\r
141                                         c = ReadByte ();\r
142                                         if (c != 0xBF) {\r
143                                                 ms.WriteByte ((byte)0xEF);\r
144                                                 ms.WriteByte ((byte)0xBB);\r
145                                                 ms.WriteByte ((byte)c);\r
146                                         }\r
147                                 } else {\r
148                                         ms.WriteByte ((byte)0xEF);\r
149                                 }\r
150                                 break;\r
151                         case '<':\r
152                                 // try to get encoding name from XMLDecl.\r
153                                 ms.WriteByte ((byte)'<');\r
154                                 int size = stream.Read (buffer, 1, 4);\r
155                                 ms.Write (buffer, 1, 4);\r
156                                 if (Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {\r
157                                         int loop = 0;\r
158                                         c = SkipWhitespace (ms);\r
159 \r
160                                         // version. It is optional here.\r
161                                         if (c != 'v') {\r
162                                                 if (isDocumentEntity)\r
163                                                         throw new XmlException ("invalid xml declaration.");\r
164                                         } else {\r
165                                                 ms.WriteByte ((byte)'v');\r
166                                                 while (loop++ >= 0 && c >= 0) {\r
167                                                         c = stream.ReadByte ();\r
168                                                         ms.WriteByte ((byte)c);\r
169                                                         if (c == '0') { // 0 of 1.0\r
170                                                                 ms.WriteByte ((byte)stream.ReadByte ());\r
171                                                                 break;\r
172                                                         }\r
173                                                 }\r
174                                                 c = SkipWhitespace (ms);\r
175                                         }\r
176 \r
177                                         if (c == 'e') {\r
178                                                 ms.WriteByte ((byte)'e');\r
179                                                 size = stream.Read (buffer, 0, 7);\r
180                                                 ms.Write (buffer, 0, 7);\r
181                                                 if (Encoding.ASCII.GetString(buffer, 0, 7) == "ncoding") {\r
182                                                         c = this.SkipWhitespace(ms);\r
183                                                         if (c != '=')\r
184                                                                 throw encodingException;\r
185                                                         ms.WriteByte ((byte)'=');\r
186                                                         c = this.SkipWhitespace (ms);\r
187                                                         int quoteChar = c;\r
188                                                         ms.WriteByte ((byte)c);\r
189                                                         int start = (int)ms.Position;\r
190                                                         while (loop++ >= 0) {\r
191                                                                 c = stream.ReadByte ();\r
192                                                                 if (c == quoteChar)\r
193                                                                         break;\r
194                                                                 else if (c < 0)\r
195                                                                         throw encodingException;\r
196                                                                 ms.WriteByte ((byte)c);\r
197                                                         }\r
198                                                         string encodingName = Encoding.UTF8.GetString (ms.GetBuffer (), start, (int)ms.Position - start);\r
199                                                         if (!XmlConstructs.IsValidIANAEncoding (encodingName))\r
200                                                                 throw encodingException;\r
201                                                         ms.WriteByte ((byte)quoteChar);\r
202                                                         enc = Encoding.GetEncoding (encodingName);\r
203                                                 }\r
204                                                 else\r
205                                                         ms.Write (buffer, 0, size);\r
206                                         }\r
207                                         else\r
208                                                 ms.WriteByte ((byte)c);\r
209                                 }\r
210                                 buffer = ms.ToArray ();\r
211                                 bufLength = buffer.Length;\r
212                                 bufPos = 0;\r
213                                 break;\r
214                         default:\r
215                                 buffer [0] = (byte)c;\r
216                                 bufLength = 1;\r
217                                 enc = Encoding.UTF8;\r
218                                 break;\r
219                         }\r
220                 }\r
221 \r
222                 // skips whitespace and returns misc char that was read from stream\r
223                 private int SkipWhitespace (MemoryStream ms)    // ms may be null\r
224                 {\r
225                         int loop = 0;\r
226                         int c;\r
227                         while (loop++ >= 0) { // defends infinite loop (expecting overflow)\r
228                                 c = stream.ReadByte ();\r
229                                 switch (c) {\r
230                                 case '\r': goto case ' ';\r
231                                 case '\n': goto case ' ';\r
232                                 case '\t': goto case ' ';\r
233                                 case ' ':\r
234                                         if (ms != null)\r
235                                                 ms.WriteByte ((byte)c);\r
236                                         continue;\r
237                                 default:\r
238                                         return c;\r
239                                 }\r
240                         }\r
241                         throw new InvalidOperationException ();\r
242                 }\r
243 \r
244                 public Encoding ActualEncoding {\r
245                         get { return enc; }\r
246                 }\r
247 \r
248                 #region Public Overrides\r
249                 public override bool CanRead {\r
250                         get { return stream.CanRead; }\r
251                 }\r
252 \r
253                 public override bool CanSeek {\r
254                         get { return false; } //stream.CanSeek; }\r
255                 }\r
256 \r
257                 public override bool CanWrite {\r
258                         get { return false; }\r
259                 }\r
260 \r
261                 public override long Length {\r
262                         get {\r
263                                 return stream.Length;\r
264                         }\r
265                 }\r
266 \r
267                 public override long Position {\r
268                         get {\r
269                                 return stream.Position + bufLength;\r
270                         }\r
271                         set {\r
272                                 if(value < bufLength)\r
273                                         bufPos = (int)value;\r
274                                 else\r
275                                         stream.Position = value - bufLength;\r
276                         }\r
277                 }\r
278 \r
279                 public override void Close ()\r
280                 {\r
281                         stream.Close ();\r
282                 }\r
283 \r
284                 public override void Flush ()\r
285                 {\r
286                         stream.Flush ();\r
287                 }\r
288 \r
289                 public override int Read (byte[] buffer, int offset, int count)\r
290                 {\r
291                         int ret;\r
292                         if (count <= bufLength - bufPos)        {       // all from buffer\r
293                                 Array.Copy (this.buffer, bufPos, buffer, offset, count);\r
294                                 bufPos += count;\r
295                                 ret = count;\r
296                         } else {\r
297                                 int bufRest = bufLength - bufPos;\r
298                                 if (bufLength > bufPos) {\r
299                                         Array.Copy (this.buffer, bufPos, buffer, offset, bufRest);\r
300                                         bufPos += bufRest;\r
301                                 }\r
302                                 ret = bufRest +\r
303                                         stream.Read (buffer, offset + bufRest, count - bufRest);\r
304                         }\r
305                         return ret;\r
306                 }\r
307 \r
308                 public override int ReadByte ()\r
309                 {\r
310                         if (bufLength > bufPos) {\r
311                                 return buffer [bufPos++];\r
312                         }\r
313                         return stream.ReadByte ();\r
314                 }\r
315 \r
316                 public override long Seek (long offset, System.IO.SeekOrigin origin)\r
317                 {\r
318                         int bufRest = bufLength - bufPos;\r
319                         if (origin == SeekOrigin.Current)\r
320                                 if (offset < bufRest)\r
321                                         return buffer [bufPos + offset];\r
322                                 else\r
323                                         return stream.Seek (offset - bufRest, origin);\r
324                         else\r
325                                 return stream.Seek (offset, origin);\r
326                 }\r
327 \r
328                 public override void SetLength (long value)\r
329                 {\r
330                         stream.SetLength (value);\r
331                 }\r
332 \r
333                 public override void Write (byte[] buffer, int offset, int count)\r
334                 {\r
335                         throw new NotSupportedException ();\r
336                 }\r
337                 #endregion\r
338         }\r
339 }\r