2003-03-19 Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
[mono.git] / mcs / class / System.XML / System.Xml / XmlInputStream.cs
1 //
2 // System.Xml.XmlInputStream 
3 //      encoding-specification-wise XML input stream and reader
4 //
5 // Author:
6 //      Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 //
8 //      (C)2003 Atsushi Enomoto
9 //
10 using System;\r
11 using System.IO;\r
12 using System.Text;\r
13 using System.Xml;\r
14 \r
15 namespace Mono.Xml.Native\r
16 {\r
17         #region XmlStreamReader
18         public class XmlStreamReader : StreamReader
19         {
20                 XmlStreamReader (XmlInputStream input)
21                         : base (input, input.ActualEncoding != null ? input.ActualEncoding : Encoding.UTF8)
22                 {
23                 }
24
25                 public XmlStreamReader (Stream input)
26                         : this (new XmlInputStream (input, true))
27                 {
28                 }
29
30                 public XmlStreamReader (Stream input, bool docent)
31                         : this (new XmlInputStream (input, docent))
32                 {
33                 }
34
35                 public XmlStreamReader (string url)
36                         : this (url, true)
37                 {
38                 }
39
40                 public XmlStreamReader (string url, bool docent)
41                         : this (new XmlInputStream (url, docent))
42                 {
43                 }
44         }
45         #endregion
46 \r
47         class XmlInputStream : Stream\r
48         {\r
49                 Encoding enc;\r
50                 Stream stream;\r
51                 byte[] buffer = new byte[256];\r
52                 int bufLength;\r
53                 int bufPos;\r
54                 bool isDocumentEntity;  // allow omitting "version" or not.\r
55 \r
56                 static XmlException encodingException = new XmlException ("invalid encoding specification.");\r
57 \r
58                 public XmlInputStream (string url)\r
59                         : this (url, true)\r
60                 {\r
61                 }\r
62 \r
63                 public XmlInputStream (string url, bool docent)\r
64                 {\r
65                         this.isDocumentEntity = docent;\r
66 #if NetworkEnabled\r
67                         try {\r
68                                 Uri uri = new Uri (url);\r
69                                 Initialize (new MemoryStream (new System.Net.WebClient ().DownloadData (url)));\r
70                         } catch (UriFormatException ex) {\r
71                                 Initialize (new FileStream (url, FileMode.Open));\r
72                         }\r
73 #else\r
74                         Initialize (new FileStream (url, FileMode.Open, FileAccess.Read));\r
75 #endif\r
76                 }\r
77 \r
78                 public XmlInputStream (Stream stream)\r
79                         : this (stream, true)\r
80                 {\r
81                 }\r
82 \r
83                 public XmlInputStream (Stream stream, bool docent)\r
84                 {\r
85                         this.isDocumentEntity = docent;\r
86                         Initialize (stream);\r
87                 }\r
88 \r
89                 private void Initialize (Stream stream)\r
90                 {\r
91                         // FIXME: seems too waste...\r
92                         MemoryStream ms = new MemoryStream ();\r
93                         this.stream = stream;\r
94                         int c = stream.ReadByte ();\r
95                         switch (c) {\r
96                         case 0xFF:\r
97                                 c = stream.ReadByte ();\r
98                                 if (c == 0xFE) {\r
99                                         // BOM-ed little endian utf-16\r
100                                         enc = Encoding.Unicode;\r
101                                 } else {\r
102                                         // It doesn't start from "<?xml" then its encoding is utf-8\r
103                                         enc = Encoding.UTF8;\r
104                                         ms.WriteByte ((byte)0xFF);\r
105                                         ms.WriteByte ((byte)c);\r
106                                 }\r
107                                 break;\r
108                         case 0xFE:\r
109                                 c = stream.ReadByte ();\r
110                                 if (c == 0xFF) {\r
111                                         // BOM-ed big endian utf-16\r
112                                         enc = Encoding.BigEndianUnicode;\r
113                                         return;\r
114                                 } else {\r
115                                         // It doesn't start from "<?xml" then its encoding is utf-8\r
116                                         enc = Encoding.UTF8;\r
117                                         ms.WriteByte ((byte)0xFE);\r
118                                         ms.WriteByte ((byte)c);\r
119                                 }\r
120                                 break;\r
121                         case 0xEF:\r
122                                 enc = Encoding.UTF8;\r
123                                 c = ReadByte ();\r
124                                 if (c == 0xBB) {\r
125                                         c = ReadByte ();\r
126                                         if (c != 0xBF) {\r
127                                                 ms.WriteByte ((byte)0xEF);\r
128                                                 ms.WriteByte ((byte)0xBB);\r
129                                                 ms.WriteByte ((byte)c);\r
130                                         }\r
131                                 } else {\r
132                                         ms.WriteByte ((byte)0xEF);\r
133                                 }\r
134                                 break;\r
135                         case '<':\r
136                                 // try to get encoding name from XMLDecl.\r
137                                 ms.WriteByte ((byte)'<');\r
138                                 int size = stream.Read (buffer, 1, 4);\r
139                                 ms.Write (buffer, 1, 4);\r
140                                 if (Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {\r
141                                         int loop = 0;\r
142                                         c = SkipWhitespace (ms);\r
143 \r
144                                         // version. It is optional here.\r
145                                         if (c != 'v') {\r
146                                                 if (isDocumentEntity)\r
147                                                         throw new XmlException ("invalid xml declaration.");\r
148                                         } else {\r
149                                                 ms.WriteByte ((byte)'v');\r
150                                                 while (loop++ >= 0 && c >= 0) {\r
151                                                         c = stream.ReadByte ();\r
152                                                         ms.WriteByte ((byte)c);\r
153                                                         if (c == '0') { // 0 of 1.0\r
154                                                                 ms.WriteByte ((byte)stream.ReadByte ());\r
155                                                                 break;\r
156                                                         }\r
157                                                 }\r
158                                                 c = SkipWhitespace (ms);\r
159                                         }\r
160 \r
161                                         if (c == 'e') {\r
162                                                 ms.WriteByte ((byte)'e');\r
163                                                 size = stream.Read (buffer, 0, 7);\r
164                                                 ms.Write (buffer, 0, 7);\r
165                                                 if (Encoding.ASCII.GetString(buffer, 0, 7) == "ncoding") {\r
166                                                         c = this.SkipWhitespace(ms);\r
167                                                         if (c != '=')\r
168                                                                 throw encodingException;\r
169                                                         ms.WriteByte ((byte)'=');\r
170                                                         c = this.SkipWhitespace (ms);\r
171                                                         int quoteChar = c;\r
172                                                         ms.WriteByte ((byte)c);\r
173                                                         int start = (int)ms.Position;\r
174                                                         while (loop++ >= 0) {\r
175                                                                 c = stream.ReadByte ();\r
176                                                                 if (c == quoteChar)\r
177                                                                         break;\r
178                                                                 else if (c < 0)\r
179                                                                         throw encodingException;\r
180                                                                 ms.WriteByte ((byte)c);\r
181                                                         }\r
182                                                         string encodingName = Encoding.UTF8.GetString (ms.GetBuffer (), start, (int)ms.Position - start);\r
183                                                         if (!XmlConstructs.IsValidIANAEncoding (encodingName))\r
184                                                                 throw encodingException;\r
185                                                         ms.WriteByte ((byte)quoteChar);\r
186                                                         enc = Encoding.GetEncoding (encodingName);\r
187                                                 }\r
188                                                 else\r
189                                                         ms.Write (buffer, 0, size);\r
190                                         }\r
191                                         else\r
192                                                 ms.WriteByte ((byte)c);\r
193                                 }\r
194                                 buffer = ms.ToArray ();\r
195                                 bufLength = buffer.Length;\r
196                                 bufPos = 0;\r
197                                 break;\r
198                         default:\r
199                                 buffer [0] = (byte)c;\r
200                                 bufLength = 1;\r
201                                 enc = Encoding.UTF8;\r
202                                 break;\r
203                         }\r
204                 }\r
205 \r
206                 // skips whitespace and returns misc char that was read from stream\r
207                 private int SkipWhitespace (MemoryStream ms)    // ms may be null\r
208                 {\r
209                         int loop = 0;\r
210                         int c;\r
211                         while (loop++ >= 0) { // defends infinite loop (expecting overflow)\r
212                                 c = stream.ReadByte ();\r
213                                 switch (c) {\r
214                                 case '\r': goto case ' ';\r
215                                 case '\n': goto case ' ';\r
216                                 case '\t': goto case ' ';\r
217                                 case ' ':\r
218                                         if (ms != null)\r
219                                                 ms.WriteByte ((byte)c);\r
220                                         continue;\r
221                                 default:\r
222                                         return c;\r
223                                 }\r
224                         }\r
225                         throw new InvalidOperationException ();\r
226                 }\r
227 \r
228                 public Encoding ActualEncoding {\r
229                         get { return enc; }\r
230                 }\r
231 \r
232                 #region Public Overrides\r
233                 public override bool CanRead {\r
234                         get { return stream.CanRead; }\r
235                 }\r
236 \r
237                 public override bool CanSeek {\r
238                         get { return false; } //stream.CanSeek; }\r
239                 }\r
240 \r
241                 public override bool CanWrite {\r
242                         get { return false; }\r
243                 }\r
244 \r
245                 public override long Length {\r
246                         get {\r
247                                 return stream.Length;\r
248                         }\r
249                 }\r
250 \r
251                 public override long Position {\r
252                         get {\r
253                                 return stream.Position + bufLength;\r
254                         }\r
255                         set {\r
256                                 if(value < bufLength)\r
257                                         bufPos = (int)value;\r
258                                 else\r
259                                         stream.Position = value - bufLength;\r
260                         }\r
261                 }\r
262 \r
263                 public override void Flush()\r
264                 {\r
265                         stream.Flush ();\r
266                 }\r
267 \r
268                 public override int Read (byte[] buffer, int offset, int count)\r
269                 {\r
270                         int ret;\r
271                         if (count <= bufLength - bufPos)        {       // all from buffer\r
272                                 Array.Copy (this.buffer, bufPos, buffer, offset, count);\r
273                                 bufPos += count;\r
274                                 ret = count;\r
275                         } else {\r
276                                 int bufRest = bufLength - bufPos;\r
277                                 if (bufLength > bufPos) {\r
278                                         Array.Copy (this.buffer, bufPos, buffer, offset, bufRest);\r
279                                         bufPos += bufRest;\r
280                                 }\r
281                                 ret = bufRest +\r
282                                         stream.Read (buffer, offset + bufRest, count - bufRest);\r
283                         }\r
284                         return ret;\r
285                 }\r
286 \r
287                 public override int ReadByte ()\r
288                 {\r
289                         if (bufLength > bufPos) {\r
290                                 return buffer [bufPos++];\r
291                         }\r
292                         return stream.ReadByte ();\r
293                 }\r
294 \r
295                 public override long Seek (long offset, System.IO.SeekOrigin origin)\r
296                 {\r
297                         int bufRest = bufLength - bufPos;\r
298                         if (origin == SeekOrigin.Current)\r
299                                 if (offset < bufRest)\r
300                                         return buffer [bufPos + offset];\r
301                                 else\r
302                                         return stream.Seek (offset - bufRest, origin);\r
303                         else\r
304                                 return stream.Seek (offset, origin);\r
305                 }\r
306 \r
307                 public override void SetLength (long value)\r
308                 {\r
309                         stream.SetLength (value);\r
310                 }\r
311 \r
312                 public override void Write (byte[] buffer, int offset, int count)\r
313                 {\r
314                         throw new NotSupportedException ();\r
315                 }\r
316                 #endregion\r
317         }\r
318 }\r