2003-03-18 Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
[mono.git] / mcs / class / System.XML / System.Xml / XmlInputStream.cs
1 //
2 // System.Xml.XmlInputStream 
3 //      encoding-specification-wise XML input stream and reader
4 //
5 // Author:
6 //      Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 //
8 //      (C)2003 Atsushi Enomoto
9 //
10 using System;\r
11 using System.IO;\r
12 using System.Text;\r
13 using System.Xml;\r
14 \r
15 namespace Mono.Xml.Native\r
16 {\r
17         #region XmlStreamReader
18         public class XmlStreamReader : StreamReader
19         {
20                 XmlStreamReader (XmlInputStream input)
21                         : base (input, input.ActualEncoding != null ? input.ActualEncoding : Encoding.UTF8)
22                 {
23                 }
24
25                 public XmlStreamReader (Stream input)
26                         : this (new XmlInputStream (input))
27                 {
28                 }
29
30                 public XmlStreamReader (string url)
31                         : this (new XmlInputStream (url))
32                 {
33                 }
34         }
35         #endregion
36 \r
37         class XmlInputStream : Stream\r
38         {\r
39                 Encoding enc;\r
40                 Stream stream;\r
41                 byte[] buffer = new byte[256];\r
42                 int bufLength;\r
43                 int bufPos;\r
44 \r
45                 static XmlException encodingException = new XmlException ("invalid encoding specification.");\r
46 \r
47                 public XmlInputStream (string url)\r
48                 {\r
49 #if NetworkEnabled\r
50                         try {\r
51                                 Uri uri = new Uri (url);\r
52                                 Initialize (new MemoryStream (new System.Net.WebClient ().DownloadData (url)));\r
53                         } catch (UriFormatException ex) {\r
54                                 Initialize (new FileStream (url, FileMode.Open));\r
55                         }\r
56 #else\r
57                         Initialize (new FileStream (url, FileMode.Open, FileAccess.Read));\r
58 #endif\r
59                 }\r
60 \r
61                 public XmlInputStream (Stream stream)\r
62                 {\r
63                         Initialize (stream);\r
64                 }\r
65 \r
66                 private void Initialize (Stream stream)\r
67                 {\r
68                         // FIXME: seems too waste...\r
69                         MemoryStream ms = new MemoryStream ();\r
70                         this.stream = stream;\r
71                         int c = stream.ReadByte ();\r
72                         switch (c) {\r
73                         case 0xFF:\r
74                                 c = stream.ReadByte ();\r
75                                 if (c == 0xFE) {\r
76                                         // BOM-ed little endian utf-16\r
77                                         enc = Encoding.Unicode;\r
78                                 } else {\r
79                                         // It doesn't start from "<?xml" then its encoding is utf-8\r
80                                         enc = Encoding.UTF8;\r
81                                         ms.WriteByte ((byte)0xFF);\r
82                                         ms.WriteByte ((byte)c);\r
83                                 }\r
84                                 break;\r
85                         case 0xFE:\r
86                                 c = stream.ReadByte ();\r
87                                 if (c == 0xFF) {\r
88                                         // BOM-ed big endian utf-16\r
89                                         enc = Encoding.BigEndianUnicode;\r
90                                         return;\r
91                                 } else {\r
92                                         // It doesn't start from "<?xml" then its encoding is utf-8\r
93                                         enc = Encoding.UTF8;\r
94                                         ms.WriteByte ((byte)0xFE);\r
95                                         ms.WriteByte ((byte)c);\r
96                                 }\r
97                                 break;\r
98                         case 0xEF:\r
99                                 enc = Encoding.UTF8;\r
100                                 c = ReadByte ();\r
101                                 if (c == 0xBB) {\r
102                                         c = ReadByte ();\r
103                                         if (c != 0xBF) {\r
104                                                 ms.WriteByte ((byte)0xEF);\r
105                                                 ms.WriteByte ((byte)0xBB);\r
106                                                 ms.WriteByte ((byte)c);\r
107                                         }\r
108                                 } else {\r
109                                         ms.WriteByte ((byte)0xEF);\r
110                                 }\r
111                                 break;\r
112                         case '<':\r
113                                 // try to get encoding name from XMLDecl.\r
114                                 ms.WriteByte ((byte)'<');\r
115                                 int size = stream.Read (buffer, 1, 4);\r
116                                 ms.Write (buffer, 1, 4);\r
117                                 if (Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {\r
118                                         int loop = 0;\r
119                                         c = SkipWhitespace (ms);\r
120                                         // version\r
121                                         if (c != 'v' || stream.ReadByte () != 'e')\r
122                                                 throw new XmlException ("invalid xml declaration.");\r
123                                         ms.WriteByte ((byte)'v');\r
124                                         ms.WriteByte ((byte)'e');\r
125                                         while (loop++ >= 0) {\r
126                                                 c = stream.ReadByte ();\r
127                                                 ms.WriteByte ((byte)c);\r
128                                                 if (c == '0') {\r
129                                                         ms.WriteByte ((byte)stream.ReadByte ());\r
130                                                         break;\r
131                                                 }\r
132                                         }\r
133                                         c = SkipWhitespace (ms);\r
134                                         if (c == 'e') {\r
135                                                 ms.WriteByte ((byte)'e');\r
136                                                 size = stream.Read (buffer, 0, 7);\r
137                                                 ms.Write (buffer, 0, 7);\r
138                                                 if (Encoding.ASCII.GetString(buffer, 0, 7) == "ncoding") {\r
139                                                         c = this.SkipWhitespace(ms);\r
140                                                         if (c != '=')\r
141                                                                 throw encodingException;\r
142                                                         ms.WriteByte ((byte)'=');\r
143                                                         c = this.SkipWhitespace (ms);\r
144                                                         int quoteChar = c;\r
145                                                         ms.WriteByte ((byte)c);\r
146                                                         int start = (int)ms.Position;\r
147                                                         while (loop++ >= 0) {\r
148                                                                 c = stream.ReadByte ();\r
149                                                                 if (c == quoteChar)\r
150                                                                         break;\r
151                                                                 else if (c < 0)\r
152                                                                         throw encodingException;\r
153                                                                 ms.WriteByte ((byte)c);\r
154                                                         }\r
155                                                         string encodingName = Encoding.UTF8.GetString (ms.GetBuffer (), start, (int)ms.Position - start);\r
156                                                         if (!XmlConstructs.IsValidIANAEncoding (encodingName))\r
157                                                                 throw encodingException;\r
158                                                         ms.WriteByte ((byte)quoteChar);\r
159                                                         enc = Encoding.GetEncoding (encodingName);\r
160                                                 }\r
161                                                 else\r
162                                                         ms.Write (buffer, 0, size);\r
163                                         }\r
164                                         else\r
165                                                 ms.WriteByte ((byte)c);\r
166                                 }\r
167                                 buffer = ms.ToArray ();\r
168                                 bufLength = buffer.Length;\r
169                                 bufPos = 0;\r
170                                 break;\r
171                         default:\r
172                                 buffer [0] = (byte)c;\r
173                                 bufLength = 1;\r
174                                 enc = Encoding.UTF8;\r
175                                 break;\r
176                         }\r
177                 }\r
178 \r
179                 // skips whitespace and returns misc char that was read from stream\r
180                 private int SkipWhitespace (MemoryStream ms)    // ms may be null\r
181                 {\r
182                         int loop = 0;\r
183                         int c;\r
184                         while (loop++ >= 0) { // defends infinite loop (expecting overflow)\r
185                                 c = stream.ReadByte ();\r
186                                 switch (c) {\r
187                                 case '\r': goto case ' ';\r
188                                 case '\n': goto case ' ';\r
189                                 case '\t': goto case ' ';\r
190                                 case ' ':\r
191                                         if (ms != null)\r
192                                                 ms.WriteByte ((byte)c);\r
193                                         continue;\r
194                                 default:\r
195                                         return c;\r
196                                 }\r
197                         }\r
198                         throw new InvalidOperationException ();\r
199                 }\r
200 \r
201                 public Encoding ActualEncoding {\r
202                         get { return enc; }\r
203                 }\r
204 \r
205                 #region Public Overrides\r
206                 public override bool CanRead {\r
207                         get { return stream.CanRead; }\r
208                 }\r
209 \r
210                 public override bool CanSeek {\r
211                         get { return false; } //stream.CanSeek; }\r
212                 }\r
213 \r
214                 public override bool CanWrite {\r
215                         get { return false; }\r
216                 }\r
217 \r
218                 public override long Length {\r
219                         get {\r
220                                 return stream.Length;\r
221                         }\r
222                 }\r
223 \r
224                 public override long Position {\r
225                         get {\r
226                                 return stream.Position + bufLength;\r
227                         }\r
228                         set {\r
229                                 if(value < bufLength)\r
230                                         bufPos = (int)value;\r
231                                 else\r
232                                         stream.Position = value - bufLength;\r
233                         }\r
234                 }\r
235 \r
236                 public override void Flush()\r
237                 {\r
238                         stream.Flush ();\r
239                 }\r
240 \r
241                 public override int Read (byte[] buffer, int offset, int count)\r
242                 {\r
243                         int ret;\r
244                         if (count <= bufLength - bufPos)        {       // all from buffer\r
245                                 Array.Copy (this.buffer, bufPos, buffer, offset, count);\r
246                                 bufPos += count;\r
247                                 ret = count;\r
248                         } else {\r
249                                 int bufRest = bufLength - bufPos;\r
250                                 if (bufLength > bufPos) {\r
251                                         Array.Copy (this.buffer, bufPos, buffer, offset, bufRest);\r
252                                         bufPos += bufRest;\r
253                                 }\r
254                                 ret = bufRest +\r
255                                         stream.Read (buffer, offset + bufRest, count - bufRest);\r
256                         }\r
257                         return ret;\r
258                 }\r
259 \r
260                 public override int ReadByte ()\r
261                 {\r
262                         if (bufLength > bufPos) {\r
263                                 return buffer [bufPos++];\r
264                         }\r
265                         return stream.ReadByte ();\r
266                 }\r
267 \r
268                 public override long Seek (long offset, System.IO.SeekOrigin origin)\r
269                 {\r
270                         int bufRest = bufLength - bufPos;\r
271                         if (origin == SeekOrigin.Current)\r
272                                 if (offset < bufRest)\r
273                                         return buffer [bufPos + offset];\r
274                                 else\r
275                                         return stream.Seek (offset - bufRest, origin);\r
276                         else\r
277                                 return stream.Seek (offset, origin);\r
278                 }\r
279 \r
280                 public override void SetLength (long value)\r
281                 {\r
282                         stream.SetLength (value);\r
283                 }\r
284 \r
285                 public override void Write (byte[] buffer, int offset, int count)\r
286                 {\r
287                         throw new NotSupportedException ();\r
288                 }\r
289                 #endregion\r
290         }\r
291 }\r