2003-02-16 Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
authorAtsushi Eno <atsushieno@gmail.com>
Sun, 16 Feb 2003 07:25:43 +0000 (07:25 -0000)
committerAtsushi Eno <atsushieno@gmail.com>
Sun, 16 Feb 2003 07:25:43 +0000 (07:25 -0000)
* XmlInputStream.cs : added (also contains internal XmlStreamReader).
* XmlDocument.cs : Load () now can specify URL using XmlUrlResolver,
and can read non-UTF-8 stream.
* XmlTextReader.cs : related to above stream fix.
* XmlUrlResolver.cs : implemented GetEntity ().

svn path=/trunk/mcs/; revision=11611

mcs/class/System.XML/System.Xml/ChangeLog
mcs/class/System.XML/System.Xml/XmlDocument.cs
mcs/class/System.XML/System.Xml/XmlInputStream.cs [new file with mode: 0644]
mcs/class/System.XML/System.Xml/XmlTextReader.cs
mcs/class/System.XML/System.Xml/XmlUrlResolver.cs

index e431351f6ece18b273d7b73eac9580a6f5dc9ccc..4472f83e82ee0825391026b2c2eca362f1d03df1 100644 (file)
@@ -1,3 +1,11 @@
+2003-02-16  Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
+
+       * XmlInputStream.cs : added (also contains internal XmlStreamReader).
+       * XmlDocument.cs : Load () now can specify URL using XmlUrlResolver,
+               and can read non-UTF-8 stream.
+       * XmlTextReader.cs : related to above stream fix.
+       * XmlUrlResolver.cs : implemented GetEntity ().
+
 2003-02-03  Gonzalo Paniagua Javier <gonzalo@ximian.com>
 
        * XmlTextWriter.cs: implemented WriteRaw (char[], int, int).
index e54a1ed4dcc9111d7104a74683a9c7453010063a..49741c7f1f5750a6d566d02e3c23a4e3fcb26b15 100644 (file)
@@ -552,14 +552,16 @@ namespace System.Xml
 
                public virtual void Load (Stream inStream)
                {
-                       XmlReader xmlReader = new XmlTextReader (inStream);
+                       XmlReader xmlReader = new XmlTextReader (new XmlInputStream (inStream));
                        Load (xmlReader);
                }
 
                public virtual void Load (string filename)
                {
-                       baseURI = filename;
-                       XmlReader xmlReader = new XmlTextReader (new StreamReader (filename));
+                       Uri uri = new Uri (filename);
+                       baseURI = filename;     // FIXME: resolve base
+                       Stream stream = new XmlUrlResolver ().GetEntity (uri, null, typeof(Stream)) as Stream;
+                       XmlReader xmlReader = new XmlTextReader (new XmlStreamReader (new XmlInputStream (stream)));
                        Load (xmlReader);
                }
 
@@ -697,10 +699,16 @@ namespace System.Xml
                        int startDepth = reader.Depth;
                        bool atStart = true;
                        bool ignoredWhitespace;
+                       bool reachedEOF = false;
 
                        do {
                                ignoredWhitespace = false;
                                reader.Read ();
+                               if (reader.NodeType == XmlNodeType.None)
+                                       if (reachedEOF)
+                                               throw new Exception ("XML Reader reached to end while reading node.");
+                                       else
+                                               reachedEOF = true;
                                // This complicated check is because we shouldn't make
                                // improper additional XmlReader.Read() by this method itself.
                                if(atStart && (reader.NodeType == XmlNodeType.EndElement || 
@@ -813,7 +821,7 @@ namespace System.Xml
                                                ignoredWhitespace = true;
                                        break;
                                }
-                       } while(ignoredWhitespace ||
+                       } while ((!reader.EOF && ignoredWhitespace) ||
                                reader.Depth > startDepth || 
                                // This complicated condition is because reader.Depth was set
                                // before XmlTextReader.depth increments ;-)
diff --git a/mcs/class/System.XML/System.Xml/XmlInputStream.cs b/mcs/class/System.XML/System.Xml/XmlInputStream.cs
new file mode 100644 (file)
index 0000000..f5b75fd
--- /dev/null
@@ -0,0 +1,271 @@
+//
+// System.Xml.XmlInputStream 
+//     encoding-specification-wise XML input stream and reader
+//
+// Author:
+//     Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
+//
+//     (C)2003 Atsushi Enomoto
+//
+using System;\r
+using System.IO;\r
+using System.Text;\r
+\r
+namespace System.Xml\r
+{\r
+       #region XmlStreamReader
+       internal class XmlStreamReader : StreamReader
+       {
+               public XmlStreamReader (XmlInputStream input)
+                       : base (input, input.ActualEncoding != null ? input.ActualEncoding : Encoding.UTF8)
+               {
+               }
+       }
+       #endregion
+\r
+       public class XmlInputStream : Stream\r
+       {\r
+               Encoding enc;\r
+               Stream stream;\r
+               byte[] buffer = new byte[256];\r
+               int bufLength;\r
+               int bufPos;\r
+\r
+               static XmlException encodingException = new XmlException ("invalid encoding specification.");\r
+\r
+               public XmlInputStream (string uri)\r
+               {\r
+                       Initialize (new System.Net.WebClient ().OpenRead (uri));\r
+               }\r
+\r
+               public XmlInputStream (Stream stream)\r
+               {\r
+                       Initialize (stream);\r
+               }\r
+\r
+               private void Initialize (Stream stream)\r
+               {\r
+                       // FIXME: seems too waste...\r
+                       MemoryStream ms = new MemoryStream ();\r
+                       this.stream = stream;\r
+                       int c = stream.ReadByte ();\r
+                       switch (c) {\r
+                       case 0xFF:\r
+                               c = stream.ReadByte ();\r
+                               if (c == 0xFE) {\r
+                                       // BOM-ed little endian utf-16\r
+                                       enc = Encoding.Unicode;\r
+                               } else {\r
+                                       // It doesn't start from "<?xml" then its encoding is utf-8\r
+                                       enc = Encoding.UTF8;\r
+                                       ms.WriteByte ((byte)0xFF);\r
+                                       ms.WriteByte ((byte)c);\r
+                               }\r
+                               break;\r
+                       case 0xFE:\r
+                               c = stream.ReadByte ();\r
+                               if (c == 0xFF) {\r
+                                       // BOM-ed big endian utf-16\r
+                                       enc = Encoding.BigEndianUnicode;\r
+                                       return;\r
+                               } else {\r
+                                       // It doesn't start from "<?xml" then its encoding is utf-8\r
+                                       enc = Encoding.UTF8;\r
+                                       ms.WriteByte ((byte)0xFE);\r
+                                       ms.WriteByte ((byte)c);\r
+                               }\r
+                               break;\r
+                       case 0xEF:\r
+                               enc = Encoding.UTF8;\r
+                               c = ReadByte ();\r
+                               if (c == 0xBB) {\r
+                                       c = ReadByte ();\r
+                                       if (c != 0xBF) {\r
+                                               ms.WriteByte ((byte)0xEF);\r
+                                               ms.WriteByte ((byte)0xBB);\r
+                                               ms.WriteByte ((byte)c);\r
+                                       }\r
+                               } else {\r
+                                       ms.WriteByte ((byte)0xEF);\r
+                               }\r
+                               break;\r
+                       case '<':\r
+                               // try to get encoding name from XMLDecl.\r
+                               ms.WriteByte ((byte)'<');\r
+                               int size = stream.Read (buffer, 1, 4);\r
+                               ms.Write (buffer, 1, 4);\r
+                               if (Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {\r
+                                       int loop = 0;\r
+                                       c = SkipWhitespace (ms);\r
+                                       // version\r
+                                       if (c != 'v' || stream.ReadByte () != 'e')\r
+                                               throw new XmlException ("invalid xml declaration.");\r
+                                       ms.WriteByte ((byte)'v');\r
+                                       ms.WriteByte ((byte)'e');\r
+                                       while (loop++ >= 0) {\r
+                                               c = stream.ReadByte ();\r
+                                               ms.WriteByte ((byte)c);\r
+                                               if (c == '0') {\r
+                                                       ms.WriteByte ((byte)stream.ReadByte ());\r
+                                                       break;\r
+                                               }\r
+                                       }\r
+                                       c = SkipWhitespace (ms);\r
+                                       if (c == 'e') {\r
+                                               ms.WriteByte ((byte)'e');\r
+                                               size = stream.Read (buffer, 0, 7);\r
+                                               ms.Write (buffer, 0, 7);\r
+                                               if (Encoding.ASCII.GetString(buffer, 0, 7) == "ncoding") {\r
+                                                       c = this.SkipWhitespace(ms);\r
+                                                       if (c != '=')\r
+                                                               throw encodingException;\r
+                                                       ms.WriteByte ((byte)'=');\r
+                                                       c = this.SkipWhitespace (ms);\r
+                                                       int quoteChar = c;\r
+                                                       ms.WriteByte ((byte)c);\r
+                                                       int start = (int)ms.Position;\r
+                                                       while (loop++ >= 0) {\r
+                                                               c = stream.ReadByte ();\r
+                                                               if (c == quoteChar)\r
+                                                                       break;\r
+                                                               else if (c < 0)\r
+                                                                       throw encodingException;\r
+                                                               ms.WriteByte ((byte)c);\r
+                                                       }\r
+                                                       string encodingName = Encoding.UTF8.GetString (ms.GetBuffer (), start, (int)ms.Position - start);\r
+                                                       if (!XmlConstructs.IsValidIANAEncoding (encodingName))\r
+                                                               throw encodingException;\r
+                                                       ms.WriteByte ((byte)quoteChar);\r
+                                                       enc = Encoding.GetEncoding (encodingName);\r
+                                               }\r
+                                               else\r
+                                                       ms.Write (buffer, 0, size);\r
+                                       }\r
+                                       else\r
+                                               ms.WriteByte ((byte)c);\r
+                               }\r
+                               buffer = ms.ToArray ();\r
+                               bufLength = buffer.Length;\r
+                               bufPos = 0;\r
+                               break;\r
+                       default:\r
+                               buffer [0] = (byte)c;\r
+                               bufLength = 1;\r
+                               enc = Encoding.UTF8;\r
+                               break;\r
+                       }\r
+               }\r
+\r
+               // skips whitespace and returns misc char that was read from stream\r
+               private int SkipWhitespace (MemoryStream ms)    // ms may be null\r
+               {\r
+                       int loop = 0;\r
+                       int c;\r
+                       while (loop++ >= 0) { // defends infinite loop (expecting overflow)\r
+                               c = stream.ReadByte ();\r
+                               switch (c) {\r
+                               case '\r': goto case ' ';\r
+                               case '\n': goto case ' ';\r
+                               case '\t': goto case ' ';\r
+                               case ' ':\r
+                                       if (ms != null)\r
+                                               ms.WriteByte ((byte)c);\r
+                                       continue;\r
+                               default:\r
+                                       return c;\r
+                               }\r
+                       }\r
+                       throw new InvalidOperationException ();\r
+               }\r
+\r
+               public Encoding ActualEncoding {\r
+                       get { return enc; }\r
+               }\r
+\r
+               #region Public Overrides\r
+               public override bool CanRead {\r
+                       get { return stream.CanRead; }\r
+               }\r
+\r
+               public override bool CanSeek {\r
+                       get { return false; } //stream.CanSeek; }\r
+               }\r
+\r
+               public override bool CanWrite {\r
+                       get { return false; }\r
+               }\r
+\r
+               public override long Length {\r
+                       get {\r
+                               return stream.Length;\r
+                       }\r
+               }\r
+\r
+               public override long Position {\r
+                       get {\r
+                               return stream.Position + bufLength;\r
+                       }\r
+                       set {\r
+                               if(value < bufLength)\r
+                                       bufPos = (int)value;\r
+                               else\r
+                                       stream.Position = value - bufLength;\r
+                       }\r
+               }\r
+\r
+               public override void Flush()\r
+               {\r
+                       stream.Flush ();\r
+               }\r
+\r
+               public override int Read (byte[] buffer, int offset, int count)\r
+               {\r
+                       int ret;\r
+                       if (count <= bufLength - bufPos)        {       // all from buffer\r
+                               Array.Copy (this.buffer, bufPos, buffer, offset, count);\r
+                               bufPos += count;\r
+                               ret = count;\r
+                       } else {\r
+                               int bufRest = bufLength - bufPos;\r
+                               if (bufLength > bufPos) {\r
+                                       Array.Copy (this.buffer, bufPos, buffer, offset, bufRest);\r
+                                       bufPos += bufRest;\r
+                               }\r
+                               ret = bufRest +\r
+                                       stream.Read (buffer, offset + bufRest, count - bufRest);\r
+                       }\r
+                       return ret;\r
+               }\r
+\r
+               public override int ReadByte ()\r
+               {\r
+                       if (bufLength > bufPos) {\r
+                               return buffer [bufPos++];\r
+                       }\r
+                       return stream.ReadByte ();\r
+               }\r
+\r
+               public override long Seek (long offset, System.IO.SeekOrigin origin)\r
+               {\r
+                       int bufRest = bufLength - bufPos;\r
+                       if (origin == SeekOrigin.Current)\r
+                               if (offset < bufRest)\r
+                                       return buffer [bufPos + offset];\r
+                               else\r
+                                       return stream.Seek (offset - bufRest, origin);\r
+                       else\r
+                               return stream.Seek (offset, origin);\r
+               }\r
+\r
+               public override void SetLength (long value)\r
+               {\r
+                       stream.SetLength (value);\r
+               }\r
+\r
+               public override void Write (byte[] buffer, int offset, int count)\r
+               {\r
+                       throw new NotSupportedException ();\r
+               }\r
+               #endregion\r
+       }\r
+}\r
index 21162130b1893dbfb48b3ebc95d8cfb421fac7c4..d0e0e03440526df5286a0bb49943d8de53e22ad8 100644 (file)
@@ -656,7 +656,11 @@ namespace System.Xml
                internal void SetReaderFragment(TextReader fragment, XmlNodeType fragType)
                {
                        this.reader = fragment;
-                       can_seek = fragment != null && fragment.Peek () != -1;
+                       StreamReader sr = fragment as StreamReader;
+                       if (sr != null)
+                               can_seek = sr.BaseStream.CanSeek;
+                       else
+                               can_seek = fragment != null && fragment.Peek () != -1;
 
                        if (fragType == XmlNodeType.Attribute)
                                value = "''";
index 54750bb6f602848e5f2487a2af04228599e63f9e..aa171bd7c23193897ad10bafc3bc1df81b7cce5b 100755 (executable)
@@ -1,11 +1,13 @@
 // System.Xml.XmlUrlResolver.cs
 //
 // Author: Duncan Mak (duncan@ximian.com)
+//        Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
 //
 // (C) Ximian, Inc.
 //
 
 using System.Net;
+using System.IO;
 
 namespace System.Xml
 {
@@ -13,7 +15,15 @@ namespace System.Xml
        {
                // Field
                ICredentials credential;
-               
+               WebClient webClientInternal;
+               WebClient webClient {
+                       get {
+                               if (webClientInternal == null)
+                                       webClientInternal = new WebClient ();
+                               return webClientInternal;
+                       }
+               }
+
                // Constructor
                public XmlUrlResolver ()
                        : base ()
@@ -27,9 +37,16 @@ namespace System.Xml
                }
                
                // Methods
-               [MonoTODO]
+               [MonoTODO("This implementation is bad because the spec explicitly forbids parameter Uri representing non-absolute.")]
                public override object GetEntity (Uri absoluteUri, string role, Type ofObjectToReturn)
                {
+                       // (MS documentation says) parameter role isn't used yet.
+                       Stream s = null;
+                       webClient.Credentials = credential;
+                       s = new XmlInputStream (webClient.OpenRead (absoluteUri.ToString ()));
+                       if (s.GetType ().IsSubclassOf (ofObjectToReturn))
+                               return s;
+                       s.Close ();
                        return null;
                }
 
@@ -37,5 +54,5 @@ namespace System.Xml
                {
                        return new Uri (baseUri, relativeUri);
                }
-       }       
+       }
 }