X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mcs%2Fclass%2FSystem.XML%2FSystem.Xml%2FXmlTextReader.cs;h=5895b4b6572c36603d6de2a8d8f9c304b8dd7903;hb=ea63c7d9e2a5f5808af6679a0ee2ae1af124a77a;hp=91611af2ee9d2ac6254437660226abf26b9ee56b;hpb=86709d74f26dbf25062a83caabfcaa39159f5bee;p=mono.git diff --git a/mcs/class/System.XML/System.Xml/XmlTextReader.cs b/mcs/class/System.XML/System.Xml/XmlTextReader.cs index 91611af2ee9..5895b4b6572 100644 --- a/mcs/class/System.XML/System.Xml/XmlTextReader.cs +++ b/mcs/class/System.XML/System.Xml/XmlTextReader.cs @@ -13,6 +13,8 @@ // instructions, and comments are OK. // // It barfs on DOCTYPE declarations. +// => No barfing, but parsing is incomplete. +// DTD nodes are not still created. // // There's also no checking being done for either well-formedness // or validity. @@ -38,106 +40,87 @@ namespace System.Xml { public class XmlTextReader : XmlReader, IXmlLineInfo { + WhitespaceHandling whitespaceHandling = WhitespaceHandling.All; #region Constructors - [MonoTODO] protected XmlTextReader () { - throw new NotImplementedException (); } - [MonoTODO] public XmlTextReader (Stream input) + : this (new StreamReader (input)) { - // We can share some code in the constructors (at least for this one and next 2) - XmlNameTable nt = new NameTable (); - XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt); - parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None); - Init (); - reader = new StreamReader (input); } - [MonoTODO] public XmlTextReader (string url) + : this(url, new NameTable ()) { - XmlNameTable nt = new NameTable (); - XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt); - parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None); - Init (); - reader = new StreamReader(url); } - [MonoTODO] public XmlTextReader (TextReader input) + : this (input, new NameTable ()) { - XmlNameTable nt = new NameTable (); - XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt); - parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None); - Init (); - reader = input; } - [MonoTODO] protected XmlTextReader (XmlNameTable nt) + : this (String.Empty, null, XmlNodeType.None, null) { - throw new NotImplementedException (); } - [MonoTODO] public XmlTextReader (Stream input, XmlNameTable nt) + : this(new StreamReader (input), nt) { - throw new NotImplementedException (); } - [MonoTODO] public XmlTextReader (string url, Stream input) + : this (url, new StreamReader (input)) { - throw new NotImplementedException (); } - [MonoTODO] public XmlTextReader (string url, TextReader input) + : this (url, input, new NameTable ()) { - throw new NotImplementedException (); } - [MonoTODO] + [MonoTODO("Non-filename-url must be supported. Waiting for WebClient")] public XmlTextReader (string url, XmlNameTable nt) + // : this(url, new StreamReader ((Stream)new XmlUrlResolver ().GetEntity (new Uri (url), null, typeof(Stream))), nt) + : this (url, new StreamReader (url), nt) { - throw new NotImplementedException (); } - [MonoTODO] public XmlTextReader (TextReader input, XmlNameTable nt) + : this(String.Empty, input, nt) { - throw new NotImplementedException (); } - [MonoTODO] public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context) + : this (String.Empty, new StreamReader (xmlFragment), fragType, context) { - throw new NotImplementedException (); } - [MonoTODO] public XmlTextReader (string url, Stream input, XmlNameTable nt) + : this (url, new StreamReader (input), nt) { - throw new NotImplementedException (); } - [MonoTODO] public XmlTextReader (string url, TextReader input, XmlNameTable nt) + : this (url, input, XmlNodeType.Document, new XmlParserContext (nt, new XmlNamespaceManager (nt), null, XmlSpace.None)) { - throw new NotImplementedException (); } - [MonoTODO] + [MonoTODO("TODO as same as private XmlTextReader(TextReader, XmlNodeType, XmlParserContext)")] public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context) + : this (String.Empty, new StringReader (xmlFragment), fragType, context) { - //Waiting for Validating reader for fragType rules. - parserContext = context; - Init (); - reader = new StringReader(xmlFragment); + } + + // TODO still remains as described at head of this file, + // but it might not be TODO of the constructors... + XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context) + { + this.SetReaderContext(url, context); + this.SetReaderFragment(fragment, fragType); } #endregion @@ -149,21 +132,21 @@ namespace System.Xml get { return attributes.Count; } } - [MonoTODO] public override string BaseURI { - get { throw new NotImplementedException (); } + get { return parserContext.BaseURI; } } public override int Depth { - get { return depth > 0 ? depth : 0; } + get { + return elementDepth; + } } - [MonoTODO] public Encoding Encoding { - get { throw new NotImplementedException (); } + get { return parserContext.Encoding; } } public override bool EOF @@ -210,16 +193,14 @@ namespace System.Xml get { return GetAttribute (localName, namespaceName); } } - [MonoTODO] public int LineNumber { - get { throw new NotImplementedException (); } + get { return line; } } - [MonoTODO] public int LinePosition { - get { throw new NotImplementedException (); } + get { return column; } } public override string LocalName @@ -282,11 +263,10 @@ namespace System.Xml get { return value; } } - [MonoTODO] public WhitespaceHandling WhitespaceHandling { - get { throw new NotImplementedException (); } - set { throw new NotImplementedException (); } + get { return whitespaceHandling; } + set { whitespaceHandling = value; } } [MonoTODO] @@ -320,12 +300,16 @@ namespace System.Xml [MonoTODO] public override string GetAttribute (int i) { - throw new NotImplementedException (); + if (i > attributes.Count) + throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount"); + else + throw new NotImplementedException (); } public override string GetAttribute (string name) { - return attributes [name] as string; + return attributes.ContainsKey (name) ? + attributes [name] as string : String.Empty; } public override string GetAttribute (string localName, string namespaceURI) @@ -344,10 +328,12 @@ namespace System.Xml string thisNamespaceURI = LookupNamespace (thisPrefix); if (namespaceURI == thisNamespaceURI) - return attributes [thisName] as string; + return attributes.ContainsKey (thisName) ? + attributes [thisName] as string : String.Empty; } } else if (localName == "xmlns" && namespaceURI == "http://www.w3.org/2000/xmlns/" && thisName == "xmlns") - return attributes [thisName] as string; + return attributes.ContainsKey (thisName) ? + attributes [thisName] as string : String.Empty; } return String.Empty; @@ -471,10 +457,80 @@ namespace System.Xml return more; } - [MonoTODO] public override bool ReadAttributeValue () { - throw new NotImplementedException (); + // reading attribute value phase now stopped + if(attributeStringCurrentPosition < 0 || + attributeString.Length < attributeStringCurrentPosition) { + attributeStringCurrentPosition = 0; + attributeString = String.Empty; + return false; + } + + // If not started, then initialize attributeString when parsing is at start. + if(attributeStringCurrentPosition == 0) + attributeString = value; + + bool returnEntity = false; + value = String.Empty; + int nextPosition = attributeString.IndexOf ('&', + attributeStringCurrentPosition); + + // if attribute string starts from '&' then it may be (unparsable) entity reference. + if(nextPosition == 0) { + string parsed = ReadAttributeValueEntityReference (); + if(parsed == null) { + // return entity (It is only this case to return entity reference.) + int endEntityPosition = attributeString.IndexOf (';', + attributeStringCurrentPosition); + SetProperties (XmlNodeType.EntityReference, + attributeString.Substring (attributeStringCurrentPosition + 1, + endEntityPosition - attributeStringCurrentPosition - 1), + false, + String.Empty, + false); + attributeStringCurrentPosition = endEntityPosition + 1; + + return true; + } + else + value += parsed; + } + + // Other case always set text node. + while(!returnEntity) { + nextPosition = attributeString.IndexOf ('&', attributeStringCurrentPosition); + if(nextPosition < 0) { + // Reached to the end of value string. + value += attributeString.Substring (attributeStringCurrentPosition); + attributeStringCurrentPosition = -1; + break; + } else if(nextPosition == attributeStringCurrentPosition) { + string parsed = ReadAttributeValueEntityReference (); + if(parsed != null) + value += parsed; + else { + // Found that an entity reference starts from this point. + // Then once stop to parse attribute value and then return text. + value += attributeString.Substring (attributeStringCurrentPosition, + nextPosition - attributeStringCurrentPosition); + break; + } + } else { + value += attributeString.Substring (attributeStringCurrentPosition, + nextPosition - attributeStringCurrentPosition); + attributeStringCurrentPosition = nextPosition; + break; + } + } + + SetProperties(XmlNodeType.Text, + "#text", + false, + value, + false); + + return true; } [MonoTODO] @@ -510,10 +566,10 @@ namespace System.Xml while (startname != endname) { ReadContent (); - endname = this.Name; + endname = this.Name; } - xmlBuffer.Replace(currentTag.ToString (), ""); + xmlBuffer.Replace (currentTag.ToString (), ""); saveToXmlBuffer = false; string InnerXml = xmlBuffer.ToString (); xmlBuffer.Length = 0; @@ -524,20 +580,18 @@ namespace System.Xml [MonoTODO] public override string ReadOuterXml () { - // Still need a Well Formedness check. - // Will wait for Validating reader ;-) if (NodeType == XmlNodeType.Attribute) { - return Name+"=\""+Value+"\""; + return Name + "=\"" + Value + "\""; } else { saveToXmlBuffer = true; - xmlBuffer.Append(currentTag.ToString ()); + xmlBuffer.Append (currentTag.ToString ()); string startname = this.Name; string endname = string.Empty; readState = ReadState.Interactive; while (startname != endname) { ReadContent (); - endname = this.Name; + endname = this.Name; } saveToXmlBuffer = false; string OuterXml = xmlBuffer.ToString (); @@ -566,7 +620,44 @@ namespace System.Xml #endregion - // privates + #region Internals + internal string publicId; + internal string systemId; + + internal void SetReaderContext (string url, XmlParserContext context) + { + parserContext = context; + parserContext.BaseURI = url; + Init (); + } + + internal void SetReaderFragment(TextReader fragment, XmlNodeType fragType) + { + this.reader = fragment; + can_seek = fragment != null && fragment.Peek () != -1; +/* for future use + switch(fragType) + { + case XmlNodeType.Attribute: // attribute content + parserContext.InputState = XmlParserInputState.AttributeValue; + break; + case XmlNodeType.DocumentFragment: // element content + parserContext.InputState = XmlParserInputState.Content; + break; + case XmlNodeType.Element: // one element + parserContext.InputState = XmlParserInputState.StartTag; + break; + case XmlNodeType.Document: // document content + parserContext.InputState = XmlParserInputState.Start; + break; + default: + throw new InvalidOperationException("setting this xml node type not allowed."); + } +*/ + } + #endregion + + #region Privates private XmlParserContext parserContext; @@ -574,6 +665,7 @@ namespace System.Xml private ReadState readState; private int depth; + private int elementDepth; private bool depthDown; private bool popScope; @@ -613,12 +705,20 @@ namespace System.Xml private StringBuilder xmlBuffer; // This is for Read(Inner|Outer)Xml private StringBuilder currentTag; // A buffer for ReadContent for ReadOuterXml private bool saveToXmlBuffer; + private int line = 1; + private int column = 1; + private bool has_peek; + private bool can_seek; + private int peek_char; + + private string attributeString = String.Empty; + private int attributeStringCurrentPosition; private void Init () { readState = ReadState.Initial; - depth = -1; + depth = 0; depthDown = false; popScope = false; @@ -665,6 +765,7 @@ namespace System.Xml this.name = name; this.isEmptyElement = isEmptyElement; this.value = value; + this.elementDepth = depth; if (clearAttributes) ClearAttributes (); @@ -722,12 +823,33 @@ namespace System.Xml private int PeekChar () { - return reader.Peek (); + if (can_seek) + return reader.Peek (); + + if (has_peek) + return peek_char; + + peek_char = reader.Read (); + has_peek = true; + return peek_char; } private int ReadChar () { - int ch = reader.Read (); + int ch; + if (has_peek) { + ch = peek_char; + has_peek = false; + } else { + ch = reader.Read (); + } + + if (ch == '\n') { + line++; + column = 1; + } else { + column++; + } if (saveToXmlBuffer) { xmlBuffer.Append ((char) ch); } @@ -740,28 +862,42 @@ namespace System.Xml // element or text outside of the document element. private bool ReadContent () { - bool more = false; currentTag.Length = 0; if (popScope) { parserContext.NamespaceManager.PopScope (); popScope = false; } - if (depthDown) - --depth; - if (returnEntityReference) { - ++depth; SetEntityReferenceProperties (); - more = true; } else { switch (PeekChar ()) { case '<': ReadChar (); ReadTag (); - more = true; break; + case '\r': + if (whitespaceHandling == WhitespaceHandling.All || + whitespaceHandling == WhitespaceHandling.Significant) + return ReadWhitespace (); + + ReadChar (); + return ReadContent (); + case '\n': + if (whitespaceHandling == WhitespaceHandling.All || + whitespaceHandling == WhitespaceHandling.Significant) + return ReadWhitespace (); + + ReadChar (); + return ReadContent (); + case ' ': + if (whitespaceHandling == WhitespaceHandling.All || + whitespaceHandling == WhitespaceHandling.Significant) + return ReadWhitespace (); + + SkipWhitespace (); + return ReadContent (); case -1: readState = ReadState.EndOfFile; SetProperties ( @@ -771,16 +907,13 @@ namespace System.Xml String.Empty, // value true // clearAttributes ); - more = false; break; default: - ReadText (); - more = true; + ReadText (true); break; } } - - return more; + return this.ReadState != ReadState.EndOfFile; } private void SetEntityReferenceProperties () @@ -844,8 +977,6 @@ namespace System.Xml Expect ('>'); - ++depth; - SetProperties ( XmlNodeType.Element, // nodeType name, // name @@ -853,6 +984,12 @@ namespace System.Xml String.Empty, // value false // clearAttributes ); + + if (!depthDown) + ++depth; + else + depthDown = false; + } // The reader is positioned on the first character @@ -920,9 +1057,10 @@ namespace System.Xml // The reader is positioned on the first character // of the text. - private void ReadText () + private void ReadText (bool cleanValue) { - valueLength = 0; + if (cleanValue) + valueLength = 0; int ch = PeekChar (); @@ -938,14 +1076,8 @@ namespace System.Xml } if (returnEntityReference && valueLength == 0) { - ++depth; SetEntityReferenceProperties (); } else { - if (depth >= 0) { - ++depth; - depthDown = true; - } - SetProperties ( XmlNodeType.Text, // nodeType String.Empty, // name @@ -1120,6 +1252,8 @@ namespace System.Xml // The reader is positioned on the first character // of the target. + // + // Now it also reads XmlDeclaration, this method name became improper... private void ReadProcessingInstruction () { string target = ReadName (); @@ -1138,7 +1272,16 @@ namespace System.Xml AppendValueChar ((char)ch); } +/* for future use + if(target == "xml") && parserContext.InputState != XmlParserInputState.Start) + throw new XmlException("Xml declaration is not allowed here."); + else { + parserContext.InputState = XmlParserInputState.DTD; //for future use + } +*/ SetProperties ( + target == "xml" ? + XmlNodeType.XmlDeclaration : XmlNodeType.ProcessingInstruction, // nodeType target, // name false, // isEmptyElement @@ -1156,20 +1299,18 @@ namespace System.Xml switch (ch) { case '-': - Expect ('-'); - Expect ('-'); + Expect ("--"); ReadComment (); break; case '[': ReadChar (); - Expect ('C'); - Expect ('D'); - Expect ('A'); - Expect ('T'); - Expect ('A'); - Expect ('['); + Expect ("CDATA["); ReadCDATA (); break; + case 'D': + Expect ("DOCTYPE"); + ReadDoctypeDecl (); + break; } } @@ -1229,8 +1370,6 @@ namespace System.Xml AppendValueChar ((char)ch); } - ++depth; - SetProperties ( XmlNodeType.CDATA, // nodeType String.Empty, // name @@ -1240,6 +1379,189 @@ namespace System.Xml ); } + // The reader is positioned on the first character after + // the leading ''); + + parserContext.InternalSubset = xmlBuffer.ToString (); + + // set properties for node + SetProperties ( + XmlNodeType.DocumentType, // nodeType + doctypeName, // name + false, // isEmptyElement + parserContext.InternalSubset, // value + true // clearAttributes + ); + } + + // Read any one of following: + // elementdecl, AttlistDecl, EntityDecl, NotationDecl, + // PI, Comment, Parameter Entity, or doctype termination char(']') + // + // returns a node of some nodeType or null, setting nodeType. + // (if None then ']' was found.) + private void ReadDTDInternalSubset() + { + SkipWhitespace (); + switch(ReadChar ()) + { + case ']': + nodeType = XmlNodeType.None; + break; + case '%': + string peName = ReadName (); + Expect (';'); + nodeType = XmlNodeType.EntityReference; // It's chating a bit;-) + break; + case '<': + switch(ReadChar ()) + { + case '?': + ReadProcessingInstruction (); + break; + case '!': + switch(ReadChar ()) + { + case '-': + Expect ('-'); + ReadComment (); + break; + case 'E': + switch(ReadChar ()) + { + case 'N': + Expect ("TITY"); + ReadEntityDecl (); + break; + case 'L': + Expect ("EMENT"); + ReadElementDecl (); + break; + default: + throw new XmlException ("Syntax Error after ''); + } + + private void ReadEntityDecl() + { + while(ReadChar () != '>'); + } + + private void ReadAttListDecl() + { + while(ReadChar () != '>'); + } + + private void ReadNotationDecl() + { + while(ReadChar () != '>'); + } + + // The reader is positioned on the first 'S' of "SYSTEM". + private string ReadSystemLiteral (bool expectSYSTEM) + { + if(expectSYSTEM) + Expect ("SYSTEM"); + SkipWhitespace (); + int quoteChar = ReadChar (); // apos or quot + xmlBuffer.Length = 0; + saveToXmlBuffer = true; + int c = 0; + while(c != quoteChar) { + c = ReadChar (); + if(c < 0) throw new XmlException ("Unexpected end of stream in ExternalID."); + } + saveToXmlBuffer = false; + xmlBuffer.Remove (xmlBuffer.Length-1, 1); // cut quoteChar + return xmlBuffer.ToString (); + } + + private string ReadPubidLiteral() + { + Expect ("PUBLIC"); + SkipWhitespace (); + int quoteChar = ReadChar (); + xmlBuffer.Length = 0; + saveToXmlBuffer = true; + int c = 0; + while(c != quoteChar) + { + c = ReadChar (); + if(c < 0) throw new XmlException ("Unexpected end of stream in ExternalID."); + if(c != quoteChar && !XmlChar.IsPubidChar (c)) + throw new XmlException("character '" + (char)c + "' not allowed for PUBLIC ID"); + } + ReadChar(); // skips quoteChar + xmlBuffer.Remove (xmlBuffer.Length-1, 1); // cut quoteChar + saveToXmlBuffer = false; + return xmlBuffer.ToString (); + } + // The reader is positioned on the first character // of the name. private string ReadName () @@ -1275,11 +1597,128 @@ namespace System.Xml } } + private void Expect (string expected) + { + int len = expected.Length; + for(int i=0; i< len; i++) + Expect (expected[i]); + } + // Does not consume the first non-whitespace character. private void SkipWhitespace () { + //FIXME: Should not skip if whitespaceHandling == WhiteSpaceHandling.None while (XmlChar.IsWhitespace (PeekChar ())) ReadChar (); } + + private bool ReadWhitespace () + { + valueLength = 0; + int ch = PeekChar (); + do { + AppendValueChar (ReadChar ()); + } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch)); + + if (ch != -1 && ch != '<') + ReadText (false); + else + SetProperties (XmlNodeType.Whitespace, + String.Empty, + false, + CreateValueString (), + true); + + return (PeekChar () != -1); + } + + // read entity reference from attribute string and if parsable then return the value. + private string ReadAttributeValueEntityReference () + { + int endEntityPosition = attributeString.IndexOf(';', + attributeStringCurrentPosition); + string entityName = attributeString.Substring (attributeStringCurrentPosition + 1, + endEntityPosition - attributeStringCurrentPosition - 1); + + attributeStringCurrentPosition = endEntityPosition + 1; + + if(entityName [0] == '#') { + char c; + // character entity + if(entityName [1] == 'x') { + // hexadecimal + c = (char) int.Parse ("0" + entityName.Substring (2), + System.Globalization.NumberStyles.HexNumber); + } else { + // decimal + c = (char) int.Parse (entityName.Substring (1)); + } + return c.ToString(); + } + else { + switch(entityName) + { + case "lt": return "<"; + case "gt": return ">"; + case "amp": return "&"; + case "quot": return "\""; + case "apos": return "'"; + default: return null; + } + } + } + + private string ResolveAttributeValue (string unresolved) + { + if(unresolved == null) return null; + StringBuilder resolved = new StringBuilder(); + int pos = 0; + + int next = unresolved.IndexOf ('&'); + if(next < 0) + return unresolved; + + while(next >= 0) { + if(pos < next) + resolved.Append (unresolved.Substring (pos, next - pos));// - 1); + int endPos = unresolved.IndexOf (';', next+1); + string entityName = + unresolved.Substring (next + 1, endPos - next - 1); + if(entityName [0] == '#') { + char c; + // character entity + if(entityName [1] == 'x') { + // hexadecimal + c = (char) int.Parse ("0" + entityName.Substring (2), + System.Globalization.NumberStyles.HexNumber); + } else { + // decimal + c = (char) int.Parse (entityName.Substring (1)); + } + resolved.Append (c); + } else { + switch(entityName) { + case "lt": resolved.Append ("<"); break; + case "gt": resolved.Append (">"); break; + case "amp": resolved.Append ("&"); break; + case "quot": resolved.Append ("\""); break; + case "apos": resolved.Append ("'"); break; + // With respect to "Value", MS document is helpless + // and the implemention returns inconsistent value + // (e.g. XML: "&ent; &ent;" ---> Value: "&ent; &ent;".) + default: resolved.Append ("&" + entityName + ";"); break; + } + } + pos = endPos + 1; + if(pos > unresolved.Length) + break; + next = unresolved.IndexOf('&', pos); + } + resolved.Append (unresolved.Substring(pos)); + + return resolved.ToString(); + } + + #endregion } }