X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mcs%2Fclass%2FSystem.XML%2FSystem.Xml%2FXmlTextReader.cs;h=f1d5b59e8b04e5f2364e7c63c64d26bd4fe4522d;hb=073481b554c5a73f6954392c6f86a701db817d04;hp=f76f1bba3336a7d2845c27ffb1ad3760edfbb08c;hpb=a085bb96357fc502f5701b1c5557352b6c55bb67;p=mono.git diff --git a/mcs/class/System.XML/System.Xml/XmlTextReader.cs b/mcs/class/System.XML/System.Xml/XmlTextReader.cs index f76f1bba333..f1d5b59e8b0 100644 --- a/mcs/class/System.XML/System.Xml/XmlTextReader.cs +++ b/mcs/class/System.XML/System.Xml/XmlTextReader.cs @@ -9,34 +9,53 @@ // (C) 2001, 2002 Jason Diamond http://injektilo.org/ // -// FIXME: -// -// I haven't checked whether DTD parser runs correct. // -// More strict well-formedness checking should be done. +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // -// NameTables aren't being used completely yet. -// -// Some thought needs to be given to performance. There's too many -// strings being allocated. + +// FIXME: // -// Some of the MoveTo methods haven't been implemented yet. +// Some thought needs to be given to performance. // -// xml:space, xml:lang aren't being tracked. +// If current node is on an Attribute, Prefix might be null, and +// in several fields which uses XmlReader, it should be considered. // using System; using System.Collections; +using System.Globalization; using System.IO; +using System.Security.Policy; using System.Text; +using System.Xml.Schema; using Mono.Xml; -using Mono.Xml.Native; namespace System.Xml { +#if NET_2_0 + public class XmlTextReader : XmlReader, + IXmlLineInfo, IXmlNamespaceResolver +#else public class XmlTextReader : XmlReader, IXmlLineInfo +#endif { - WhitespaceHandling whitespaceHandling = WhitespaceHandling.All; #region Constructors protected XmlTextReader () @@ -78,10 +97,15 @@ namespace System.Xml { } - [MonoTODO("Non-filename-url must be supported. Waiting for WebClient")] public XmlTextReader (string url, XmlNameTable nt) - : this (url, new XmlStreamReader (url), nt) { + Uri uri = resolver.ResolveUri (null, url); + Stream s = resolver.GetEntity (uri, null, typeof (Stream)) as Stream; + XmlParserContext ctx = new XmlParserContext (nt, + new XmlNamespaceManager (nt), + String.Empty, + XmlSpace.None); + this.InitializeContext (uri.ToString(), ctx, new XmlStreamReader (s), XmlNodeType.Document); } public XmlTextReader (TextReader input, XmlNameTable nt) @@ -90,7 +114,10 @@ namespace System.Xml } public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context) - : this (context.BaseURI, new XmlStreamReader (xmlFragment), fragType, context) + : this (context != null ? context.BaseURI : String.Empty, + new XmlStreamReader (xmlFragment), + fragType, + context) { } @@ -104,7 +131,6 @@ namespace System.Xml { } - [MonoTODO("TODO as same as private XmlTextReader(TextReader, XmlNodeType, XmlParserContext)")] public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context) : this (context != null ? context.BaseURI : String.Empty, new StringReader (xmlFragment), @@ -113,11 +139,9 @@ namespace System.Xml { } - // TODO still remains as described at head of this file, - // but it might not be TODO of the constructors... XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context) { - this.Initialize (url, context, fragment, fragType); + InitializeContext (url, context, fragment, fragType); } #endregion @@ -126,7 +150,7 @@ namespace System.Xml public override int AttributeCount { - get { return attributes.Count; } + get { return attributeCount; } } public override string BaseURI @@ -134,9 +158,32 @@ namespace System.Xml get { return parserContext.BaseURI; } } +#if NET_2_0 + public override bool CanResolveEntity { + get { return true; } + } + +#endif + + internal bool CharacterChecking { + get { return checkCharacters && normalization; } + set { checkCharacters = value; } + } + + // for XmlReaderSettings.CloseInput support + internal bool CloseInput { + get { return closeInput; } + set { closeInput = value; } + } + public override int Depth { get { + int nodeTypeMod = currentToken.NodeType == XmlNodeType.Element ? 0 : -1; + if (currentAttributeValue >= 0) + return nodeTypeMod + elementDepth + 2; // inside attribute value. + else if (currentAttribute >= 0) + return nodeTypeMod + elementDepth + 1; return elementDepth; } } @@ -145,73 +192,77 @@ namespace System.Xml { get { return parserContext.Encoding; } } +#if NET_2_0 + [MonoTODO] + public EntityHandling EntityHandling { + get { return entityHandling; } + set { entityHandling = value; } + } +#endif - public override bool EOF - { - get - { - return - readState == ReadState.EndOfFile || - readState == ReadState.Closed; - } + public override bool EOF { + get { return readState == ReadState.EndOfFile; } } - public override bool HasValue - { - get { return value != String.Empty; } +#if NET_2_0 + [MonoTODO] + public override Evidence Evidence { + get { return base.Evidence; } } +#endif - public override bool IsDefault - { - get - { - // XmlTextReader does not expand default attributes. - return false; - } + public override bool HasValue { + get { return cursorToken.Value != null; } } - public override bool IsEmptyElement - { - get { return isEmptyElement; } + public override bool IsDefault { + // XmlTextReader does not expand default attributes. + get { return false; } } - public override string this [int i] - { + public override bool IsEmptyElement { + get { return cursorToken.IsEmptyElement; } + } + + public override string this [int i] { get { return GetAttribute (i); } } - public override string this [string name] - { + public override string this [string name] { get { return GetAttribute (name); } } - public override string this [string localName, string namespaceName] - { + public override string this [string localName, string namespaceName] { get { return GetAttribute (localName, namespaceName); } } - public int LineNumber - { - get { return currentInput.LineNumber; } + public int LineNumber { + get { + if (useProceedingLineInfo) + return line; + else + return cursorToken.LineNumber; + } } - public int LinePosition - { - get { return currentInput.LinePosition; } + public int LinePosition { + get { + if (useProceedingLineInfo) + return column; + else + return cursorToken.LinePosition; + } } - public override string LocalName - { - get { return localName; } + public override string LocalName { + get { return cursorToken.LocalName; } } - public override string Name - { - get { return name; } + public override string Name { + get { return cursorToken.Name; } } - public bool Namespaces - { + public bool Namespaces { get { return namespaces; } set { if (readState != ReadState.Initial) @@ -220,559 +271,802 @@ namespace System.Xml } } - public override string NamespaceURI - { - get { return namespaceURI; } + public override string NamespaceURI { + get { return cursorToken.NamespaceURI; } } - public override XmlNameTable NameTable - { + public override XmlNameTable NameTable { get { return parserContext.NameTable; } } - public override XmlNodeType NodeType - { - get { return nodeType; } + public override XmlNodeType NodeType { + get { return cursorToken.NodeType; } } - [MonoTODO] - public bool Normalization - { - get { throw new NotImplementedException (); } - set { throw new NotImplementedException (); } + public bool Normalization { + get { return normalization; } + set { normalization = value; } } - public override string Prefix - { - get { return prefix; } + public override string Prefix { + get { return cursorToken.Prefix; } } - public override char QuoteChar - { - get { - // value string holds attribute quotation char. - if (NodeType == XmlNodeType.Attribute) - return value [0]; - else - return '"'; - } +#if NET_2_0 + public bool ProhibitDtd { + get { return prohibitDtd; } + set { prohibitDtd = value; } } +#endif - public override ReadState ReadState - { + public override char QuoteChar { + get { return cursorToken.QuoteChar; } + } + + public override ReadState ReadState { get { return readState; } } - public override string Value - { - get { - if(NodeType == XmlNodeType.Attribute) - return UnescapeAttributeValue(value); - else - return value; - } +#if NET_2_0 + public override XmlReaderSettings Settings { + get { return base.Settings; } } +#endif - public WhitespaceHandling WhitespaceHandling - { + public override string Value { + get { return cursorToken.Value != null ? cursorToken.Value : String.Empty; } + } + + public WhitespaceHandling WhitespaceHandling { get { return whitespaceHandling; } set { whitespaceHandling = value; } } - [MonoTODO] - public override string XmlLang - { - get { throw new NotImplementedException (); } + public override string XmlLang { + get { return parserContext.XmlLang; } } - public XmlResolver XmlResolver - { + public XmlResolver XmlResolver { set { resolver = value; } } - [MonoTODO] - public override XmlSpace XmlSpace - { - get { throw new NotImplementedException (); } + public override XmlSpace XmlSpace { + get { return parserContext.XmlSpace; } } #endregion #region Methods - [MonoTODO] public override void Close () { readState = ReadState.Closed; + + cursorToken.Clear (); + currentToken.Clear (); + attributeCount = 0; + if (closeInput && reader != null) + reader.Close (); } public override string GetAttribute (int i) { - if (i > attributes.Count) + if (i >= attributeCount) throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount"); - else - return UnescapeAttributeValue (attributes [orderedAttributes [i]] as string); + else { + return attributeTokens [i].Value; + } } + // MS.NET 1.0 msdn says that this method returns String.Empty + // for absent attribute, but in fact it returns null. + // This description is corrected in MS.NET 1.1 msdn. public override string GetAttribute (string name) { - return attributes.ContainsKey (name) ? - UnescapeAttributeValue (attributes [name] as string) : String.Empty; + for (int i = 0; i < attributeCount; i++) + if (attributeTokens [i].Name == name) + return attributeTokens [i].Value; + return null; } - public override string GetAttribute (string localName, string namespaceURI) + private int GetIndexOfQualifiedAttribute (string localName, string namespaceURI) { - foreach (DictionaryEntry entry in attributes) - { - string thisName = entry.Key as string; - - int indexOfColon = thisName.IndexOf (':'); - - if (indexOfColon != -1) { - string thisLocalName = thisName.Substring (indexOfColon + 1); + for (int i = 0; i < attributeCount; i++) { + XmlAttributeTokenInfo ti = attributeTokens [i]; + if (ti.LocalName == localName && ti.NamespaceURI == namespaceURI) + return i; + } + return -1; + } - if (localName == thisLocalName) { - string thisPrefix = thisName.Substring (0, indexOfColon); - string thisNamespaceURI = LookupNamespace (thisPrefix); + internal XmlParserContext GetInternalParserContext () + { + return parserContext; + } - if (namespaceURI == thisNamespaceURI) - return attributes.ContainsKey (thisName) ? - UnescapeAttributeValue (attributes [thisName] as string) : String.Empty; - } - } else if (localName == "xmlns" && namespaceURI == "http://www.w3.org/2000/xmlns/" && thisName == "xmlns") - return attributes.ContainsKey (thisName) ? - UnescapeAttributeValue (attributes [thisName] as string) : String.Empty; - } + public override string GetAttribute (string localName, string namespaceURI) + { + int idx = this.GetIndexOfQualifiedAttribute (localName, namespaceURI); + if (idx < 0) + return null; + return attributeTokens [idx].Value; + } - return String.Empty; +#if NET_2_0 + public IDictionary GetNamespacesInScope (XmlNamespaceScope scope) + { + return parserContext.NamespaceManager.GetNamespacesInScope (scope); } +#endif - [MonoTODO] public TextReader GetRemainder () { - throw new NotImplementedException (); + if (peekCharsIndex == peekCharsLength) + return reader; + return new StringReader (new string (peekChars, peekCharsIndex, peekCharsLength - peekCharsIndex) + reader.ReadToEnd ()); } +#if NET_2_0 + public bool HasLineInfo () +#else bool IXmlLineInfo.HasLineInfo () +#endif { return true; } public override string LookupNamespace (string prefix) { - return parserContext.NamespaceManager.LookupNamespace (prefix); + return LookupNamespace (prefix, false); } - public override void MoveToAttribute (int i) +#if NET_2_0 + public override string LookupNamespace (string prefix, bool atomizedName) +#else + internal override string LookupNamespace (string prefix, bool atomizedName) +#endif { - MoveToElement (); - - if (attributes == null || orderedAttributes.Count < i || i < 0) - throw new ArgumentOutOfRangeException ("attribute index out of range."); + return parserContext.NamespaceManager.LookupNamespace (prefix, atomizedName); + } - string name = orderedAttributes [i] as string; - string value = attributes [name] as string; - SetProperties ( - XmlNodeType.Attribute, // nodeType - name, // name - false, // isEmptyElement - value, // value - false // clearAttributes - ); - attributeValuePos = 0; +#if NET_2_0 + string IXmlNamespaceResolver.LookupPrefix (string ns) + { + return LookupPrefix (ns, false); } - public override bool MoveToAttribute (string name) + public string LookupPrefix (string ns, bool atomizedName) { - MoveToElement (); - bool match = false; + return parserContext.NamespaceManager.LookupPrefix (ns, atomizedName); + } +#endif - if (attributes == null) - return false; + public override void MoveToAttribute (int i) + { + if (i >= attributeCount) + throw new ArgumentOutOfRangeException ("attribute index out of range."); - if (orderedAttributesEnumerator == null) { - SaveProperties (); - orderedAttributesEnumerator = orderedAttributes.GetEnumerator (); - } + currentAttribute = i; + currentAttributeValue = -1; + cursorToken = attributeTokens [i]; + } - while (orderedAttributesEnumerator.MoveNext ()) { - if(name == orderedAttributesEnumerator.Current as string) { - match = true; - break; + public override bool MoveToAttribute (string name) + { + for (int i = 0; i < attributeCount; i++) { + XmlAttributeTokenInfo ti = attributeTokens [i]; + if (ti.Name == name) { + MoveToAttribute (i); + return true; } } - - if (match) { - string value = attributes [name] as string; - SetProperties ( - XmlNodeType.Attribute, // nodeType - name, // name - false, // isEmptyElement - value, // value - false // clearAttributes - ); - attributeValuePos = 0; - } - - return match; + return false; } - [MonoTODO] public override bool MoveToAttribute (string localName, string namespaceName) { - throw new NotImplementedException (); + int idx = GetIndexOfQualifiedAttribute (localName, namespaceName); + if (idx < 0) + return false; + MoveToAttribute (idx); + return true; } public override bool MoveToElement () { - if (orderedAttributesEnumerator != null) { - orderedAttributesEnumerator = null; - RestoreProperties (); + if (currentToken == null) // for attribute .ctor() + return false; + + if (cursorToken == currentToken) + return false; + + if (currentAttribute >= 0) { + currentAttribute = -1; + currentAttributeValue = -1; + cursorToken = currentToken; return true; } - - return false; + else + return false; } public override bool MoveToFirstAttribute () { + if (attributeCount == 0) + return false; MoveToElement (); return MoveToNextAttribute (); } public override bool MoveToNextAttribute () { - if (attributes == null) + if (currentAttribute == 0 && attributeCount == 0) return false; - - if (orderedAttributesEnumerator == null) { - SaveProperties (); - orderedAttributesEnumerator = orderedAttributes.GetEnumerator (); - } - - if (orderedAttributesEnumerator.MoveNext ()) { - string name = orderedAttributesEnumerator.Current as string; - string value = attributes [name] as string; - SetProperties ( - XmlNodeType.Attribute, // nodeType - name, // name - false, // isEmptyElement - value, // value - false // clearAttributes - ); - attributeValuePos = 0; + if (currentAttribute + 1 < attributeCount) { + currentAttribute++; + currentAttributeValue = -1; + cursorToken = attributeTokens [currentAttribute]; return true; } - - return false; + else + return false; } public override bool Read () { - bool more = false; + if (startNodeType == XmlNodeType.Attribute) { + if (currentAttribute == 0) + return false; // already read. + ClearAttributes (); + IncrementAttributeToken (); + ReadAttributeValueTokens ('"'); + cursorToken = attributeTokens [0]; + currentAttributeValue = -1; + readState = ReadState.Interactive; + return true; + } + bool more = false; readState = ReadState.Interactive; + currentLinkedNodeLineNumber = line; + currentLinkedNodeLinePosition = column; + useProceedingLineInfo = true; + + cursorToken = currentToken; + attributeCount = 0; + currentAttribute = currentAttributeValue = -1; + currentToken.Clear (); + + // It was moved from end of ReadStartTag (). + if (depthUp) { + ++depth; + depthUp = false; + } + + if (shouldSkipUntilEndTag) { + shouldSkipUntilEndTag = false; + return ReadUntilEndTag (); + } + + base64CacheStartsAt = -1; more = ReadContent (); + if (!more && startNodeType == XmlNodeType.Document && currentState != XmlNodeType.EndElement) + throw new XmlException ("Document element did not appear."); + + useProceedingLineInfo = false; return more; } public override bool ReadAttributeValue () { - // 'attributeString' holds real string value (without their - // quotation characters). - // - // 'attributeValuePos' holds current position - // of 'attributeString' while iterating ReadAttribute(). - // It may be: - // -1 if ReadAttributeValue() has already finished. - // 0 if ReadAttributeValue() ready to start reading. - // >0 if ReadAttributeValue() already got 1 or more values - // - // local 'refPosition' holds the position on the - // attributeString which may be used next time. - - if (attributeValuePos < 0) { - SetProperties (XmlNodeType.None, - String.Empty, - false, - String.Empty, - false); - return false; + if (readState == ReadState.Initial && startNodeType == XmlNodeType.Attribute) { + Read (); } - // If not started, then initialize attributeString when parsing is at start. - if (attributeValuePos == 0) - attributeString = - value.Substring (1, value.Length - 2); - - returnEntityReference = false; - value = String.Empty; - int refPosition; - int loop = 0; - - do { - refPosition = attributeString.IndexOf ('&', attributeValuePos); - if (refPosition < 0) { - // Reached to the end of value string. - value += attributeString.Substring (attributeValuePos); - attributeValuePos = -1; - break; - } else if (refPosition == attributeValuePos) { - string parsed = ReadAttributeValueReference (); - if (parsed != null) - value += parsed; - else { - // Found that an entity reference starts from this point. - // reset position to after '&'. - attributeValuePos = refPosition; - if (value.Length <= 0) { - int endNamePos = attributeString.IndexOf (";", attributeValuePos); - value = attributeString.Substring (attributeValuePos+1, endNamePos - attributeValuePos - 1); - attributeValuePos += value.Length + 2; - returnEntityReference = true; - } - break; - } - } else { - value += attributeString.Substring (attributeValuePos, - refPosition - attributeValuePos); - attributeValuePos = refPosition; - continue; - } - } while (++loop > 0); + if (currentAttribute < 0) + return false; + XmlAttributeTokenInfo ti = attributeTokens [currentAttribute]; + if (currentAttributeValue < 0) + currentAttributeValue = ti.ValueTokenStartIndex - 1; - if (returnEntityReference) - SetProperties (XmlNodeType.EntityReference, - value, - false, - String.Empty, - false); + if (currentAttributeValue < ti.ValueTokenEndIndex) { + currentAttributeValue++; + cursorToken = attributeValueTokens [currentAttributeValue]; + return true; + } else - SetProperties (XmlNodeType.Text, - "#text", - false, - value, - false); + return false; + } - return true; + private int SkipIgnorableBase64Chars (char [] chars, int charsLength, int i) + { + while (chars [i] == '=' || XmlChar.IsWhitespace (chars [i])) + if (charsLength == ++i) + break; + return i; } - [MonoTODO] public int ReadBase64 (byte [] buffer, int offset, int length) { - throw new NotImplementedException (); + if (offset < 0) + throw new ArgumentOutOfRangeException ("offset", offset, "Offset must be non-negative integer."); + else if (length < 0) + throw new ArgumentOutOfRangeException ("length", length, "Length must be non-negative integer."); + else if (buffer.Length < offset + length) + throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length."); + + if (length == 0) // It does not raise an error. + return 0; + + int bufIndex = offset; + int bufLast = offset + length; + + if (base64CacheStartsAt >= 0) { + for (int i = base64CacheStartsAt; i < 3; i++) { + buffer [bufIndex++] = base64Cache [base64CacheStartsAt++]; + if (bufIndex == bufLast) + return bufLast - offset; + } + } + + for (int i = 0; i < 3; i++) + base64Cache [i] = 0; + base64CacheStartsAt = -1; + + int max = (int) System.Math.Ceiling (4.0 / 3 * length); + int additional = max % 4; + if (additional > 0) + max += 4 - additional; + char [] chars = new char [max]; + int charsLength = ReadChars (chars, 0, max); + + byte b = 0; + byte work = 0; + bool loop = true; + for (int i = 0; i < charsLength - 3; i++) { + if ((i = SkipIgnorableBase64Chars (chars, charsLength, i)) == charsLength) + break; + b = (byte) (GetBase64Byte (chars [i]) << 2); + if (bufIndex < bufLast) + buffer [bufIndex] = b; + else { + if (base64CacheStartsAt < 0) + base64CacheStartsAt = 0; + base64Cache [0] = b; + } + // charsLength mod 4 might not equals to 0. + if (++i == charsLength) + break; + if ((i = SkipIgnorableBase64Chars (chars, charsLength, i)) == charsLength) + break; + b = GetBase64Byte (chars [i]); + work = (byte) (b >> 4); + if (bufIndex < bufLast) { + buffer [bufIndex] += work; + bufIndex++; + } + else + base64Cache [0] += work; + + work = (byte) ((b & 0xf) << 4); + if (bufIndex < bufLast) { + buffer [bufIndex] = work; + } + else { + if (base64CacheStartsAt < 0) + base64CacheStartsAt = 1; + base64Cache [1] = work; + } + + if (++i == charsLength) + break; + if ((i = SkipIgnorableBase64Chars (chars, charsLength, i)) == charsLength) + break; + b = GetBase64Byte (chars [i]); + work = (byte) (b >> 2); + if (bufIndex < bufLast) { + buffer [bufIndex] += work; + bufIndex++; + } + else + base64Cache [1] += work; + + work = (byte) ((b & 3) << 6); + if (bufIndex < bufLast) + buffer [bufIndex] = work; + else { + if (base64CacheStartsAt < 0) + base64CacheStartsAt = 2; + base64Cache [2] = work; + } + if (++i == charsLength) + break; + if ((i = SkipIgnorableBase64Chars (chars, charsLength, i)) == charsLength) + break; + work = GetBase64Byte (chars [i]); + if (bufIndex < bufLast) { + buffer [bufIndex] += work; + bufIndex++; + } + else + base64Cache [2] += work; + } + return System.Math.Min (bufLast - offset, bufIndex - offset); } - [MonoTODO] public int ReadBinHex (byte [] buffer, int offset, int length) { - throw new NotImplementedException (); + if (offset < 0) + throw new ArgumentOutOfRangeException ("offset", offset, "Offset must be non-negative integer."); + else if (length < 0) + throw new ArgumentOutOfRangeException ("length", length, "Length must be non-negative integer."); + else if (buffer.Length < offset + length) + throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length."); + + if (length == 0) + return 0; + + char [] chars = new char [length * 2]; + int charsLength = ReadChars (chars, 0, length * 2); + return XmlConvert.FromBinHexString (chars, offset, charsLength, buffer); } - [MonoTODO] public int ReadChars (char [] buffer, int offset, int length) { - throw new NotImplementedException (); + return ReadCharsInternal (buffer, offset, length); } +#if NET_2_0 + public override string ReadString () + { + return ReadStringInternal (); + } +#elif NET_1_1 +#else public override string ReadInnerXml () { - if (readState != ReadState.Interactive) - return String.Empty; - - switch (NodeType) { - case XmlNodeType.Attribute: - return value.Substring (1, value.Length - 2); - case XmlNodeType.Element: - if (IsEmptyElement) - return String.Empty; - - int startDepth = depth; - - if (innerXmlBuilder == null) - innerXmlBuilder = new StringBuilder (); - innerXmlBuilder.Length = 0; - do { - ReadContent (); - if (NodeType != XmlNodeType.EndElement || depth + 1 > startDepth) - innerXmlBuilder.Append (currentTag); - } while (depth >= startDepth); - - string xml = innerXmlBuilder.ToString (); - innerXmlBuilder.Length = 0; - return xml; - case XmlNodeType.None: - // MS document is incorrect. Seems not to progress. - return String.Empty; - default: - Read (); - return String.Empty; - } + return ReadInnerXmlInternal (); } public override string ReadOuterXml () { - if (readState != ReadState.Interactive) - return String.Empty; + return ReadOuterXmlInternal (); + } - switch (NodeType) { - case XmlNodeType.Attribute: - // strictly incompatible with MS... (it holds spaces attribute between name, value and "=" char (very trivial). - return String.Format ("{0}={1}{2}{1}", Name, QuoteChar, ReadInnerXml ()); - case XmlNodeType.Element: - bool isEmpty = IsEmptyElement; - string startTag = currentTag.ToString (); - string name = Name; + public override string ReadString () + { + return ReadStringInternal (); + } +#endif - if (NodeType == XmlNodeType.Element && !isEmpty) - return String.Format ("{0}{1}", startTag, ReadInnerXml (), name); - else - return currentTag.ToString (); - case XmlNodeType.None: - // MS document is incorrect. Seems not to progress. - return String.Empty; - default: - Read (); - return String.Empty; - } + public void ResetState () + { + throw new InvalidOperationException ("Cannot call ResetState when parsing an XML fragment."); + Init (); } - public override string ReadString () +#if NET_2_0 + [MonoTODO] + public override bool ReadValueAsBoolean () { - if (readStringBuffer == null) - readStringBuffer = new StringBuilder (); - readStringBuffer.Length = 0; + return base.ReadValueAsBoolean (); + } - switch (NodeType) { - default: - return String.Empty; - case XmlNodeType.Element: - if (IsEmptyElement) - return String.Empty; - do { - Read (); - switch (NodeType) { - case XmlNodeType.Text: - case XmlNodeType.CDATA: - case XmlNodeType.Whitespace: - case XmlNodeType.SignificantWhitespace: - readStringBuffer.Append (Value); - continue; - } - break; - } while (true); - break; - case XmlNodeType.Text: - case XmlNodeType.CDATA: - case XmlNodeType.Whitespace: - case XmlNodeType.SignificantWhitespace: - do { - switch (NodeType) { - case XmlNodeType.Text: - case XmlNodeType.CDATA: - case XmlNodeType.Whitespace: - case XmlNodeType.SignificantWhitespace: - readStringBuffer.Append (Value); - Read (); - continue; - } - break; - } while (true); - break; - } - string ret = readStringBuffer.ToString (); - readStringBuffer.Length = 0; - return ret; + [MonoTODO] + public override DateTime ReadValueAsDateTime () + { + return base.ReadValueAsDateTime (); } [MonoTODO] - public void ResetState () + public override decimal ReadValueAsDecimal () + { + return base.ReadValueAsDecimal (); + } + + [MonoTODO] + public override double ReadValueAsDouble () + { + return base.ReadValueAsDouble (); + } + + [MonoTODO] + public override int ReadValueAsInt32 () + { + return base.ReadValueAsInt32 (); + } + + [MonoTODO] + public override long ReadValueAsInt64 () + { + return base.ReadValueAsInt64 (); + } + + [MonoTODO] + public override ICollection ReadValueAsList () + { + return base.ReadValueAsList (); + } + + [MonoTODO] + public override float ReadValueAsSingle () + { + return base.ReadValueAsSingle (); + } + + [MonoTODO] + public override string ReadValueAsString () + { + return ReadString (); + } + + [MonoTODO] + public override object ReadValueAs (Type type) + { + return base.ReadValueAs (type); + } + + [MonoTODO] + public override object ReadValueAs (Type type, IXmlNamespaceResolver resolver) { - throw new NotImplementedException (); + return base.ReadValueAs (type, resolver); } +#endif public override void ResolveEntity () { - // XmlTextReaders don't resolve entities. + // XmlTextReader does not resolve entities. throw new InvalidOperationException ("XmlTextReader cannot resolve external entities."); } +#if NET_2_0 + [MonoTODO ("Implement for performance reason")] + public override void Skip () + { + base.Skip (); + } +#endif #endregion #region Internals // Parsed DTD Objects - internal DTDObjectModel currentSubset; +#if DTD_HANDLE_EVENTS + internal event ValidationEventHandler ValidationEventHandler; +#endif - internal void Initialize (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType) + internal DTDObjectModel DTD { + get { return parserContext.Dtd; } + } + + internal XmlResolver Resolver { + get { return resolver; } + } + #endregion + + #region Privates + internal class XmlTokenInfo { - parserContext = context; - if (context == null) { - XmlNameTable nt = new NameTable (); - parserContext = new XmlParserContext (nt, - new XmlNamespaceManager (nt), - String.Empty, - XmlSpace.None); + public XmlTokenInfo (XmlTextReader xtr, bool isPrimaryToken) + { + this.isPrimaryToken = isPrimaryToken; + Reader = xtr; + Clear (); } - if (url != null && url != String.Empty) - parserContext.BaseURI = url; - Init (); - switch (fragType) { - case XmlNodeType.Attribute: - value = "''"; - break; - case XmlNodeType.Element: - allowMultipleRoot = true; - break; - case XmlNodeType.Document: - break; - default: - throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType)); + bool isPrimaryToken; + string valueCache; + + protected XmlTextReader Reader; + + public string Name; + public string LocalName; + public string Prefix; + public string NamespaceURI; + public bool IsEmptyElement; + public char QuoteChar; + public int LineNumber; + public int LinePosition; + + public XmlNodeType NodeType; + + public virtual string Value { + get { + if (valueCache != null) + return valueCache; + switch (NodeType) { + case XmlNodeType.Text: + case XmlNodeType.SignificantWhitespace: + case XmlNodeType.Whitespace: + case XmlNodeType.Comment: + case XmlNodeType.CDATA: + case XmlNodeType.ProcessingInstruction: + valueCache = Reader.CreateValueString (); + return valueCache; + } + return null; + } + set { valueCache = value; } + } + + public virtual void Clear () + { + valueCache = null; + NodeType = XmlNodeType.None; + Name = LocalName = Prefix = NamespaceURI = String.Empty; + IsEmptyElement = false; + QuoteChar = '"'; + LineNumber = LinePosition = 0; + } + + internal virtual void FillNames () + { + if (Reader.Namespaces) { + int indexOfColon = -1; + switch (NodeType) { + case XmlNodeType.Attribute: + case XmlNodeType.Element: + case XmlNodeType.EndElement: + indexOfColon = Name.IndexOf (':'); + break; + } + + if (indexOfColon == -1) { + Prefix = String.Empty; + LocalName = Name; + } else { + // This improves speed by at least nearly 5%, but eats more memory at least nearly 0.3% + // However, this might be reverted if NameTable is got improved. + char [] nameArr = Name.ToCharArray (); + Prefix = Reader.NameTable.Add (nameArr, 0, indexOfColon); + LocalName = Reader.NameTable.Add (nameArr, indexOfColon + 1, nameArr.Length - indexOfColon - 1); +// Prefix = Reader.NameTable.Add (Name.Substring (0, indexOfColon)); +// LocalName = Reader.NameTable.Add (Name.Substring (indexOfColon + 1)); + } + + // NamespaceURI + switch (NodeType) { + case XmlNodeType.Attribute: + if (Prefix.Length == 0) + NamespaceURI = string.Empty; + else + NamespaceURI = Reader.LookupNamespace (Prefix, true); + break; + + case XmlNodeType.Element: + case XmlNodeType.EndElement: + NamespaceURI = Reader.LookupNamespace (Prefix, true); + break; + default: + NamespaceURI = ""; + break; + } + } else { + Prefix = String.Empty; + LocalName = Name; + } + } + } + + internal class XmlAttributeTokenInfo : XmlTokenInfo + { + public XmlAttributeTokenInfo (XmlTextReader reader) + : base (reader, false) + { + NodeType = XmlNodeType.Attribute; } - this.currentInput = new XmlParserInput (fragment, url); - StreamReader sr = fragment as StreamReader; + public int ValueTokenStartIndex; + public int ValueTokenEndIndex; + string valueCache; + bool cachedNormalization; + StringBuilder tmpBuilder = new StringBuilder (); + + public override string Value { + get { + if (cachedNormalization != Reader.Normalization) + valueCache = null; + if (valueCache != null) + return valueCache; + + cachedNormalization = Reader.Normalization; + + // An empty value should return String.Empty. + if (ValueTokenStartIndex == ValueTokenEndIndex) { + XmlTokenInfo ti = Reader.attributeValueTokens [ValueTokenStartIndex]; + if (ti.NodeType == XmlNodeType.EntityReference) + valueCache = String.Concat ("&", ti.Name, ";"); + else + valueCache = ti.Value; + if (cachedNormalization) + NormalizeSpaces (); + return valueCache; + } + + tmpBuilder.Length = 0; + for (int i = ValueTokenStartIndex; i <= ValueTokenEndIndex; i++) { + XmlTokenInfo ti = Reader.attributeValueTokens [i]; + if (ti.NodeType == XmlNodeType.Text) + tmpBuilder.Append (ti.Value); + else { + tmpBuilder.Append ('&'); + tmpBuilder.Append (ti.Name); + tmpBuilder.Append (';'); + } + } + + valueCache = tmpBuilder.ToString (); + if (cachedNormalization) + NormalizeSpaces (); + return valueCache; + } + + set { valueCache = value; } + } + + public override void Clear () + { + base.Clear (); + valueCache = null; + NodeType = XmlNodeType.Attribute; + ValueTokenStartIndex = ValueTokenEndIndex = 0; + } + + internal override void FillNames () + { + base.FillNames (); + if (Prefix == "xmlns" || Name == "xmlns") + NamespaceURI = XmlNamespaceManager.XmlnsXmlns; + } + + private void NormalizeSpaces () + { + tmpBuilder.Length = 0; + for (int i = 0; i < valueCache.Length; i++) + switch (valueCache [i]) { + case '\r': + if (i + 1 < valueCache.Length && valueCache [i + 1] == '\n') + i++; + goto case '\n'; + case '\t': + case '\n': + tmpBuilder.Append (' '); + break; + default: + tmpBuilder.Append (valueCache [i]); + break; + } + valueCache = tmpBuilder.ToString (); + } } - #endregion - #region Privates + private XmlTokenInfo cursorToken; + private XmlTokenInfo currentToken; + private XmlAttributeTokenInfo currentAttributeToken; + private XmlTokenInfo currentAttributeValueToken; + private XmlAttributeTokenInfo [] attributeTokens = new XmlAttributeTokenInfo [10]; + private XmlTokenInfo [] attributeValueTokens = new XmlTokenInfo [10]; + private int currentAttribute; + private int currentAttributeValue; + private int attributeCount; private XmlParserContext parserContext; - private XmlParserInput currentInput; - private Stack parserInputStack = new Stack (); private ReadState readState; private int depth; private int elementDepth; - private bool depthDown; + private bool depthUp; private bool popScope; - private Stack elementStack; - private Stack baseURIStack; - private bool haveEnteredDocument; - private bool allowMultipleRoot = false; - - private XmlNodeType nodeType; - private string name; - private string prefix; - private string localName; - private string namespaceURI; - private bool isEmptyElement; - private string value; - - private XmlNodeType saveNodeType; - private string saveName; - private string savePrefix; - private string saveLocalName; - private string saveNamespaceURI; - private bool saveIsEmptyElement; - - private Hashtable attributes; - private ArrayList orderedAttributes; - private IEnumerator orderedAttributesEnumerator; + + private string [] elementNames; + int elementNameStackPos; + + private bool allowMultipleRoot; + + private bool isStandalone; private bool returnEntityReference; private string entityReferenceName; @@ -780,62 +1074,75 @@ namespace System.Xml private char [] nameBuffer; private int nameLength; private int nameCapacity; - private const int initialNameCapacity = 256; + private const int initialNameCapacity = 32; private char [] valueBuffer; private int valueLength; private int valueCapacity; - private const int initialValueCapacity = 8192; - - // A buffer for ReadContent for ReadOuterXml - private StringBuilder currentTag { - get { - return currentInput.CurrentMarkup; - } - } - - private string attributeString = String.Empty; - private int attributeValuePos; - // This should be only referenced(used) by ReadInnerXml(). Kind of flyweight pattern. - private StringBuilder innerXmlBuilder; - private StringBuilder readStringBuffer; - - // Parameter entity placeholder - private Hashtable parameterEntities = new Hashtable (); - int dtdIncludeSect; - - private XmlResolver resolver = new XmlUrlResolver (); - + private const int initialValueCapacity = 256; + + private char [] currentTagBuffer; + private int currentTagLength; + private int currentTagCapacity; + private const int initialCurrentTagCapacity = 256; + + private TextReader reader; + private char [] peekChars; + private int peekCharsIndex; + private int peekCharsLength; + private const int peekCharCapacity = 1024; + + private int line; + private int column; + + private int currentLinkedNodeLineNumber; + private int currentLinkedNodeLinePosition; + private bool useProceedingLineInfo; + + private XmlNodeType startNodeType; + // State machine attribute. + // XmlDeclaration: after the first node. + // DocumentType: after doctypedecl + // Element: inside document element + // EndElement: after document element + private XmlNodeType currentState; + + // For ReadChars()/ReadBase64()/ReadBinHex() + private bool shouldSkipUntilEndTag; + private byte [] base64Cache = new byte [3]; + private int base64CacheStartsAt; + + // These values are never re-initialized. private bool namespaces = true; + private WhitespaceHandling whitespaceHandling = WhitespaceHandling.All; + private XmlResolver resolver = new XmlUrlResolver (); + private bool normalization = false; - private XmlException ReaderError (string message) - { - return new XmlException (message, LineNumber, LinePosition); - } + private bool checkCharacters; + private bool prohibitDtd = false; + private bool closeInput = true; + private EntityHandling entityHandling; // 2.0 private void Init () { + currentToken = new XmlTokenInfo (this, true); + cursorToken = currentToken; + currentAttribute = -1; + currentAttributeValue = -1; + attributeCount = 0; + readState = ReadState.Initial; + allowMultipleRoot = false; depth = 0; - depthDown = false; - - popScope = false; - elementStack = new Stack(); - baseURIStack = new Stack(); - haveEnteredDocument = false; + elementDepth = 0; + depthUp = false; - nodeType = XmlNodeType.None; - name = String.Empty; - prefix = String.Empty; - localName = string.Empty; - isEmptyElement = false; - value = String.Empty; - - attributes = new Hashtable (); - orderedAttributes = new ArrayList (); - orderedAttributesEnumerator = null; + popScope = allowMultipleRoot = false; + elementNames = new string [10]; + elementNameStackPos = 0; + isStandalone = false; returnEntityReference = false; entityReferenceName = String.Empty; @@ -846,7 +1153,102 @@ namespace System.Xml valueBuffer = new char [initialValueCapacity]; valueLength = 0; valueCapacity = initialValueCapacity; + + currentTagBuffer = new char [initialCurrentTagCapacity]; + currentTagLength = 0; + currentTagCapacity = initialCurrentTagCapacity; + + peekCharsIndex = 0; + peekCharsLength = 0; + if (peekChars == null) + peekChars = new char [peekCharCapacity]; + + line = 1; + column = 1; + currentTagLength = 0; + + currentLinkedNodeLineNumber = currentLinkedNodeLinePosition = 0; + useProceedingLineInfo = false; + + currentState = XmlNodeType.None; + + shouldSkipUntilEndTag = false; + base64CacheStartsAt = -1; + + checkCharacters = true; +#if NET_2_0 + if (Settings != null) + checkCharacters = Settings.CheckCharacters; +#endif + prohibitDtd = false; + closeInput = true; + entityHandling = EntityHandling.ExpandCharEntities; + } + + private void InitializeContext (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType) + { + startNodeType = fragType; + parserContext = context; + if (context == null) { + XmlNameTable nt = new NameTable (); + parserContext = new XmlParserContext (nt, + new XmlNamespaceManager (nt), + String.Empty, + XmlSpace.None); + } + + if (url != null && url.Length > 0) { + Uri uri = null; + try { + uri = new Uri (url); + } catch (Exception) { + string path = Path.GetFullPath ("./a"); + uri = new Uri (new Uri (path), url); + } + parserContext.BaseURI = uri.ToString (); + } + + Init (); + + reader = fragment; + + switch (fragType) { + case XmlNodeType.Attribute: + reader = new StringReader (fragment.ReadToEnd ().Replace ("\"", """)); + break; + case XmlNodeType.Element: + currentState = XmlNodeType.Element; + allowMultipleRoot = true; + break; + case XmlNodeType.Document: + break; + default: + throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType)); + } + } + +#if NET_2_0 + [MonoTODO ("Test")] + internal ConformanceLevel Conformance { + set { + if (value == ConformanceLevel.Fragment) { + currentState = XmlNodeType.Element; + allowMultipleRoot = true; + } + } + } + + internal void AdjustLineInfoOffset (int lineNumberOffset, int linePositionOffset) + { + line += lineNumberOffset; + column += linePositionOffset; + } + + internal void SetNameTable (XmlNameTable nameTable) + { + parserContext.NameTable = nameTable; } +#endif // Use this method rather than setting the properties // directly so that all the necessary properties can @@ -860,79 +1262,96 @@ namespace System.Xml string value, bool clearAttributes) { - this.nodeType = nodeType; - this.name = name; - this.isEmptyElement = isEmptyElement; - this.value = value; + SetProperties (currentToken, nodeType, name, isEmptyElement, value, clearAttributes); + currentToken.LineNumber = this.currentLinkedNodeLineNumber; + currentToken.LinePosition = this.currentLinkedNodeLinePosition; + } + + private void SetProperties ( + XmlTokenInfo token, + XmlNodeType nodeType, + string name, + bool isEmptyElement, + string value, + bool clearAttributes) + { + token.Clear (); + token.NodeType = nodeType; + token.Name = name; + token.IsEmptyElement = isEmptyElement; + token.Value = value; this.elementDepth = depth; if (clearAttributes) ClearAttributes (); - if (namespaces) { - int indexOfColon = name.IndexOf (':'); - - if (indexOfColon == -1) { - prefix = String.Empty; - localName = name; - } else { - prefix = name.Substring (0, indexOfColon); - localName = name.Substring (indexOfColon + 1); - } - } else { - prefix = String.Empty; - localName = name; - } - - namespaceURI = LookupNamespace (prefix); + token.FillNames (); } - private void SaveProperties () + private void ClearAttributes () { - saveNodeType = nodeType; - saveName = name; - savePrefix = prefix; - saveLocalName = localName; - saveNamespaceURI = namespaceURI; - saveIsEmptyElement = isEmptyElement; - // An element's value is always String.Empty. + for (int i = 0; i < attributeCount; i++) + attributeTokens [i].Clear (); + attributeCount = 0; + currentAttribute = -1; + currentAttributeValue = -1; } - private void RestoreProperties () + private int PeekChar () { - nodeType = saveNodeType; - name = saveName; - prefix = savePrefix; - localName = saveLocalName; - namespaceURI = saveNamespaceURI; - isEmptyElement = saveIsEmptyElement; - value = String.Empty; + if (peekCharsLength == peekCharsIndex) { + if (!ReadTextReader ()) + return -1; + return PeekChar (); + } + else { + char c = peekChars [peekCharsIndex]; + if (c != 0) return c; + else return -1; + } } - private void AddAttribute (string name, string value) + private int ReadChar () { - attributes.Add (name, value); - orderedAttributes.Add (name); - } + int ch; - private void ClearAttributes () - { - if (attributes.Count > 0) { - attributes.Clear (); - orderedAttributes.Clear (); + if (peekCharsLength == peekCharsIndex) { + if (!ReadTextReader ()) + return -1; + return ReadChar (); } + ch = peekChars [peekCharsIndex++]; - orderedAttributesEnumerator = null; + if (ch == '\n') { + line++; + column = 1; + } else if (ch == 0) { + return -1; + } else { + column++; + } + if (currentState != XmlNodeType.Element) + AppendCurrentTagChar (ch); + return ch; } - private int PeekChar () + private bool ReadTextReader () { - return currentInput.PeekChar (); + peekCharsIndex = 0; + peekCharsLength = reader.Read (peekChars, 0, peekCharCapacity); + if (peekCharsLength == 0) + return false; + return true; } - private int ReadChar () + private string ExpandSurrogateChar (int ch) { - return currentInput.ReadChar (); + if (ch < Char.MaxValue) + return ((char) ch).ToString (); + else { + char [] tmp = new char [] {(char) (ch / 0x10000 + 0xD800 - 1), (char) (ch % 0x10000 + 0xDC00)}; + return new string (tmp); + } } // This should really keep track of some state so @@ -940,45 +1359,52 @@ namespace System.Xml // element or text outside of the document element. private bool ReadContent () { - currentTag.Length = 0; + currentTagLength = 0; if (popScope) { parserContext.NamespaceManager.PopScope (); popScope = false; } - if (returnEntityReference) { + if (returnEntityReference) SetEntityReferenceProperties (); - } else { - switch (PeekChar ()) { - case '<': - ReadChar (); - ReadTag (); - break; - case '\r': goto case ' '; - case '\n': goto case ' '; - case '\t': goto case ' '; - case ' ': - if (whitespaceHandling == WhitespaceHandling.All || - whitespaceHandling == WhitespaceHandling.Significant) - return ReadWhitespace (); - - SkipWhitespace (); - return ReadContent (); - case -1: - if (depth > 0) - throw new XmlException ("unexpected end of file. Current depth is " + depth); + else { + int c = PeekChar (); + if (c == -1) { readState = ReadState.EndOfFile; + ClearValueBuffer (); SetProperties ( XmlNodeType.None, // nodeType String.Empty, // name false, // isEmptyElement - String.Empty, // value + null, // value true // clearAttributes ); - break; - default: - ReadText (true); - break; + if (depth > 0) + throw new XmlException ("unexpected end of file. Current depth is " + depth); + + return false; + } else { + switch ((char) c) { + case '<': + ReadChar (); + ReadTag (); + break; + case '\r': goto case ' '; + case '\n': goto case ' '; + case '\t': goto case ' '; + case ' ': + if (whitespaceHandling == WhitespaceHandling.All || + whitespaceHandling == WhitespaceHandling.Significant) + ReadWhitespace (); + else { + SkipWhitespace (); + return ReadContent (); + } + break; + default: + ReadText (true); + break; + } } } return this.ReadState != ReadState.EndOfFile; @@ -986,11 +1412,21 @@ namespace System.Xml private void SetEntityReferenceProperties () { + DTDEntityDeclaration decl = DTD != null ? DTD.EntityDecls [entityReferenceName] : null; + if (this.isStandalone) + if (DTD == null || decl == null || !decl.IsInternalSubset) + throw new XmlException (this as IXmlLineInfo, + "Standalone document must not contain any references to an non-internally declared entity."); + if (decl != null && decl.NotationName != null) + throw new XmlException (this as IXmlLineInfo, + "Reference to any unparsed entities is not allowed here."); + + ClearValueBuffer (); SetProperties ( XmlNodeType.EntityReference, // nodeType entityReferenceName, // name false, // isEmptyElement - String.Empty, // value + null, // value true // clearAttributes ); @@ -1024,64 +1460,128 @@ namespace System.Xml // The leading '<' has already been consumed. private void ReadStartTag () { + if (currentState == XmlNodeType.EndElement) + throw new XmlException (this as IXmlLineInfo, + "Multiple document element was detected."); + currentState = XmlNodeType.Element; + parserContext.NamespaceManager.PushScope (); - string name = ReadName (); - if (haveEnteredDocument && elementStack.Count == 0 && !allowMultipleRoot) - throw ReaderError("document has terminated, cannot open new element"); + currentLinkedNodeLineNumber = line; + currentLinkedNodeLinePosition = column; - haveEnteredDocument = true; - SkipWhitespace (); + string name = ReadName (); + if (currentState == XmlNodeType.EndElement) + throw new XmlException (this as IXmlLineInfo,"document has terminated, cannot open new element"); bool isEmptyElement = false; ClearAttributes (); - if (XmlConstructs.IsNameStart (PeekChar ())) - ReadAttributes (); + SkipWhitespace (); + if (XmlChar.IsFirstNameChar (PeekChar ())) + ReadAttributes (false); + cursorToken = this.currentToken; + + // fill namespaces + for (int i = 0; i < attributeCount; i++) + attributeTokens [i].FillNames (); + + // quick name check + for (int i = 0; i < attributeCount; i++) { + for (int j = i + 1; j < attributeCount; j++) + if (Object.ReferenceEquals (attributeTokens [i].Name, attributeTokens [j].Name) || + (Object.ReferenceEquals (attributeTokens [i].LocalName, attributeTokens [j].LocalName) && + Object.ReferenceEquals (attributeTokens [i].NamespaceURI, attributeTokens [j].NamespaceURI))) + throw new XmlException (this as IXmlLineInfo, + "Attribute name and qualified name must be identical."); + } + string baseUri = GetAttribute ("xml:base"); + if (baseUri != null) { + if (this.resolver != null) + parserContext.BaseURI = resolver.ResolveUri (new Uri (BaseURI), baseUri).ToString (); + else + parserContext.BaseURI = baseUri; + } + string xmlLang = GetAttribute ("xml:lang"); + if (xmlLang != null) + parserContext.XmlLang = xmlLang; + string xmlSpaceAttr = GetAttribute ("xml:space"); + if (xmlSpaceAttr != null) { + if (xmlSpaceAttr == "preserve") + parserContext.XmlSpace = XmlSpace.Preserve; + else if (xmlSpaceAttr == "default") + parserContext.XmlSpace = XmlSpace.Default; + else + throw new XmlException (this as IXmlLineInfo,String.Format ("Invalid xml:space value: {0}", xmlSpaceAttr)); + } if (PeekChar () == '/') { ReadChar (); isEmptyElement = true; - depthDown = true; popScope = true; } else { - elementStack.Push (name); - baseURIStack.Push (attributes ["xml:base"] != null ? - attributes ["xml:base"] : BaseURI); + depthUp = true; + PushElementName (name); + parserContext.PushScope (); } Expect ('>'); - SetProperties ( XmlNodeType.Element, // nodeType name, // name isEmptyElement, // isEmptyElement - String.Empty, // value + null, // value false // clearAttributes ); - if (!depthDown) - ++depth; - else - depthDown = false; + if (LookupNamespace (Prefix) == null) + throw new XmlException (String.Format ("'{0}' is undeclared namespace.", Prefix)); + try { + for (int i = 0; i < attributeCount; i++) { + MoveToAttribute (i); + if (LookupNamespace (Prefix) == null) + throw new XmlException (String.Format ("'{0}' is undeclared namespace.", Prefix)); + } + } finally { + MoveToElement (); + } + + if (IsEmptyElement) + CheckCurrentStateUpdate (); + } + private void PushElementName (string name) + { + if (elementNames.Length == elementNameStackPos) { + string [] newArray = new string [elementNames.Length * 2]; + Array.Copy (elementNames, 0, newArray, 0, elementNameStackPos); + elementNames = newArray; + } + elementNames [elementNameStackPos++] = name; } // The reader is positioned on the first character // of the element's name. private void ReadEndTag () { + if (currentState != XmlNodeType.Element) + throw new XmlException (this as IXmlLineInfo, + "End tag cannot appear in this state."); + + currentLinkedNodeLineNumber = line; + currentLinkedNodeLinePosition = column; + string name = ReadName (); - if (elementStack.Count == 0) - throw ReaderError("closing element without matching opening element"); - if ((string)elementStack.Pop() != name) - throw ReaderError("unmatched closing element"); - baseURIStack.Pop (); + if (elementNameStackPos == 0) + throw new XmlException (this as IXmlLineInfo,"closing element without matching opening element"); + string expected = elementNames [--elementNameStackPos]; + if (expected != name) + throw new XmlException (this as IXmlLineInfo,String.Format ("unmatched closing element: expected {0} but found {1}", expected, name)); + parserContext.PopScope (); - SkipWhitespace (); - Expect ('>'); + ExpectAfterWhitespace ('>'); --depth; @@ -1089,29 +1589,43 @@ namespace System.Xml XmlNodeType.EndElement, // nodeType name, // name false, // isEmptyElement - String.Empty, // value + null, // value true // clearAttributes ); popScope = true; + + CheckCurrentStateUpdate (); } - private void AppendNameChar (int ch) + private void CheckCurrentStateUpdate () { - CheckNameCapacity (); - nameBuffer [nameLength++] = (char)ch; + if (depth == 0 && !allowMultipleRoot && (IsEmptyElement || NodeType == XmlNodeType.EndElement)) + currentState = XmlNodeType.EndElement; } - private void CheckNameCapacity () + private void AppendNameChar (int ch) { - if (nameLength == nameCapacity) { - nameCapacity = nameCapacity * 2; - char [] oldNameBuffer = nameBuffer; - nameBuffer = new char [nameCapacity]; - Array.Copy (oldNameBuffer, nameBuffer, nameLength); + if (nameLength == nameCapacity) + ExpandNameCapacity (); + if (ch < Char.MaxValue) + nameBuffer [nameLength++] = (char) ch; + else { + nameBuffer [nameLength++] = (char) (ch / 0x10000 + 0xD800 - 1); + if (nameLength == nameCapacity) + ExpandNameCapacity (); + nameBuffer [nameLength++] = (char) (ch % 0x10000 + 0xDC00); } } + private void ExpandNameCapacity () + { + nameCapacity = nameCapacity * 2; + char [] oldNameBuffer = nameBuffer; + nameBuffer = new char [nameCapacity]; + Array.Copy (oldNameBuffer, nameBuffer, nameLength); + } + private string CreateNameString () { return parserContext.NameTable.Add (nameBuffer, 0, nameLength); @@ -1119,53 +1633,127 @@ namespace System.Xml private void AppendValueChar (int ch) { - CheckValueCapacity (); - valueBuffer [valueLength++] = (char)ch; + if (valueLength == valueCapacity) + ExpandValueCapacity (); + if (ch < Char.MaxValue) + valueBuffer [valueLength++] = (char) ch; + else { + valueBuffer [valueLength++] = (char) (ch / 0x10000 + 0xD800 - 1); + if (valueLength == valueCapacity) + ExpandValueCapacity (); + valueBuffer [valueLength++] = (char) (ch % 0x10000 + 0xDC00); + } } - private void CheckValueCapacity () + private void ExpandValueCapacity () { - if (valueLength == valueCapacity) { - valueCapacity = valueCapacity * 2; - char [] oldValueBuffer = valueBuffer; - valueBuffer = new char [valueCapacity]; - Array.Copy (oldValueBuffer, valueBuffer, valueLength); - } + valueCapacity = valueCapacity * 2; + char [] oldValueBuffer = valueBuffer; + valueBuffer = new char [valueCapacity]; + Array.Copy (oldValueBuffer, valueBuffer, valueLength); } private string CreateValueString () { - return new String (valueBuffer, 0, valueLength); + return new string (valueBuffer, 0, valueLength); + } + + private void ClearValueBuffer () + { + valueLength = 0; + } + + private void AppendCurrentTagChar (int ch) + { + if (currentTagLength == currentTagCapacity) + ExpandCurrentTagCapacity (); + if (ch < Char.MaxValue) + currentTagBuffer [currentTagLength++] = (char) ch; + else { + currentTagBuffer [currentTagLength++] = (char) (ch / 0x10000 + 0xD800 - 1); + if (currentTagLength == currentTagCapacity) + ExpandCurrentTagCapacity (); + currentTagBuffer [currentTagLength++] = (char) (ch % 0x10000 + 0xDC00); + } + } + + private void ExpandCurrentTagCapacity () + { + currentTagCapacity = currentTagCapacity * 2; + char [] oldCurrentTagBuffer = currentTagBuffer; + currentTagBuffer = new char [currentTagCapacity]; + Array.Copy (oldCurrentTagBuffer, currentTagBuffer, currentTagLength); + } + + private string CreateCurrentTagString () + { + return new string (currentTagBuffer, 0, currentTagLength); + } + + private void ClearCurrentTagBuffer () + { + currentTagLength = 0; } // The reader is positioned on the first character // of the text. - private void ReadText (bool cleanValue) + private void ReadText (bool notWhitespace) { - if (cleanValue) - valueLength = 0; + if (currentState != XmlNodeType.Element) + throw new XmlException (this as IXmlLineInfo, + "Text node cannot appear in this state."); + + if (notWhitespace) + ClearValueBuffer (); int ch = PeekChar (); + bool previousWasCloseBracket = false; while (ch != '<' && ch != -1) { if (ch == '&') { ReadChar (); - if (ReadReference (false)) + ch = ReadReference (false); + if (returnEntityReference) // Returns -1 if char validation should not be done break; - } else - AppendValueChar (ReadChar ()); + } else if (normalization && ch == '\r') { + ReadChar (); + ch = ReadChar (); + if (ch != '\n') + // append '\n' instead of '\r'. + AppendValueChar ('\n'); + // and in case of "\r\n", discard '\r'. + } else { + if (CharacterChecking && XmlChar.IsInvalid (ch)) + throw new XmlException (this, "Not allowed character was found."); + ch = ReadChar (); + } + + AppendValueChar (ch); + // Block "]]>" + if (ch == ']') { + if (previousWasCloseBracket) + if (PeekChar () == '>') + throw new XmlException (this as IXmlLineInfo, + "Inside text content, character sequence ']]>' is not allowed."); + previousWasCloseBracket = true; + } + else if (previousWasCloseBracket) + previousWasCloseBracket = false; ch = PeekChar (); + notWhitespace = true; } if (returnEntityReference && valueLength == 0) { SetEntityReferenceProperties (); } else { + XmlNodeType nodeType = notWhitespace ? XmlNodeType.Text : + this.XmlSpace == XmlSpace.Preserve ? XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace; SetProperties ( - XmlNodeType.Text, // nodeType + nodeType, // nodeType String.Empty, // name false, // isEmptyElement - CreateValueString (), // value + null, // value: create only when required true // clearAttributes ); } @@ -1176,18 +1764,16 @@ namespace System.Xml // character reference or one of the predefined entities. // This allows the ReadText method to break so that the // next call to Read will return the EntityReference node. - private bool ReadReference (bool ignoreEntityReferences) + private int ReadReference (bool ignoreEntityReferences) { if (PeekChar () == '#') { ReadChar (); - ReadCharacterReference (); + return ReadCharacterReference (); } else - ReadEntityReference (ignoreEntityReferences); - - return returnEntityReference; + return ReadEntityReference (ignoreEntityReferences); } - private void ReadCharacterReference () + private int ReadCharacterReference () { int value = 0; @@ -1204,10 +1790,10 @@ namespace System.Xml else if (ch >= 'a' && ch <= 'f') value = (value << 4) + ch - 'a' + 10; else - throw ReaderError ( - String.Format ( + throw new XmlException (this as IXmlLineInfo, + String.Format (CultureInfo.InvariantCulture, "invalid hexadecimal digit: {0} (#x{1:X})", - (char)ch, + (char) ch, ch)); } } else { @@ -1217,133 +1803,264 @@ namespace System.Xml if (ch >= '0' && ch <= '9') value = value * 10 + ch - '0'; else - throw ReaderError ( - String.Format ( + throw new XmlException (this as IXmlLineInfo, + String.Format (CultureInfo.InvariantCulture, "invalid decimal digit: {0} (#x{1:X})", - (char)ch, + (char) ch, ch)); } } ReadChar (); // ';' - AppendValueChar (value); + // There is no way to save surrogate pairs... + if (CharacterChecking && XmlChar.IsInvalid (value)) + throw new XmlException (this as IXmlLineInfo, + "Referenced character was not allowed in XML. Normalization is " + normalization + ", checkCharacters = " + checkCharacters); + return value; } - private void ReadEntityReference (bool ignoreEntityReferences) + // Returns -1 if it should not be validated. + // Real EOF must not be detected here. + private int ReadEntityReference (bool ignoreEntityReferences) { - nameLength = 0; - - int ch = PeekChar (); - - while (ch != ';' && ch != -1) { - AppendNameChar (ReadChar ()); - ch = PeekChar (); - } - + string name = ReadName (); Expect (';'); - string name = CreateNameString (); - - switch (name) - { - case "lt": - AppendValueChar ('<'); - break; - case "gt": - AppendValueChar ('>'); - break; - case "amp": + int predefined = XmlChar.GetPredefinedEntity (name); + if (predefined >= 0) + return predefined; + else { + if (ignoreEntityReferences) { AppendValueChar ('&'); - break; - case "apos": - AppendValueChar ('\''); - break; - case "quot": - AppendValueChar ('"'); - break; - default: - if (ignoreEntityReferences) { - AppendValueChar ('&'); - - foreach (char ch2 in name) { - AppendValueChar (ch2); - } - - AppendValueChar (';'); - } else { - returnEntityReference = true; - entityReferenceName = name; - } - break; + for (int i = 0; i < name.Length; i++) + AppendValueChar (name [i]); + AppendValueChar (';'); + } else { + returnEntityReference = true; + entityReferenceName = name; + } } + return -1; } // The reader is positioned on the first character of // the attribute name. - private void ReadAttributes () + private void ReadAttributes (bool isXmlDecl) { + int peekChar = -1; + bool requireWhitespace = false; + currentAttribute = -1; + currentAttributeValue = -1; + do { - string name = ReadName (); - SkipWhitespace (); - Expect ('='); - SkipWhitespace (); - string value = ReadAttribute (); + if (!SkipWhitespace () && requireWhitespace) + throw new XmlException ("Unexpected token. Name is required here."); + + IncrementAttributeToken (); + currentAttributeToken.LineNumber = line; + currentAttributeToken.LinePosition = column; + + currentAttributeToken.LocalName = + currentAttributeToken.Name = ReadName (); + ExpectAfterWhitespace ('='); SkipWhitespace (); + ReadAttributeValueTokens (-1); + attributeCount++; + + if (currentAttributeToken.Name == "xmlns") + parserContext.NamespaceManager.AddNamespace (String.Empty, GetAttribute (currentAttribute)); + else if (currentAttributeToken.Name.StartsWith ("xmlns:")) { + string nsPrefix = currentAttributeToken.Name.Substring (6); + parserContext.NamespaceManager.AddNamespace (nsPrefix, GetAttribute (currentAttribute)); + } - if (name == "xmlns") - parserContext.NamespaceManager.AddNamespace (String.Empty, UnescapeAttributeValue (value)); - else if (name.StartsWith ("xmlns:")) - parserContext.NamespaceManager.AddNamespace (name.Substring (6), UnescapeAttributeValue (value)); + if (!SkipWhitespace ()) + requireWhitespace = true; + peekChar = PeekChar (); + if (isXmlDecl) { + if (peekChar == '?') + break; + } + else if (peekChar == '/' || peekChar == '>') + break; + } while (peekChar != -1); - AddAttribute (name, value); - } while (PeekChar () != '/' && PeekChar () != '>' && PeekChar () != -1); + currentAttribute = -1; + currentAttributeValue = -1; } - // The reader is positioned on the quote character. - // *Keeps quote char* to value to get_QuoteChar() correctly. - private string ReadAttribute () + private void AddAttribute (string name, string value) { - valueLength = 0; + IncrementAttributeToken (); + XmlAttributeTokenInfo ati = attributeTokens [currentAttribute]; + ati.Name = "SYSTEM"; + ati.FillNames (); + IncrementAttributeValueToken (); + XmlTokenInfo vti = attributeValueTokens [currentAttributeValue]; + vti.Value = value; + SetProperties (vti, XmlNodeType.Text, String.Empty, false, value, false); + attributeCount++; + } + + private void IncrementAttributeToken () + { + currentAttribute++; + if (attributeTokens.Length == currentAttribute) { + XmlAttributeTokenInfo [] newArray = + new XmlAttributeTokenInfo [attributeTokens.Length * 2]; + attributeTokens.CopyTo (newArray, 0); + attributeTokens = newArray; + } + if (attributeTokens [currentAttribute] == null) + attributeTokens [currentAttribute] = new XmlAttributeTokenInfo (this); + currentAttributeToken = attributeTokens [currentAttribute]; + currentAttributeToken.Clear (); + } - int quoteChar = ReadChar (); + private void IncrementAttributeValueToken () + { + ClearValueBuffer (); + currentAttributeValue++; + if (attributeValueTokens.Length == currentAttributeValue) { + XmlTokenInfo [] newArray = new XmlTokenInfo [attributeValueTokens.Length * 2]; + attributeValueTokens.CopyTo (newArray, 0); + attributeValueTokens = newArray; + } + if (attributeValueTokens [currentAttributeValue] == null) + attributeValueTokens [currentAttributeValue] = new XmlTokenInfo (this, false); + currentAttributeValueToken = attributeValueTokens [currentAttributeValue]; + currentAttributeValueToken.Clear (); + } + + // LAMESPEC: Orthodox XML reader should normalize attribute values + private void ReadAttributeValueTokens (int dummyQuoteChar) + { + int quoteChar = (dummyQuoteChar < 0) ? ReadChar () : dummyQuoteChar; if (quoteChar != '\'' && quoteChar != '\"') - throw ReaderError ("an attribute value was not quoted"); + throw new XmlException (this as IXmlLineInfo,"an attribute value was not quoted"); + currentAttributeToken.QuoteChar = (char) quoteChar; - AppendValueChar (quoteChar); + IncrementAttributeValueToken (); + currentAttributeToken.ValueTokenStartIndex = currentAttributeValue; + currentAttributeValueToken.LineNumber = line; + currentAttributeValueToken.LinePosition = column; - while (PeekChar () != quoteChar) { - int ch = ReadChar (); + bool incrementToken = false; + bool isNewToken = true; + bool loop = true; + int ch = 0; + while (loop) { + ch = ReadChar (); + if (ch == quoteChar) + break; + + if (incrementToken) { + IncrementAttributeValueToken (); + currentAttributeValueToken.LineNumber = line; + currentAttributeValueToken.LinePosition = column; + incrementToken = false; + isNewToken = true; + } switch (ch) { case '<': - throw ReaderError ("attribute values cannot contain '<'"); + throw new XmlException (this as IXmlLineInfo,"attribute values cannot contain '<'"); case -1: - throw ReaderError ("unexpected end of file in an attribute value"); + if (dummyQuoteChar < 0) + throw new XmlException (this as IXmlLineInfo,"unexpected end of file in an attribute value"); + else // Attribute value constructor. + loop = false; + break; + case '&': + int startPosition = currentTagLength - 1; + if (PeekChar () == '#') { + ReadChar (); + ch = ReadCharacterReference (); + if (CharacterChecking && XmlChar.IsInvalid (ch)) + throw new XmlException (this as IXmlLineInfo, + "Not allowed character was found."); + AppendValueChar (ch); + break; + } + // Check XML 1.0 section 3.1 WFC. + string entName = ReadName (); + Expect (';'); + int predefined = XmlChar.GetPredefinedEntity (entName); + if (predefined < 0) { + CheckAttributeEntityReferenceWFC (entName); + currentAttributeValueToken.Value = CreateValueString (); + currentAttributeValueToken.NodeType = XmlNodeType.Text; + if (!isNewToken) + IncrementAttributeValueToken (); + currentAttributeValueToken.Name = entName; + currentAttributeValueToken.Value = String.Empty; + currentAttributeValueToken.NodeType = XmlNodeType.EntityReference; + incrementToken = true; + } + else + AppendValueChar (predefined); + break; default: + if (CharacterChecking && XmlChar.IsInvalid (ch)) + throw new XmlException (this, "Invalid character was found."); AppendValueChar (ch); break; } + + isNewToken = false; + } + if (!incrementToken) { + currentAttributeValueToken.Value = CreateValueString (); + currentAttributeValueToken.NodeType = XmlNodeType.Text; } + currentAttributeToken.ValueTokenEndIndex = currentAttributeValue; - ReadChar (); // quoteChar - AppendValueChar (quoteChar); + } - return CreateValueString (); + private void CheckAttributeEntityReferenceWFC (string entName) + { + DTDEntityDeclaration entDecl = + DTD == null ? null : DTD.EntityDecls [entName]; + if (DTD != null && resolver != null && entDecl == null) + throw new XmlException (this, "Referenced entity does not exist."); + + if (entDecl == null) + return; + + if (entDecl.HasExternalReference) + throw new XmlException (this, "Reference to external entities is not allowed in the value of an attribute."); + if (isStandalone && !entDecl.IsInternalSubset) + throw new XmlException (this, "Reference to external entities is not allowed in the internal subset."); + if (entDecl.EntityValue.IndexOf ('<') >= 0) + throw new XmlException (this, "Attribute must not contain character '<' either directly or indirectly by way of entity references."); } // The reader is positioned on the first character // of the target. // - // Now it also reads XmlDeclaration, this method name became improper... + // It may be xml declaration or processing instruction. private void ReadProcessingInstruction () { string target = ReadName (); - SkipWhitespace (); + if (target == "xml") { + ReadXmlDeclaration (); + return; + } else if (target.ToLower (CultureInfo.InvariantCulture) == "xml") + throw new XmlException (this as IXmlLineInfo, + "Not allowed processing instruction name which starts with 'X', 'M', 'L' was found."); - valueLength = 0; + if (currentState == XmlNodeType.None) + currentState = XmlNodeType.XmlDeclaration; + + if (!SkipWhitespace ()) + if (PeekChar () != '?') + throw new XmlException (this as IXmlLineInfo, + "Invalid processing instruction name was found."); + + ClearValueBuffer (); while (PeekChar () != -1) { int ch = ReadChar (); @@ -1353,20 +2070,159 @@ namespace System.Xml break; } - AppendValueChar ((char)ch); + if (CharacterChecking && XmlChar.IsInvalid (ch)) + throw new XmlException (this, "Invalid character was found."); + AppendValueChar (ch); } SetProperties ( - target == "xml" ? - XmlNodeType.XmlDeclaration : XmlNodeType.ProcessingInstruction, // nodeType target, // name false, // isEmptyElement - CreateValueString (), // value + null, // value: create only when required true // clearAttributes ); } + // The reader is positioned after " 1 && + (attributeTokens [1].Name != "encoding" && + attributeTokens [1].Name != "standalone")) + message = "Invalid Xml Declaration markup was found."; + else if (attributeCount > 2 && attributeTokens [2].Name != "standalone") + message = "Invalid Xml Declaration markup was found."; + string sa = GetAttribute ("standalone"); + if (sa != null && sa != "yes" && sa != "no") + message = "Only 'yes' or 'no' is allowed for standalone."; + + this.isStandalone = (sa == "yes"); + + if (message != null) + throw new XmlException (this as IXmlLineInfo, message); + + SetProperties ( + XmlNodeType.XmlDeclaration, // nodeType + "xml", // name + false, // isEmptyElement + new string (currentTagBuffer, 6, currentTagLength - 6), // value + false // clearAttributes + ); + + Expect ("?>"); + } + + internal void SkipTextDeclaration () + { + this.currentState = XmlNodeType.Element; + + if (PeekChar () != '<') + return; + + ReadChar (); + + if (PeekChar () != '?') { + peekCharsIndex = 0; + return; + } + ReadChar (); + + while (peekCharsIndex < 6) { + if (PeekChar () < 0) + break; + else + ReadChar (); + } + if (new string (peekChars, 2, 4) != "xml ") { + if (new string (peekChars, 2, 3).ToLower (CultureInfo.InvariantCulture) == "xml") { + throw new XmlException (this as IXmlLineInfo, + "Processing instruction name must not be character sequence 'X' 'M' 'L' with case insensitivity."); + } + peekCharsIndex = 0; + return; + } + + SkipWhitespace (); + + // version decl + if (PeekChar () == 'v') { + Expect ("version"); + ExpectAfterWhitespace ('='); + SkipWhitespace (); + int quoteChar = ReadChar (); + char [] expect1_0 = new char [3]; + int versionLength = 0; + switch (quoteChar) { + case '\'': + case '"': + while (PeekChar () != quoteChar) { + if (PeekChar () == -1) + throw new XmlException (this as IXmlLineInfo, + "Invalid version declaration inside text declaration."); + else if (versionLength == 3) + throw new XmlException (this as IXmlLineInfo, + "Invalid version number inside text declaration."); + else { + expect1_0 [versionLength] = (char) ReadChar (); + versionLength++; + if (versionLength == 3 && new String (expect1_0) != "1.0") + throw new XmlException (this as IXmlLineInfo, + "Invalid version number inside text declaration."); + } + } + ReadChar (); + SkipWhitespace (); + break; + default: + throw new XmlException (this as IXmlLineInfo, + "Invalid version declaration inside text declaration."); + } + } + + if (PeekChar () == 'e') { + Expect ("encoding"); + ExpectAfterWhitespace ('='); + SkipWhitespace (); + int quoteChar = ReadChar (); + switch (quoteChar) { + case '\'': + case '"': + while (PeekChar () != quoteChar) + if (ReadChar () == -1) + throw new XmlException (this as IXmlLineInfo, + "Invalid encoding declaration inside text declaration."); + ReadChar (); + SkipWhitespace (); + break; + default: + throw new XmlException (this as IXmlLineInfo, + "Invalid encoding declaration inside text declaration."); + } + // Encoding value should be checked inside XmlInputStream. + } + else + throw new XmlException (this as IXmlLineInfo, + "Encoding declaration is mandatory in text declaration."); + + Expect ("?>"); + } + // The reader is positioned on the first character after // the leading '') - throw ReaderError ("comments cannot contain '--'"); + throw new XmlException (this as IXmlLineInfo,"comments cannot contain '--'"); ReadChar (); break; } - AppendValueChar ((char)ch); + if (XmlChar.IsInvalid (ch)) + throw new XmlException (this as IXmlLineInfo, + "Not allowed character was found."); + + AppendValueChar (ch); } SetProperties ( XmlNodeType.Comment, // nodeType String.Empty, // name false, // isEmptyElement - CreateValueString (), // value + null, // value: create only when required true // clearAttributes ); } @@ -1426,7 +2292,11 @@ namespace System.Xml // the leading ''); + ExpectAfterWhitespace ('>'); - // now compile DTD - currentSubset = new DTDObjectModel (); // merges both internal and external subsets in the meantime, - int originalParserDepth = parserInputStack.Count; - if (intSubsetStartLine > 0) { - XmlParserInput original = currentInput; - currentInput = new XmlParserInput (new StringReader (parserContext.InternalSubset), BaseURI, intSubsetStartLine, intSubsetStartColumn); - do { - CompileDTDSubset (); - if (PeekChar () == -1 && parserInputStack.Count > 0) - popParserInput (); - } while (nodeType != XmlNodeType.None || parserInputStack.Count > originalParserDepth); - if (dtdIncludeSect != 0) - this.ReaderError ("INCLUDE section is not ended correctly."); - currentInput = original; - } - if (systemId != String.Empty) { - pushParserInput (systemId); - do { - this.CompileDTDSubset (); - if (PeekChar () == -1 && parserInputStack.Count > 1) - popParserInput (); - } while (nodeType != XmlNodeType.None || parserInputStack.Count > originalParserDepth + 1); - popParserInput (); - } + GenerateDTDObjectModel (doctypeName, publicId, + systemId, parserContext.InternalSubset, + intSubsetStartLine, intSubsetStartColumn); // set properties for node SetProperties ( @@ -1538,987 +2405,277 @@ namespace System.Xml parserContext.InternalSubset, // value true // clearAttributes ); + + if (publicId != null) + AddAttribute ("PUBLIC", publicId); + if (systemId != null) + AddAttribute ("SYSTEM", systemId); + currentAttribute = currentAttributeValue = -1; } - private void pushParserInput (string url) + internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId, + string systemId, string internalSubset) { - string absPath = null; -#if NetworkEnabled - try { - Uri baseUrl = new Uri (BaseURI); - absPath = resolver.ResolveUri (baseUrl, url).ToString (); - } catch (UriFormatException) { - if (Path.IsPathRooted (url)) - absPath = url; - else if (BaseURI != String.Empty) - absPath = new FileInfo (BaseURI).DirectoryName + Path.DirectorySeparatorChar + url; - else - absPath = url; - } -#else - if (Path.IsPathRooted (url)) - absPath = url; - else if (BaseURI != String.Empty) - absPath = new FileInfo (BaseURI).DirectoryName + Path.DirectorySeparatorChar + url; - else - absPath = url; + return GenerateDTDObjectModel (name, publicId, systemId, internalSubset, 0, 0); + } + + internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId, + string systemId, string internalSubset, int intSubsetStartLine, int intSubsetStartColumn) + { + // now compile DTD + parserContext.Dtd = new DTDObjectModel (this.NameTable); // merges both internal and external subsets in the meantime, + DTD.BaseURI = BaseURI; + DTD.Name = name; + DTD.PublicId = publicId; + DTD.SystemId = systemId; + DTD.InternalSubset = internalSubset; + DTD.XmlResolver = resolver; + DTD.IsStandalone = isStandalone; + DTD.LineNumber = line; + DTD.LinePosition = column; + + DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn); + dr.Normalization = this.normalization; +#if DTD_HANDLE_EVENTS + dr.ValidationEventHandler += new ValidationEventHandler (OnValidationEvent); #endif - foreach (XmlParserInput i in parserInputStack.ToArray ()) { - if (i.BaseURI == url) - this.ReaderError ("Nested inclusion is not allowed: " + url); - } - parserInputStack.Push (currentInput); - currentInput = new XmlParserInput (new XmlStreamReader (absPath, false), absPath); - baseURIStack.Push (BaseURI); - parserContext.BaseURI = absPath; - } - - private void popParserInput () - { - currentInput = parserInputStack.Pop () as XmlParserInput; - parserContext.BaseURI = this.baseURIStack.Pop () as string; - } - - private enum DtdInputState - { - Free = 1, - ElementDecl, - AttlistDecl, - EntityDecl, - NotationDecl, - PI, - Comment, - InsideSingleQuoted, - InsideDoubleQuoted, - } - - private class DtdInputStateStack - { - Stack intern = new Stack (); - public DtdInputStateStack () - { - Push (DtdInputState.Free); - } - - public DtdInputState Peek () - { - return (DtdInputState) intern.Peek (); - } - - public DtdInputState Pop () - { - return (DtdInputState) intern.Pop (); - } - - public void Push (DtdInputState val) - { - intern.Push (val); - } - } - - - DtdInputStateStack stateStack = new DtdInputStateStack (); - DtdInputState State { - get { return stateStack.Peek (); } - } - - // Simply read but not generate any result. - private void ReadInternalSubset () - { - bool continueParse = true; - - while (continueParse) { - switch (ReadChar ()) { - case ']': - switch (State) { - case DtdInputState.Free: - continueParse = false; - break; - case DtdInputState.InsideDoubleQuoted: - continue; - case DtdInputState.InsideSingleQuoted: - continue; - default: - throw ReaderError ("unexpected end of file at DTD."); - } - break; - case -1: - throw ReaderError ("unexpected end of file at DTD."); - case '<': - if (State == DtdInputState.InsideDoubleQuoted || - State == DtdInputState.InsideSingleQuoted) - continue; // well-formed - switch (ReadChar ()) { - case '?': - stateStack.Push (DtdInputState.PI); - break; - case '!': - switch (ReadChar ()) { - case 'E': - switch (ReadChar ()) { - case 'L': - Expect ("EMENT"); - stateStack.Push (DtdInputState.ElementDecl); - break; - case 'N': - Expect ("TITY"); - stateStack.Push (DtdInputState.EntityDecl); - break; - default: - throw ReaderError ("unexpected token ''."); - } - break; - case '\'': - if (State == DtdInputState.InsideSingleQuoted) - stateStack.Pop (); - else if (State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.Comment) - stateStack.Push (DtdInputState.InsideSingleQuoted); - break; - case '"': - if (State == DtdInputState.InsideDoubleQuoted) - stateStack.Pop (); - else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment) - stateStack.Push (DtdInputState.InsideDoubleQuoted); - break; - case '>': - switch (State) { - case DtdInputState.ElementDecl: - goto case DtdInputState.NotationDecl; - case DtdInputState.AttlistDecl: - goto case DtdInputState.NotationDecl; - case DtdInputState.EntityDecl: - goto case DtdInputState.NotationDecl; - case DtdInputState.NotationDecl: - stateStack.Pop (); - break; - case DtdInputState.InsideDoubleQuoted: - continue; - case DtdInputState.InsideSingleQuoted: - continue; // well-formed - case DtdInputState.Comment: - continue; - default: - throw ReaderError ("unexpected token '>'"); - } - break; - case '?': - if (State == DtdInputState.PI) { - if (ReadChar () == '>') - stateStack.Pop (); - } - break; - case '-': - if (State == DtdInputState.Comment) { - if (PeekChar () == '-') { - ReadChar (); - Expect ('>'); - stateStack.Pop (); - } - } - break; - case '%': - if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted) - throw ReaderError ("Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8)."); - break; - } - } - } - - // Read any one of following: - // elementdecl, AttlistDecl, EntityDecl, NotationDecl, - // PI, Comment, Parameter Entity, or doctype termination char(']') - // - // returns a node of some nodeType or null, setting nodeType. - // (if None then ']' was found.) - private void CompileDTDSubset() + return dr.GenerateDTDObjectModel (); + } + + private void OnValidationEvent (object o, ValidationEventArgs e) { - SkipWhitespace (); - switch(PeekChar ()) +#if DTD_HANDLE_EVENTS + if (ValidationEventHandler != null) + // Override object as this. + ValidationEventHandler (this, e); +#endif + } + + private enum DtdInputState + { + Free = 1, + ElementDecl, + AttlistDecl, + EntityDecl, + NotationDecl, + PI, + Comment, + InsideSingleQuoted, + InsideDoubleQuoted, + } + + private class DtdInputStateStack + { + Stack intern = new Stack (); + public DtdInputStateStack () { - case -1: - nodeType = XmlNodeType.None; - break; - case '%': - TryExpandPERef (); - break; - case '<': - ReadChar (); - switch(ReadChar ()) - { - case '?': - // Only read, no store. - ReadProcessingInstruction (); - break; - case '!': - CompileDeclaration (); - break; - default: - throw ReaderError ("Syntax Error after '<' character."); - } - break; - case ']': - // End of inclusion - Expect ("]]>"); - dtdIncludeSect--; - SkipWhitespace (); - break; - default: - throw ReaderError (String.Format ("Syntax Error inside doctypedecl markup : {0}({1})", PeekChar (), (char) PeekChar ())); + Push (DtdInputState.Free); + } + + public DtdInputState Peek () + { + return (DtdInputState) intern.Peek (); + } + + public DtdInputState Pop () + { + return (DtdInputState) intern.Pop (); } + + public void Push (DtdInputState val) + { + intern.Push (val); + } + } + + + DtdInputStateStack stateStack = new DtdInputStateStack (); + DtdInputState State { + get { return stateStack.Peek (); } } - private void CompileDeclaration () + // Simply read but not generate any result. + private void ReadInternalSubset () { - nodeType = XmlNodeType.DocumentType; // Hack!! - switch(ReadChar ()) - { - case '-': - Expect ('-'); - // Only read, no store. - ReadComment (); - break; - case 'E': - switch(ReadChar ()) - { - case 'N': - Expect ("TITY"); - SkipWhitespace (); - LOOPBACK: - if (PeekChar () == '%') { - ReadChar (); - if (!XmlConstructs.IsSpace (PeekChar ())) { - ExpandPERef (); - goto LOOPBACK; -// throw ReaderError ("expected whitespace between '%' and name."); - } else { - SkipWhitespace (); - TryExpandPERef (); - if (XmlConstructs.IsName (PeekChar ())) - ReadParameterEntityDecl (); - else - throw ReaderError ("expected name character"); - } + bool continueParse = true; + + while (continueParse) { + switch (ReadChar ()) { + case ']': + switch (State) { + case DtdInputState.Free: + continueParse = false; break; + case DtdInputState.InsideDoubleQuoted: + continue; + case DtdInputState.InsideSingleQuoted: + continue; + default: + throw new XmlException (this as IXmlLineInfo,"unexpected end of file at DTD."); } - DTDEntityDeclaration ent = ReadEntityDecl (); - if (currentSubset.EntityDecls [ent.Name] == null) - currentSubset.EntityDecls.Add (ent.Name, ent); - break; - case 'L': - Expect ("EMENT"); - DTDElementDeclaration el = ReadElementDecl (); - currentSubset.ElementDecls.Add (el.Name, el); break; - default: - throw ReaderError ("Syntax Error after ' 0) { - switch (skip ? PeekChar () : ReadChar ()) { - case -1: - throw ReaderError ("Unexpected IGNORE section end."); + case '"': + if (State == DtdInputState.InsideDoubleQuoted) + stateStack.Pop (); + else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment) + stateStack.Push (DtdInputState.InsideDoubleQuoted); break; - case '<': - if (ReadChar () == '!' && ReadChar () == '[') - dtdIgnoreSect++; + case '>': + switch (State) { + case DtdInputState.ElementDecl: + goto case DtdInputState.NotationDecl; + case DtdInputState.AttlistDecl: + goto case DtdInputState.NotationDecl; + case DtdInputState.EntityDecl: + goto case DtdInputState.NotationDecl; + case DtdInputState.NotationDecl: + stateStack.Pop (); + break; + case DtdInputState.InsideDoubleQuoted: + case DtdInputState.InsideSingleQuoted: + case DtdInputState.Comment: + continue; + default: + throw new XmlException (this as IXmlLineInfo,"unexpected token '>'"); + } break; - case ']': - if (ReadChar () == ']') { + case '?': + if (State == DtdInputState.PI) { if (ReadChar () == '>') - dtdIgnoreSect--; - else - skip = true; + stateStack.Pop (); } break; + case '-': + if (State == DtdInputState.Comment) { + if (PeekChar () == '-') { + ReadChar (); + Expect ('>'); + stateStack.Pop (); + } + } + break; + case '%': + if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted) + throw new XmlException (this as IXmlLineInfo,"Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8)."); + break; } - skip = false; } } - // The reader is positioned on the head of the name. - private DTDElementDeclaration ReadElementDecl () - { - DTDElementDeclaration decl = new DTDElementDeclaration (); - SkipWhitespace (); - TryExpandPERef (); - decl.Name = ReadName (); - SkipWhitespace (); - TryExpandPERef (); - ReadContentSpec (decl); - SkipWhitespace (); - Expect ('>'); - return decl; - } - - // read 'children'(BNF) of contentspec - private void ReadContentSpec (DTDElementDeclaration decl) - { - switch(PeekChar ()) - { - case 'E': - decl.IsEmpty = true; - Expect ("EMPTY"); - break; - case 'A': - decl.IsAny = true; - Expect ("ANY"); - break; - case '(': - DTDContentModel model = decl.ContentModel; - ReadChar (); - SkipWhitespace (); - TryExpandPERef (); - if(PeekChar () == '#') { - // Mixed Contents - decl.IsMixedContent = true; - Expect ("#PCDATA"); - SkipWhitespace (); - TryExpandPERef (); - SkipWhitespace (); - while(PeekChar () != ')') { - Expect('|'); - SkipWhitespace (); - TryExpandPERef (); - SkipWhitespace (); - model.ChildModels.Add (ReadName ()); - SkipWhitespace (); - TryExpandPERef (); - } - Expect (')'); - if(PeekChar () == '*') - ReadChar (); // ZeroOrMore - } else { - // Non-Mixed Contents - model.ChildModels.Add (ReadCP ()); - SkipWhitespace (); - - do { // copied from ReadCP() ...;-) - TryExpandPERef (); - SkipWhitespace (); - if(PeekChar ()=='|') { - // CPType=Or - model.OrderType = DTDContentOrderType.Or; - ReadChar (); - SkipWhitespace (); - model.ChildModels.Add (ReadCP ()); - SkipWhitespace (); - } - else if(PeekChar () == ',') - { - // CPType=Seq - model.OrderType = DTDContentOrderType.Seq; - ReadChar (); - SkipWhitespace (); - model.ChildModels.Add (ReadCP ()); - SkipWhitespace (); - } - else - break; - } - while(true); - - Expect (')'); - switch(PeekChar ()) - { - case '?': - model.MinOccurs = 0; - ReadChar (); - break; - case '*': - model.MinOccurs = 0; - model.MaxOccurs = decimal.MaxValue; - ReadChar (); - break; - case '+': - model.MaxOccurs = decimal.MaxValue; - ReadChar (); - break; - } - SkipWhitespace (); - } - SkipWhitespace (); - break; - } - } - - // Read 'cp' (BNF) of contentdecl (BNF) - private DTDContentModel ReadCP () - { - DTDContentModel model = new DTDContentModel (); - TryExpandPERef (); - if(PeekChar () == '(') { - ReadChar (); - SkipWhitespace (); - model.ChildModels.Add (ReadCP ()); - SkipWhitespace (); - do { - TryExpandPERef (); - SkipWhitespace (); - if(PeekChar ()=='|') { - // CPType=Or - model.OrderType = DTDContentOrderType.Or; - ReadChar (); - SkipWhitespace (); - model.ChildModels.Add (ReadCP ()); - SkipWhitespace (); - } - else if(PeekChar () == ',') { - // CPType=Seq - model.OrderType = DTDContentOrderType.Seq; - ReadChar (); - SkipWhitespace (); - model.ChildModels.Add (ReadCP ()); - SkipWhitespace (); - } - else - break; - } - while(true); - SkipWhitespace (); - Expect (')'); - } - else { - TryExpandPERef (); - model.ElementName = ReadName (); - } - - switch(PeekChar ()) { - case '?': - model.MinOccurs = 0; - ReadChar (); - break; - case '*': - model.MinOccurs = 0; - model.MaxOccurs = decimal.MaxValue; - ReadChar (); - break; - case '+': - model.MaxOccurs = decimal.MaxValue; - ReadChar (); - break; - } - return model; - } - - // The reader is positioned on the first name char. - private void ReadParameterEntityDecl () - { - DTDParameterEntityDeclaration decl = - new DTDParameterEntityDeclaration(); - decl.BaseURI = BaseURI; - - decl.Name = ReadName (); - SkipWhitespace (); - - if (PeekChar () == 'S' || PeekChar () == 'P') { -// throw new NotImplementedException ("External parameter entity reference is not implemented yet."); - // read publicId/systemId - ReadExternalID (); - decl.PublicId = attributes ["PUBLIC"] as string; - decl.SystemId = attributes ["SYSTEM"] as string; - SkipWhitespace (); - } - else { - TryExpandPERef (); - int quoteChar = ReadChar (); - int start = currentTag.Length; - while (true) { - SkipWhitespace (); - int c = PeekChar (); - if ((int) c == -1) - throw new XmlException ("unexpected end of stream in entity value definition."); - switch (c) { - case '"': - ReadChar (); - if (quoteChar == '"') goto SKIP; - break; - case '\'': - ReadChar (); - if (quoteChar == '\'') goto SKIP; - break; - case '%': - ImportAsPERef (); - break; - default: - ReadChar (); - break; - } - } - SKIP: - decl.Value = currentTag.ToString (start, currentTag.Length - start - 1); - } - SkipWhitespace (); - Expect ('>'); - if (parameterEntities [decl.Name] == null) { - parameterEntities.Add (decl.Name, decl); - } - } - - // reader is positioned on '%' - private void ImportAsPERef () - { - StringBuilder sb = null; - int peRefStart = currentTag.Length; - string appendStr = ""; - ReadChar (); - string peName = ReadName (); - Expect (';'); - DTDParameterEntityDeclaration peDecl = - this.parameterEntities [peName] as DTDParameterEntityDeclaration; - if (peDecl == null) - throw ReaderError ("Parameter entity " + peName + " not found."); - if (peDecl.SystemId != null) { - pushParserInput (peDecl.SystemId); - if (sb == null) - sb = new StringBuilder (); - else - sb.Length = 0; - while (PeekChar () != -1) - sb.Append (ReadChar ()); - popParserInput (); - appendStr = sb.ToString (); - } else { - appendStr = peDecl.Value; - } - currentTag.Remove (peRefStart, - currentTag.Length - peRefStart); - currentTag.Append (Dereference (appendStr)); - } - - // The reader is positioned on the head of the name. - private DTDEntityDeclaration ReadEntityDecl () - { - DTDEntityDeclaration decl = new DTDEntityDeclaration (); - decl.Name = ReadName (); - SkipWhitespace (); - TryExpandPERef (); - SkipWhitespace (); - - if (PeekChar () == 'S' || PeekChar () == 'P') { - // external entity - ReadExternalID (); - decl.PublicId = attributes ["PUBLIC"] as string; - decl.SystemId = attributes ["SYSTEM"] as string; - SkipWhitespace (); - if (PeekChar () == 'N') - { - // NDataDecl - Expect ("NDATA"); - SkipWhitespace (); - decl.NotationName = ReadName (); // ndata_name - } - } - else { - // general entity - decl.EntityValue = ReadEntityValueDecl (); - } - SkipWhitespace (); - Expect ('>'); - return decl; - } - - private string ReadEntityValueDecl () - { - SkipWhitespace (); - // quotation char will be finally removed on unescaping - int quoteChar = ReadChar (); - int start = currentTag.Length; - if (quoteChar != '\'' && quoteChar != '"') - throw new XmlException ("quotation char was expected."); - - while (PeekChar () != quoteChar) { - switch (PeekChar ()) { - case '%': - this.ImportAsPERef (); - continue; - case '&': - ReadChar (); -// Expect ('#'); -// ReadCharacterReference (); - ReadReference (true); - break; - case -1: - throw new XmlException ("unexpected end of stream."); - default: - ReadChar (); - break; - } - } - string value = Dereference (currentTag.ToString (start, currentTag.Length - start)); - Expect (quoteChar); - return value; - } - - private DTDAttListDeclaration ReadAttListDecl () - { - SkipWhitespace (); - TryExpandPERef (); - string name = ReadName (); // target element name - DTDAttListDeclaration decl = - currentSubset.AttListDecls [name] as DTDAttListDeclaration; - if (decl == null) - decl = new DTDAttListDeclaration (); - decl.Name = name; - - SkipWhitespace (); - TryExpandPERef (); - SkipWhitespace (); - - while (XmlConstructs.IsName ((char) PeekChar ())) { - DTDAttributeDefinition def = ReadAttributeDefinition (); - if (decl.AttributeDefinitions [def.Name] == null) - decl.AttributeDefinitions.Add (def.Name, def); - SkipWhitespace (); - TryExpandPERef (); - SkipWhitespace (); - } - SkipWhitespace (); - Expect ('>'); - return decl; - } - - private DTDAttributeDefinition ReadAttributeDefinition () - { - DTDAttributeDefinition def = new DTDAttributeDefinition (); - - // attr_name - TryExpandPERef (); - def.Name = ReadName (); - SkipWhitespace (); - - // attr_value - TryExpandPERef (); - switch(PeekChar ()) { - case 'C': // CDATA - Expect ("CDATA"); - def.AttributeType = DTDAttributeType.CData; - break; - case 'I': // ID, IDREF, IDREFS - Expect ("ID"); - if(PeekChar () == 'R') { - Expect ("REF"); - if(PeekChar () == 'S') { - // IDREFS - ReadChar (); - def.AttributeType = DTDAttributeType.IdRefs; - } - else // IDREF - def.AttributeType = DTDAttributeType.IdRef; - } - else // ID - def.AttributeType = DTDAttributeType.Id; - break; - case 'E': // ENTITY, ENTITIES - Expect ("ENTIT"); - switch(ReadChar ()) { - case 'Y': // ENTITY - def.AttributeType = DTDAttributeType.Entity; - break; - case 'I': // ENTITIES - Expect ("ES"); - def.AttributeType = DTDAttributeType.Entities; - break; - } - break; - case 'N': // NMTOKEN, NMTOKENS, NOTATION - ReadChar (); - switch(PeekChar ()) { - case 'M': - Expect ("MTOKEN"); - if(PeekChar ()=='S') { // NMTOKENS - ReadChar (); - def.AttributeType = DTDAttributeType.NmTokens; - } - else // NMTOKEN - def.AttributeType = DTDAttributeType.NmToken; - break; - case 'O': - Expect ("OTATION"); - def.AttributeType = DTDAttributeType.Notation; - SkipWhitespace (); - Expect ('('); - SkipWhitespace (); - def.EnumeratedNotations.Add (ReadName ()); // notation name - SkipWhitespace (); - while(PeekChar () == '|') { - ReadChar (); - SkipWhitespace (); - def.EnumeratedNotations.Add (ReadName ()); // notation name - SkipWhitespace (); - } - Expect (')'); - break; - default: - throw new XmlException ("attribute declaration syntax error."); - } - break; - default: // Enumerated Values - TryExpandPERef (); - Expect ('('); - SkipWhitespace (); - def.EnumeratedAttributeDeclaration.Add (ReadNmToken ()); // enum value - SkipWhitespace (); - while(PeekChar () == '|') { - ReadChar (); - SkipWhitespace (); - def.EnumeratedAttributeDeclaration.Add (ReadNmToken ()); // enum value - SkipWhitespace (); - } - Expect (')'); - break; - } - SkipWhitespace (); - - TryExpandPERef (); - - // def_value - if(PeekChar () == '#') - { - ReadChar (); - switch(PeekChar ()) - { - case 'R': - Expect ("REQUIRED"); - def.OccurenceType = DTDAttributeOccurenceType.Required; - break; - case 'I': - Expect ("IMPLIED"); - def.OccurenceType = DTDAttributeOccurenceType.Optional; - break; - case 'F': - Expect ("FIXED"); - def.OccurenceType = DTDAttributeOccurenceType.Fixed; - SkipWhitespace (); - def.UnresolvedDefaultValue = ReadAttribute (); - break; - } - } else { - // one of the enumerated value - if (PeekChar () == -1) { - popParserInput (); - } - SkipWhitespace (); - def.UnresolvedDefaultValue = ReadAttribute (); - } - - return def; - } - - private DTDNotationDeclaration ReadNotationDecl() - { - DTDNotationDeclaration decl = new DTDNotationDeclaration (); - SkipWhitespace (); - decl.Name = ReadName (); // notation name - if (namespaces) { // copy from SetProperties ;-) - int indexOfColon = decl.Name.IndexOf (':'); - - if (indexOfColon == -1) { - decl.Prefix = String.Empty; - decl.LocalName = decl.Name; - } else { - decl.Prefix = decl.Name.Substring (0, indexOfColon); - decl.LocalName = decl.Name.Substring (indexOfColon + 1); - } - } else { - decl.Prefix = String.Empty; - decl.LocalName = decl.Name; - } - - SkipWhitespace (); - if(PeekChar () == 'P') { - decl.PublicId = ReadPubidLiteral (); - SkipWhitespace (); - if (PeekChar () == '\'' || PeekChar () == '"') { - decl.SystemId = ReadSystemLiteral (false); - SkipWhitespace (); - } - } else if(PeekChar () == 'S') { - decl.SystemId = ReadSystemLiteral (true); - SkipWhitespace (); - } - if(decl.PublicId == null && decl.SystemId == null) - throw new XmlException ("public or system declaration required for \"NOTATION\" declaration."); - Expect ('>'); - return decl; - } - - private void TryExpandPERef () - { - if (PeekChar () == '%') { - ReadChar (); - if (!XmlConstructs.IsName (PeekChar ())) - return; - ExpandPERef (); - } - } - - // reader is positioned on the first letter of the name. - private void ExpandPERef () - { - ExpandPERef (true); - } - - private void ExpandPERef (bool attachSpace) - { - string peName = ReadName (); - Expect (";"); - ExpandNamedPERef (peName, attachSpace); - } - - private void ExpandNamedPERef (string peName, bool attachSpace) - { - DTDParameterEntityDeclaration decl = - parameterEntities [peName] as DTDParameterEntityDeclaration; - if (decl == null) - throw new XmlException ("undeclared parameter entity: '" + peName + "'"); - if (decl.SystemId != null) { - pushParserInput (decl.SystemId); - } - // add buffer - else - currentInput.InsertParameterEntityBuffer (attachSpace ? " " + Dereference (decl.Value) + " " : decl.Value); - SkipWhitespace (); // is it ok? -// while (PeekChar () == '%') -// TryExpandPERef (); // recursive - } - - private void ReadExternalID() { - switch(PeekChar ()) { - case 'S': - attributes ["PUBLIC"] = null; - attributes ["SYSTEM"] = ReadSystemLiteral (true); - break; - case 'P': - attributes ["PUBLIC"] = ReadPubidLiteral (); - SkipWhitespace (); - attributes ["SYSTEM"] = ReadSystemLiteral (false); - break; - } - } - // The reader is positioned on the first 'S' of "SYSTEM". private string ReadSystemLiteral (bool expectSYSTEM) { - if(expectSYSTEM) + if(expectSYSTEM) { Expect ("SYSTEM"); - SkipWhitespace (); + if (!SkipWhitespace ()) + throw new XmlException (this as IXmlLineInfo, + "Whitespace is required after 'SYSTEM'."); + } + else + SkipWhitespace (); int quoteChar = ReadChar (); // apos or quot - int startPos = currentTag.Length; + int startPos = currentTagLength; int c = 0; - while(c != quoteChar) { + ClearValueBuffer (); + while (c != quoteChar) { c = ReadChar (); - if(c < 0) throw ReaderError ("Unexpected end of stream in ExternalID."); + if (c < 0) + throw new XmlException (this as IXmlLineInfo,"Unexpected end of stream in ExternalID."); + if (c != quoteChar) + AppendValueChar (c); } - return currentTag.ToString (startPos, currentTag.Length - 1 - startPos); + return CreateValueString (); } private string ReadPubidLiteral() { Expect ("PUBLIC"); - SkipWhitespace (); + if (!SkipWhitespace ()) + throw new XmlException (this as IXmlLineInfo, + "Whitespace is required after 'PUBLIC'."); int quoteChar = ReadChar (); - int startPos = currentTag.Length; + int startPos = currentTagLength; int c = 0; + ClearValueBuffer (); while(c != quoteChar) { c = ReadChar (); - if(c < 0) throw ReaderError ("Unexpected end of stream in ExternalID."); - if(c != quoteChar && !XmlConstructs.IsPubid (c)) - throw ReaderError("character '" + (char)c + "' not allowed for PUBLIC ID"); - } - return currentTag.ToString (startPos, currentTag.Length - 1 - startPos); - } - - // The reader is positioned on the first character - // of the name. - internal string ReadName () - { - return ReadNameOrNmToken(false); - } - - // The reader is positioned on the first character - // of the name. - private string ReadNmToken () - { - return ReadNameOrNmToken(true); - } - - private string ReadNameOrNmToken(bool isNameToken) - { - int ch = PeekChar (); - if(isNameToken) { - if (!XmlConstructs.IsName ((char) ch)) - throw ReaderError (String.Format ("a name did not start with a legal character {0} ({1})", ch, (char)ch)); - } - else { - if (!XmlConstructs.IsNameStart ((char) PeekChar ())) - throw ReaderError (String.Format ("a name did not start with a legal character {0} ({1})", ch, (char)ch)); - } + if(c < 0) throw new XmlException (this as IXmlLineInfo,"Unexpected end of stream in ExternalID."); + if(c != quoteChar && !XmlChar.IsPubidChar (c)) + throw new XmlException (this as IXmlLineInfo,"character '" + (char) c + "' not allowed for PUBLIC ID"); + if (c != quoteChar) + AppendValueChar (c); + } + return CreateValueString (); + } + + // The reader is positioned on the first character + // of the name. + private string ReadName () + { + int ch = PeekChar (); + if (!XmlChar.IsFirstNameChar (ch)) + throw new XmlException (this as IXmlLineInfo,String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch)); nameLength = 0; AppendNameChar (ReadChar ()); - while (XmlConstructs.IsName (PeekChar ())) { + while (XmlChar.IsNameChar (PeekChar ())) { AppendNameChar (ReadChar ()); } @@ -2532,12 +2689,12 @@ namespace System.Xml int ch = ReadChar (); if (ch != expected) { - throw ReaderError ( - String.Format ( + throw new XmlException (this as IXmlLineInfo, + String.Format (CultureInfo.InvariantCulture, "expected '{0}' ({1:X}) but found '{2}' ({3:X})", - (char)expected, + (char) expected, expected, - (char)ch, + (char) ch, ch)); } } @@ -2549,127 +2706,155 @@ namespace System.Xml Expect (expected[i]); } + private void ExpectAfterWhitespace (char c) + { + while (true) { + int i = ReadChar (); + if (i < 0x21 && XmlChar.IsWhitespace (i)) + continue; + if (c != i) + throw new XmlException (this, String.Format (CultureInfo.InvariantCulture, "Expected {0}, but found {1} [{2}]", c, (char) i, i)); + break; + } + } + // Does not consume the first non-whitespace character. - private void SkipWhitespace () + private bool SkipWhitespace () { - //FIXME: Should not skip if whitespaceHandling == WhiteSpaceHandling.None - while (XmlConstructs.IsSpace (PeekChar ())) + bool skipped = XmlChar.IsWhitespace (PeekChar ()); + if (!skipped) + return false; + while (XmlChar.IsWhitespace (PeekChar ())) ReadChar (); + return skipped; } - private bool ReadWhitespace () + private void ReadWhitespace () { - valueLength = 0; + if (currentState == XmlNodeType.None) + currentState = XmlNodeType.XmlDeclaration; + + ClearValueBuffer (); int ch = PeekChar (); do { AppendValueChar (ReadChar ()); - } while ((ch = PeekChar ()) != -1 && XmlConstructs.IsSpace (ch)); + } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch)); - if (ch != -1 && ch != '<') + if (currentState == XmlNodeType.Element && ch != -1 && ch != '<') ReadText (false); - else - SetProperties (XmlNodeType.Whitespace, + else { + XmlNodeType nodeType = (this.XmlSpace == XmlSpace.Preserve) ? + XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace; + SetProperties (nodeType, String.Empty, false, - CreateValueString (), + null, // value: create only when required true); + } - return (PeekChar () != -1); + return; } - // read entity reference from attribute string and if parsable then return the value. - private string ReadAttributeValueReference () + // Since ReadBase64() is processed for every 4 chars, it does + // not handle '=' here. + private byte GetBase64Byte (char ch) { - int endEntityPosition = attributeString.IndexOf(';', - attributeValuePos); - string entityName = attributeString.Substring (attributeValuePos + 1, - endEntityPosition - attributeValuePos - 1); - - attributeValuePos = endEntityPosition + 1; - - if(entityName [0] == '#') { - char c; - // character entity - if(entityName [1] == 'x') { - // hexadecimal - c = (char) int.Parse ("0" + entityName.Substring (2), - System.Globalization.NumberStyles.HexNumber); - } else { - // decimal - c = (char) int.Parse (entityName.Substring (1)); - } - return c.ToString(); - } - else { - switch(entityName) - { - case "lt": return "<"; - case "gt": return ">"; - case "amp": return "&"; - case "quot": return "\""; - case "apos": return "'"; - default: return null; - } + switch (ch) { + case '+': + return 62; + case '/': + return 63; + default: + if (ch >= 'A' && ch <= 'Z') + return (byte) (ch - 'A'); + else if (ch >= 'a' && ch <= 'z') + return (byte) (ch - 'a' + 26); + else if (ch >= '0' && ch <= '9') + return (byte) (ch - '0' + 52); + else + throw new XmlException ("Invalid Base64 character was found."); } } - private string UnescapeAttributeValue (string unresolved) + // Returns -1 if it should throw an error. + private int ReadCharsInternal (char [] buffer, int offset, int length) { - if(unresolved == null) return null; + if (IsEmptyElement) { + Read (); + return 0; + } - // trim start/end edge of quotation character. - return Dereference (unresolved.Substring (1, unresolved.Length - 2)); - } + if (offset < 0) + throw new ArgumentOutOfRangeException ("offset", offset, "Offset must be non-negative integer."); + else if (length < 0) + throw new ArgumentOutOfRangeException ("length", length, "Length must be non-negative integer."); + else if (buffer.Length < offset + length) + throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length."); - private string Dereference (string unresolved) - { - StringBuilder resolved = new StringBuilder(); - int pos = 0; - int next = unresolved.IndexOf ('&'); - if(next < 0) - return unresolved; + if (NodeType != XmlNodeType.Element) + return 0; - while(next >= 0) { - if(pos < next) - resolved.Append (unresolved.Substring (pos, next - pos));// - 1); - int endPos = unresolved.IndexOf (';', next+1); - string entityName = - unresolved.Substring (next + 1, endPos - next - 1); - if(entityName [0] == '#') { - char c; - // character entity - if(entityName [1] == 'x') { - // hexadecimal - c = (char) int.Parse ("0" + entityName.Substring (2), - System.Globalization.NumberStyles.HexNumber); - } else { - // decimal - c = (char) int.Parse (entityName.Substring (1)); + shouldSkipUntilEndTag = true; + + int bufIndex = offset; + for (int i = 0; i < length; i++) { + int c = PeekChar (); + switch (c) { + case -1: + throw new XmlException (this as IXmlLineInfo, "Unexpected end of xml."); + case '<': + ReadChar (); + if (PeekChar () != '/') { + buffer [bufIndex++] = '<'; + continue; } - resolved.Append (c); - } else { - switch(entityName) { - case "lt": resolved.Append ("<"); break; - case "gt": resolved.Append (">"); break; - case "amp": resolved.Append ("&"); break; - case "quot": resolved.Append ("\""); break; - case "apos": resolved.Append ("'"); break; - // With respect to "Value", MS document is helpless - // and the implemention returns inconsistent value - // (e.g. XML: "&ent; &ent;" ---> Value: "&ent; &ent;".) - default: resolved.Append ("&" + entityName + ";"); break; + // Seems to skip immediate EndElement + Expect ('/'); + if (depthUp) { + depth++; + depthUp = false; + } + ReadEndTag (); + shouldSkipUntilEndTag = false; + Read (); // move to the next node + return i; + default: + ReadChar (); + if (c < Char.MaxValue) + buffer [bufIndex++] = (char) c; + else { + buffer [bufIndex++] = (char) (c / 0x10000 + 0xD800 - 1); + buffer [bufIndex++] = (char) (c % 0x10000 + 0xDC00); } - } - pos = endPos + 1; - if(pos > unresolved.Length) break; - next = unresolved.IndexOf('&', pos); + } } - resolved.Append (unresolved.Substring(pos)); - - return resolved.ToString(); + return length; } + private bool ReadUntilEndTag () + { + int ch; + do { + ch = ReadChar (); + switch (ch) { + case -1: + throw new XmlException (this as IXmlLineInfo, + "Unexpected end of xml."); + case '<': + if (PeekChar () != '/') + continue; + ReadChar (); + string name = ReadName (); + if (name != elementNames [elementNameStackPos - 1]) + continue; + Expect ('>'); + depth--; + elementNames [--elementNameStackPos] = null; + return Read (); + } + } while (true); + } #endregion } }