2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
7 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
9 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
10 // Copyright (C) 2005-2006 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //#define USE_NAME_BUFFER
34 // Optimization TODOs:
36 // - support PushbackChar() which reverts one character read.
37 // - ReadTextReader() should always keep one pushback buffer
38 // as pushback safety net.
39 // - Replace (peek,read) * n -> read * n + pushback
43 using System.Collections;
45 using System.Collections.Generic;
47 using System.Globalization;
49 using System.Security.Permissions;
51 using System.Xml.Schema;
64 internal class XmlTextReader : XmlReader,
65 IXmlLineInfo, IXmlNamespaceResolver, IHasXmlParserContext
67 [PermissionSet (SecurityAction.InheritanceDemand, Unrestricted = true)]
68 public class XmlTextReader : XmlReader, IXmlLineInfo, IHasXmlParserContext
73 protected XmlTextReader ()
77 public XmlTextReader (Stream input)
78 : this (new XmlStreamReader (input))
82 public XmlTextReader (string url)
83 : this(url, new NameTable ())
87 public XmlTextReader (TextReader input)
88 : this (input, new NameTable ())
92 protected XmlTextReader (XmlNameTable nt)
93 : this (String.Empty, null, XmlNodeType.None, null)
97 public XmlTextReader (Stream input, XmlNameTable nt)
98 : this(new XmlStreamReader (input), nt)
102 public XmlTextReader (string url, Stream input)
103 : this (url, new XmlStreamReader (input))
107 public XmlTextReader (string url, TextReader input)
108 : this (url, input, new NameTable ())
112 public XmlTextReader (string url, XmlNameTable nt)
115 Stream stream = GetStreamFromUrl (url, out uriString);
116 XmlParserContext ctx = new XmlParserContext (nt,
117 new XmlNamespaceManager (nt),
120 this.InitializeContext (uriString, ctx, new XmlStreamReader (stream), XmlNodeType.Document);
123 public XmlTextReader (TextReader input, XmlNameTable nt)
124 : this (String.Empty, input, nt)
128 // This is used in XmlReader.Create() to indicate that string
129 // argument is uri, not an xml fragment.
130 internal XmlTextReader (bool dummy, XmlResolver resolver, string url, XmlNodeType fragType, XmlParserContext context)
132 if (resolver == null) {
134 resolver = new XmlXapResolver ();
136 resolver = new XmlUrlResolver ();
139 this.XmlResolver = resolver;
141 Stream stream = GetStreamFromUrl (url, out uriString);
142 this.InitializeContext (uriString, context, new XmlStreamReader (stream), fragType);
145 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
146 : this (context != null ? context.BaseURI : String.Empty,
147 new XmlStreamReader (xmlFragment),
151 disallowReset = true;
154 internal XmlTextReader (string baseURI, TextReader xmlFragment, XmlNodeType fragType)
155 : this (baseURI, xmlFragment, fragType, null)
159 public XmlTextReader (string url, Stream input, XmlNameTable nt)
160 : this (url, new XmlStreamReader (input), nt)
164 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
165 : this (url, input, XmlNodeType.Document, null)
169 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
170 : this (context != null ? context.BaseURI : String.Empty,
171 new StringReader (xmlFragment),
175 disallowReset = true;
178 internal XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
180 InitializeContext (url, context, fragment, fragType);
183 private Stream GetStreamFromUrl (string url, out string absoluteUriString)
185 Uri uri = resolver.ResolveUri (null, url);
186 absoluteUriString = uri != null ? uri.ToString () : String.Empty;
187 return resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
194 public override int AttributeCount
196 get { return attributeCount; }
199 public override string BaseURI
201 get { return parserContext.BaseURI; }
205 public override bool CanReadBinaryContent {
209 public override bool CanReadValueChunk {
213 internal override bool CanReadBinaryContent {
217 internal override bool CanReadValueChunk {
222 internal bool CharacterChecking {
223 get { return checkCharacters; }
224 set { checkCharacters = value; }
227 // for XmlReaderSettings.CloseInput support
228 internal bool CloseInput {
229 get { return closeInput; }
230 set { closeInput = value; }
233 public override int Depth
236 int nodeTypeMod = currentToken.NodeType == XmlNodeType.Element ? 0 : -1;
237 if (currentAttributeValue >= 0)
238 return nodeTypeMod + elementDepth + 2; // inside attribute value.
239 else if (currentAttribute >= 0)
240 return nodeTypeMod + elementDepth + 1;
245 public Encoding Encoding
247 get { return parserContext.Encoding; }
250 public EntityHandling EntityHandling {
251 get { return entityHandling; }
252 set { entityHandling = value; }
256 public override bool EOF {
257 get { return readState == ReadState.EndOfFile; }
260 public override bool HasValue {
261 get { return cursorToken.Value != null; }
264 public override bool IsDefault {
265 // XmlTextReader does not expand default attributes.
266 get { return false; }
269 public override bool IsEmptyElement {
270 get { return cursorToken.IsEmptyElement; }
275 public override string this [int i] {
276 get { return GetAttribute (i); }
279 public override string this [string name] {
280 get { return GetAttribute (name); }
283 public override string this [string localName, string namespaceName] {
284 get { return GetAttribute (localName, namespaceName); }
288 public int LineNumber {
290 if (useProceedingLineInfo)
293 return cursorToken.LineNumber;
297 public int LinePosition {
299 if (useProceedingLineInfo)
302 return cursorToken.LinePosition;
306 public override string LocalName {
307 get { return cursorToken.LocalName; }
310 public override string Name {
311 get { return cursorToken.Name; }
314 public bool Namespaces {
315 get { return namespaces; }
317 if (readState != ReadState.Initial)
318 throw new InvalidOperationException ("Namespaces have to be set before reading.");
323 public override string NamespaceURI {
324 get { return cursorToken.NamespaceURI; }
327 public override XmlNameTable NameTable {
328 get { return nameTable; }
331 public override XmlNodeType NodeType {
332 get { return cursorToken.NodeType; }
335 public bool Normalization {
336 get { return normalization; }
337 set { normalization = value; }
340 public override string Prefix {
341 get { return cursorToken.Prefix; }
344 public bool ProhibitDtd {
345 get { return prohibitDtd; }
346 set { prohibitDtd = value; }
349 public override char QuoteChar {
350 get { return cursorToken.QuoteChar; }
353 public override ReadState ReadState {
354 get { return readState; }
358 public override XmlReaderSettings Settings {
359 get { return base.Settings; }
363 public override string Value {
364 get { return cursorToken.Value != null ? cursorToken.Value : String.Empty; }
367 public WhitespaceHandling WhitespaceHandling {
368 get { return whitespaceHandling; }
369 set { whitespaceHandling = value; }
372 public override string XmlLang {
373 get { return parserContext.XmlLang; }
376 public XmlResolver XmlResolver {
377 set { resolver = value; }
380 public override XmlSpace XmlSpace {
381 get { return parserContext.XmlSpace; }
388 public override void Close ()
390 readState = ReadState.Closed;
392 cursorToken.Clear ();
393 currentToken.Clear ();
395 if (closeInput && reader != null)
399 public override string GetAttribute (int i)
401 if (i >= attributeCount)
402 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
404 return attributeTokens [i].Value;
408 // MS.NET 1.0 msdn says that this method returns String.Empty
409 // for absent attribute, but in fact it returns null.
410 // This description is corrected in MS.NET 1.1 msdn.
411 public override string GetAttribute (string name)
413 for (int i = 0; i < attributeCount; i++)
414 if (attributeTokens [i].Name == name)
415 return attributeTokens [i].Value;
419 private int GetIndexOfQualifiedAttribute (string localName, string namespaceURI)
421 for (int i = 0; i < attributeCount; i++) {
422 XmlAttributeTokenInfo ti = attributeTokens [i];
423 if (ti.LocalName == localName && ti.NamespaceURI == namespaceURI)
429 XmlParserContext IHasXmlParserContext.ParserContext {
430 get { return parserContext; }
433 public override string GetAttribute (string localName, string namespaceURI)
435 int idx = this.GetIndexOfQualifiedAttribute (localName, namespaceURI);
438 return attributeTokens [idx].Value;
442 public IDictionary<string, string> GetNamespacesInScope (XmlNamespaceScope scope)
444 return nsmgr.GetNamespacesInScope (scope);
447 IDictionary<string, string> IXmlNamespaceResolver.GetNamespacesInScope (XmlNamespaceScope scope)
449 return GetNamespacesInScope (scope);
453 public TextReader GetRemainder ()
455 if (peekCharsLength < 0)
457 return new StringReader (new string (peekChars, peekCharsIndex, peekCharsLength - peekCharsIndex) + reader.ReadToEnd ());
461 public bool HasLineInfo ()
463 bool IXmlLineInfo.HasLineInfo ()
469 public override string LookupNamespace (string prefix)
471 return LookupNamespace (prefix, false);
474 private string LookupNamespace (string prefix, bool atomizedNames)
476 string s = nsmgr.LookupNamespace (
477 prefix, atomizedNames);
478 return s == String.Empty ? null : s;
482 string IXmlNamespaceResolver.LookupPrefix (string ns)
484 return LookupPrefix (ns, false);
487 public string LookupPrefix (string ns, bool atomizedName)
489 return nsmgr.LookupPrefix (ns, atomizedName);
493 public override void MoveToAttribute (int i)
495 if (i >= attributeCount)
496 throw new ArgumentOutOfRangeException ("attribute index out of range.");
498 currentAttribute = i;
499 currentAttributeValue = -1;
500 cursorToken = attributeTokens [i];
503 public override bool MoveToAttribute (string name)
505 for (int i = 0; i < attributeCount; i++) {
506 XmlAttributeTokenInfo ti = attributeTokens [i];
507 if (ti.Name == name) {
515 public override bool MoveToAttribute (string localName, string namespaceName)
517 int idx = GetIndexOfQualifiedAttribute (localName, namespaceName);
520 MoveToAttribute (idx);
524 public override bool MoveToElement ()
526 if (currentToken == null) // for attribute .ctor()
529 if (cursorToken == currentToken)
532 if (currentAttribute >= 0) {
533 currentAttribute = -1;
534 currentAttributeValue = -1;
535 cursorToken = currentToken;
542 public override bool MoveToFirstAttribute ()
544 if (attributeCount == 0)
547 return MoveToNextAttribute ();
550 public override bool MoveToNextAttribute ()
552 if (currentAttribute == 0 && attributeCount == 0)
554 if (currentAttribute + 1 < attributeCount) {
556 currentAttributeValue = -1;
557 cursorToken = attributeTokens [currentAttribute];
564 public override bool Read ()
566 if (readState == ReadState.Closed)
568 curNodePeekIndex = peekCharsIndex;
569 preserveCurrentTag = true;
573 if (startNodeType == XmlNodeType.Attribute) {
574 if (currentAttribute == 0)
575 return false; // already read.
576 SkipTextDeclaration ();
578 IncrementAttributeToken ();
579 ReadAttributeValueTokens ('"');
580 cursorToken = attributeTokens [0];
581 currentAttributeValue = -1;
582 readState = ReadState.Interactive;
585 if (readState == ReadState.Initial && currentState == XmlNodeType.Element)
586 SkipTextDeclaration ();
592 readState = ReadState.Interactive;
593 currentLinkedNodeLineNumber = line;
594 currentLinkedNodeLinePosition = column;
595 useProceedingLineInfo = true;
597 cursorToken = currentToken;
599 currentAttribute = currentAttributeValue = -1;
600 currentToken.Clear ();
602 // It was moved from end of ReadStartTag ().
608 if (readCharsInProgress) {
609 readCharsInProgress = false;
610 return ReadUntilEndTag ();
613 more = ReadContent ();
615 if (!more && startNodeType == XmlNodeType.Document && currentState != XmlNodeType.EndElement)
616 throw NotWFError ("Document element did not appear.");
618 useProceedingLineInfo = false;
622 public override bool ReadAttributeValue ()
624 if (readState == ReadState.Initial && startNodeType == XmlNodeType.Attribute) {
628 if (currentAttribute < 0)
630 XmlAttributeTokenInfo ti = attributeTokens [currentAttribute];
631 if (currentAttributeValue < 0)
632 currentAttributeValue = ti.ValueTokenStartIndex - 1;
634 if (currentAttributeValue < ti.ValueTokenEndIndex) {
635 currentAttributeValue++;
636 cursorToken = attributeValueTokens [currentAttributeValue];
643 public int ReadBase64 (byte [] buffer, int offset, int length)
645 BinaryCharGetter = binaryCharGetter;
647 return Binary.ReadBase64 (buffer, offset, length);
649 BinaryCharGetter = null;
653 public int ReadBinHex (byte [] buffer, int offset, int length)
655 BinaryCharGetter = binaryCharGetter;
657 return Binary.ReadBinHex (buffer, offset, length);
659 BinaryCharGetter = null;
663 public int ReadChars (char [] buffer, int offset, int length)
666 throw new ArgumentOutOfRangeException (
670 "Offset must be non-negative integer.");
672 } else if (length < 0) {
673 throw new ArgumentOutOfRangeException (
677 "Length must be non-negative integer.");
679 } else if (buffer.Length < offset + length)
680 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
682 if (IsEmptyElement) {
687 if (!readCharsInProgress && NodeType != XmlNodeType.Element)
690 preserveCurrentTag = false;
691 readCharsInProgress = true;
692 useProceedingLineInfo = true;
694 return ReadCharsInternal (buffer, offset, length);
697 public void ResetState ()
700 throw new InvalidOperationException ("Cannot call ResetState when parsing an XML fragment.");
704 public override void ResolveEntity ()
706 // XmlTextReader does not resolve entities.
707 throw new InvalidOperationException ("XmlTextReader cannot resolve external entities.");
711 [MonoTODO] // FIXME: Implement, for performance improvement
712 public override void Skip ()
720 // Parsed DTD Objects
721 // Note that thgis property must be kept since dtd2xsd uses it.
722 internal DTDObjectModel DTD {
723 get { return parserContext.Dtd; }
726 internal XmlResolver Resolver {
727 get { return resolver; }
732 internal class XmlTokenInfo
734 public XmlTokenInfo (XmlTextReader xtr)
742 protected XmlTextReader Reader;
745 public string LocalName;
746 public string Prefix;
747 public string NamespaceURI;
748 public bool IsEmptyElement;
749 public char QuoteChar;
750 public int LineNumber;
751 public int LinePosition;
752 public int ValueBufferStart;
753 public int ValueBufferEnd;
755 public XmlNodeType NodeType;
757 public virtual string Value {
759 if (valueCache != null)
761 if (ValueBufferStart >= 0) {
762 //Console.WriteLine (NodeType + " / " + ValueBuffer.Length + " / " + ValueBufferStart + " / " + ValueBufferEnd);
763 valueCache = Reader.valueBuffer.ToString (ValueBufferStart, ValueBufferEnd - ValueBufferStart);
767 case XmlNodeType.Text:
768 case XmlNodeType.SignificantWhitespace:
769 case XmlNodeType.Whitespace:
770 case XmlNodeType.Comment:
771 case XmlNodeType.CDATA:
772 case XmlNodeType.ProcessingInstruction:
773 valueCache = Reader.CreateValueString ();
778 set { valueCache = value; }
781 public virtual void Clear ()
783 ValueBufferStart = -1;
785 NodeType = XmlNodeType.None;
786 Name = LocalName = Prefix = NamespaceURI = String.Empty;
787 IsEmptyElement = false;
789 LineNumber = LinePosition = 0;
793 internal class XmlAttributeTokenInfo : XmlTokenInfo
795 public XmlAttributeTokenInfo (XmlTextReader reader)
798 NodeType = XmlNodeType.Attribute;
801 public int ValueTokenStartIndex;
802 public int ValueTokenEndIndex;
804 StringBuilder tmpBuilder = new StringBuilder ();
806 public override string Value {
808 if (valueCache != null)
811 // An empty value should return String.Empty.
812 if (ValueTokenStartIndex == ValueTokenEndIndex) {
813 XmlTokenInfo ti = Reader.attributeValueTokens [ValueTokenStartIndex];
814 if (ti.NodeType == XmlNodeType.EntityReference)
815 valueCache = String.Concat ("&", ti.Name, ";");
817 valueCache = ti.Value;
821 tmpBuilder.Length = 0;
822 for (int i = ValueTokenStartIndex; i <= ValueTokenEndIndex; i++) {
823 XmlTokenInfo ti = Reader.attributeValueTokens [i];
824 if (ti.NodeType == XmlNodeType.Text)
825 tmpBuilder.Append (ti.Value);
827 tmpBuilder.Append ('&');
828 tmpBuilder.Append (ti.Name);
829 tmpBuilder.Append (';');
833 valueCache = tmpBuilder.ToString (0, tmpBuilder.Length);
837 set { valueCache = value; }
840 public override void Clear ()
844 NodeType = XmlNodeType.Attribute;
845 ValueTokenStartIndex = ValueTokenEndIndex = 0;
848 internal void FillXmlns ()
850 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns))
851 Reader.nsmgr.AddNamespace (LocalName, Value);
852 else if (Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
853 Reader.nsmgr.AddNamespace (String.Empty, Value);
856 internal void FillNamespace ()
858 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns) ||
859 Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
860 NamespaceURI = XmlNamespaceManager.XmlnsXmlns;
861 else if (Prefix.Length == 0)
862 NamespaceURI = string.Empty;
864 NamespaceURI = Reader.LookupNamespace (Prefix, true);
868 private XmlTokenInfo cursorToken;
869 private XmlTokenInfo currentToken;
870 private XmlAttributeTokenInfo currentAttributeToken;
871 private XmlTokenInfo currentAttributeValueToken;
872 private XmlAttributeTokenInfo [] attributeTokens = new XmlAttributeTokenInfo [10];
873 private XmlTokenInfo [] attributeValueTokens = new XmlTokenInfo [10];
874 private int currentAttribute;
875 private int currentAttributeValue;
876 private int attributeCount;
878 private XmlParserContext parserContext;
879 private XmlNameTable nameTable;
880 private XmlNamespaceManager nsmgr;
882 private ReadState readState;
883 private bool disallowReset;
886 private int elementDepth;
887 private bool depthUp;
889 private bool popScope;
893 public TagName (string n, string l, string p)
900 public readonly string Name;
901 public readonly string LocalName;
902 public readonly string Prefix;
905 private TagName [] elementNames;
906 int elementNameStackPos;
908 private bool allowMultipleRoot;
910 private bool isStandalone;
912 private bool returnEntityReference;
913 private string entityReferenceName;
916 private char [] nameBuffer;
917 private int nameLength;
918 private int nameCapacity;
919 private const int initialNameCapacity = 32;
922 private StringBuilder valueBuffer;
924 private TextReader reader;
925 private char [] peekChars;
926 private int peekCharsIndex;
927 private int peekCharsLength;
928 private int curNodePeekIndex;
929 private bool preserveCurrentTag;
930 private const int peekCharCapacity = 1024;
935 private int currentLinkedNodeLineNumber;
936 private int currentLinkedNodeLinePosition;
937 private bool useProceedingLineInfo;
939 private XmlNodeType startNodeType;
940 // State machine attribute.
941 // XmlDeclaration: after the first node.
942 // DocumentType: after doctypedecl
943 // Element: inside document element
944 // EndElement: after document element
945 private XmlNodeType currentState;
947 // For ReadChars()/ReadBase64()/ReadBinHex()
948 private int nestLevel;
949 private bool readCharsInProgress;
950 XmlReaderBinarySupport.CharGetter binaryCharGetter;
952 // These values are never re-initialized.
953 private bool namespaces = true;
954 private WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
956 private XmlResolver resolver = new XmlXapResolver ();
958 private XmlResolver resolver = new XmlUrlResolver ();
960 private bool normalization = false;
962 private bool checkCharacters;
963 private bool prohibitDtd = false;
964 private bool closeInput = true;
965 private EntityHandling entityHandling; // 2.0
967 private NameTable whitespacePool;
968 private char [] whitespaceCache;
970 private XmlException NotWFError (string message)
972 return new XmlException (this as IXmlLineInfo, BaseURI, message);
977 allowMultipleRoot = false;
978 elementNames = new TagName [10];
979 valueBuffer = new StringBuilder ();
980 binaryCharGetter = new XmlReaderBinarySupport.CharGetter (ReadChars);
982 nameBuffer = new char [initialNameCapacity];
985 checkCharacters = true;
987 if (Settings != null)
988 checkCharacters = Settings.CheckCharacters;
992 entityHandling = EntityHandling.ExpandCharEntities;
995 if (peekChars == null)
996 peekChars = new char [peekCharCapacity];
997 peekCharsLength = -1;
998 curNodePeekIndex = -1; // read from start
1003 currentLinkedNodeLineNumber = currentLinkedNodeLinePosition = 0;
1008 private void Clear ()
1010 currentToken = new XmlTokenInfo (this);
1011 cursorToken = currentToken;
1012 currentAttribute = -1;
1013 currentAttributeValue = -1;
1016 readState = ReadState.Initial;
1022 popScope = allowMultipleRoot = false;
1023 elementNameStackPos = 0;
1025 isStandalone = false;
1026 returnEntityReference = false;
1027 entityReferenceName = String.Empty;
1031 nameCapacity = initialNameCapacity;
1033 useProceedingLineInfo = false;
1035 currentState = XmlNodeType.None;
1037 readCharsInProgress = false;
1040 private void InitializeContext (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType)
1042 startNodeType = fragType;
1043 parserContext = context;
1044 if (context == null) {
1045 XmlNameTable nt = new NameTable ();
1046 parserContext = new XmlParserContext (nt,
1047 new XmlNamespaceManager (nt),
1051 nameTable = parserContext.NameTable;
1052 nameTable = nameTable != null ? nameTable : new NameTable ();
1053 nsmgr = parserContext.NamespaceManager;
1054 nsmgr = nsmgr != null ? nsmgr : new XmlNamespaceManager (nameTable);
1056 if (url != null && url.Length > 0) {
1060 uri = new Uri (url, UriKind.RelativeOrAbsolute);
1062 uri = new Uri (url);
1064 } catch (Exception) {
1065 string path = Path.GetFullPath ("./a");
1066 uri = new Uri (new Uri (path), url);
1068 parserContext.BaseURI = uri.ToString ();
1076 case XmlNodeType.Attribute:
1077 reader = new StringReader (fragment.ReadToEnd ().Replace ("\"", """));
1079 case XmlNodeType.Element:
1080 currentState = XmlNodeType.Element;
1081 allowMultipleRoot = true;
1083 case XmlNodeType.Document:
1086 throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType));
1091 internal ConformanceLevel Conformance {
1092 get { return allowMultipleRoot ? ConformanceLevel.Fragment : ConformanceLevel.Document; }
1094 if (value == ConformanceLevel.Fragment) {
1095 currentState = XmlNodeType.Element;
1096 allowMultipleRoot = true;
1101 internal void AdjustLineInfoOffset (int lineNumberOffset, int linePositionOffset)
1103 line += lineNumberOffset;
1104 column += linePositionOffset;
1107 internal void SetNameTable (XmlNameTable nameTable)
1109 parserContext.NameTable = nameTable;
1113 // Use this method rather than setting the properties
1114 // directly so that all the necessary properties can
1115 // be changed in harmony with each other. Maybe the
1116 // fields should be in a seperate class to help enforce
1119 // Namespace URI could not be provided here.
1120 private void SetProperties (
1121 XmlNodeType nodeType,
1125 bool isEmptyElement,
1127 bool clearAttributes)
1129 SetTokenProperties (currentToken, nodeType, name, prefix, localName, isEmptyElement, value, clearAttributes);
1130 currentToken.LineNumber = this.currentLinkedNodeLineNumber;
1131 currentToken.LinePosition = this.currentLinkedNodeLinePosition;
1134 private void SetTokenProperties (
1136 XmlNodeType nodeType,
1140 bool isEmptyElement,
1142 bool clearAttributes)
1144 token.NodeType = nodeType;
1146 token.Prefix = prefix;
1147 token.LocalName = localName;
1148 token.IsEmptyElement = isEmptyElement;
1149 token.Value = value;
1150 this.elementDepth = depth;
1152 if (clearAttributes)
1156 private void ClearAttributes ()
1158 //for (int i = 0; i < attributeCount; i++)
1159 // attributeTokens [i].Clear ();
1161 currentAttribute = -1;
1162 currentAttributeValue = -1;
1165 private int PeekSurrogate (int c)
1167 if (peekCharsLength <= peekCharsIndex + 1) {
1168 if (!ReadTextReader (c))
1169 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1173 int highhalfChar = peekChars [peekCharsIndex];
1174 int lowhalfChar = peekChars [peekCharsIndex+1];
1176 if (((highhalfChar & 0xFC00) != 0xD800) || ((lowhalfChar & 0xFC00) != 0xDC00))
1177 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1178 return highhalfChar;
1179 return 0x10000 + (highhalfChar-0xD800)*0x400 + (lowhalfChar-0xDC00);
1182 private int PeekChar ()
1184 if (peekCharsIndex < peekCharsLength) {
1185 int c = peekChars [peekCharsIndex];
1188 if (c < 0xD800 || c >= 0xDFFF)
1190 return PeekSurrogate (c);
1192 if (!ReadTextReader (-1))
1198 private int ReadChar ()
1200 int ch = PeekChar ();
1204 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1209 } else if (ch != -1) {
1215 private void Advance (int ch) {
1219 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1224 } else if (ch != -1) {
1229 private bool ReadTextReader (int remained)
1231 if (peekCharsLength < 0) { // initialized buffer
1232 peekCharsLength = reader.Read (peekChars, 0, peekChars.Length);
1233 return peekCharsLength > 0;
1235 int offset = remained >= 0 ? 1 : 0;
1236 int copysize = peekCharsLength - curNodePeekIndex;
1238 // It must assure that current tag content always exists
1240 if (!preserveCurrentTag) {
1241 curNodePeekIndex = 0;
1244 } else if (peekCharsLength < peekChars.Length) {
1245 // NonBlockingStreamReader returned less bytes
1246 // than the size of the buffer. In that case,
1247 // just refill the buffer.
1248 } else if (curNodePeekIndex <= (peekCharsLength >> 1)) {
1249 // extend the buffer
1250 char [] tmp = new char [peekChars.Length * 2];
1251 Array.Copy (peekChars, curNodePeekIndex,
1254 curNodePeekIndex = 0;
1255 peekCharsIndex = copysize;
1257 Array.Copy (peekChars, curNodePeekIndex,
1258 peekChars, 0, copysize);
1259 curNodePeekIndex = 0;
1260 peekCharsIndex = copysize;
1263 peekChars [peekCharsIndex] = (char) remained;
1264 int count = peekChars.Length - peekCharsIndex - offset;
1265 if (count > peekCharCapacity)
1266 count = peekCharCapacity;
1267 int read = reader.Read (
1268 peekChars, peekCharsIndex + offset, count);
1269 int remainingSize = offset + read;
1270 peekCharsLength = peekCharsIndex + remainingSize;
1272 return (remainingSize != 0);
1275 private bool ReadContent ()
1279 parserContext.PopScope ();
1283 if (returnEntityReference)
1284 SetEntityReferenceProperties ();
1286 int c = PeekChar ();
1288 readState = ReadState.EndOfFile;
1289 ClearValueBuffer ();
1291 XmlNodeType.None, // nodeType
1292 String.Empty, // name
1293 String.Empty, // prefix
1294 String.Empty, // localName
1295 false, // isEmptyElement
1297 true // clearAttributes
1300 throw NotWFError ("unexpected end of file. Current depth is " + depth);
1307 switch (PeekChar ())
1315 ReadProcessingInstruction ();
1330 if (!ReadWhitespace ())
1332 return ReadContent ();
1340 return this.ReadState != ReadState.EndOfFile;
1343 private void SetEntityReferenceProperties ()
1345 DTDEntityDeclaration decl = DTD != null ? DTD.EntityDecls [entityReferenceName] : null;
1346 if (this.isStandalone)
1347 if (DTD == null || decl == null || !decl.IsInternalSubset)
1348 throw NotWFError ("Standalone document must not contain any references to an non-internally declared entity.");
1349 if (decl != null && decl.NotationName != null)
1350 throw NotWFError ("Reference to any unparsed entities is not allowed here.");
1352 ClearValueBuffer ();
1354 XmlNodeType.EntityReference, // nodeType
1355 entityReferenceName, // name
1356 String.Empty, // prefix
1357 entityReferenceName, // localName
1358 false, // isEmptyElement
1360 true // clearAttributes
1363 returnEntityReference = false;
1364 entityReferenceName = String.Empty;
1367 // The leading '<' has already been consumed.
1368 private void ReadStartTag ()
1370 if (currentState == XmlNodeType.EndElement)
1371 throw NotWFError ("Multiple document element was detected.");
1372 currentState = XmlNodeType.Element;
1376 currentLinkedNodeLineNumber = line;
1377 currentLinkedNodeLinePosition = column;
1379 string prefix, localName;
1380 string name = ReadName (out prefix, out localName);
1381 if (currentState == XmlNodeType.EndElement)
1382 throw NotWFError ("document has terminated, cannot open new element");
1384 bool isEmptyElement = false;
1389 if (XmlChar.IsFirstNameChar (PeekChar ()))
1390 ReadAttributes (false);
1391 cursorToken = this.currentToken;
1394 for (int i = 0; i < attributeCount; i++)
1395 attributeTokens [i].FillXmlns ();
1396 for (int i = 0; i < attributeCount; i++)
1397 attributeTokens [i].FillNamespace ();
1401 for (int i = 0; i < attributeCount; i++)
1402 if (attributeTokens [i].Prefix == "xmlns" &&
1403 attributeTokens [i].Value == String.Empty)
1404 throw NotWFError ("Empty namespace URI cannot be mapped to non-empty prefix.");
1406 for (int i = 0; i < attributeCount; i++) {
1407 for (int j = i + 1; j < attributeCount; j++)
1408 if (Object.ReferenceEquals (attributeTokens [i].Name, attributeTokens [j].Name) ||
1409 (Object.ReferenceEquals (attributeTokens [i].LocalName, attributeTokens [j].LocalName) &&
1410 Object.ReferenceEquals (attributeTokens [i].NamespaceURI, attributeTokens [j].NamespaceURI)))
1411 throw NotWFError ("Attribute name and qualified name must be identical.");
1414 if (PeekChar () == '/') {
1416 isEmptyElement = true;
1421 PushElementName (name, localName, prefix);
1423 parserContext.PushScope ();
1428 XmlNodeType.Element, // nodeType
1432 isEmptyElement, // isEmptyElement
1434 false // clearAttributes
1436 if (prefix.Length > 0)
1437 currentToken.NamespaceURI = LookupNamespace (prefix, true);
1438 else if (namespaces)
1439 currentToken.NamespaceURI = nsmgr.DefaultNamespace;
1442 if (NamespaceURI == null)
1443 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1445 for (int i = 0; i < attributeCount; i++) {
1446 MoveToAttribute (i);
1447 if (NamespaceURI == null)
1448 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1455 for (int i = 0; i < attributeCount; i++) {
1456 if (!Object.ReferenceEquals (attributeTokens [i].Prefix, XmlNamespaceManager.PrefixXml))
1458 string aname = attributeTokens [i].LocalName;
1459 string value = attributeTokens [i].Value;
1462 if (this.resolver != null) {
1464 BaseURI != String.Empty ?
1465 new Uri (BaseURI) : null;
1466 Uri uri = resolver.ResolveUri (
1468 parserContext.BaseURI =
1474 parserContext.BaseURI = value;
1477 parserContext.XmlLang = value;
1482 parserContext.XmlSpace = XmlSpace.Preserve;
1485 parserContext.XmlSpace = XmlSpace.Default;
1488 throw NotWFError (String.Format ("Invalid xml:space value: {0}", value));
1495 CheckCurrentStateUpdate ();
1498 private void PushElementName (string name, string local, string prefix)
1500 if (elementNames.Length == elementNameStackPos) {
1501 TagName [] newArray = new TagName [elementNames.Length * 2];
1502 Array.Copy (elementNames, 0, newArray, 0, elementNameStackPos);
1503 elementNames = newArray;
1505 elementNames [elementNameStackPos++] =
1506 new TagName (name, local, prefix);
1509 // The reader is positioned on the first character
1510 // of the element's name.
1511 private void ReadEndTag ()
1513 if (currentState != XmlNodeType.Element)
1514 throw NotWFError ("End tag cannot appear in this state.");
1516 currentLinkedNodeLineNumber = line;
1517 currentLinkedNodeLinePosition = column;
1519 if (elementNameStackPos == 0)
1520 throw NotWFError ("closing element without matching opening element");
1521 TagName expected = elementNames [--elementNameStackPos];
1522 Expect (expected.Name);
1524 ExpectAfterWhitespace ('>');
1529 XmlNodeType.EndElement, // nodeType
1530 expected.Name, // name
1531 expected.Prefix, // prefix
1532 expected.LocalName, // localName
1533 false, // isEmptyElement
1535 true // clearAttributes
1537 if (expected.Prefix.Length > 0)
1538 currentToken.NamespaceURI = LookupNamespace (expected.Prefix, true);
1539 else if (namespaces)
1540 currentToken.NamespaceURI = nsmgr.DefaultNamespace;
1544 CheckCurrentStateUpdate ();
1547 private void CheckCurrentStateUpdate ()
1549 if (depth == 0 && !allowMultipleRoot && (IsEmptyElement || NodeType == XmlNodeType.EndElement))
1550 currentState = XmlNodeType.EndElement;
1554 private void AppendSurrogatePairNameChar (int ch)
1556 nameBuffer [nameLength++] = (char) ((ch - 0x10000) / 0x400 + 0xD800);
1557 if (nameLength == nameCapacity)
1558 ExpandNameCapacity ();
1559 nameBuffer [nameLength++] = (char) ((ch - 0x10000) % 0x400 + 0xDC00);
1562 private void ExpandNameCapacity ()
1564 nameCapacity = nameCapacity * 2;
1565 char [] oldNameBuffer = nameBuffer;
1566 nameBuffer = new char [nameCapacity];
1567 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1571 private void AppendValueChar (int ch)
1573 if (ch < Char.MaxValue)
1574 valueBuffer.Append ((char) ch);
1576 AppendSurrogatePairValueChar (ch);
1579 private void AppendSurrogatePairValueChar (int ch)
1581 valueBuffer.Append ((char) ((ch - 0x10000) / 0x400 + 0xD800));
1582 valueBuffer.Append ((char) ((ch - 0x10000) % 0x400 + 0xDC00));
1585 private string CreateValueString ()
1587 // Since whitespace strings are mostly identical
1588 // depending on the Depth, we make use of NameTable
1589 // to atomize whitespace strings.
1591 case XmlNodeType.Whitespace:
1592 case XmlNodeType.SignificantWhitespace:
1593 int len = valueBuffer.Length;
1594 if (whitespaceCache == null)
1595 whitespaceCache = new char [32];
1596 if (len >= whitespaceCache.Length)
1598 if (whitespacePool == null)
1599 whitespacePool = new NameTable ();
1600 #if NET_2_0 && !NET_2_1
1601 valueBuffer.CopyTo (0, whitespaceCache, 0, len);
1603 for (int i = 0; i < len; i++)
1604 whitespaceCache [i] = valueBuffer [i];
1606 return whitespacePool.Add (whitespaceCache, 0, valueBuffer.Length);
1608 return (valueBuffer.Capacity < 100) ?
1609 valueBuffer.ToString (0, valueBuffer.Length) :
1610 valueBuffer.ToString ();
1613 private void ClearValueBuffer ()
1615 valueBuffer.Length = 0;
1618 // The reader is positioned on the first character
1620 private void ReadText (bool notWhitespace)
1622 if (currentState != XmlNodeType.Element)
1623 throw NotWFError ("Text node cannot appear in this state.");
1624 preserveCurrentTag = false;
1627 ClearValueBuffer ();
1629 int ch = PeekChar ();
1630 bool previousWasCloseBracket = false;
1632 while (ch != '<' && ch != -1) {
1635 ch = ReadReference (false);
1636 if (returnEntityReference) // Returns -1 if char validation should not be done
1638 } else if (normalization && ch == '\r') {
1642 // append '\n' instead of '\r'.
1643 AppendValueChar ('\n');
1644 // and in case of "\r\n", discard '\r'.
1647 if (CharacterChecking && XmlChar.IsInvalid (ch))
1648 throw NotWFError ("Not allowed character was found.");
1652 // FIXME: it might be optimized by the JIT later,
1653 // AppendValueChar (ch);
1655 if (ch < Char.MaxValue)
1656 valueBuffer.Append ((char) ch);
1658 AppendSurrogatePairValueChar (ch);
1663 if (previousWasCloseBracket)
1664 if (PeekChar () == '>')
1665 throw NotWFError ("Inside text content, character sequence ']]>' is not allowed.");
1666 previousWasCloseBracket = true;
1668 else if (previousWasCloseBracket)
1669 previousWasCloseBracket = false;
1671 notWhitespace = true;
1674 if (returnEntityReference && valueBuffer.Length == 0) {
1675 SetEntityReferenceProperties ();
1677 XmlNodeType nodeType = notWhitespace ? XmlNodeType.Text :
1678 this.XmlSpace == XmlSpace.Preserve ? XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
1680 nodeType, // nodeType
1681 String.Empty, // name
1682 String.Empty, // prefix
1683 String.Empty, // localName
1684 false, // isEmptyElement
1685 null, // value: create only when required
1686 true // clearAttributes
1691 // The leading '&' has already been consumed.
1692 // Returns true if the entity reference isn't a simple
1693 // character reference or one of the predefined entities.
1694 // This allows the ReadText method to break so that the
1695 // next call to Read will return the EntityReference node.
1696 private int ReadReference (bool ignoreEntityReferences)
1698 if (PeekChar () == '#') {
1700 return ReadCharacterReference ();
1702 return ReadEntityReference (ignoreEntityReferences);
1705 private int ReadCharacterReference ()
1710 if (PeekChar () == 'x') {
1713 while ((ch = PeekChar ()) != ';' && ch != -1) {
1716 if (ch >= '0' && ch <= '9')
1717 value = (value << 4) + ch - '0';
1718 else if (ch >= 'A' && ch <= 'F')
1719 value = (value << 4) + ch - 'A' + 10;
1720 else if (ch >= 'a' && ch <= 'f')
1721 value = (value << 4) + ch - 'a' + 10;
1723 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1724 "invalid hexadecimal digit: {0} (#x{1:X})",
1729 while ((ch = PeekChar ()) != ';' && ch != -1) {
1732 if (ch >= '0' && ch <= '9')
1733 value = value * 10 + ch - '0';
1735 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1736 "invalid decimal digit: {0} (#x{1:X})",
1744 // There is no way to save surrogate pairs...
1745 if (CharacterChecking && Normalization &&
1746 XmlChar.IsInvalid (value))
1747 throw NotWFError ("Referenced character was not allowed in XML. Normalization is " + normalization + ", checkCharacters = " + checkCharacters);
1751 // Returns -1 if it should not be validated.
1752 // Real EOF must not be detected here.
1753 private int ReadEntityReference (bool ignoreEntityReferences)
1755 string name = ReadName ();
1758 int predefined = XmlChar.GetPredefinedEntity (name);
1759 if (predefined >= 0)
1762 if (ignoreEntityReferences) {
1763 AppendValueChar ('&');
1764 for (int i = 0; i < name.Length; i++)
1765 AppendValueChar (name [i]);
1766 AppendValueChar (';');
1768 returnEntityReference = true;
1769 entityReferenceName = name;
1775 // The reader is positioned on the first character of
1776 // the attribute name.
1777 private void ReadAttributes (bool isXmlDecl)
1780 bool requireWhitespace = false;
1781 currentAttribute = -1;
1782 currentAttributeValue = -1;
1785 if (!SkipWhitespace () && requireWhitespace)
1786 throw NotWFError ("Unexpected token. Name is required here.");
1788 IncrementAttributeToken ();
1789 currentAttributeToken.LineNumber = line;
1790 currentAttributeToken.LinePosition = column;
1792 string prefix, localName;
1793 currentAttributeToken.Name = ReadName (out prefix, out localName);
1794 currentAttributeToken.Prefix = prefix;
1795 currentAttributeToken.LocalName = localName;
1796 ExpectAfterWhitespace ('=');
1798 ReadAttributeValueTokens (-1);
1799 // This hack is required for xmldecl which has
1800 // both effective attributes and Value.
1803 dummyValue = currentAttributeToken.Value;
1807 if (!SkipWhitespace ())
1808 requireWhitespace = true;
1809 peekChar = PeekChar ();
1811 if (peekChar == '?')
1814 else if (peekChar == '/' || peekChar == '>')
1816 } while (peekChar != -1);
1818 currentAttribute = -1;
1819 currentAttributeValue = -1;
1822 private void AddAttributeWithValue (string name, string value)
1824 IncrementAttributeToken ();
1825 XmlAttributeTokenInfo ati = attributeTokens [currentAttribute];
1826 ati.Name = parserContext.NameTable.Add (name);
1827 ati.Prefix = String.Empty;
1828 ati.NamespaceURI = String.Empty;
1829 IncrementAttributeValueToken ();
1830 XmlTokenInfo vti = attributeValueTokens [currentAttributeValue];
1831 SetTokenProperties (vti,
1843 private void IncrementAttributeToken ()
1846 if (attributeTokens.Length == currentAttribute) {
1847 XmlAttributeTokenInfo [] newArray =
1848 new XmlAttributeTokenInfo [attributeTokens.Length * 2];
1849 attributeTokens.CopyTo (newArray, 0);
1850 attributeTokens = newArray;
1852 if (attributeTokens [currentAttribute] == null)
1853 attributeTokens [currentAttribute] = new XmlAttributeTokenInfo (this);
1854 currentAttributeToken = attributeTokens [currentAttribute];
1855 currentAttributeToken.Clear ();
1858 private void IncrementAttributeValueToken ()
1860 currentAttributeValue++;
1861 if (attributeValueTokens.Length == currentAttributeValue) {
1862 XmlTokenInfo [] newArray = new XmlTokenInfo [attributeValueTokens.Length * 2];
1863 attributeValueTokens.CopyTo (newArray, 0);
1864 attributeValueTokens = newArray;
1866 if (attributeValueTokens [currentAttributeValue] == null)
1867 attributeValueTokens [currentAttributeValue] = new XmlTokenInfo (this);
1868 currentAttributeValueToken = attributeValueTokens [currentAttributeValue];
1869 currentAttributeValueToken.Clear ();
1872 // LAMESPEC: Orthodox XML reader should normalize attribute values
1873 private void ReadAttributeValueTokens (int dummyQuoteChar)
1875 int quoteChar = (dummyQuoteChar < 0) ? ReadChar () : dummyQuoteChar;
1877 if (quoteChar != '\'' && quoteChar != '\"')
1878 throw NotWFError ("an attribute value was not quoted");
1879 currentAttributeToken.QuoteChar = (char) quoteChar;
1881 IncrementAttributeValueToken ();
1882 currentAttributeToken.ValueTokenStartIndex = currentAttributeValue;
1883 currentAttributeValueToken.LineNumber = line;
1884 currentAttributeValueToken.LinePosition = column;
1886 bool incrementToken = false;
1887 bool isNewToken = true;
1890 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1893 if (ch == quoteChar)
1896 if (incrementToken) {
1897 IncrementAttributeValueToken ();
1898 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1899 currentAttributeValueToken.LineNumber = line;
1900 currentAttributeValueToken.LinePosition = column;
1901 incrementToken = false;
1908 throw NotWFError ("attribute values cannot contain '<'");
1910 if (dummyQuoteChar < 0)
1911 throw NotWFError ("unexpected end of file in an attribute value");
1912 else // Attribute value constructor.
1918 if (PeekChar () == '\n')
1919 continue; // skip '\r'.
1921 // The csc in MS.NET 2.0 beta 1 barfs on this goto, so work around that
1930 // When Normalize = true, then replace
1931 // all spaces to ' '
1937 if (PeekChar () == '#') {
1939 ch = ReadCharacterReference ();
1940 AppendValueChar (ch);
1943 // Check XML 1.0 section 3.1 WFC.
1944 string entName = ReadName ();
1946 int predefined = XmlChar.GetPredefinedEntity (entName);
1947 if (predefined < 0) {
1948 CheckAttributeEntityReferenceWFC (entName);
1950 if (entityHandling == EntityHandling.ExpandEntities) {
1951 string value = DTD.GenerateEntityAttributeText (entName);
1952 foreach (char c in (IEnumerable<char>) value)
1953 AppendValueChar (c);
1957 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1958 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1960 IncrementAttributeValueToken ();
1961 currentAttributeValueToken.Name = entName;
1962 currentAttributeValueToken.Value = String.Empty;
1963 currentAttributeValueToken.NodeType = XmlNodeType.EntityReference;
1964 incrementToken = true;
1968 AppendValueChar (predefined);
1971 if (CharacterChecking && XmlChar.IsInvalid (ch))
1972 throw NotWFError ("Invalid character was found.");
1973 // FIXME: it might be optimized by the JIT later,
1974 // AppendValueChar (ch);
1976 if (ch < Char.MaxValue)
1977 valueBuffer.Append ((char) ch);
1979 AppendSurrogatePairValueChar (ch);
1986 if (!incrementToken) {
1987 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1988 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1990 currentAttributeToken.ValueTokenEndIndex = currentAttributeValue;
1994 private void CheckAttributeEntityReferenceWFC (string entName)
1996 DTDEntityDeclaration entDecl =
1997 DTD == null ? null : DTD.EntityDecls [entName];
1998 if (entDecl == null) {
1999 if (entityHandling == EntityHandling.ExpandEntities
2000 || (DTD != null && resolver != null && entDecl == null))
2001 throw NotWFError (String.Format ("Referenced entity '{0}' does not exist.", entName));
2006 if (entDecl.HasExternalReference)
2007 throw NotWFError ("Reference to external entities is not allowed in the value of an attribute.");
2008 if (isStandalone && !entDecl.IsInternalSubset)
2009 throw NotWFError ("Reference to external entities is not allowed in the internal subset.");
2010 if (entDecl.EntityValue.IndexOf ('<') >= 0)
2011 throw NotWFError ("Attribute must not contain character '<' either directly or indirectly by way of entity references.");
2014 // The reader is positioned on the first character
2017 // It may be xml declaration or processing instruction.
2018 private void ReadProcessingInstruction ()
2020 string target = ReadName ();
2021 if (target != "xml" && target.ToLower (CultureInfo.InvariantCulture) == "xml")
2022 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
2024 if (!SkipWhitespace ())
2025 if (PeekChar () != '?')
2026 throw NotWFError ("Invalid processing instruction name was found.");
2028 ClearValueBuffer ();
2031 while ((ch = PeekChar ()) != -1) {
2034 if (ch == '?' && PeekChar () == '>') {
2039 if (CharacterChecking && XmlChar.IsInvalid (ch))
2040 throw NotWFError ("Invalid character was found.");
2041 AppendValueChar (ch);
2044 if (Object.ReferenceEquals (target, XmlNamespaceManager.PrefixXml))
2045 VerifyXmlDeclaration ();
2047 if (currentState == XmlNodeType.None)
2048 currentState = XmlNodeType.XmlDeclaration;
2051 XmlNodeType.ProcessingInstruction, // nodeType
2053 String.Empty, // prefix
2054 target, // localName
2055 false, // isEmptyElement
2056 null, // value: create only when required
2057 true // clearAttributes
2062 void VerifyXmlDeclaration ()
2064 if (!allowMultipleRoot && currentState != XmlNodeType.None)
2065 throw NotWFError ("XML declaration cannot appear in this state.");
2067 currentState = XmlNodeType.XmlDeclaration;
2069 string text = CreateValueString ();
2075 string encoding = null, standalone = null;
2077 ParseAttributeFromString (text, ref idx, out name, out value);
2078 if (name != "version" || value != "1.0")
2079 throw NotWFError ("'version' is expected.");
2080 name = String.Empty;
2081 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2082 ParseAttributeFromString (text, ref idx, out name, out value);
2083 if (name == "encoding") {
2084 if (!XmlChar.IsValidIANAEncoding (value))
2085 throw NotWFError ("'encoding' must be a valid IANA encoding name.");
2086 if (reader is XmlStreamReader)
2087 parserContext.Encoding = ((XmlStreamReader) reader).Encoding;
2089 parserContext.Encoding = Encoding.Unicode;
2091 name = String.Empty;
2092 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2093 ParseAttributeFromString (text, ref idx, out name, out value);
2095 if (name == "standalone") {
2096 this.isStandalone = value == "yes";
2097 if (value != "yes" && value != "no")
2098 throw NotWFError ("Only 'yes' or 'no' is allow for 'standalone'");
2100 SkipWhitespaceInString (text, ref idx);
2102 else if (name.Length != 0)
2103 throw NotWFError (String.Format ("Unexpected token: '{0}'", name));
2105 if (idx < text.Length)
2106 throw NotWFError ("'?' is expected.");
2108 AddAttributeWithValue ("version", "1.0");
2109 if (encoding != null)
2110 AddAttributeWithValue ("encoding", encoding);
2111 if (standalone != null)
2112 AddAttributeWithValue ("standalone", standalone);
2113 currentAttribute = currentAttributeValue = -1;
2116 XmlNodeType.XmlDeclaration, // nodeType
2118 String.Empty, // prefix
2120 false, // isEmptyElement
2122 false // clearAttributes
2126 bool SkipWhitespaceInString (string text, ref int idx)
2129 while (idx < text.Length && XmlChar.IsWhitespace (text [idx]))
2131 return idx - start > 0;
2134 private void ParseAttributeFromString (string src,
2135 ref int idx, out string name, out string value)
2137 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2141 while (idx < src.Length && XmlChar.IsNameChar (src [idx]))
2143 name = src.Substring (start, idx - start);
2145 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2147 if (idx == src.Length || src [idx] != '=')
2148 throw NotWFError (String.Format ("'=' is expected after {0}", name));
2151 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2154 if (idx == src.Length || src [idx] != '"' && src [idx] != '\'')
2155 throw NotWFError ("'\"' or '\'' is expected.");
2157 char quote = src [idx];
2161 while (idx < src.Length && src [idx] != quote)
2165 value = src.Substring (start, idx - start - 1);
2168 internal void SkipTextDeclaration ()
2170 if (PeekChar () != '<')
2175 if (PeekChar () != '?') {
2181 while (peekCharsIndex < 6) {
2182 if (PeekChar () < 0)
2187 if (new string (peekChars, 2, 4) != "xml ") {
2188 if (new string (peekChars, 2, 4).ToLower (CultureInfo.InvariantCulture) == "xml ") {
2189 throw NotWFError ("Processing instruction name must not be character sequence 'X' 'M' 'L' with case insensitivity.");
2198 if (PeekChar () == 'v') {
2200 ExpectAfterWhitespace ('=');
2202 int quoteChar = ReadChar ();
2203 char [] expect1_0 = new char [3];
2204 int versionLength = 0;
2205 switch (quoteChar) {
2208 while (PeekChar () != quoteChar) {
2209 if (PeekChar () == -1)
2210 throw NotWFError ("Invalid version declaration inside text declaration.");
2211 else if (versionLength == 3)
2212 throw NotWFError ("Invalid version number inside text declaration.");
2214 expect1_0 [versionLength] = (char) ReadChar ();
2216 if (versionLength == 3 && new String (expect1_0) != "1.0")
2217 throw NotWFError ("Invalid version number inside text declaration.");
2224 throw NotWFError ("Invalid version declaration inside text declaration.");
2228 if (PeekChar () == 'e') {
2229 Expect ("encoding");
2230 ExpectAfterWhitespace ('=');
2232 int quoteChar = ReadChar ();
2233 switch (quoteChar) {
2236 while (PeekChar () != quoteChar)
2237 if (ReadChar () == -1)
2238 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2243 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2245 // Encoding value should be checked inside XmlInputStream.
2248 // this condition is to check if this instance is
2249 // not created by XmlReader.Create() (which just
2250 // omits strict text declaration check).
2251 else if (Conformance == ConformanceLevel.Auto)
2252 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
2257 curNodePeekIndex = peekCharsIndex; // without this it causes incorrect value start indication.
2260 // The reader is positioned on the first character after
2261 // the leading '<!'.
2262 private void ReadDeclaration ()
2264 int ch = PeekChar ();
2282 throw NotWFError ("Unexpected declaration markup was found.");
2286 // The reader is positioned on the first character after
2287 // the leading '<!--'.
2288 private void ReadComment ()
2290 if (currentState == XmlNodeType.None)
2291 currentState = XmlNodeType.XmlDeclaration;
2293 preserveCurrentTag = false;
2295 ClearValueBuffer ();
2298 while ((ch = PeekChar ()) != -1) {
2301 if (ch == '-' && PeekChar () == '-') {
2304 if (PeekChar () != '>')
2305 throw NotWFError ("comments cannot contain '--'");
2311 if (XmlChar.IsInvalid (ch))
2312 throw NotWFError ("Not allowed character was found.");
2314 AppendValueChar (ch);
2318 XmlNodeType.Comment, // nodeType
2319 String.Empty, // name
2320 String.Empty, // prefix
2321 String.Empty, // localName
2322 false, // isEmptyElement
2323 null, // value: create only when required
2324 true // clearAttributes
2328 // The reader is positioned on the first character after
2329 // the leading '<![CDATA['.
2330 private void ReadCDATA ()
2332 if (currentState != XmlNodeType.Element)
2333 throw NotWFError ("CDATA section cannot appear in this state.");
2334 preserveCurrentTag = false;
2336 ClearValueBuffer ();
2340 while (PeekChar () != -1) {
2345 if (ch == ']' && PeekChar () == ']') {
2346 ch = ReadChar (); // ']'
2348 if (PeekChar () == '>') {
2355 if (normalization && ch == '\r') {
2358 // append '\n' instead of '\r'.
2359 AppendValueChar ('\n');
2360 // otherwise, discard '\r'.
2363 if (CharacterChecking && XmlChar.IsInvalid (ch))
2364 throw NotWFError ("Invalid character was found.");
2366 // FIXME: it might be optimized by the JIT later,
2367 // AppendValueChar (ch);
2369 if (ch < Char.MaxValue)
2370 valueBuffer.Append ((char) ch);
2372 AppendSurrogatePairValueChar (ch);
2377 XmlNodeType.CDATA, // nodeType
2378 String.Empty, // name
2379 String.Empty, // prefix
2380 String.Empty, // localName
2381 false, // isEmptyElement
2382 null, // value: create only when required
2383 true // clearAttributes
2387 // The reader is positioned on the first character after
2388 // the leading '<!DOCTYPE'.
2389 private void ReadDoctypeDecl ()
2392 throw NotWFError ("Document Type Declaration (DTD) is prohibited in this XML.");
2393 switch (currentState) {
2394 case XmlNodeType.DocumentType:
2395 case XmlNodeType.Element:
2396 case XmlNodeType.EndElement:
2397 throw NotWFError ("Document type cannot appear in this state.");
2399 currentState = XmlNodeType.DocumentType;
2401 string doctypeName = null;
2402 string publicId = null;
2403 string systemId = null;
2404 int intSubsetStartLine = 0;
2405 int intSubsetStartColumn = 0;
2408 doctypeName = ReadName ();
2413 systemId = ReadSystemLiteral (true);
2416 publicId = ReadPubidLiteral ();
2417 if (!SkipWhitespace ())
2418 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
2419 systemId = ReadSystemLiteral (false);
2425 if(PeekChar () == '[')
2427 // read markupdecl etc. or end of decl
2429 intSubsetStartLine = this.LineNumber;
2430 intSubsetStartColumn = this.LinePosition;
2431 ClearValueBuffer ();
2432 ReadInternalSubset ();
2433 parserContext.InternalSubset = CreateValueString ();
2435 // end of DOCTYPE decl.
2436 ExpectAfterWhitespace ('>');
2438 GenerateDTDObjectModel (doctypeName, publicId,
2439 systemId, parserContext.InternalSubset,
2440 intSubsetStartLine, intSubsetStartColumn);
2442 // set properties for <!DOCTYPE> node
2444 XmlNodeType.DocumentType, // nodeType
2445 doctypeName, // name
2446 String.Empty, // prefix
2447 doctypeName, // localName
2448 false, // isEmptyElement
2449 parserContext.InternalSubset, // value
2450 true // clearAttributes
2453 if (publicId != null)
2454 AddAttributeWithValue ("PUBLIC", publicId);
2455 if (systemId != null)
2456 AddAttributeWithValue ("SYSTEM", systemId);
2457 currentAttribute = currentAttributeValue = -1;
2460 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2461 string systemId, string internalSubset)
2463 return GenerateDTDObjectModel (name, publicId, systemId, internalSubset, 0, 0);
2466 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2467 string systemId, string internalSubset, int intSubsetStartLine, int intSubsetStartColumn)
2470 parserContext.Dtd = new DTDObjectModel (this.NameTable); // merges both internal and external subsets in the meantime,
2471 DTD.BaseURI = BaseURI;
2473 DTD.PublicId = publicId;
2474 DTD.SystemId = systemId;
2475 DTD.InternalSubset = internalSubset;
2476 DTD.XmlResolver = resolver;
2477 DTD.IsStandalone = isStandalone;
2478 DTD.LineNumber = line;
2479 DTD.LinePosition = column;
2481 DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn);
2482 dr.Normalization = this.normalization;
2483 return dr.GenerateDTDObjectModel ();
2486 private enum DtdInputState
2499 private class DtdInputStateStack
2501 Stack intern = new Stack ();
2502 public DtdInputStateStack ()
2504 Push (DtdInputState.Free);
2507 public DtdInputState Peek ()
2509 return (DtdInputState) intern.Peek ();
2512 public DtdInputState Pop ()
2514 return (DtdInputState) intern.Pop ();
2517 public void Push (DtdInputState val)
2524 DtdInputStateStack stateStack = new DtdInputStateStack ();
2525 DtdInputState State {
2526 get { return stateStack.Peek (); }
2529 private int ReadValueChar ()
2531 int ret = ReadChar ();
2532 AppendValueChar (ret);
2536 private void ExpectAndAppend (string s)
2539 valueBuffer.Append (s);
2542 // Simply read but not generate any result.
2543 private void ReadInternalSubset ()
2545 bool continueParse = true;
2547 while (continueParse) {
2548 switch (ReadValueChar ()) {
2551 case DtdInputState.Free:
2553 valueBuffer.Remove (valueBuffer.Length - 1, 1);
2554 continueParse = false;
2556 case DtdInputState.InsideDoubleQuoted:
2557 case DtdInputState.InsideSingleQuoted:
2558 case DtdInputState.Comment:
2561 throw NotWFError ("unexpected end of file at DTD.");
2565 throw NotWFError ("unexpected end of file at DTD.");
2568 case DtdInputState.InsideDoubleQuoted:
2569 case DtdInputState.InsideSingleQuoted:
2570 case DtdInputState.Comment:
2571 continue; // well-formed
2573 int c = ReadValueChar ();
2576 stateStack.Push (DtdInputState.PI);
2579 switch (ReadValueChar ()) {
2581 switch (ReadValueChar ()) {
2583 ExpectAndAppend ("EMENT");
2584 stateStack.Push (DtdInputState.ElementDecl);
2587 ExpectAndAppend ("TITY");
2588 stateStack.Push (DtdInputState.EntityDecl);
2591 throw NotWFError ("unexpected token '<!E'.");
2595 ExpectAndAppend ("TTLIST");
2596 stateStack.Push (DtdInputState.AttlistDecl);
2599 ExpectAndAppend ("OTATION");
2600 stateStack.Push (DtdInputState.NotationDecl);
2603 ExpectAndAppend ("-");
2604 stateStack.Push (DtdInputState.Comment);
2609 throw NotWFError (String.Format ("unexpected '<{0}'.", (char) c));
2613 if (State == DtdInputState.InsideSingleQuoted)
2615 else if (State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.Comment)
2616 stateStack.Push (DtdInputState.InsideSingleQuoted);
2619 if (State == DtdInputState.InsideDoubleQuoted)
2621 else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment)
2622 stateStack.Push (DtdInputState.InsideDoubleQuoted);
2626 case DtdInputState.ElementDecl:
2627 goto case DtdInputState.NotationDecl;
2628 case DtdInputState.AttlistDecl:
2629 goto case DtdInputState.NotationDecl;
2630 case DtdInputState.EntityDecl:
2631 goto case DtdInputState.NotationDecl;
2632 case DtdInputState.NotationDecl:
2635 case DtdInputState.InsideDoubleQuoted:
2636 case DtdInputState.InsideSingleQuoted:
2637 case DtdInputState.Comment:
2640 throw NotWFError ("unexpected token '>'");
2644 if (State == DtdInputState.PI) {
2645 if (ReadValueChar () == '>')
2650 if (State == DtdInputState.Comment) {
2651 if (PeekChar () == '-') {
2653 ExpectAndAppend (">");
2659 if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted)
2660 throw NotWFError ("Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8).");
2666 // The reader is positioned on the first 'S' of "SYSTEM".
2667 private string ReadSystemLiteral (bool expectSYSTEM)
2671 if (!SkipWhitespace ())
2672 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
2676 int quoteChar = ReadChar (); // apos or quot
2678 ClearValueBuffer ();
2679 while (c != quoteChar) {
2682 throw NotWFError ("Unexpected end of stream in ExternalID.");
2684 AppendValueChar (c);
2686 return CreateValueString ();
2689 private string ReadPubidLiteral()
2692 if (!SkipWhitespace ())
2693 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
2694 int quoteChar = ReadChar ();
2696 ClearValueBuffer ();
2697 while(c != quoteChar)
2700 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
2701 if(c != quoteChar && !XmlChar.IsPubidChar (c))
2702 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char)c ));
2704 AppendValueChar (c);
2706 return CreateValueString ();
2709 // The reader is positioned on the first character
2711 private string ReadName ()
2713 string prefix, local;
2714 return ReadName (out prefix, out local);
2717 private string ReadName (out string prefix, out string localName)
2719 #if !USE_NAME_BUFFER
2720 bool savePreserve = preserveCurrentTag;
2721 preserveCurrentTag = true;
2723 int startOffset = peekCharsIndex - curNodePeekIndex;
2724 int ch = PeekChar ();
2725 if (!XmlChar.IsFirstNameChar (ch))
2726 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2731 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2733 if (ch == ':' && namespaces && colonAt < 0)
2738 int start = curNodePeekIndex + startOffset;
2740 string name = NameTable.Add (
2741 peekChars, start, length);
2744 prefix = NameTable.Add (
2745 peekChars, start, colonAt);
2746 localName = NameTable.Add (
2747 peekChars, start + colonAt + 1, length - colonAt - 1);
2749 prefix = String.Empty;
2753 preserveCurrentTag = savePreserve;
2757 int ch = PeekChar ();
2758 if (!XmlChar.IsFirstNameChar (ch))
2759 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2764 // AppendNameChar (ch);
2766 // nameBuffer.Length is always non-0 so no need to ExpandNameCapacity () here
2767 if (ch < Char.MaxValue)
2768 nameBuffer [nameLength++] = (char) ch;
2770 AppendSurrogatePairNameChar (ch);
2775 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2778 if (ch == ':' && namespaces && colonAt < 0)
2779 colonAt = nameLength;
2780 // AppendNameChar (ch);
2782 if (nameLength == nameCapacity)
2783 ExpandNameCapacity ();
2784 if (ch < Char.MaxValue)
2785 nameBuffer [nameLength++] = (char) ch;
2787 AppendSurrogatePairNameChar (ch);
2791 string name = NameTable.Add (nameBuffer, 0, nameLength);
2794 prefix = NameTable.Add (nameBuffer, 0, colonAt);
2795 localName = NameTable.Add (nameBuffer, colonAt + 1, nameLength - colonAt - 1);
2797 prefix = String.Empty;
2805 // Read the next character and compare it against the
2806 // specified character.
2807 private void Expect (int expected)
2809 int ch = ReadChar ();
2811 if (ch != expected) {
2812 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2813 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
2816 ch < 0 ? (object) "EOF" : (char) ch,
2821 private void Expect (string expected)
2823 for (int i = 0; i < expected.Length; i++)
2824 if (ReadChar () != expected [i])
2825 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2826 "'{0}' is expected", expected));
2829 private void ExpectAfterWhitespace (char c)
2832 int i = ReadChar ();
2833 if (i < 0x21 && XmlChar.IsWhitespace (i))
2836 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0}, but found {1} [{2}]", c, i < 0 ? (object) "EOF" : (char) i, i));
2841 // Does not consume the first non-whitespace character.
2842 private bool SkipWhitespace ()
2844 // FIXME: It should be inlined by the JIT.
2845 // bool skipped = XmlChar.IsWhitespace (PeekChar ());
2846 int ch = PeekChar ();
2847 bool skipped = (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2851 // FIXME: It should be inlined by the JIT.
2852 // while (XmlChar.IsWhitespace (PeekChar ()))
2854 while ((ch = PeekChar ()) == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD)
2859 private bool ReadWhitespace ()
2861 if (currentState == XmlNodeType.None)
2862 currentState = XmlNodeType.XmlDeclaration;
2864 bool savePreserve = preserveCurrentTag;
2865 preserveCurrentTag = true;
2866 int startOffset = peekCharsIndex - curNodePeekIndex; // it should be 0 for now though.
2868 int ch = PeekChar ();
2872 // FIXME: It should be inlined by the JIT.
2873 // } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
2874 } while (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2876 bool isText = currentState == XmlNodeType.Element && ch != -1 && ch != '<';
2878 if (!isText && (whitespaceHandling == WhitespaceHandling.None ||
2879 whitespaceHandling == WhitespaceHandling.Significant && XmlSpace != XmlSpace.Preserve))
2882 ClearValueBuffer ();
2883 valueBuffer.Append (peekChars, curNodePeekIndex, peekCharsIndex - curNodePeekIndex - startOffset);
2884 preserveCurrentTag = savePreserve;
2889 XmlNodeType nodeType = (this.XmlSpace == XmlSpace.Preserve) ?
2890 XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
2891 SetProperties (nodeType,
2896 null, // value: create only when required
2903 // Returns -1 if it should throw an error.
2904 private int ReadCharsInternal (char [] buffer, int offset, int length)
2906 int bufIndex = offset;
2907 for (int i = 0; i < length; i++) {
2908 int c = PeekChar ();
2911 throw NotWFError ("Unexpected end of xml.");
2913 if (i + 1 == length)
2914 // if it does not end here,
2915 // it cannot store another
2916 // character, so stop here.
2919 if (PeekChar () != '/') {
2921 buffer [bufIndex++] = '<';
2924 else if (nestLevel-- > 0) {
2925 buffer [bufIndex++] = '<';
2928 // Seems to skip immediate EndElement
2935 readCharsInProgress = false;
2936 Read (); // move to the next node
2940 if (c < Char.MaxValue)
2941 buffer [bufIndex++] = (char) c;
2943 buffer [bufIndex++] = (char) ((c - 0x10000) / 0x400 + 0xD800);
2944 buffer [bufIndex++] = (char) ((c - 0x10000) % 0x400 + 0xDC00);
2952 private bool ReadUntilEndTag ()
2955 currentState = XmlNodeType.EndElement;
2961 throw NotWFError ("Unexpected end of xml.");
2963 if (PeekChar () != '/') {
2967 else if (--nestLevel > 0)
2970 string name = ReadName ();
2971 if (name != elementNames [elementNameStackPos - 1].Name)