2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
7 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
9 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
10 // Copyright (C) 2005-2006 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //#define USE_NAME_BUFFER
34 // Optimization TODOs:
36 // - support PushbackChar() which reverts one character read.
37 // - ReadTextReader() should always keep one pushback buffer
38 // as pushback safety net.
39 // - Replace (peek,read) * n -> read * n + pushback
43 using System.Collections;
44 using System.Collections.Generic;
45 using System.Globalization;
47 using System.Security.Permissions;
49 using System.Xml.Schema;
55 class XmlTextReader : XmlReader,
56 IXmlLineInfo, IXmlNamespaceResolver, IHasXmlParserContext
60 protected XmlTextReader ()
64 public XmlTextReader (Stream input)
65 : this (new XmlStreamReader (input))
69 public XmlTextReader (string url)
70 : this(url, new NameTable ())
74 public XmlTextReader (TextReader input)
75 : this (input, new NameTable ())
79 protected XmlTextReader (XmlNameTable nt)
80 : this (String.Empty, null, XmlNodeType.None, null)
84 public XmlTextReader (Stream input, XmlNameTable nt)
85 : this(new XmlStreamReader (input), nt)
89 public XmlTextReader (string url, Stream input)
90 : this (url, new XmlStreamReader (input))
94 public XmlTextReader (string url, TextReader input)
95 : this (url, input, new NameTable ())
99 public XmlTextReader (string url, XmlNameTable nt)
101 reader_uri = resolver.ResolveUri (null, url);
102 string uriString = (reader_uri == null) ? String.Empty : reader_uri.ToString ();
103 XmlParserContext ctx = new XmlParserContext (nt,
104 new XmlNamespaceManager (nt),
107 this.InitializeContext (uriString, ctx, null, XmlNodeType.Document);
110 public XmlTextReader (TextReader input, XmlNameTable nt)
111 : this (String.Empty, input, nt)
115 // This is used in XmlReader.Create() to indicate that string
116 // argument is uri, not an xml fragment.
117 internal XmlTextReader (bool dummy, XmlResolver resolver, string url, XmlNodeType fragType, XmlParserContext context)
119 if (resolver == null) {
121 resolver = new XmlXapResolver ();
123 resolver = new XmlUrlResolver ();
126 this.XmlResolver = resolver;
129 Stream stream = GetStreamFromUrl (url, out uriString);
130 this.InitializeContext (uriString, context, new XmlStreamReader (stream), fragType);
133 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
134 : this (context != null ? context.BaseURI : String.Empty,
135 new XmlStreamReader (xmlFragment),
139 disallowReset = true;
142 internal XmlTextReader (string baseURI, TextReader xmlFragment, XmlNodeType fragType)
143 : this (baseURI, xmlFragment, fragType, null)
147 public XmlTextReader (string url, Stream input, XmlNameTable nt)
148 : this (url, new XmlStreamReader (input), nt)
152 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
153 : this (url, input, XmlNodeType.Document, null)
157 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
158 : this (context != null ? context.BaseURI : String.Empty,
159 new StringReader (xmlFragment),
163 disallowReset = true;
166 internal XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
168 InitializeContext (url, context, fragment, fragType);
171 Uri ResolveUri (string url)
173 return resolver.ResolveUri (null, url);
176 Stream GetStreamFromUrl (string url, out string absoluteUriString)
180 throw new ArgumentNullException ("url");
182 throw new ArgumentException ("url");
184 Uri uri = ResolveUri (url);
185 absoluteUriString = uri != null ? uri.ToString () : String.Empty;
186 return resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
193 public override int AttributeCount
195 get { return attributeCount; }
198 public override string BaseURI
200 get { return parserContext.BaseURI; }
203 public override bool CanReadBinaryContent {
207 public override bool CanReadValueChunk {
211 internal bool CharacterChecking {
212 get { return checkCharacters; }
213 set { checkCharacters = value; }
216 // for XmlReaderSettings.CloseInput support
217 internal bool CloseInput {
218 get { return closeInput; }
219 set { closeInput = value; }
222 public override int Depth
225 int nodeTypeMod = currentToken.NodeType == XmlNodeType.Element ? 0 : -1;
226 if (currentAttributeValue >= 0)
227 return nodeTypeMod + elementDepth + 2; // inside attribute value.
228 else if (currentAttribute >= 0)
229 return nodeTypeMod + elementDepth + 1;
234 public Encoding Encoding
236 get { return parserContext.Encoding; }
239 public EntityHandling EntityHandling {
240 get { return entityHandling; }
241 set { entityHandling = value; }
244 public override bool EOF {
245 get { return readState == ReadState.EndOfFile; }
248 public override bool HasValue {
249 get { return cursorToken.Value != null; }
252 public override bool IsDefault {
253 // XmlTextReader does not expand default attributes.
254 get { return false; }
257 public override bool IsEmptyElement {
258 get { return cursorToken.IsEmptyElement; }
261 public int LineNumber {
263 if (useProceedingLineInfo)
266 return cursorToken.LineNumber;
270 public int LinePosition {
272 if (useProceedingLineInfo)
275 return cursorToken.LinePosition;
279 public override string LocalName {
280 get { return cursorToken.LocalName; }
283 public override string Name {
284 get { return cursorToken.Name; }
287 public bool Namespaces {
288 get { return namespaces; }
290 if (readState != ReadState.Initial)
291 throw new InvalidOperationException ("Namespaces have to be set before reading.");
296 public override string NamespaceURI {
297 get { return cursorToken.NamespaceURI; }
300 public override XmlNameTable NameTable {
301 get { return nameTable; }
304 public override XmlNodeType NodeType {
305 get { return cursorToken.NodeType; }
308 public bool Normalization {
309 get { return normalization; }
310 set { normalization = value; }
313 public override string Prefix {
314 get { return cursorToken.Prefix; }
317 public bool ProhibitDtd {
318 get { return prohibitDtd; }
319 set { prohibitDtd = value; }
322 public override char QuoteChar {
323 get { return cursorToken.QuoteChar; }
326 public override ReadState ReadState {
327 get { return readState; }
330 public override XmlReaderSettings Settings {
331 get { return base.Settings; }
334 public override string Value {
335 get { return cursorToken.Value != null ? cursorToken.Value : String.Empty; }
338 public WhitespaceHandling WhitespaceHandling {
339 get { return whitespaceHandling; }
340 set { whitespaceHandling = value; }
343 public override string XmlLang {
344 get { return parserContext.XmlLang; }
347 public XmlResolver XmlResolver {
348 set { resolver = value; }
351 public override XmlSpace XmlSpace {
352 get { return parserContext.XmlSpace; }
359 public override void Close ()
361 readState = ReadState.Closed;
363 cursorToken.Clear ();
364 currentToken.Clear ();
366 if (closeInput && reader != null)
370 public override string GetAttribute (int i)
372 if (i >= attributeCount)
373 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
375 return attributeTokens [i].Value;
379 // MS.NET 1.0 msdn says that this method returns String.Empty
380 // for absent attribute, but in fact it returns null.
381 // This description is corrected in MS.NET 1.1 msdn.
382 public override string GetAttribute (string name)
384 for (int i = 0; i < attributeCount; i++)
385 if (attributeTokens [i].Name == name)
386 return attributeTokens [i].Value;
390 private int GetIndexOfQualifiedAttribute (string localName, string namespaceURI)
392 namespaceURI = namespaceURI ?? String.Empty;
393 for (int i = 0; i < attributeCount; i++) {
394 XmlAttributeTokenInfo ti = attributeTokens [i];
395 if (ti.LocalName == localName && ti.NamespaceURI == namespaceURI)
401 XmlParserContext IHasXmlParserContext.ParserContext {
402 get { return parserContext; }
405 public override string GetAttribute (string localName, string namespaceURI)
407 int idx = this.GetIndexOfQualifiedAttribute (localName, namespaceURI);
410 return attributeTokens [idx].Value;
413 public IDictionary<string, string> GetNamespacesInScope (XmlNamespaceScope scope)
415 return nsmgr.GetNamespacesInScope (scope);
418 IDictionary<string, string> IXmlNamespaceResolver.GetNamespacesInScope (XmlNamespaceScope scope)
420 return GetNamespacesInScope (scope);
423 public TextReader GetRemainder ()
425 if (peekCharsLength < 0)
427 return new StringReader (new string (peekChars, peekCharsIndex, peekCharsLength - peekCharsIndex) + reader.ReadToEnd ());
430 public bool HasLineInfo ()
435 public override string LookupNamespace (string prefix)
437 return LookupNamespace (prefix, false);
440 private string LookupNamespace (string prefix, bool atomizedNames)
442 string s = nsmgr.LookupNamespace (
443 prefix, atomizedNames);
444 return s == String.Empty ? null : s;
447 string IXmlNamespaceResolver.LookupPrefix (string ns)
449 return LookupPrefix (ns, false);
452 public string LookupPrefix (string ns, bool atomizedName)
454 return nsmgr.LookupPrefix (ns, atomizedName);
457 public override void MoveToAttribute (int i)
459 if (i >= attributeCount)
460 throw new ArgumentOutOfRangeException ("attribute index out of range.");
462 currentAttribute = i;
463 currentAttributeValue = -1;
464 cursorToken = attributeTokens [i];
467 public override bool MoveToAttribute (string name)
469 for (int i = 0; i < attributeCount; i++) {
470 XmlAttributeTokenInfo ti = attributeTokens [i];
471 if (ti.Name == name) {
479 public override bool MoveToAttribute (string localName, string namespaceName)
481 int idx = GetIndexOfQualifiedAttribute (localName, namespaceName);
484 MoveToAttribute (idx);
488 public override bool MoveToElement ()
490 if (currentToken == null) // for attribute .ctor()
493 if (cursorToken == currentToken)
496 if (currentAttribute >= 0) {
497 currentAttribute = -1;
498 currentAttributeValue = -1;
499 cursorToken = currentToken;
506 public override bool MoveToFirstAttribute ()
508 if (attributeCount == 0)
511 return MoveToNextAttribute ();
514 public override bool MoveToNextAttribute ()
516 if (currentAttribute == 0 && attributeCount == 0)
518 if (currentAttribute + 1 < attributeCount) {
520 currentAttributeValue = -1;
521 cursorToken = attributeTokens [currentAttribute];
528 public override bool Read ()
530 if (readState == ReadState.Closed)
532 curNodePeekIndex = peekCharsIndex;
533 preserveCurrentTag = true;
537 if (startNodeType == XmlNodeType.Attribute) {
538 if (currentAttribute == 0)
539 return false; // already read.
540 SkipTextDeclaration ();
542 IncrementAttributeToken ();
543 ReadAttributeValueTokens ('"');
544 cursorToken = attributeTokens [0];
545 currentAttributeValue = -1;
546 readState = ReadState.Interactive;
549 if (readState == ReadState.Initial && currentState == XmlNodeType.Element)
550 SkipTextDeclaration ();
556 readState = ReadState.Interactive;
557 currentLinkedNodeLineNumber = line;
558 currentLinkedNodeLinePosition = column;
559 useProceedingLineInfo = true;
561 cursorToken = currentToken;
563 currentAttribute = currentAttributeValue = -1;
564 currentToken.Clear ();
566 // It was moved from end of ReadStartTag ().
572 if (readCharsInProgress) {
573 readCharsInProgress = false;
574 return ReadUntilEndTag ();
577 more = ReadContent ();
579 if (!more && startNodeType == XmlNodeType.Document && currentState != XmlNodeType.EndElement)
580 throw NotWFError ("Document element did not appear.");
582 useProceedingLineInfo = false;
586 public override bool ReadAttributeValue ()
588 if (readState == ReadState.Initial && startNodeType == XmlNodeType.Attribute) {
592 if (currentAttribute < 0)
594 XmlAttributeTokenInfo ti = attributeTokens [currentAttribute];
595 if (currentAttributeValue < 0)
596 currentAttributeValue = ti.ValueTokenStartIndex - 1;
598 if (currentAttributeValue < ti.ValueTokenEndIndex) {
599 currentAttributeValue++;
600 cursorToken = attributeValueTokens [currentAttributeValue];
607 public int ReadBase64 (byte [] buffer, int offset, int length)
609 BinaryCharGetter = binaryCharGetter;
611 return Binary.ReadBase64 (buffer, offset, length);
613 BinaryCharGetter = null;
617 public int ReadBinHex (byte [] buffer, int offset, int length)
619 BinaryCharGetter = binaryCharGetter;
621 return Binary.ReadBinHex (buffer, offset, length);
623 BinaryCharGetter = null;
627 public int ReadChars (char [] buffer, int offset, int length)
630 throw new ArgumentOutOfRangeException (
634 "Offset must be non-negative integer.");
636 } else if (length < 0) {
637 throw new ArgumentOutOfRangeException (
641 "Length must be non-negative integer.");
643 } else if (buffer.Length < offset + length)
644 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
646 if (IsEmptyElement) {
651 if (!readCharsInProgress && NodeType != XmlNodeType.Element)
654 preserveCurrentTag = false;
655 readCharsInProgress = true;
656 useProceedingLineInfo = true;
658 return ReadCharsInternal (buffer, offset, length);
661 public void ResetState ()
664 throw new InvalidOperationException ("Cannot call ResetState when parsing an XML fragment.");
668 public override void ResolveEntity ()
670 // XmlTextReader does not resolve entities.
671 throw new InvalidOperationException ("XmlTextReader cannot resolve external entities.");
674 [MonoTODO] // FIXME: Implement, for performance improvement
675 public override void Skip ()
682 // Parsed DTD Objects
683 // Note that thgis property must be kept since dtd2xsd uses it.
684 internal DTDObjectModel DTD {
685 get { return parserContext.Dtd; }
688 internal XmlResolver Resolver {
689 get { return resolver; }
694 internal class XmlTokenInfo
696 public XmlTokenInfo (XmlTextReader xtr)
704 protected XmlTextReader Reader;
707 public string LocalName;
708 public string Prefix;
709 public string NamespaceURI;
710 public bool IsEmptyElement;
711 public char QuoteChar;
712 public int LineNumber;
713 public int LinePosition;
714 public int ValueBufferStart;
715 public int ValueBufferEnd;
717 public XmlNodeType NodeType;
719 public virtual string Value {
721 if (valueCache != null)
723 if (ValueBufferStart >= 0) {
724 //Console.WriteLine (NodeType + " / " + ValueBuffer.Length + " / " + ValueBufferStart + " / " + ValueBufferEnd);
725 valueCache = Reader.valueBuffer.ToString (ValueBufferStart, ValueBufferEnd - ValueBufferStart);
729 case XmlNodeType.Text:
730 case XmlNodeType.SignificantWhitespace:
731 case XmlNodeType.Whitespace:
732 case XmlNodeType.Comment:
733 case XmlNodeType.CDATA:
734 case XmlNodeType.ProcessingInstruction:
735 valueCache = Reader.CreateValueString ();
740 set { valueCache = value; }
743 public virtual void Clear ()
745 ValueBufferStart = -1;
747 NodeType = XmlNodeType.None;
748 Name = LocalName = Prefix = NamespaceURI = String.Empty;
749 IsEmptyElement = false;
751 LineNumber = LinePosition = 0;
755 internal class XmlAttributeTokenInfo : XmlTokenInfo
757 public XmlAttributeTokenInfo (XmlTextReader reader)
760 NodeType = XmlNodeType.Attribute;
763 public int ValueTokenStartIndex;
764 public int ValueTokenEndIndex;
766 StringBuilder tmpBuilder = new StringBuilder ();
768 public override string Value {
770 if (valueCache != null)
773 // An empty value should return String.Empty.
774 if (ValueTokenStartIndex == ValueTokenEndIndex) {
775 XmlTokenInfo ti = Reader.attributeValueTokens [ValueTokenStartIndex];
776 if (ti.NodeType == XmlNodeType.EntityReference)
777 valueCache = String.Concat ("&", ti.Name, ";");
779 valueCache = ti.Value;
783 tmpBuilder.Length = 0;
784 for (int i = ValueTokenStartIndex; i <= ValueTokenEndIndex; i++) {
785 XmlTokenInfo ti = Reader.attributeValueTokens [i];
786 if (ti.NodeType == XmlNodeType.Text)
787 tmpBuilder.Append (ti.Value);
789 tmpBuilder.Append ('&');
790 tmpBuilder.Append (ti.Name);
791 tmpBuilder.Append (';');
795 valueCache = tmpBuilder.ToString (0, tmpBuilder.Length);
799 set { valueCache = value; }
802 public override void Clear ()
806 NodeType = XmlNodeType.Attribute;
807 ValueTokenStartIndex = ValueTokenEndIndex = 0;
810 internal void FillXmlns ()
812 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns))
813 Reader.nsmgr.AddNamespace (LocalName, Value);
814 else if (Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
815 Reader.nsmgr.AddNamespace (String.Empty, Value);
818 internal void FillNamespace ()
820 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns) ||
821 Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
822 NamespaceURI = XmlNamespaceManager.XmlnsXmlns;
823 else if (Prefix.Length == 0)
824 NamespaceURI = string.Empty;
826 NamespaceURI = Reader.LookupNamespace (Prefix, true);
830 private XmlTokenInfo cursorToken;
831 private XmlTokenInfo currentToken;
832 private XmlAttributeTokenInfo currentAttributeToken;
833 private XmlTokenInfo currentAttributeValueToken;
834 private XmlAttributeTokenInfo [] attributeTokens = new XmlAttributeTokenInfo [10];
835 private XmlTokenInfo [] attributeValueTokens = new XmlTokenInfo [10];
836 private int currentAttribute;
837 private int currentAttributeValue;
838 private int attributeCount;
840 private XmlParserContext parserContext;
841 private XmlNameTable nameTable;
842 private XmlNamespaceManager nsmgr;
844 private ReadState readState;
845 private bool disallowReset;
848 private int elementDepth;
849 private bool depthUp;
851 private bool popScope;
855 public TagName (string n, string l, string p)
862 public readonly string Name;
863 public readonly string LocalName;
864 public readonly string Prefix;
867 private TagName [] elementNames;
868 int elementNameStackPos;
870 private bool allowMultipleRoot;
872 private bool isStandalone;
874 private bool returnEntityReference;
875 private string entityReferenceName;
878 private char [] nameBuffer;
879 private int nameLength;
880 private int nameCapacity;
881 private const int initialNameCapacity = 32;
884 private StringBuilder valueBuffer;
887 private TextReader reader;
888 private char [] peekChars;
889 private int peekCharsIndex;
890 private int peekCharsLength;
891 private int curNodePeekIndex;
892 private bool preserveCurrentTag;
893 private const int peekCharCapacity = 1024;
898 private int currentLinkedNodeLineNumber;
899 private int currentLinkedNodeLinePosition;
900 private bool useProceedingLineInfo;
902 private XmlNodeType startNodeType;
903 // State machine attribute.
904 // XmlDeclaration: after the first node.
905 // DocumentType: after doctypedecl
906 // Element: inside document element
907 // EndElement: after document element
908 private XmlNodeType currentState;
910 // For ReadChars()/ReadBase64()/ReadBinHex()
911 private int nestLevel;
912 private bool readCharsInProgress;
913 XmlReaderBinarySupport.CharGetter binaryCharGetter;
915 // These values are never re-initialized.
916 private bool namespaces = true;
917 private WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
919 private XmlResolver resolver = new XmlXapResolver ();
921 private XmlResolver resolver = new XmlUrlResolver ();
923 private bool normalization = false;
925 private bool checkCharacters;
926 private bool prohibitDtd = false;
927 private bool closeInput = true;
928 private EntityHandling entityHandling; // 2.0
930 private NameTable whitespacePool;
931 private char [] whitespaceCache;
933 private XmlException NotWFError (string message)
935 return new XmlException (this as IXmlLineInfo, BaseURI, message);
940 allowMultipleRoot = false;
941 elementNames = new TagName [10];
942 valueBuffer = new StringBuilder ();
943 binaryCharGetter = new XmlReaderBinarySupport.CharGetter (ReadChars);
945 nameBuffer = new char [initialNameCapacity];
948 checkCharacters = true;
949 if (Settings != null)
950 checkCharacters = Settings.CheckCharacters;
953 entityHandling = EntityHandling.ExpandCharEntities;
956 if (peekChars == null)
957 peekChars = new char [peekCharCapacity];
958 peekCharsLength = -1;
959 curNodePeekIndex = -1; // read from start
964 currentLinkedNodeLineNumber = currentLinkedNodeLinePosition = 0;
969 private void Clear ()
971 currentToken = new XmlTokenInfo (this);
972 cursorToken = currentToken;
973 currentAttribute = -1;
974 currentAttributeValue = -1;
977 readState = ReadState.Initial;
983 popScope = allowMultipleRoot = false;
984 elementNameStackPos = 0;
986 isStandalone = false;
987 returnEntityReference = false;
988 entityReferenceName = String.Empty;
992 nameCapacity = initialNameCapacity;
994 useProceedingLineInfo = false;
996 currentState = XmlNodeType.None;
998 readCharsInProgress = false;
1001 private void InitializeContext (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType)
1003 startNodeType = fragType;
1004 parserContext = context;
1005 if (context == null) {
1006 XmlNameTable nt = new NameTable ();
1007 parserContext = new XmlParserContext (nt,
1008 new XmlNamespaceManager (nt),
1012 nameTable = parserContext.NameTable;
1013 nameTable = nameTable != null ? nameTable : new NameTable ();
1014 nsmgr = parserContext.NamespaceManager;
1015 nsmgr = nsmgr != null ? nsmgr : new XmlNamespaceManager (nameTable);
1017 if (url != null && url.Length > 0) {
1019 Uri uri = new Uri (url, UriKind.RelativeOrAbsolute);
1023 uri = new Uri (url, UriKind.RelativeOrAbsolute);
1024 } catch (Exception) {
1025 string path = Path.GetFullPath ("./a");
1026 uri = new Uri (new Uri (path), url);
1029 parserContext.BaseURI = uri.ToString ();
1037 case XmlNodeType.Attribute:
1038 reader = new StringReader (fragment.ReadToEnd ().Replace ("\"", """));
1040 case XmlNodeType.Element:
1041 currentState = XmlNodeType.Element;
1042 allowMultipleRoot = true;
1044 case XmlNodeType.Document:
1047 throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType));
1051 internal ConformanceLevel Conformance {
1052 get { return allowMultipleRoot ? ConformanceLevel.Fragment : ConformanceLevel.Document; }
1054 if (value == ConformanceLevel.Fragment) {
1055 currentState = XmlNodeType.Element;
1056 allowMultipleRoot = true;
1061 internal void AdjustLineInfoOffset (int lineNumberOffset, int linePositionOffset)
1063 line += lineNumberOffset;
1064 column += linePositionOffset;
1067 internal void SetNameTable (XmlNameTable nameTable)
1069 parserContext.NameTable = nameTable;
1072 // Use this method rather than setting the properties
1073 // directly so that all the necessary properties can
1074 // be changed in harmony with each other. Maybe the
1075 // fields should be in a seperate class to help enforce
1078 // Namespace URI could not be provided here.
1079 private void SetProperties (
1080 XmlNodeType nodeType,
1084 bool isEmptyElement,
1086 bool clearAttributes)
1088 SetTokenProperties (currentToken, nodeType, name, prefix, localName, isEmptyElement, value, clearAttributes);
1089 currentToken.LineNumber = this.currentLinkedNodeLineNumber;
1090 currentToken.LinePosition = this.currentLinkedNodeLinePosition;
1093 private void SetTokenProperties (
1095 XmlNodeType nodeType,
1099 bool isEmptyElement,
1101 bool clearAttributes)
1103 token.NodeType = nodeType;
1105 token.Prefix = prefix;
1106 token.LocalName = localName;
1107 token.IsEmptyElement = isEmptyElement;
1108 token.Value = value;
1109 this.elementDepth = depth;
1111 if (clearAttributes)
1115 private void ClearAttributes ()
1117 //for (int i = 0; i < attributeCount; i++)
1118 // attributeTokens [i].Clear ();
1120 currentAttribute = -1;
1121 currentAttributeValue = -1;
1124 private int PeekSurrogate (int c)
1126 if (peekCharsLength <= peekCharsIndex + 1) {
1127 if (!ReadTextReader (c))
1128 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1132 int highhalfChar = peekChars [peekCharsIndex];
1133 int lowhalfChar = peekChars [peekCharsIndex+1];
1135 if (((highhalfChar & 0xFC00) != 0xD800) || ((lowhalfChar & 0xFC00) != 0xDC00))
1136 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1137 return highhalfChar;
1138 return 0x10000 + (highhalfChar-0xD800)*0x400 + (lowhalfChar-0xDC00);
1141 private int PeekChar ()
1143 if (peekCharsIndex < peekCharsLength) {
1144 int c = peekChars [peekCharsIndex];
1147 if (c < 0xD800 || c >= 0xDFFF)
1149 return PeekSurrogate (c);
1151 if (!ReadTextReader (-1))
1157 private int ReadChar ()
1159 int ch = PeekChar ();
1163 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1168 } else if (ch != -1) {
1174 private void Advance (int ch) {
1178 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1183 } else if (ch != -1) {
1188 private bool ReadTextReader (int remained)
1190 if (reader == null && reader_uri != null) {
1191 Uri uri = reader_uri;
1194 reader = new XmlStreamReader (GetStreamFromUrl (uri.ToString (), out uriString));
1196 if (peekCharsLength < 0) { // initialized buffer
1197 peekCharsLength = reader.Read (peekChars, 0, peekChars.Length);
1198 return peekCharsLength > 0;
1200 int offset = remained >= 0 ? 1 : 0;
1201 int copysize = peekCharsLength - curNodePeekIndex;
1203 // It must assure that current tag content always exists
1205 if (!preserveCurrentTag) {
1206 curNodePeekIndex = 0;
1209 } else if (peekCharsLength < peekChars.Length) {
1210 // NonBlockingStreamReader returned less bytes
1211 // than the size of the buffer. In that case,
1212 // just refill the buffer.
1213 } else if (curNodePeekIndex <= (peekCharsLength >> 1)) {
1214 // extend the buffer
1215 char [] tmp = new char [peekChars.Length * 2];
1216 Array.Copy (peekChars, curNodePeekIndex,
1219 curNodePeekIndex = 0;
1220 peekCharsIndex = copysize;
1222 Array.Copy (peekChars, curNodePeekIndex,
1223 peekChars, 0, copysize);
1224 curNodePeekIndex = 0;
1225 peekCharsIndex = copysize;
1228 peekChars [peekCharsIndex] = (char) remained;
1229 int count = peekChars.Length - peekCharsIndex - offset;
1230 if (count > peekCharCapacity)
1231 count = peekCharCapacity;
1232 int read = reader.Read (
1233 peekChars, peekCharsIndex + offset, count);
1234 int remainingSize = offset + read;
1235 peekCharsLength = peekCharsIndex + remainingSize;
1237 return (remainingSize != 0);
1240 private bool ReadContent ()
1244 parserContext.PopScope ();
1248 if (returnEntityReference)
1249 SetEntityReferenceProperties ();
1251 int c = PeekChar ();
1253 readState = ReadState.EndOfFile;
1254 ClearValueBuffer ();
1256 XmlNodeType.None, // nodeType
1257 String.Empty, // name
1258 String.Empty, // prefix
1259 String.Empty, // localName
1260 false, // isEmptyElement
1262 true // clearAttributes
1265 throw NotWFError ("unexpected end of file. Current depth is " + depth);
1272 switch (PeekChar ())
1280 ReadProcessingInstruction ();
1295 if (!ReadWhitespace ())
1297 return ReadContent ();
1305 return this.ReadState != ReadState.EndOfFile;
1308 private void SetEntityReferenceProperties ()
1310 DTDEntityDeclaration decl = DTD != null ? DTD.EntityDecls [entityReferenceName] : null;
1311 if (this.isStandalone)
1312 if (DTD == null || decl == null || !decl.IsInternalSubset)
1313 throw NotWFError ("Standalone document must not contain any references to an non-internally declared entity.");
1314 if (decl != null && decl.NotationName != null)
1315 throw NotWFError ("Reference to any unparsed entities is not allowed here.");
1317 ClearValueBuffer ();
1319 XmlNodeType.EntityReference, // nodeType
1320 entityReferenceName, // name
1321 String.Empty, // prefix
1322 entityReferenceName, // localName
1323 false, // isEmptyElement
1325 true // clearAttributes
1328 returnEntityReference = false;
1329 entityReferenceName = String.Empty;
1332 // The leading '<' has already been consumed.
1333 private void ReadStartTag ()
1335 if (currentState == XmlNodeType.EndElement)
1336 throw NotWFError ("Multiple document element was detected.");
1337 currentState = XmlNodeType.Element;
1341 currentLinkedNodeLineNumber = line;
1342 currentLinkedNodeLinePosition = column;
1344 string prefix, localName;
1345 string name = ReadName (out prefix, out localName);
1346 if (currentState == XmlNodeType.EndElement)
1347 throw NotWFError ("document has terminated, cannot open new element");
1349 bool isEmptyElement = false;
1354 if (XmlChar.IsFirstNameChar (PeekChar ()))
1355 ReadAttributes (false);
1356 cursorToken = this.currentToken;
1359 for (int i = 0; i < attributeCount; i++)
1360 attributeTokens [i].FillXmlns ();
1361 for (int i = 0; i < attributeCount; i++)
1362 attributeTokens [i].FillNamespace ();
1366 for (int i = 0; i < attributeCount; i++)
1367 if (attributeTokens [i].Prefix == "xmlns" &&
1368 attributeTokens [i].Value == String.Empty)
1369 throw NotWFError ("Empty namespace URI cannot be mapped to non-empty prefix.");
1371 for (int i = 0; i < attributeCount; i++) {
1372 for (int j = i + 1; j < attributeCount; j++)
1373 if (Object.ReferenceEquals (attributeTokens [i].Name, attributeTokens [j].Name) ||
1374 (Object.ReferenceEquals (attributeTokens [i].LocalName, attributeTokens [j].LocalName) &&
1375 Object.ReferenceEquals (attributeTokens [i].NamespaceURI, attributeTokens [j].NamespaceURI)))
1376 throw NotWFError ("Attribute name and qualified name must be identical.");
1379 if (PeekChar () == '/') {
1381 isEmptyElement = true;
1386 PushElementName (name, localName, prefix);
1388 parserContext.PushScope ();
1393 XmlNodeType.Element, // nodeType
1397 isEmptyElement, // isEmptyElement
1399 false // clearAttributes
1401 if (prefix.Length > 0)
1402 currentToken.NamespaceURI = LookupNamespace (prefix, true);
1403 else if (namespaces)
1404 currentToken.NamespaceURI = nsmgr.DefaultNamespace;
1407 if (NamespaceURI == null)
1408 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1410 for (int i = 0; i < attributeCount; i++) {
1411 MoveToAttribute (i);
1412 if (NamespaceURI == null)
1413 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1420 for (int i = 0; i < attributeCount; i++) {
1421 if (!Object.ReferenceEquals (attributeTokens [i].Prefix, XmlNamespaceManager.PrefixXml))
1423 string aname = attributeTokens [i].LocalName;
1424 string value = attributeTokens [i].Value;
1427 if (this.resolver != null) {
1429 BaseURI != String.Empty ?
1430 new Uri (BaseURI) : null;
1431 // xml:base="" without any base URI -> pointless. However there are
1432 // some people who use such xml:base. Seealso bug #608391.
1433 if (buri == null && String.IsNullOrEmpty (value))
1435 Uri uri = resolver.ResolveUri (
1437 parserContext.BaseURI =
1443 parserContext.BaseURI = value;
1446 parserContext.XmlLang = value;
1451 parserContext.XmlSpace = XmlSpace.Preserve;
1454 parserContext.XmlSpace = XmlSpace.Default;
1457 throw NotWFError (String.Format ("Invalid xml:space value: {0}", value));
1464 CheckCurrentStateUpdate ();
1467 private void PushElementName (string name, string local, string prefix)
1469 if (elementNames.Length == elementNameStackPos) {
1470 TagName [] newArray = new TagName [elementNames.Length * 2];
1471 Array.Copy (elementNames, 0, newArray, 0, elementNameStackPos);
1472 elementNames = newArray;
1474 elementNames [elementNameStackPos++] =
1475 new TagName (name, local, prefix);
1478 // The reader is positioned on the first character
1479 // of the element's name.
1480 private void ReadEndTag ()
1482 if (currentState != XmlNodeType.Element)
1483 throw NotWFError ("End tag cannot appear in this state.");
1485 currentLinkedNodeLineNumber = line;
1486 currentLinkedNodeLinePosition = column;
1488 if (elementNameStackPos == 0)
1489 throw NotWFError ("closing element without matching opening element");
1490 TagName expected = elementNames [--elementNameStackPos];
1491 Expect (expected.Name);
1493 ExpectAfterWhitespace ('>');
1498 XmlNodeType.EndElement, // nodeType
1499 expected.Name, // name
1500 expected.Prefix, // prefix
1501 expected.LocalName, // localName
1502 false, // isEmptyElement
1504 true // clearAttributes
1506 if (expected.Prefix.Length > 0)
1507 currentToken.NamespaceURI = LookupNamespace (expected.Prefix, true);
1508 else if (namespaces)
1509 currentToken.NamespaceURI = nsmgr.DefaultNamespace;
1513 CheckCurrentStateUpdate ();
1516 private void CheckCurrentStateUpdate ()
1518 if (depth == 0 && !allowMultipleRoot && (IsEmptyElement || NodeType == XmlNodeType.EndElement))
1519 currentState = XmlNodeType.EndElement;
1523 private void AppendSurrogatePairNameChar (int ch)
1525 nameBuffer [nameLength++] = (char) ((ch - 0x10000) / 0x400 + 0xD800);
1526 if (nameLength == nameCapacity)
1527 ExpandNameCapacity ();
1528 nameBuffer [nameLength++] = (char) ((ch - 0x10000) % 0x400 + 0xDC00);
1531 private void ExpandNameCapacity ()
1533 nameCapacity = nameCapacity * 2;
1534 char [] oldNameBuffer = nameBuffer;
1535 nameBuffer = new char [nameCapacity];
1536 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1540 private void AppendValueChar (int ch)
1542 if (ch <= Char.MaxValue)
1543 valueBuffer.Append ((char) ch);
1545 AppendSurrogatePairValueChar (ch);
1548 private void AppendSurrogatePairValueChar (int ch)
1550 valueBuffer.Append ((char) ((ch - 0x10000) / 0x400 + 0xD800));
1551 valueBuffer.Append ((char) ((ch - 0x10000) % 0x400 + 0xDC00));
1554 private string CreateValueString ()
1556 // Since whitespace strings are mostly identical
1557 // depending on the Depth, we make use of NameTable
1558 // to atomize whitespace strings.
1560 case XmlNodeType.Whitespace:
1561 case XmlNodeType.SignificantWhitespace:
1562 int len = valueBuffer.Length;
1563 if (whitespaceCache == null)
1564 whitespaceCache = new char [32];
1565 if (len >= whitespaceCache.Length)
1567 if (whitespacePool == null)
1568 whitespacePool = new NameTable ();
1570 valueBuffer.CopyTo (0, whitespaceCache, 0, len);
1572 for (int i = 0; i < len; i++)
1573 whitespaceCache [i] = valueBuffer [i];
1575 return whitespacePool.Add (whitespaceCache, 0, valueBuffer.Length);
1577 return (valueBuffer.Capacity < 100) ?
1578 valueBuffer.ToString (0, valueBuffer.Length) :
1579 valueBuffer.ToString ();
1582 private void ClearValueBuffer ()
1584 valueBuffer.Length = 0;
1587 // The reader is positioned on the first character
1589 private void ReadText (bool notWhitespace)
1591 if (currentState != XmlNodeType.Element)
1592 throw NotWFError ("Text node cannot appear in this state.");
1593 preserveCurrentTag = false;
1596 ClearValueBuffer ();
1598 int ch = PeekChar ();
1599 bool previousWasCloseBracket = false;
1601 while (ch != '<' && ch != -1) {
1604 ch = ReadReference (false);
1605 if (returnEntityReference) // Returns -1 if char validation should not be done
1607 } else if (normalization && ch == '\r') {
1611 // append '\n' instead of '\r'.
1612 AppendValueChar ('\n');
1613 // and in case of "\r\n", discard '\r'.
1616 if (CharacterChecking && XmlChar.IsInvalid (ch))
1617 throw NotWFError ("Not allowed character was found.");
1621 // FIXME: it might be optimized by the JIT later,
1622 // AppendValueChar (ch);
1624 if (ch <= Char.MaxValue)
1625 valueBuffer.Append ((char) ch);
1627 AppendSurrogatePairValueChar (ch);
1632 if (previousWasCloseBracket)
1633 if (PeekChar () == '>')
1634 throw NotWFError ("Inside text content, character sequence ']]>' is not allowed.");
1635 previousWasCloseBracket = true;
1637 else if (previousWasCloseBracket)
1638 previousWasCloseBracket = false;
1640 notWhitespace = true;
1643 if (returnEntityReference && valueBuffer.Length == 0) {
1644 SetEntityReferenceProperties ();
1646 XmlNodeType nodeType = notWhitespace ? XmlNodeType.Text :
1647 this.XmlSpace == XmlSpace.Preserve ? XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
1649 nodeType, // nodeType
1650 String.Empty, // name
1651 String.Empty, // prefix
1652 String.Empty, // localName
1653 false, // isEmptyElement
1654 null, // value: create only when required
1655 true // clearAttributes
1660 // The leading '&' has already been consumed.
1661 // Returns true if the entity reference isn't a simple
1662 // character reference or one of the predefined entities.
1663 // This allows the ReadText method to break so that the
1664 // next call to Read will return the EntityReference node.
1665 private int ReadReference (bool ignoreEntityReferences)
1667 if (PeekChar () == '#') {
1669 return ReadCharacterReference ();
1671 return ReadEntityReference (ignoreEntityReferences);
1674 private int ReadCharacterReference ()
1679 if (PeekChar () == 'x') {
1682 while ((ch = PeekChar ()) != ';' && ch != -1) {
1685 if (ch >= '0' && ch <= '9')
1686 value = (value << 4) + ch - '0';
1687 else if (ch >= 'A' && ch <= 'F')
1688 value = (value << 4) + ch - 'A' + 10;
1689 else if (ch >= 'a' && ch <= 'f')
1690 value = (value << 4) + ch - 'a' + 10;
1692 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1693 "invalid hexadecimal digit: {0} (#x{1:X})",
1698 while ((ch = PeekChar ()) != ';' && ch != -1) {
1701 if (ch >= '0' && ch <= '9')
1702 value = value * 10 + ch - '0';
1704 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1705 "invalid decimal digit: {0} (#x{1:X})",
1713 // There is no way to save surrogate pairs...
1714 if (CharacterChecking && Normalization &&
1715 XmlChar.IsInvalid (value))
1716 throw NotWFError ("Referenced character was not allowed in XML. Normalization is " + normalization + ", checkCharacters = " + checkCharacters);
1720 // Returns -1 if it should not be validated.
1721 // Real EOF must not be detected here.
1722 private int ReadEntityReference (bool ignoreEntityReferences)
1724 string name = ReadName ();
1727 int predefined = XmlChar.GetPredefinedEntity (name);
1728 if (predefined >= 0)
1731 if (ignoreEntityReferences) {
1732 AppendValueChar ('&');
1733 for (int i = 0; i < name.Length; i++)
1734 AppendValueChar (name [i]);
1735 AppendValueChar (';');
1737 returnEntityReference = true;
1738 entityReferenceName = name;
1744 // The reader is positioned on the first character of
1745 // the attribute name.
1746 private void ReadAttributes (bool isXmlDecl)
1749 bool requireWhitespace = false;
1750 currentAttribute = -1;
1751 currentAttributeValue = -1;
1754 if (!SkipWhitespace () && requireWhitespace)
1755 throw NotWFError ("Unexpected token. Name is required here.");
1757 IncrementAttributeToken ();
1758 currentAttributeToken.LineNumber = line;
1759 currentAttributeToken.LinePosition = column;
1761 string prefix, localName;
1762 currentAttributeToken.Name = ReadName (out prefix, out localName);
1763 currentAttributeToken.Prefix = prefix;
1764 currentAttributeToken.LocalName = localName;
1765 ExpectAfterWhitespace ('=');
1767 ReadAttributeValueTokens (-1);
1768 // This hack is required for xmldecl which has
1769 // both effective attributes and Value.
1772 dummyValue = currentAttributeToken.Value;
1776 if (!SkipWhitespace ())
1777 requireWhitespace = true;
1778 peekChar = PeekChar ();
1780 if (peekChar == '?')
1783 else if (peekChar == '/' || peekChar == '>')
1785 } while (peekChar != -1);
1787 currentAttribute = -1;
1788 currentAttributeValue = -1;
1791 private void AddAttributeWithValue (string name, string value)
1793 IncrementAttributeToken ();
1794 XmlAttributeTokenInfo ati = attributeTokens [currentAttribute];
1795 ati.Name = NameTable.Add (name);
1796 ati.Prefix = String.Empty;
1797 ati.NamespaceURI = String.Empty;
1798 IncrementAttributeValueToken ();
1799 XmlTokenInfo vti = attributeValueTokens [currentAttributeValue];
1800 SetTokenProperties (vti,
1812 private void IncrementAttributeToken ()
1815 if (attributeTokens.Length == currentAttribute) {
1816 XmlAttributeTokenInfo [] newArray =
1817 new XmlAttributeTokenInfo [attributeTokens.Length * 2];
1818 attributeTokens.CopyTo (newArray, 0);
1819 attributeTokens = newArray;
1821 if (attributeTokens [currentAttribute] == null)
1822 attributeTokens [currentAttribute] = new XmlAttributeTokenInfo (this);
1823 currentAttributeToken = attributeTokens [currentAttribute];
1824 currentAttributeToken.Clear ();
1827 private void IncrementAttributeValueToken ()
1829 currentAttributeValue++;
1830 if (attributeValueTokens.Length == currentAttributeValue) {
1831 XmlTokenInfo [] newArray = new XmlTokenInfo [attributeValueTokens.Length * 2];
1832 attributeValueTokens.CopyTo (newArray, 0);
1833 attributeValueTokens = newArray;
1835 if (attributeValueTokens [currentAttributeValue] == null)
1836 attributeValueTokens [currentAttributeValue] = new XmlTokenInfo (this);
1837 currentAttributeValueToken = attributeValueTokens [currentAttributeValue];
1838 currentAttributeValueToken.Clear ();
1841 // LAMESPEC: Orthodox XML reader should normalize attribute values
1842 private void ReadAttributeValueTokens (int dummyQuoteChar)
1844 int quoteChar = (dummyQuoteChar < 0) ? ReadChar () : dummyQuoteChar;
1846 if (quoteChar != '\'' && quoteChar != '\"')
1847 throw NotWFError ("an attribute value was not quoted");
1848 currentAttributeToken.QuoteChar = (char) quoteChar;
1850 IncrementAttributeValueToken ();
1851 currentAttributeToken.ValueTokenStartIndex = currentAttributeValue;
1852 currentAttributeValueToken.LineNumber = line;
1853 currentAttributeValueToken.LinePosition = column;
1855 bool incrementToken = false;
1856 bool isNewToken = true;
1859 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1862 if (ch == quoteChar)
1865 if (incrementToken) {
1866 IncrementAttributeValueToken ();
1867 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1868 currentAttributeValueToken.LineNumber = line;
1869 currentAttributeValueToken.LinePosition = column;
1870 incrementToken = false;
1877 throw NotWFError ("attribute values cannot contain '<'");
1879 if (dummyQuoteChar < 0)
1880 throw NotWFError ("unexpected end of file in an attribute value");
1881 else // Attribute value constructor.
1887 if (PeekChar () == '\n')
1888 continue; // skip '\r'.
1890 // The csc in MS.NET 2.0 beta 1 barfs on this goto, so work around that
1899 // When Normalize = true, then replace
1900 // all spaces to ' '
1906 if (PeekChar () == '#') {
1908 ch = ReadCharacterReference ();
1909 AppendValueChar (ch);
1912 // Check XML 1.0 section 3.1 WFC.
1913 string entName = ReadName ();
1915 int predefined = XmlChar.GetPredefinedEntity (entName);
1916 if (predefined < 0) {
1917 CheckAttributeEntityReferenceWFC (entName);
1918 if (entityHandling == EntityHandling.ExpandEntities) {
1919 string value = DTD.GenerateEntityAttributeText (entName);
1920 foreach (char c in (IEnumerable<char>) value)
1921 AppendValueChar (c);
1923 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1924 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1926 IncrementAttributeValueToken ();
1927 currentAttributeValueToken.Name = entName;
1928 currentAttributeValueToken.Value = String.Empty;
1929 currentAttributeValueToken.NodeType = XmlNodeType.EntityReference;
1930 incrementToken = true;
1934 AppendValueChar (predefined);
1937 if (CharacterChecking && XmlChar.IsInvalid (ch))
1938 throw NotWFError ("Invalid character was found.");
1939 // FIXME: it might be optimized by the JIT later,
1940 // AppendValueChar (ch);
1942 if (ch <= Char.MaxValue)
1943 valueBuffer.Append ((char) ch);
1945 AppendSurrogatePairValueChar (ch);
1952 if (!incrementToken) {
1953 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1954 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1956 currentAttributeToken.ValueTokenEndIndex = currentAttributeValue;
1960 private void CheckAttributeEntityReferenceWFC (string entName)
1962 DTDEntityDeclaration entDecl =
1963 DTD == null ? null : DTD.EntityDecls [entName];
1964 if (entDecl == null) {
1965 if (entityHandling == EntityHandling.ExpandEntities
1966 || (DTD != null && resolver != null && entDecl == null))
1967 throw NotWFError (String.Format ("Referenced entity '{0}' does not exist.", entName));
1972 if (entDecl.HasExternalReference)
1973 throw NotWFError ("Reference to external entities is not allowed in the value of an attribute.");
1974 if (isStandalone && !entDecl.IsInternalSubset)
1975 throw NotWFError ("Reference to external entities is not allowed in the internal subset.");
1976 if (entDecl.EntityValue.IndexOf ('<') >= 0)
1977 throw NotWFError ("Attribute must not contain character '<' either directly or indirectly by way of entity references.");
1980 // The reader is positioned on the first character
1983 // It may be xml declaration or processing instruction.
1984 private void ReadProcessingInstruction ()
1986 string target = ReadName ();
1987 if (target != "xml" && target.ToLower (CultureInfo.InvariantCulture) == "xml")
1988 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
1990 if (!SkipWhitespace ())
1991 if (PeekChar () != '?')
1992 throw NotWFError ("Invalid processing instruction name was found.");
1994 ClearValueBuffer ();
1997 while ((ch = PeekChar ()) != -1) {
2000 if (ch == '?' && PeekChar () == '>') {
2005 if (CharacterChecking && XmlChar.IsInvalid (ch))
2006 throw NotWFError ("Invalid character was found.");
2007 AppendValueChar (ch);
2010 if (Object.ReferenceEquals (target, XmlNamespaceManager.PrefixXml))
2011 VerifyXmlDeclaration ();
2013 if (currentState == XmlNodeType.None)
2014 currentState = XmlNodeType.XmlDeclaration;
2017 XmlNodeType.ProcessingInstruction, // nodeType
2019 String.Empty, // prefix
2020 target, // localName
2021 false, // isEmptyElement
2022 null, // value: create only when required
2023 true // clearAttributes
2028 void VerifyXmlDeclaration ()
2030 if (!allowMultipleRoot && currentState != XmlNodeType.None)
2031 throw NotWFError ("XML declaration cannot appear in this state.");
2033 currentState = XmlNodeType.XmlDeclaration;
2035 string text = CreateValueString ();
2041 string encoding = null, standalone = null;
2043 ParseAttributeFromString (text, ref idx, out name, out value);
2044 if (name != "version" || value != "1.0")
2045 throw NotWFError ("'version' is expected.");
2046 name = String.Empty;
2047 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2048 ParseAttributeFromString (text, ref idx, out name, out value);
2049 if (name == "encoding") {
2050 if (!XmlChar.IsValidIANAEncoding (value))
2051 throw NotWFError ("'encoding' must be a valid IANA encoding name.");
2052 if (reader is XmlStreamReader)
2053 parserContext.Encoding = ((XmlStreamReader) reader).Encoding;
2055 parserContext.Encoding = Encoding.Unicode;
2057 name = String.Empty;
2058 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2059 ParseAttributeFromString (text, ref idx, out name, out value);
2061 if (name == "standalone") {
2062 this.isStandalone = value == "yes";
2063 if (value != "yes" && value != "no")
2064 throw NotWFError ("Only 'yes' or 'no' is allow for 'standalone'");
2066 SkipWhitespaceInString (text, ref idx);
2068 else if (name.Length != 0)
2069 throw NotWFError (String.Format ("Unexpected token: '{0}'", name));
2071 if (idx < text.Length)
2072 throw NotWFError ("'?' is expected.");
2074 AddAttributeWithValue ("version", "1.0");
2075 if (encoding != null)
2076 AddAttributeWithValue ("encoding", encoding);
2077 if (standalone != null)
2078 AddAttributeWithValue ("standalone", standalone);
2079 currentAttribute = currentAttributeValue = -1;
2082 XmlNodeType.XmlDeclaration, // nodeType
2084 String.Empty, // prefix
2086 false, // isEmptyElement
2088 false // clearAttributes
2092 bool SkipWhitespaceInString (string text, ref int idx)
2095 while (idx < text.Length && XmlChar.IsWhitespace (text [idx]))
2097 return idx - start > 0;
2100 private void ParseAttributeFromString (string src,
2101 ref int idx, out string name, out string value)
2103 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2107 while (idx < src.Length && XmlChar.IsNameChar (src [idx]))
2109 name = src.Substring (start, idx - start);
2111 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2113 if (idx == src.Length || src [idx] != '=')
2114 throw NotWFError (String.Format ("'=' is expected after {0}", name));
2117 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2120 if (idx == src.Length || src [idx] != '"' && src [idx] != '\'')
2121 throw NotWFError ("'\"' or '\'' is expected.");
2123 char quote = src [idx];
2127 while (idx < src.Length && src [idx] != quote)
2131 value = src.Substring (start, idx - start - 1);
2134 internal void SkipTextDeclaration ()
2136 if (PeekChar () != '<')
2141 if (PeekChar () != '?') {
2147 while (peekCharsIndex < 6) {
2148 if (PeekChar () < 0)
2153 if (new string (peekChars, 2, 4) != "xml ") {
2154 if (new string (peekChars, 2, 4).ToLower (CultureInfo.InvariantCulture) == "xml ") {
2155 throw NotWFError ("Processing instruction name must not be character sequence 'X' 'M' 'L' with case insensitivity.");
2164 if (PeekChar () == 'v') {
2166 ExpectAfterWhitespace ('=');
2168 int quoteChar = ReadChar ();
2169 char [] expect1_0 = new char [3];
2170 int versionLength = 0;
2171 switch (quoteChar) {
2174 while (PeekChar () != quoteChar) {
2175 if (PeekChar () == -1)
2176 throw NotWFError ("Invalid version declaration inside text declaration.");
2177 else if (versionLength == 3)
2178 throw NotWFError ("Invalid version number inside text declaration.");
2180 expect1_0 [versionLength] = (char) ReadChar ();
2182 if (versionLength == 3 && new String (expect1_0) != "1.0")
2183 throw NotWFError ("Invalid version number inside text declaration.");
2190 throw NotWFError ("Invalid version declaration inside text declaration.");
2194 if (PeekChar () == 'e') {
2195 Expect ("encoding");
2196 ExpectAfterWhitespace ('=');
2198 int quoteChar = ReadChar ();
2199 switch (quoteChar) {
2202 while (PeekChar () != quoteChar)
2203 if (ReadChar () == -1)
2204 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2209 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2211 // Encoding value should be checked inside XmlInputStream.
2213 // this condition is to check if this instance is
2214 // not created by XmlReader.Create() (which just
2215 // omits strict text declaration check).
2216 else if (Conformance == ConformanceLevel.Auto)
2217 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
2221 curNodePeekIndex = peekCharsIndex; // without this it causes incorrect value start indication.
2224 // The reader is positioned on the first character after
2225 // the leading '<!'.
2226 private void ReadDeclaration ()
2228 int ch = PeekChar ();
2246 throw NotWFError ("Unexpected declaration markup was found.");
2250 // The reader is positioned on the first character after
2251 // the leading '<!--'.
2252 private void ReadComment ()
2254 if (currentState == XmlNodeType.None)
2255 currentState = XmlNodeType.XmlDeclaration;
2257 preserveCurrentTag = false;
2259 ClearValueBuffer ();
2262 while ((ch = PeekChar ()) != -1) {
2265 if (ch == '-' && PeekChar () == '-') {
2268 if (PeekChar () != '>')
2269 throw NotWFError ("comments cannot contain '--'");
2275 if (XmlChar.IsInvalid (ch))
2276 throw NotWFError ("Not allowed character was found.");
2278 AppendValueChar (ch);
2282 XmlNodeType.Comment, // nodeType
2283 String.Empty, // name
2284 String.Empty, // prefix
2285 String.Empty, // localName
2286 false, // isEmptyElement
2287 null, // value: create only when required
2288 true // clearAttributes
2292 // The reader is positioned on the first character after
2293 // the leading '<![CDATA['.
2294 private void ReadCDATA ()
2296 if (currentState != XmlNodeType.Element)
2297 throw NotWFError ("CDATA section cannot appear in this state.");
2298 preserveCurrentTag = false;
2300 ClearValueBuffer ();
2304 while (PeekChar () != -1) {
2309 if (ch == ']' && PeekChar () == ']') {
2310 ch = ReadChar (); // ']'
2312 if (PeekChar () == '>') {
2319 if (normalization && ch == '\r') {
2322 // append '\n' instead of '\r'.
2323 AppendValueChar ('\n');
2324 // otherwise, discard '\r'.
2327 if (CharacterChecking && XmlChar.IsInvalid (ch))
2328 throw NotWFError ("Invalid character was found.");
2330 // FIXME: it might be optimized by the JIT later,
2331 // AppendValueChar (ch);
2333 if (ch <= Char.MaxValue)
2334 valueBuffer.Append ((char) ch);
2336 AppendSurrogatePairValueChar (ch);
2341 XmlNodeType.CDATA, // nodeType
2342 String.Empty, // name
2343 String.Empty, // prefix
2344 String.Empty, // localName
2345 false, // isEmptyElement
2346 null, // value: create only when required
2347 true // clearAttributes
2351 // The reader is positioned on the first character after
2352 // the leading '<!DOCTYPE'.
2353 private void ReadDoctypeDecl ()
2356 throw NotWFError ("Document Type Declaration (DTD) is prohibited in this XML.");
2357 switch (currentState) {
2358 case XmlNodeType.DocumentType:
2359 case XmlNodeType.Element:
2360 case XmlNodeType.EndElement:
2361 throw NotWFError ("Document type cannot appear in this state.");
2363 currentState = XmlNodeType.DocumentType;
2365 string doctypeName = null;
2366 string publicId = null;
2367 string systemId = null;
2368 int intSubsetStartLine = 0;
2369 int intSubsetStartColumn = 0;
2372 doctypeName = ReadName ();
2377 systemId = ReadSystemLiteral (true);
2380 publicId = ReadPubidLiteral ();
2381 if (!SkipWhitespace ())
2382 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
2383 systemId = ReadSystemLiteral (false);
2389 if(PeekChar () == '[')
2391 // read markupdecl etc. or end of decl
2393 intSubsetStartLine = this.LineNumber;
2394 intSubsetStartColumn = this.LinePosition;
2395 ClearValueBuffer ();
2396 ReadInternalSubset ();
2397 parserContext.InternalSubset = CreateValueString ();
2399 // end of DOCTYPE decl.
2400 ExpectAfterWhitespace ('>');
2402 GenerateDTDObjectModel (doctypeName, publicId,
2403 systemId, parserContext.InternalSubset,
2404 intSubsetStartLine, intSubsetStartColumn);
2406 // set properties for <!DOCTYPE> node
2408 XmlNodeType.DocumentType, // nodeType
2409 doctypeName, // name
2410 String.Empty, // prefix
2411 doctypeName, // localName
2412 false, // isEmptyElement
2413 parserContext.InternalSubset, // value
2414 true // clearAttributes
2417 if (publicId != null)
2418 AddAttributeWithValue ("PUBLIC", publicId);
2419 if (systemId != null)
2420 AddAttributeWithValue ("SYSTEM", systemId);
2421 currentAttribute = currentAttributeValue = -1;
2424 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2425 string systemId, string internalSubset)
2427 return GenerateDTDObjectModel (name, publicId, systemId, internalSubset, 0, 0);
2430 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2431 string systemId, string internalSubset, int intSubsetStartLine, int intSubsetStartColumn)
2434 parserContext.Dtd = new DTDObjectModel (this.NameTable); // merges both internal and external subsets in the meantime,
2435 DTD.BaseURI = BaseURI;
2437 DTD.PublicId = publicId;
2438 DTD.SystemId = systemId;
2439 DTD.InternalSubset = internalSubset;
2440 DTD.XmlResolver = resolver;
2441 DTD.IsStandalone = isStandalone;
2442 DTD.LineNumber = line;
2443 DTD.LinePosition = column;
2445 DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn);
2446 dr.Normalization = this.normalization;
2447 return dr.GenerateDTDObjectModel ();
2450 private enum DtdInputState
2463 private class DtdInputStateStack
2465 Stack intern = new Stack ();
2466 public DtdInputStateStack ()
2468 Push (DtdInputState.Free);
2471 public DtdInputState Peek ()
2473 return (DtdInputState) intern.Peek ();
2476 public DtdInputState Pop ()
2478 return (DtdInputState) intern.Pop ();
2481 public void Push (DtdInputState val)
2488 DtdInputStateStack stateStack = new DtdInputStateStack ();
2489 DtdInputState State {
2490 get { return stateStack.Peek (); }
2493 private int ReadValueChar ()
2495 int ret = ReadChar ();
2496 AppendValueChar (ret);
2500 private void ExpectAndAppend (string s)
2503 valueBuffer.Append (s);
2506 // Simply read but not generate any result.
2507 private void ReadInternalSubset ()
2509 bool continueParse = true;
2511 while (continueParse) {
2512 switch (ReadValueChar ()) {
2515 case DtdInputState.Free:
2517 valueBuffer.Remove (valueBuffer.Length - 1, 1);
2518 continueParse = false;
2520 case DtdInputState.InsideDoubleQuoted:
2521 case DtdInputState.InsideSingleQuoted:
2522 case DtdInputState.Comment:
2525 throw NotWFError ("unexpected end of file at DTD.");
2529 throw NotWFError ("unexpected end of file at DTD.");
2532 case DtdInputState.InsideDoubleQuoted:
2533 case DtdInputState.InsideSingleQuoted:
2534 case DtdInputState.Comment:
2535 continue; // well-formed
2537 int c = ReadValueChar ();
2540 stateStack.Push (DtdInputState.PI);
2543 switch (ReadValueChar ()) {
2545 switch (ReadValueChar ()) {
2547 ExpectAndAppend ("EMENT");
2548 stateStack.Push (DtdInputState.ElementDecl);
2551 ExpectAndAppend ("TITY");
2552 stateStack.Push (DtdInputState.EntityDecl);
2555 throw NotWFError ("unexpected token '<!E'.");
2559 ExpectAndAppend ("TTLIST");
2560 stateStack.Push (DtdInputState.AttlistDecl);
2563 ExpectAndAppend ("OTATION");
2564 stateStack.Push (DtdInputState.NotationDecl);
2567 ExpectAndAppend ("-");
2568 stateStack.Push (DtdInputState.Comment);
2573 throw NotWFError (String.Format ("unexpected '<{0}'.", (char) c));
2577 if (State == DtdInputState.InsideSingleQuoted)
2579 else if (State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.Comment)
2580 stateStack.Push (DtdInputState.InsideSingleQuoted);
2583 if (State == DtdInputState.InsideDoubleQuoted)
2585 else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment)
2586 stateStack.Push (DtdInputState.InsideDoubleQuoted);
2590 case DtdInputState.ElementDecl:
2591 goto case DtdInputState.NotationDecl;
2592 case DtdInputState.AttlistDecl:
2593 goto case DtdInputState.NotationDecl;
2594 case DtdInputState.EntityDecl:
2595 goto case DtdInputState.NotationDecl;
2596 case DtdInputState.NotationDecl:
2599 case DtdInputState.InsideDoubleQuoted:
2600 case DtdInputState.InsideSingleQuoted:
2601 case DtdInputState.Comment:
2604 throw NotWFError ("unexpected token '>'");
2608 if (State == DtdInputState.PI) {
2609 if (ReadValueChar () == '>')
2614 if (State == DtdInputState.Comment) {
2615 if (PeekChar () == '-') {
2617 ExpectAndAppend (">");
2623 if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted)
2624 throw NotWFError ("Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8).");
2630 // The reader is positioned on the first 'S' of "SYSTEM".
2631 private string ReadSystemLiteral (bool expectSYSTEM)
2635 if (!SkipWhitespace ())
2636 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
2640 int quoteChar = ReadChar (); // apos or quot
2642 ClearValueBuffer ();
2643 while (c != quoteChar) {
2646 throw NotWFError ("Unexpected end of stream in ExternalID.");
2648 AppendValueChar (c);
2650 return CreateValueString ();
2653 private string ReadPubidLiteral()
2656 if (!SkipWhitespace ())
2657 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
2658 int quoteChar = ReadChar ();
2660 ClearValueBuffer ();
2661 while(c != quoteChar)
2664 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
2665 if(c != quoteChar && !XmlChar.IsPubidChar (c))
2666 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char)c ));
2668 AppendValueChar (c);
2670 return CreateValueString ();
2673 // The reader is positioned on the first character
2675 private string ReadName ()
2677 string prefix, local;
2678 return ReadName (out prefix, out local);
2681 private string ReadName (out string prefix, out string localName)
2683 #if !USE_NAME_BUFFER
2684 bool savePreserve = preserveCurrentTag;
2685 preserveCurrentTag = true;
2687 int startOffset = peekCharsIndex - curNodePeekIndex;
2688 int ch = PeekChar ();
2689 if (!XmlChar.IsFirstNameChar (ch))
2690 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2695 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2697 if (ch == ':' && namespaces && colonAt < 0)
2702 int start = curNodePeekIndex + startOffset;
2704 string name = NameTable.Add (
2705 peekChars, start, length);
2708 prefix = NameTable.Add (
2709 peekChars, start, colonAt);
2710 localName = NameTable.Add (
2711 peekChars, start + colonAt + 1, length - colonAt - 1);
2713 prefix = String.Empty;
2717 preserveCurrentTag = savePreserve;
2721 int ch = PeekChar ();
2722 if (!XmlChar.IsFirstNameChar (ch))
2723 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2728 // AppendNameChar (ch);
2730 // nameBuffer.Length is always non-0 so no need to ExpandNameCapacity () here
2731 if (ch <= Char.MaxValue)
2732 nameBuffer [nameLength++] = (char) ch;
2734 AppendSurrogatePairNameChar (ch);
2739 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2742 if (ch == ':' && namespaces && colonAt < 0)
2743 colonAt = nameLength;
2744 // AppendNameChar (ch);
2746 if (nameLength == nameCapacity)
2747 ExpandNameCapacity ();
2748 if (ch <= Char.MaxValue)
2749 nameBuffer [nameLength++] = (char) ch;
2751 AppendSurrogatePairNameChar (ch);
2755 string name = NameTable.Add (nameBuffer, 0, nameLength);
2758 prefix = NameTable.Add (nameBuffer, 0, colonAt);
2759 localName = NameTable.Add (nameBuffer, colonAt + 1, nameLength - colonAt - 1);
2761 prefix = String.Empty;
2769 // Read the next character and compare it against the
2770 // specified character.
2771 private void Expect (int expected)
2773 int ch = ReadChar ();
2775 if (ch != expected) {
2776 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2777 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
2780 ch < 0 ? (object) "EOF" : (char) ch,
2785 private void Expect (string expected)
2787 for (int i = 0; i < expected.Length; i++)
2788 if (ReadChar () != expected [i])
2789 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2790 "'{0}' is expected", expected));
2793 private void ExpectAfterWhitespace (char c)
2796 int i = ReadChar ();
2797 if (i < 0x21 && XmlChar.IsWhitespace (i))
2800 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0}, but found {1} [{2}]", c, i < 0 ? (object) "EOF" : (char) i, i));
2805 // Does not consume the first non-whitespace character.
2806 private bool SkipWhitespace ()
2808 // FIXME: It should be inlined by the JIT.
2809 // bool skipped = XmlChar.IsWhitespace (PeekChar ());
2810 int ch = PeekChar ();
2811 bool skipped = (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2815 // FIXME: It should be inlined by the JIT.
2816 // while (XmlChar.IsWhitespace (PeekChar ()))
2818 while ((ch = PeekChar ()) == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD)
2823 private bool ReadWhitespace ()
2825 if (currentState == XmlNodeType.None)
2826 currentState = XmlNodeType.XmlDeclaration;
2828 bool savePreserve = preserveCurrentTag;
2829 preserveCurrentTag = true;
2830 int startOffset = peekCharsIndex - curNodePeekIndex; // it should be 0 for now though.
2832 int ch = PeekChar ();
2836 // FIXME: It should be inlined by the JIT.
2837 // } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
2838 } while (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2840 bool isText = currentState == XmlNodeType.Element && ch != -1 && ch != '<';
2842 if (!isText && (whitespaceHandling == WhitespaceHandling.None ||
2843 whitespaceHandling == WhitespaceHandling.Significant && XmlSpace != XmlSpace.Preserve))
2846 ClearValueBuffer ();
2847 valueBuffer.Append (peekChars, curNodePeekIndex, peekCharsIndex - curNodePeekIndex - startOffset);
2848 preserveCurrentTag = savePreserve;
2853 XmlNodeType nodeType = (this.XmlSpace == XmlSpace.Preserve) ?
2854 XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
2855 SetProperties (nodeType,
2860 null, // value: create only when required
2867 // Returns -1 if it should throw an error.
2868 private int ReadCharsInternal (char [] buffer, int offset, int length)
2870 int bufIndex = offset;
2871 for (int i = 0; i < length; i++) {
2872 int c = PeekChar ();
2875 throw NotWFError ("Unexpected end of xml.");
2878 if (PeekChar () != '/') {
2880 buffer [bufIndex++] = '<';
2883 else if (nestLevel-- > 0) {
2884 buffer [bufIndex++] = '<';
2887 // Seems to skip immediate EndElement
2894 readCharsInProgress = false;
2895 Read (); // move to the next node
2899 if (c <= Char.MaxValue)
2900 buffer [bufIndex++] = (char) c;
2902 buffer [bufIndex++] = (char) ((c - 0x10000) / 0x400 + 0xD800);
2903 buffer [bufIndex++] = (char) ((c - 0x10000) % 0x400 + 0xDC00);
2911 private bool ReadUntilEndTag ()
2914 currentState = XmlNodeType.EndElement;
2920 throw NotWFError ("Unexpected end of xml.");
2922 if (PeekChar () != '/') {
2926 else if (--nestLevel > 0)
2929 string name = ReadName ();
2930 if (name != elementNames [elementNameStackPos - 1].Name)