2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
7 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
9 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
10 // Copyright (C) 2005-2006 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //#define USE_NAME_BUFFER
34 // Optimization TODOs:
36 // - support PushbackChar() which reverts one character read.
37 // - ReadTextReader() should always keep one pushback buffer
38 // as pushback safety net.
39 // - Replace (peek,read) * n -> read * n + pushback
43 using System.Collections;
44 using System.Collections.Generic;
45 using System.Globalization;
47 using System.Security.Permissions;
49 using System.Xml.Schema;
55 class XmlTextReader : XmlReader,
56 IXmlLineInfo, IXmlNamespaceResolver, IHasXmlParserContext
60 protected XmlTextReader ()
64 public XmlTextReader (Stream input)
65 : this (new XmlStreamReader (input))
69 public XmlTextReader (string url)
70 : this(url, new NameTable ())
74 public XmlTextReader (TextReader input)
75 : this (input, new NameTable ())
79 protected XmlTextReader (XmlNameTable nt)
80 : this (String.Empty, null, XmlNodeType.None, null)
84 public XmlTextReader (Stream input, XmlNameTable nt)
85 : this(new XmlStreamReader (input), nt)
89 public XmlTextReader (string url, Stream input)
90 : this (url, new XmlStreamReader (input))
94 public XmlTextReader (string url, TextReader input)
95 : this (url, input, new NameTable ())
99 public XmlTextReader (string url, XmlNameTable nt)
101 reader_uri = resolver.ResolveUri (null, url);
102 string uriString = (reader_uri == null) ? String.Empty : reader_uri.ToString ();
103 XmlParserContext ctx = new XmlParserContext (nt,
104 new XmlNamespaceManager (nt),
107 this.InitializeContext (uriString, ctx, null, XmlNodeType.Document);
110 public XmlTextReader (TextReader input, XmlNameTable nt)
111 : this (String.Empty, input, nt)
115 // This is used in XmlReader.Create() to indicate that string
116 // argument is uri, not an xml fragment.
117 internal XmlTextReader (bool dummy, XmlResolver resolver, string url, XmlNodeType fragType, XmlParserContext context)
119 if (resolver == null) {
120 resolver = new XmlUrlResolver ();
122 this.XmlResolver = resolver;
125 Stream stream = GetStreamFromUrl (url, out uriString);
126 this.InitializeContext (uriString, context, new XmlStreamReader (stream), fragType);
129 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
130 : this (context != null ? context.BaseURI : String.Empty,
131 new XmlStreamReader (xmlFragment),
135 disallowReset = true;
138 internal XmlTextReader (string baseURI, TextReader xmlFragment, XmlNodeType fragType)
139 : this (baseURI, xmlFragment, fragType, null)
143 public XmlTextReader (string url, Stream input, XmlNameTable nt)
144 : this (url, new XmlStreamReader (input), nt)
148 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
149 : this (url, input, XmlNodeType.Document, null)
153 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
154 : this (context != null ? context.BaseURI : String.Empty,
155 new StringReader (xmlFragment),
159 disallowReset = true;
162 internal XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
164 InitializeContext (url, context, fragment, fragType);
167 Stream GetStreamFromUrl (string url, out string absoluteUriString)
171 throw new ArgumentNullException ("url");
173 throw new ArgumentException ("url");
176 // This needs to work even if resolver is explicitly set to null
178 var res = resolver ?? new XmlUrlResolver ();
179 var uri = res.ResolveUri (null, url);
180 absoluteUriString = uri != null ? uri.ToString () : String.Empty;
181 return res.GetEntity (uri, null, typeof (Stream)) as Stream;
188 public override int AttributeCount
190 get { return attributeCount; }
193 public override string BaseURI
195 get { return parserContext.BaseURI; }
198 public override bool CanReadBinaryContent {
202 public override bool CanReadValueChunk {
206 internal bool CharacterChecking {
207 get { return checkCharacters; }
208 set { checkCharacters = value; }
211 // for XmlReaderSettings.CloseInput support
212 internal bool CloseInput {
213 get { return closeInput; }
214 set { closeInput = value; }
217 public override int Depth
220 int nodeTypeMod = currentToken.NodeType == XmlNodeType.Element ? 0 : -1;
221 if (currentAttributeValue >= 0)
222 return nodeTypeMod + elementDepth + 2; // inside attribute value.
223 else if (currentAttribute >= 0)
224 return nodeTypeMod + elementDepth + 1;
229 public Encoding Encoding
231 get { return parserContext.Encoding; }
234 public EntityHandling EntityHandling {
235 get { return entityHandling; }
236 set { entityHandling = value; }
239 public override bool EOF {
240 get { return readState == ReadState.EndOfFile; }
243 public override bool HasValue {
244 get { return cursorToken.Value != null; }
247 public override bool IsDefault {
248 // XmlTextReader does not expand default attributes.
249 get { return false; }
252 public override bool IsEmptyElement {
253 get { return cursorToken.IsEmptyElement; }
256 public int LineNumber {
258 if (useProceedingLineInfo)
261 return cursorToken.LineNumber;
265 public int LinePosition {
267 if (useProceedingLineInfo)
270 return cursorToken.LinePosition;
274 public override string LocalName {
275 get { return cursorToken.LocalName; }
278 public override string Name {
279 get { return cursorToken.Name; }
282 public bool Namespaces {
283 get { return namespaces; }
285 if (readState != ReadState.Initial)
286 throw new InvalidOperationException ("Namespaces have to be set before reading.");
291 public override string NamespaceURI {
292 get { return cursorToken.NamespaceURI; }
295 public override XmlNameTable NameTable {
296 get { return nameTable; }
299 public override XmlNodeType NodeType {
300 get { return cursorToken.NodeType; }
303 public bool Normalization {
304 get { return normalization; }
305 set { normalization = value; }
308 public override string Prefix {
309 get { return cursorToken.Prefix; }
312 public bool ProhibitDtd {
313 get { return prohibitDtd; }
314 set { prohibitDtd = value; }
317 public override char QuoteChar {
318 get { return cursorToken.QuoteChar; }
321 public override ReadState ReadState {
322 get { return readState; }
325 public override XmlReaderSettings Settings {
326 get { return base.Settings; }
329 public override string Value {
330 get { return cursorToken.Value != null ? cursorToken.Value : String.Empty; }
333 public WhitespaceHandling WhitespaceHandling {
334 get { return whitespaceHandling; }
335 set { whitespaceHandling = value; }
338 public override string XmlLang {
339 get { return parserContext.XmlLang; }
342 public XmlResolver XmlResolver {
343 set { resolver = value; }
346 public override XmlSpace XmlSpace {
347 get { return parserContext.XmlSpace; }
354 public override void Close ()
356 readState = ReadState.Closed;
358 cursorToken.Clear ();
359 currentToken.Clear ();
361 if (closeInput && reader != null)
365 public override string GetAttribute (int i)
367 if (i >= attributeCount)
368 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
370 return attributeTokens [i].Value;
374 // MS.NET 1.0 msdn says that this method returns String.Empty
375 // for absent attribute, but in fact it returns null.
376 // This description is corrected in MS.NET 1.1 msdn.
377 public override string GetAttribute (string name)
379 for (int i = 0; i < attributeCount; i++)
380 if (attributeTokens [i].Name == name)
381 return attributeTokens [i].Value;
385 private int GetIndexOfQualifiedAttribute (string localName, string namespaceURI)
387 namespaceURI = namespaceURI ?? String.Empty;
388 for (int i = 0; i < attributeCount; i++) {
389 XmlAttributeTokenInfo ti = attributeTokens [i];
390 if (ti.LocalName == localName && ti.NamespaceURI == namespaceURI)
396 XmlParserContext IHasXmlParserContext.ParserContext {
397 get { return parserContext; }
400 public override string GetAttribute (string localName, string namespaceURI)
402 int idx = this.GetIndexOfQualifiedAttribute (localName, namespaceURI);
405 return attributeTokens [idx].Value;
408 public IDictionary<string, string> GetNamespacesInScope (XmlNamespaceScope scope)
410 return nsmgr.GetNamespacesInScope (scope);
413 IDictionary<string, string> IXmlNamespaceResolver.GetNamespacesInScope (XmlNamespaceScope scope)
415 return GetNamespacesInScope (scope);
418 public TextReader GetRemainder ()
420 if (peekCharsLength < 0)
422 return new StringReader (new string (peekChars, peekCharsIndex, peekCharsLength - peekCharsIndex) + reader.ReadToEnd ());
425 public bool HasLineInfo ()
430 public override string LookupNamespace (string prefix)
432 return LookupNamespace (prefix, false);
435 private string LookupNamespace (string prefix, bool atomizedNames)
437 string s = nsmgr.LookupNamespace (
438 prefix, atomizedNames);
439 return s == String.Empty ? null : s;
442 string IXmlNamespaceResolver.LookupPrefix (string ns)
444 return LookupPrefix (ns, false);
447 public string LookupPrefix (string ns, bool atomizedName)
449 return nsmgr.LookupPrefix (ns, atomizedName);
452 public override void MoveToAttribute (int i)
454 if (i >= attributeCount)
455 throw new ArgumentOutOfRangeException ("attribute index out of range.");
457 currentAttribute = i;
458 currentAttributeValue = -1;
459 cursorToken = attributeTokens [i];
462 public override bool MoveToAttribute (string name)
464 for (int i = 0; i < attributeCount; i++) {
465 XmlAttributeTokenInfo ti = attributeTokens [i];
466 if (ti.Name == name) {
474 public override bool MoveToAttribute (string localName, string namespaceName)
476 int idx = GetIndexOfQualifiedAttribute (localName, namespaceName);
479 MoveToAttribute (idx);
483 public override bool MoveToElement ()
485 if (currentToken == null) // for attribute .ctor()
488 if (cursorToken == currentToken)
491 if (currentAttribute >= 0) {
492 currentAttribute = -1;
493 currentAttributeValue = -1;
494 cursorToken = currentToken;
501 public override bool MoveToFirstAttribute ()
503 if (attributeCount == 0)
506 return MoveToNextAttribute ();
509 public override bool MoveToNextAttribute ()
511 if (currentAttribute == 0 && attributeCount == 0)
513 if (currentAttribute + 1 < attributeCount) {
515 currentAttributeValue = -1;
516 cursorToken = attributeTokens [currentAttribute];
523 public override bool Read ()
525 if (readState == ReadState.Closed)
527 curNodePeekIndex = peekCharsIndex;
528 preserveCurrentTag = true;
532 if (startNodeType == XmlNodeType.Attribute) {
533 if (currentAttribute == 0)
534 return false; // already read.
535 SkipTextDeclaration ();
537 IncrementAttributeToken ();
538 ReadAttributeValueTokens ('"');
539 cursorToken = attributeTokens [0];
540 currentAttributeValue = -1;
541 readState = ReadState.Interactive;
544 if (readState == ReadState.Initial && currentState == XmlNodeType.Element)
545 SkipTextDeclaration ();
551 readState = ReadState.Interactive;
552 currentLinkedNodeLineNumber = line;
553 currentLinkedNodeLinePosition = column;
554 useProceedingLineInfo = true;
556 cursorToken = currentToken;
558 currentAttribute = currentAttributeValue = -1;
559 currentToken.Clear ();
561 // It was moved from end of ReadStartTag ().
567 if (readCharsInProgress) {
568 readCharsInProgress = false;
569 return ReadUntilEndTag ();
572 more = ReadContent ();
574 if (!more && startNodeType == XmlNodeType.Document && currentState != XmlNodeType.EndElement)
575 throw NotWFError ("Document element did not appear.");
577 useProceedingLineInfo = false;
581 public override bool ReadAttributeValue ()
583 if (readState == ReadState.Initial && startNodeType == XmlNodeType.Attribute) {
587 if (currentAttribute < 0)
589 XmlAttributeTokenInfo ti = attributeTokens [currentAttribute];
590 if (currentAttributeValue < 0)
591 currentAttributeValue = ti.ValueTokenStartIndex - 1;
593 if (currentAttributeValue < ti.ValueTokenEndIndex) {
594 currentAttributeValue++;
595 cursorToken = attributeValueTokens [currentAttributeValue];
602 public int ReadBase64 (byte [] buffer, int offset, int length)
604 BinaryCharGetter = binaryCharGetter;
606 return Binary.ReadBase64 (buffer, offset, length);
608 BinaryCharGetter = null;
612 public int ReadBinHex (byte [] buffer, int offset, int length)
614 BinaryCharGetter = binaryCharGetter;
616 return Binary.ReadBinHex (buffer, offset, length);
618 BinaryCharGetter = null;
622 public int ReadChars (char [] buffer, int offset, int length)
625 throw new ArgumentOutOfRangeException (
629 "Offset must be non-negative integer.");
631 } else if (length < 0) {
632 throw new ArgumentOutOfRangeException (
636 "Length must be non-negative integer.");
638 } else if (buffer.Length < offset + length)
639 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
641 if (IsEmptyElement) {
646 if (!readCharsInProgress && NodeType != XmlNodeType.Element)
649 preserveCurrentTag = false;
650 readCharsInProgress = true;
651 useProceedingLineInfo = true;
653 return ReadCharsInternal (buffer, offset, length);
656 public void ResetState ()
659 throw new InvalidOperationException ("Cannot call ResetState when parsing an XML fragment.");
663 public override void ResolveEntity ()
665 // XmlTextReader does not resolve entities.
666 throw new InvalidOperationException ("XmlTextReader cannot resolve external entities.");
669 [MonoTODO] // FIXME: Implement, for performance improvement
670 public override void Skip ()
677 // Parsed DTD Objects
678 // Note that thgis property must be kept since dtd2xsd uses it.
679 internal DTDObjectModel DTD {
680 get { return parserContext.Dtd; }
683 internal XmlResolver Resolver {
684 get { return resolver; }
689 internal class XmlTokenInfo
691 public XmlTokenInfo (XmlTextReader xtr)
699 protected XmlTextReader Reader;
702 public string LocalName;
703 public string Prefix;
704 public string NamespaceURI;
705 public bool IsEmptyElement;
706 public char QuoteChar;
707 public int LineNumber;
708 public int LinePosition;
709 public int ValueBufferStart;
710 public int ValueBufferEnd;
712 public XmlNodeType NodeType;
714 public virtual string Value {
716 if (valueCache != null)
718 if (ValueBufferStart >= 0) {
719 valueCache = Reader.valueBuffer.ToString (ValueBufferStart, ValueBufferEnd - ValueBufferStart);
723 case XmlNodeType.Text:
724 case XmlNodeType.SignificantWhitespace:
725 case XmlNodeType.Whitespace:
726 case XmlNodeType.Comment:
727 case XmlNodeType.CDATA:
728 case XmlNodeType.ProcessingInstruction:
729 valueCache = Reader.CreateValueString ();
734 set { valueCache = value; }
737 public virtual void Clear ()
739 ValueBufferStart = -1;
741 NodeType = XmlNodeType.None;
742 Name = LocalName = Prefix = NamespaceURI = String.Empty;
743 IsEmptyElement = false;
745 LineNumber = LinePosition = 0;
749 internal class XmlAttributeTokenInfo : XmlTokenInfo
751 public XmlAttributeTokenInfo (XmlTextReader reader)
754 NodeType = XmlNodeType.Attribute;
757 public int ValueTokenStartIndex;
758 public int ValueTokenEndIndex;
760 StringBuilder tmpBuilder = new StringBuilder ();
762 public override string Value {
764 if (valueCache != null)
767 // An empty value should return String.Empty.
768 if (ValueTokenStartIndex == ValueTokenEndIndex) {
769 XmlTokenInfo ti = Reader.attributeValueTokens [ValueTokenStartIndex];
770 if (ti.NodeType == XmlNodeType.EntityReference)
771 valueCache = String.Concat ("&", ti.Name, ";");
773 valueCache = ti.Value;
777 tmpBuilder.Length = 0;
778 for (int i = ValueTokenStartIndex; i <= ValueTokenEndIndex; i++) {
779 XmlTokenInfo ti = Reader.attributeValueTokens [i];
780 if (ti.NodeType == XmlNodeType.Text)
781 tmpBuilder.Append (ti.Value);
783 tmpBuilder.Append ('&');
784 tmpBuilder.Append (ti.Name);
785 tmpBuilder.Append (';');
789 valueCache = tmpBuilder.ToString (0, tmpBuilder.Length);
793 set { valueCache = value; }
796 public override void Clear ()
800 NodeType = XmlNodeType.Attribute;
801 ValueTokenStartIndex = ValueTokenEndIndex = 0;
804 internal void FillXmlns ()
806 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns))
807 Reader.nsmgr.AddNamespace (LocalName, Value);
808 else if (Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
809 Reader.nsmgr.AddNamespace (String.Empty, Value);
812 internal void FillNamespace ()
814 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns) ||
815 Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
816 NamespaceURI = XmlNamespaceManager.XmlnsXmlns;
817 else if (Prefix.Length == 0)
818 NamespaceURI = string.Empty;
820 NamespaceURI = Reader.LookupNamespace (Prefix, true);
824 private XmlTokenInfo cursorToken;
825 private XmlTokenInfo currentToken;
826 private XmlAttributeTokenInfo currentAttributeToken;
827 private XmlTokenInfo currentAttributeValueToken;
828 private XmlAttributeTokenInfo [] attributeTokens = new XmlAttributeTokenInfo [10];
829 private XmlTokenInfo [] attributeValueTokens = new XmlTokenInfo [10];
830 private int currentAttribute;
831 private int currentAttributeValue;
832 private int attributeCount;
834 private XmlParserContext parserContext;
835 private XmlNameTable nameTable;
836 private XmlNamespaceManager nsmgr;
838 private ReadState readState;
839 private bool disallowReset;
842 private int elementDepth;
843 private bool depthUp;
845 private bool popScope;
849 public TagName (string n, string l, string p)
856 public readonly string Name;
857 public readonly string LocalName;
858 public readonly string Prefix;
861 private TagName [] elementNames;
862 int elementNameStackPos;
864 private bool allowMultipleRoot;
866 private bool isStandalone;
868 private bool returnEntityReference;
869 private string entityReferenceName;
872 private char [] nameBuffer;
873 private int nameLength;
874 private int nameCapacity;
875 private const int initialNameCapacity = 32;
878 private StringBuilder valueBuffer;
881 private TextReader reader;
882 private char [] peekChars;
883 private int peekCharsIndex;
884 private int peekCharsLength;
885 private int curNodePeekIndex;
886 private bool preserveCurrentTag;
887 private const int peekCharCapacity = 1024;
892 private int currentLinkedNodeLineNumber;
893 private int currentLinkedNodeLinePosition;
894 private bool useProceedingLineInfo;
896 private XmlNodeType startNodeType;
897 // State machine attribute.
898 // XmlDeclaration: after the first node.
899 // DocumentType: after doctypedecl
900 // Element: inside document element
901 // EndElement: after document element
902 private XmlNodeType currentState;
904 // For ReadChars()/ReadBase64()/ReadBinHex()
905 private int nestLevel;
906 private bool readCharsInProgress;
907 XmlReaderBinarySupport.CharGetter binaryCharGetter;
909 // These values are never re-initialized.
910 private bool namespaces = true;
911 private WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
912 private XmlResolver resolver = new XmlUrlResolver ();
913 private bool normalization = false;
915 private bool checkCharacters;
916 private bool prohibitDtd = false;
917 private bool closeInput = true;
918 private EntityHandling entityHandling; // 2.0
920 private NameTable whitespacePool;
921 private char [] whitespaceCache;
923 private XmlException NotWFError (string message)
925 return new XmlException (this as IXmlLineInfo, BaseURI, message);
930 allowMultipleRoot = false;
931 elementNames = new TagName [10];
932 valueBuffer = new StringBuilder ();
933 binaryCharGetter = new XmlReaderBinarySupport.CharGetter (ReadChars);
935 nameBuffer = new char [initialNameCapacity];
938 checkCharacters = true;
939 if (Settings != null)
940 checkCharacters = Settings.CheckCharacters;
943 entityHandling = EntityHandling.ExpandCharEntities;
946 if (peekChars == null)
947 peekChars = new char [peekCharCapacity];
948 peekCharsLength = -1;
949 curNodePeekIndex = -1; // read from start
954 currentLinkedNodeLineNumber = currentLinkedNodeLinePosition = 0;
959 private void Clear ()
961 currentToken = new XmlTokenInfo (this);
962 cursorToken = currentToken;
963 currentAttribute = -1;
964 currentAttributeValue = -1;
967 readState = ReadState.Initial;
973 popScope = allowMultipleRoot = false;
974 elementNameStackPos = 0;
976 isStandalone = false;
977 returnEntityReference = false;
978 entityReferenceName = String.Empty;
982 nameCapacity = initialNameCapacity;
984 useProceedingLineInfo = false;
986 currentState = XmlNodeType.None;
988 readCharsInProgress = false;
991 private void InitializeContext (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType)
993 startNodeType = fragType;
994 parserContext = context;
995 if (context == null) {
996 XmlNameTable nt = new NameTable ();
997 parserContext = new XmlParserContext (nt,
998 new XmlNamespaceManager (nt),
1002 nameTable = parserContext.NameTable;
1003 nameTable = nameTable != null ? nameTable : new NameTable ();
1004 nsmgr = parserContext.NamespaceManager;
1005 nsmgr = nsmgr != null ? nsmgr : new XmlNamespaceManager (nameTable);
1007 if (url != null && url.Length > 0) {
1009 Uri uri = new Uri (url, UriKind.RelativeOrAbsolute);
1013 uri = new Uri (url, UriKind.RelativeOrAbsolute);
1014 } catch (Exception) {
1015 string path = Path.GetFullPath ("./a");
1016 uri = new Uri (new Uri (path), url);
1019 parserContext.BaseURI = uri.ToString ();
1027 case XmlNodeType.Attribute:
1028 reader = new StringReader (fragment.ReadToEnd ().Replace ("\"", """));
1030 case XmlNodeType.Element:
1031 currentState = XmlNodeType.Element;
1032 allowMultipleRoot = true;
1034 case XmlNodeType.Document:
1037 throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType));
1041 internal ConformanceLevel Conformance {
1042 get { return allowMultipleRoot ? ConformanceLevel.Fragment : ConformanceLevel.Document; }
1044 if (value == ConformanceLevel.Fragment) {
1045 currentState = XmlNodeType.Element;
1046 allowMultipleRoot = true;
1051 internal void AdjustLineInfoOffset (int lineNumberOffset, int linePositionOffset)
1053 line += lineNumberOffset;
1054 column += linePositionOffset;
1057 internal void SetNameTable (XmlNameTable nameTable)
1059 parserContext.NameTable = nameTable;
1062 // Use this method rather than setting the properties
1063 // directly so that all the necessary properties can
1064 // be changed in harmony with each other. Maybe the
1065 // fields should be in a seperate class to help enforce
1068 // Namespace URI could not be provided here.
1069 private void SetProperties (
1070 XmlNodeType nodeType,
1074 bool isEmptyElement,
1076 bool clearAttributes)
1078 SetTokenProperties (currentToken, nodeType, name, prefix, localName, isEmptyElement, value, clearAttributes);
1079 currentToken.LineNumber = this.currentLinkedNodeLineNumber;
1080 currentToken.LinePosition = this.currentLinkedNodeLinePosition;
1083 private void SetTokenProperties (
1085 XmlNodeType nodeType,
1089 bool isEmptyElement,
1091 bool clearAttributes)
1093 token.NodeType = nodeType;
1095 token.Prefix = prefix;
1096 token.LocalName = localName;
1097 token.IsEmptyElement = isEmptyElement;
1098 token.Value = value;
1099 this.elementDepth = depth;
1101 if (clearAttributes)
1105 private void ClearAttributes ()
1107 //for (int i = 0; i < attributeCount; i++)
1108 // attributeTokens [i].Clear ();
1110 currentAttribute = -1;
1111 currentAttributeValue = -1;
1114 private int PeekSurrogate (int c)
1116 if (peekCharsLength <= peekCharsIndex + 1) {
1117 if (!ReadTextReader (c))
1118 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1122 int highhalfChar = peekChars [peekCharsIndex];
1123 int lowhalfChar = peekChars [peekCharsIndex+1];
1125 if (((highhalfChar & 0xFC00) != 0xD800) || ((lowhalfChar & 0xFC00) != 0xDC00))
1126 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1127 return highhalfChar;
1128 return 0x10000 + (highhalfChar-0xD800)*0x400 + (lowhalfChar-0xDC00);
1131 private int PeekChar ()
1133 if (peekCharsIndex < peekCharsLength) {
1134 int c = peekChars [peekCharsIndex];
1137 if (c < 0xD800 || c >= 0xDFFF)
1139 return PeekSurrogate (c);
1141 if (!ReadTextReader (-1))
1147 private int ReadChar ()
1149 int ch = PeekChar ();
1153 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1158 } else if (ch != -1) {
1164 private void Advance (int ch) {
1168 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1173 } else if (ch != -1) {
1178 private bool ReadTextReader (int remained)
1180 if (reader == null && reader_uri != null) {
1181 Uri uri = reader_uri;
1184 Stream stream = GetStreamFromUrl (uri.ToString (), out uriString);
1187 reader = new XmlStreamReader (stream);
1189 if (peekCharsLength < 0) { // initialized buffer
1190 peekCharsLength = reader.Read (peekChars, 0, peekChars.Length);
1191 return peekCharsLength > 0;
1193 int offset = remained >= 0 ? 1 : 0;
1194 int copysize = peekCharsLength - curNodePeekIndex;
1196 // It must assure that current tag content always exists
1198 if (!preserveCurrentTag) {
1199 curNodePeekIndex = 0;
1202 } else if (peekCharsLength < peekChars.Length) {
1203 // NonBlockingStreamReader returned less bytes
1204 // than the size of the buffer. In that case,
1205 // just refill the buffer.
1206 } else if (curNodePeekIndex <= (peekCharsLength >> 1)) {
1207 // extend the buffer
1208 char [] tmp = new char [peekChars.Length * 2];
1209 Array.Copy (peekChars, curNodePeekIndex,
1212 curNodePeekIndex = 0;
1213 peekCharsIndex = copysize;
1215 Array.Copy (peekChars, curNodePeekIndex,
1216 peekChars, 0, copysize);
1217 curNodePeekIndex = 0;
1218 peekCharsIndex = copysize;
1221 peekChars [peekCharsIndex] = (char) remained;
1222 int count = peekChars.Length - peekCharsIndex - offset;
1223 if (count > peekCharCapacity)
1224 count = peekCharCapacity;
1225 int read = reader.Read (
1226 peekChars, peekCharsIndex + offset, count);
1227 int remainingSize = offset + read;
1228 peekCharsLength = peekCharsIndex + remainingSize;
1230 return (remainingSize != 0);
1233 private bool ReadContent ()
1237 parserContext.PopScope ();
1241 if (returnEntityReference)
1242 SetEntityReferenceProperties ();
1244 int c = PeekChar ();
1246 readState = ReadState.EndOfFile;
1247 ClearValueBuffer ();
1249 XmlNodeType.None, // nodeType
1250 String.Empty, // name
1251 String.Empty, // prefix
1252 String.Empty, // localName
1253 false, // isEmptyElement
1255 true // clearAttributes
1258 throw NotWFError ("unexpected end of file. Current depth is " + depth);
1265 switch (PeekChar ())
1273 ReadProcessingInstruction ();
1288 if (!ReadWhitespace ())
1290 return ReadContent ();
1298 return this.ReadState != ReadState.EndOfFile;
1301 private void SetEntityReferenceProperties ()
1303 DTDEntityDeclaration decl = DTD != null ? DTD.EntityDecls [entityReferenceName] : null;
1304 if (this.isStandalone)
1305 if (DTD == null || decl == null || !decl.IsInternalSubset)
1306 throw NotWFError ("Standalone document must not contain any references to an non-internally declared entity.");
1307 if (decl != null && decl.NotationName != null)
1308 throw NotWFError ("Reference to any unparsed entities is not allowed here.");
1310 ClearValueBuffer ();
1312 XmlNodeType.EntityReference, // nodeType
1313 entityReferenceName, // name
1314 String.Empty, // prefix
1315 entityReferenceName, // localName
1316 false, // isEmptyElement
1318 true // clearAttributes
1321 returnEntityReference = false;
1322 entityReferenceName = String.Empty;
1325 // The leading '<' has already been consumed.
1326 private void ReadStartTag ()
1328 if (currentState == XmlNodeType.EndElement)
1329 throw NotWFError ("Multiple document element was detected.");
1330 currentState = XmlNodeType.Element;
1334 currentLinkedNodeLineNumber = line;
1335 currentLinkedNodeLinePosition = column;
1337 string prefix, localName;
1338 string name = ReadName (out prefix, out localName);
1339 if (currentState == XmlNodeType.EndElement)
1340 throw NotWFError ("document has terminated, cannot open new element");
1342 bool isEmptyElement = false;
1347 if (XmlChar.IsFirstNameChar (PeekChar ()))
1348 ReadAttributes (false);
1349 cursorToken = this.currentToken;
1352 for (int i = 0; i < attributeCount; i++)
1353 attributeTokens [i].FillXmlns ();
1354 for (int i = 0; i < attributeCount; i++)
1355 attributeTokens [i].FillNamespace ();
1359 for (int i = 0; i < attributeCount; i++)
1360 if (attributeTokens [i].Prefix == "xmlns" &&
1361 attributeTokens [i].Value == String.Empty)
1362 throw NotWFError ("Empty namespace URI cannot be mapped to non-empty prefix.");
1364 for (int i = 0; i < attributeCount; i++) {
1365 for (int j = i + 1; j < attributeCount; j++)
1366 if (Object.ReferenceEquals (attributeTokens [i].Name, attributeTokens [j].Name) ||
1367 (Object.ReferenceEquals (attributeTokens [i].LocalName, attributeTokens [j].LocalName) &&
1368 Object.ReferenceEquals (attributeTokens [i].NamespaceURI, attributeTokens [j].NamespaceURI)))
1369 throw NotWFError ("Attribute name and qualified name must be identical.");
1372 if (PeekChar () == '/') {
1374 isEmptyElement = true;
1379 PushElementName (name, localName, prefix);
1381 parserContext.PushScope ();
1386 XmlNodeType.Element, // nodeType
1390 isEmptyElement, // isEmptyElement
1392 false // clearAttributes
1394 if (prefix.Length > 0)
1395 currentToken.NamespaceURI = LookupNamespace (prefix, true);
1396 else if (namespaces)
1397 currentToken.NamespaceURI = nsmgr.DefaultNamespace;
1400 if (NamespaceURI == null)
1401 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1403 for (int i = 0; i < attributeCount; i++) {
1404 MoveToAttribute (i);
1405 if (NamespaceURI == null)
1406 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1413 for (int i = 0; i < attributeCount; i++) {
1414 if (!Object.ReferenceEquals (attributeTokens [i].Prefix, XmlNamespaceManager.PrefixXml))
1416 string aname = attributeTokens [i].LocalName;
1417 string value = attributeTokens [i].Value;
1420 if (this.resolver != null) {
1422 BaseURI != String.Empty ?
1423 new Uri (BaseURI) : null;
1424 // xml:base="" without any base URI -> pointless. However there are
1425 // some people who use such xml:base. Seealso bug #608391.
1426 if (buri == null && String.IsNullOrEmpty (value))
1428 Uri uri = resolver.ResolveUri (
1430 parserContext.BaseURI =
1436 parserContext.BaseURI = value;
1439 parserContext.XmlLang = value;
1444 parserContext.XmlSpace = XmlSpace.Preserve;
1447 parserContext.XmlSpace = XmlSpace.Default;
1450 throw NotWFError (String.Format ("Invalid xml:space value: {0}", value));
1457 CheckCurrentStateUpdate ();
1460 private void PushElementName (string name, string local, string prefix)
1462 if (elementNames.Length == elementNameStackPos) {
1463 TagName [] newArray = new TagName [elementNames.Length * 2];
1464 Array.Copy (elementNames, 0, newArray, 0, elementNameStackPos);
1465 elementNames = newArray;
1467 elementNames [elementNameStackPos++] =
1468 new TagName (name, local, prefix);
1471 // The reader is positioned on the first character
1472 // of the element's name.
1473 private void ReadEndTag ()
1475 if (currentState != XmlNodeType.Element)
1476 throw NotWFError ("End tag cannot appear in this state.");
1478 currentLinkedNodeLineNumber = line;
1479 currentLinkedNodeLinePosition = column;
1481 if (elementNameStackPos == 0)
1482 throw NotWFError ("closing element without matching opening element");
1483 TagName expected = elementNames [--elementNameStackPos];
1484 Expect (expected.Name);
1486 ExpectAfterWhitespace ('>');
1491 XmlNodeType.EndElement, // nodeType
1492 expected.Name, // name
1493 expected.Prefix, // prefix
1494 expected.LocalName, // localName
1495 false, // isEmptyElement
1497 true // clearAttributes
1499 if (expected.Prefix.Length > 0)
1500 currentToken.NamespaceURI = LookupNamespace (expected.Prefix, true);
1501 else if (namespaces)
1502 currentToken.NamespaceURI = nsmgr.DefaultNamespace;
1506 CheckCurrentStateUpdate ();
1509 private void CheckCurrentStateUpdate ()
1511 if (depth == 0 && !allowMultipleRoot && (IsEmptyElement || NodeType == XmlNodeType.EndElement))
1512 currentState = XmlNodeType.EndElement;
1516 private void AppendSurrogatePairNameChar (int ch)
1518 nameBuffer [nameLength++] = (char) ((ch - 0x10000) / 0x400 + 0xD800);
1519 if (nameLength == nameCapacity)
1520 ExpandNameCapacity ();
1521 nameBuffer [nameLength++] = (char) ((ch - 0x10000) % 0x400 + 0xDC00);
1524 private void ExpandNameCapacity ()
1526 nameCapacity = nameCapacity * 2;
1527 char [] oldNameBuffer = nameBuffer;
1528 nameBuffer = new char [nameCapacity];
1529 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1533 private void AppendValueChar (int ch)
1535 if (ch <= Char.MaxValue)
1536 valueBuffer.Append ((char) ch);
1538 AppendSurrogatePairValueChar (ch);
1541 private void AppendSurrogatePairValueChar (int ch)
1543 valueBuffer.Append ((char) ((ch - 0x10000) / 0x400 + 0xD800));
1544 valueBuffer.Append ((char) ((ch - 0x10000) % 0x400 + 0xDC00));
1547 private string CreateValueString ()
1549 // Since whitespace strings are mostly identical
1550 // depending on the Depth, we make use of NameTable
1551 // to atomize whitespace strings.
1553 case XmlNodeType.Whitespace:
1554 case XmlNodeType.SignificantWhitespace:
1555 int len = valueBuffer.Length;
1556 if (whitespaceCache == null)
1557 whitespaceCache = new char [32];
1558 if (len >= whitespaceCache.Length)
1560 if (whitespacePool == null)
1561 whitespacePool = new NameTable ();
1563 valueBuffer.CopyTo (0, whitespaceCache, 0, len);
1565 for (int i = 0; i < len; i++)
1566 whitespaceCache [i] = valueBuffer [i];
1568 return whitespacePool.Add (whitespaceCache, 0, valueBuffer.Length);
1570 return (valueBuffer.Capacity < 100) ?
1571 valueBuffer.ToString (0, valueBuffer.Length) :
1572 valueBuffer.ToString ();
1575 private void ClearValueBuffer ()
1577 valueBuffer.Length = 0;
1580 // The reader is positioned on the first character
1582 private void ReadText (bool notWhitespace)
1584 if (currentState != XmlNodeType.Element)
1585 throw NotWFError ("Text node cannot appear in this state.");
1586 preserveCurrentTag = false;
1589 ClearValueBuffer ();
1591 int ch = PeekChar ();
1592 bool previousWasCloseBracket = false;
1594 while (ch != '<' && ch != -1) {
1597 ch = ReadReference (false);
1598 if (returnEntityReference) // Returns -1 if char validation should not be done
1600 } else if (normalization && ch == '\r') {
1604 // append '\n' instead of '\r'.
1605 AppendValueChar ('\n');
1606 // and in case of "\r\n", discard '\r'.
1609 if (CharacterChecking && XmlChar.IsInvalid (ch))
1610 throw NotWFError ("Not allowed character was found.");
1614 // FIXME: it might be optimized by the JIT later,
1615 // AppendValueChar (ch);
1617 if (ch <= Char.MaxValue)
1618 valueBuffer.Append ((char) ch);
1620 AppendSurrogatePairValueChar (ch);
1625 if (previousWasCloseBracket)
1626 if (PeekChar () == '>')
1627 throw NotWFError ("Inside text content, character sequence ']]>' is not allowed.");
1628 previousWasCloseBracket = true;
1630 else if (previousWasCloseBracket)
1631 previousWasCloseBracket = false;
1633 notWhitespace = true;
1636 if (returnEntityReference && valueBuffer.Length == 0) {
1637 SetEntityReferenceProperties ();
1639 XmlNodeType nodeType = notWhitespace ? XmlNodeType.Text :
1640 this.XmlSpace == XmlSpace.Preserve ? XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
1642 nodeType, // nodeType
1643 String.Empty, // name
1644 String.Empty, // prefix
1645 String.Empty, // localName
1646 false, // isEmptyElement
1647 null, // value: create only when required
1648 true // clearAttributes
1653 // The leading '&' has already been consumed.
1654 // Returns true if the entity reference isn't a simple
1655 // character reference or one of the predefined entities.
1656 // This allows the ReadText method to break so that the
1657 // next call to Read will return the EntityReference node.
1658 private int ReadReference (bool ignoreEntityReferences)
1660 if (PeekChar () == '#') {
1662 return ReadCharacterReference ();
1664 return ReadEntityReference (ignoreEntityReferences);
1667 private int ReadCharacterReference ()
1672 if (PeekChar () == 'x') {
1675 while ((ch = PeekChar ()) != ';' && ch != -1) {
1678 if (ch >= '0' && ch <= '9')
1679 value = (value << 4) + ch - '0';
1680 else if (ch >= 'A' && ch <= 'F')
1681 value = (value << 4) + ch - 'A' + 10;
1682 else if (ch >= 'a' && ch <= 'f')
1683 value = (value << 4) + ch - 'a' + 10;
1685 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1686 "invalid hexadecimal digit: {0} (#x{1:X})",
1691 while ((ch = PeekChar ()) != ';' && ch != -1) {
1694 if (ch >= '0' && ch <= '9')
1695 value = value * 10 + ch - '0';
1697 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1698 "invalid decimal digit: {0} (#x{1:X})",
1706 // There is no way to save surrogate pairs...
1707 if (CharacterChecking && Normalization &&
1708 XmlChar.IsInvalid (value))
1709 throw NotWFError ("Referenced character was not allowed in XML. Normalization is " + normalization + ", checkCharacters = " + checkCharacters);
1713 // Returns -1 if it should not be validated.
1714 // Real EOF must not be detected here.
1715 private int ReadEntityReference (bool ignoreEntityReferences)
1717 string name = ReadName ();
1720 int predefined = XmlChar.GetPredefinedEntity (name);
1721 if (predefined >= 0)
1724 if (ignoreEntityReferences) {
1725 AppendValueChar ('&');
1726 for (int i = 0; i < name.Length; i++)
1727 AppendValueChar (name [i]);
1728 AppendValueChar (';');
1730 returnEntityReference = true;
1731 entityReferenceName = name;
1737 // The reader is positioned on the first character of
1738 // the attribute name.
1739 private void ReadAttributes (bool isXmlDecl)
1742 bool requireWhitespace = false;
1743 currentAttribute = -1;
1744 currentAttributeValue = -1;
1747 if (!SkipWhitespace () && requireWhitespace)
1748 throw NotWFError ("Unexpected token. Name is required here.");
1750 IncrementAttributeToken ();
1751 currentAttributeToken.LineNumber = line;
1752 currentAttributeToken.LinePosition = column;
1754 string prefix, localName;
1755 currentAttributeToken.Name = ReadName (out prefix, out localName);
1756 currentAttributeToken.Prefix = prefix;
1757 currentAttributeToken.LocalName = localName;
1758 ExpectAfterWhitespace ('=');
1760 ReadAttributeValueTokens (-1);
1761 // This hack is required for xmldecl which has
1762 // both effective attributes and Value.
1765 dummyValue = currentAttributeToken.Value;
1769 if (!SkipWhitespace ())
1770 requireWhitespace = true;
1771 peekChar = PeekChar ();
1773 if (peekChar == '?')
1776 else if (peekChar == '/' || peekChar == '>')
1778 } while (peekChar != -1);
1780 currentAttribute = -1;
1781 currentAttributeValue = -1;
1784 private void AddAttributeWithValue (string name, string value)
1786 IncrementAttributeToken ();
1787 XmlAttributeTokenInfo ati = attributeTokens [currentAttribute];
1788 ati.Name = NameTable.Add (name);
1789 ati.Prefix = String.Empty;
1790 ati.NamespaceURI = String.Empty;
1791 IncrementAttributeValueToken ();
1792 XmlTokenInfo vti = attributeValueTokens [currentAttributeValue];
1793 SetTokenProperties (vti,
1802 ati.ValueTokenStartIndex = ati.ValueTokenEndIndex = currentAttributeValue;
1806 private void IncrementAttributeToken ()
1809 if (attributeTokens.Length == currentAttribute) {
1810 XmlAttributeTokenInfo [] newArray =
1811 new XmlAttributeTokenInfo [attributeTokens.Length * 2];
1812 attributeTokens.CopyTo (newArray, 0);
1813 attributeTokens = newArray;
1815 if (attributeTokens [currentAttribute] == null)
1816 attributeTokens [currentAttribute] = new XmlAttributeTokenInfo (this);
1817 currentAttributeToken = attributeTokens [currentAttribute];
1818 currentAttributeToken.Clear ();
1821 private void IncrementAttributeValueToken ()
1823 currentAttributeValue++;
1824 if (attributeValueTokens.Length == currentAttributeValue) {
1825 XmlTokenInfo [] newArray = new XmlTokenInfo [attributeValueTokens.Length * 2];
1826 attributeValueTokens.CopyTo (newArray, 0);
1827 attributeValueTokens = newArray;
1829 if (attributeValueTokens [currentAttributeValue] == null)
1830 attributeValueTokens [currentAttributeValue] = new XmlTokenInfo (this);
1831 currentAttributeValueToken = attributeValueTokens [currentAttributeValue];
1832 currentAttributeValueToken.Clear ();
1835 // LAMESPEC: Orthodox XML reader should normalize attribute values
1836 private void ReadAttributeValueTokens (int dummyQuoteChar)
1838 int quoteChar = (dummyQuoteChar < 0) ? ReadChar () : dummyQuoteChar;
1840 if (quoteChar != '\'' && quoteChar != '\"')
1841 throw NotWFError ("an attribute value was not quoted");
1842 currentAttributeToken.QuoteChar = (char) quoteChar;
1844 IncrementAttributeValueToken ();
1845 currentAttributeToken.ValueTokenStartIndex = currentAttributeValue;
1846 currentAttributeValueToken.LineNumber = line;
1847 currentAttributeValueToken.LinePosition = column;
1849 bool incrementToken = false;
1850 bool isNewToken = true;
1853 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1856 if (ch == quoteChar)
1859 if (incrementToken) {
1860 IncrementAttributeValueToken ();
1861 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1862 currentAttributeValueToken.LineNumber = line;
1863 currentAttributeValueToken.LinePosition = column;
1864 incrementToken = false;
1871 throw NotWFError ("attribute values cannot contain '<'");
1873 if (dummyQuoteChar < 0)
1874 throw NotWFError ("unexpected end of file in an attribute value");
1875 else // Attribute value constructor.
1881 if (PeekChar () == '\n')
1882 continue; // skip '\r'.
1884 // The csc in MS.NET 2.0 beta 1 barfs on this goto, so work around that
1893 // When Normalize = true, then replace
1894 // all spaces to ' '
1900 if (PeekChar () == '#') {
1902 ch = ReadCharacterReference ();
1903 AppendValueChar (ch);
1906 // Check XML 1.0 section 3.1 WFC.
1907 string entName = ReadName ();
1909 int predefined = XmlChar.GetPredefinedEntity (entName);
1910 if (predefined < 0) {
1911 CheckAttributeEntityReferenceWFC (entName);
1912 if (entityHandling == EntityHandling.ExpandEntities) {
1913 string value = DTD.GenerateEntityAttributeText (entName);
1914 foreach (char c in (IEnumerable<char>) value)
1915 AppendValueChar (c);
1917 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1918 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1920 IncrementAttributeValueToken ();
1921 currentAttributeValueToken.Name = entName;
1922 currentAttributeValueToken.Value = String.Empty;
1923 currentAttributeValueToken.NodeType = XmlNodeType.EntityReference;
1924 incrementToken = true;
1928 AppendValueChar (predefined);
1931 if (CharacterChecking && XmlChar.IsInvalid (ch))
1932 throw NotWFError ("Invalid character was found.");
1933 // FIXME: it might be optimized by the JIT later,
1934 // AppendValueChar (ch);
1936 if (ch <= Char.MaxValue)
1937 valueBuffer.Append ((char) ch);
1939 AppendSurrogatePairValueChar (ch);
1946 if (!incrementToken) {
1947 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1948 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1950 currentAttributeToken.ValueTokenEndIndex = currentAttributeValue;
1954 private void CheckAttributeEntityReferenceWFC (string entName)
1956 DTDEntityDeclaration entDecl =
1957 DTD == null ? null : DTD.EntityDecls [entName];
1958 if (entDecl == null) {
1959 if (entityHandling == EntityHandling.ExpandEntities
1960 || (DTD != null && resolver != null && entDecl == null))
1961 throw NotWFError (String.Format ("Referenced entity '{0}' does not exist.", entName));
1966 if (entDecl.HasExternalReference)
1967 throw NotWFError ("Reference to external entities is not allowed in the value of an attribute.");
1968 if (isStandalone && !entDecl.IsInternalSubset)
1969 throw NotWFError ("Reference to external entities is not allowed in the internal subset.");
1970 if (entDecl.EntityValue.IndexOf ('<') >= 0)
1971 throw NotWFError ("Attribute must not contain character '<' either directly or indirectly by way of entity references.");
1974 // The reader is positioned on the first character
1977 // It may be xml declaration or processing instruction.
1978 private void ReadProcessingInstruction ()
1980 string target = ReadName ();
1981 if (target != "xml" && target.ToLower (CultureInfo.InvariantCulture) == "xml")
1982 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
1984 if (!SkipWhitespace ())
1985 if (PeekChar () != '?')
1986 throw NotWFError ("Invalid processing instruction name was found.");
1988 ClearValueBuffer ();
1991 while ((ch = PeekChar ()) != -1) {
1994 if (ch == '?' && PeekChar () == '>') {
1999 if (CharacterChecking && XmlChar.IsInvalid (ch))
2000 throw NotWFError ("Invalid character was found.");
2001 AppendValueChar (ch);
2004 if (Object.ReferenceEquals (target, XmlNamespaceManager.PrefixXml))
2005 VerifyXmlDeclaration ();
2007 if (currentState == XmlNodeType.None)
2008 currentState = XmlNodeType.XmlDeclaration;
2011 XmlNodeType.ProcessingInstruction, // nodeType
2013 String.Empty, // prefix
2014 target, // localName
2015 false, // isEmptyElement
2016 null, // value: create only when required
2017 true // clearAttributes
2022 void VerifyXmlDeclaration ()
2024 if (!allowMultipleRoot && currentState != XmlNodeType.None)
2025 throw NotWFError ("XML declaration cannot appear in this state.");
2027 currentState = XmlNodeType.XmlDeclaration;
2029 string text = CreateValueString ();
2035 string encoding = null, standalone = null;
2037 ParseAttributeFromString (text, ref idx, out name, out value);
2038 if (name != "version" || value != "1.0")
2039 throw NotWFError ("'version' is expected.");
2040 name = String.Empty;
2041 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2042 ParseAttributeFromString (text, ref idx, out name, out value);
2043 if (name == "encoding") {
2044 if (!XmlChar.IsValidIANAEncoding (value))
2045 throw NotWFError ("'encoding' must be a valid IANA encoding name.");
2046 if (reader is XmlStreamReader)
2047 parserContext.Encoding = ((XmlStreamReader) reader).Encoding;
2049 parserContext.Encoding = Encoding.Unicode;
2051 name = String.Empty;
2052 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2053 ParseAttributeFromString (text, ref idx, out name, out value);
2055 if (name == "standalone") {
2056 this.isStandalone = value == "yes";
2057 if (value != "yes" && value != "no")
2058 throw NotWFError ("Only 'yes' or 'no' is allow for 'standalone'");
2060 SkipWhitespaceInString (text, ref idx);
2062 else if (name.Length != 0)
2063 throw NotWFError (String.Format ("Unexpected token: '{0}'", name));
2065 if (idx < text.Length)
2066 throw NotWFError ("'?' is expected.");
2068 AddAttributeWithValue ("version", "1.0");
2069 if (encoding != null)
2070 AddAttributeWithValue ("encoding", encoding);
2071 if (standalone != null)
2072 AddAttributeWithValue ("standalone", standalone);
2073 currentAttribute = currentAttributeValue = -1;
2076 XmlNodeType.XmlDeclaration, // nodeType
2078 String.Empty, // prefix
2080 false, // isEmptyElement
2082 false // clearAttributes
2086 bool SkipWhitespaceInString (string text, ref int idx)
2089 while (idx < text.Length && XmlChar.IsWhitespace (text [idx]))
2091 return idx - start > 0;
2094 private void ParseAttributeFromString (string src,
2095 ref int idx, out string name, out string value)
2097 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2101 while (idx < src.Length && XmlChar.IsNameChar (src [idx]))
2103 name = src.Substring (start, idx - start);
2105 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2107 if (idx == src.Length || src [idx] != '=')
2108 throw NotWFError (String.Format ("'=' is expected after {0}", name));
2111 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2114 if (idx == src.Length || src [idx] != '"' && src [idx] != '\'')
2115 throw NotWFError ("'\"' or '\'' is expected.");
2117 char quote = src [idx];
2121 while (idx < src.Length && src [idx] != quote)
2125 value = src.Substring (start, idx - start - 1);
2128 internal void SkipTextDeclaration ()
2130 if (PeekChar () != '<')
2135 if (PeekChar () != '?') {
2141 while (peekCharsIndex < 6) {
2142 if (PeekChar () < 0)
2147 if (new string (peekChars, 2, 4) != "xml ") {
2148 if (new string (peekChars, 2, 4).ToLower (CultureInfo.InvariantCulture) == "xml ") {
2149 throw NotWFError ("Processing instruction name must not be character sequence 'X' 'M' 'L' with case insensitivity.");
2158 if (PeekChar () == 'v') {
2160 ExpectAfterWhitespace ('=');
2162 int quoteChar = ReadChar ();
2163 char [] expect1_0 = new char [3];
2164 int versionLength = 0;
2165 switch (quoteChar) {
2168 while (PeekChar () != quoteChar) {
2169 if (PeekChar () == -1)
2170 throw NotWFError ("Invalid version declaration inside text declaration.");
2171 else if (versionLength == 3)
2172 throw NotWFError ("Invalid version number inside text declaration.");
2174 expect1_0 [versionLength] = (char) ReadChar ();
2176 if (versionLength == 3 && new String (expect1_0) != "1.0")
2177 throw NotWFError ("Invalid version number inside text declaration.");
2184 throw NotWFError ("Invalid version declaration inside text declaration.");
2188 if (PeekChar () == 'e') {
2189 Expect ("encoding");
2190 ExpectAfterWhitespace ('=');
2192 int quoteChar = ReadChar ();
2193 switch (quoteChar) {
2196 while (PeekChar () != quoteChar)
2197 if (ReadChar () == -1)
2198 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2203 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2205 // Encoding value should be checked inside XmlInputStream.
2207 // this condition is to check if this instance is
2208 // not created by XmlReader.Create() (which just
2209 // omits strict text declaration check).
2210 else if (Conformance == ConformanceLevel.Auto)
2211 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
2215 curNodePeekIndex = peekCharsIndex; // without this it causes incorrect value start indication.
2218 // The reader is positioned on the first character after
2219 // the leading '<!'.
2220 private void ReadDeclaration ()
2222 int ch = PeekChar ();
2240 throw NotWFError ("Unexpected declaration markup was found.");
2244 // The reader is positioned on the first character after
2245 // the leading '<!--'.
2246 private void ReadComment ()
2248 if (currentState == XmlNodeType.None)
2249 currentState = XmlNodeType.XmlDeclaration;
2251 preserveCurrentTag = false;
2253 ClearValueBuffer ();
2256 while ((ch = PeekChar ()) != -1) {
2259 if (ch == '-' && PeekChar () == '-') {
2262 if (PeekChar () != '>')
2263 throw NotWFError ("comments cannot contain '--'");
2269 if (XmlChar.IsInvalid (ch))
2270 throw NotWFError ("Not allowed character was found.");
2272 AppendValueChar (ch);
2276 XmlNodeType.Comment, // nodeType
2277 String.Empty, // name
2278 String.Empty, // prefix
2279 String.Empty, // localName
2280 false, // isEmptyElement
2281 null, // value: create only when required
2282 true // clearAttributes
2286 // The reader is positioned on the first character after
2287 // the leading '<![CDATA['.
2288 private void ReadCDATA ()
2290 if (currentState != XmlNodeType.Element)
2291 throw NotWFError ("CDATA section cannot appear in this state.");
2292 preserveCurrentTag = false;
2294 ClearValueBuffer ();
2298 while (PeekChar () != -1) {
2303 if (ch == ']' && PeekChar () == ']') {
2304 ch = ReadChar (); // ']'
2306 if (PeekChar () == '>') {
2313 if (normalization && ch == '\r') {
2316 // append '\n' instead of '\r'.
2317 AppendValueChar ('\n');
2318 // otherwise, discard '\r'.
2321 if (CharacterChecking && XmlChar.IsInvalid (ch))
2322 throw NotWFError ("Invalid character was found.");
2324 // FIXME: it might be optimized by the JIT later,
2325 // AppendValueChar (ch);
2327 if (ch <= Char.MaxValue)
2328 valueBuffer.Append ((char) ch);
2330 AppendSurrogatePairValueChar (ch);
2335 XmlNodeType.CDATA, // nodeType
2336 String.Empty, // name
2337 String.Empty, // prefix
2338 String.Empty, // localName
2339 false, // isEmptyElement
2340 null, // value: create only when required
2341 true // clearAttributes
2345 // The reader is positioned on the first character after
2346 // the leading '<!DOCTYPE'.
2347 private void ReadDoctypeDecl ()
2350 throw NotWFError ("Document Type Declaration (DTD) is prohibited in this XML.");
2351 switch (currentState) {
2352 case XmlNodeType.DocumentType:
2353 case XmlNodeType.Element:
2354 case XmlNodeType.EndElement:
2355 throw NotWFError ("Document type cannot appear in this state.");
2357 currentState = XmlNodeType.DocumentType;
2359 string doctypeName = null;
2360 string publicId = null;
2361 string systemId = null;
2362 int intSubsetStartLine = 0;
2363 int intSubsetStartColumn = 0;
2366 doctypeName = ReadName ();
2371 systemId = ReadSystemLiteral (true);
2374 publicId = ReadPubidLiteral ();
2375 if (!SkipWhitespace ())
2376 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
2377 systemId = ReadSystemLiteral (false);
2383 if(PeekChar () == '[')
2385 // read markupdecl etc. or end of decl
2387 intSubsetStartLine = this.LineNumber;
2388 intSubsetStartColumn = this.LinePosition;
2389 ClearValueBuffer ();
2390 ReadInternalSubset ();
2391 parserContext.InternalSubset = CreateValueString ();
2393 // end of DOCTYPE decl.
2394 ExpectAfterWhitespace ('>');
2396 GenerateDTDObjectModel (doctypeName, publicId,
2397 systemId, parserContext.InternalSubset,
2398 intSubsetStartLine, intSubsetStartColumn);
2400 // set properties for <!DOCTYPE> node
2402 XmlNodeType.DocumentType, // nodeType
2403 doctypeName, // name
2404 String.Empty, // prefix
2405 doctypeName, // localName
2406 false, // isEmptyElement
2407 parserContext.InternalSubset, // value
2408 true // clearAttributes
2411 if (publicId != null)
2412 AddAttributeWithValue ("PUBLIC", publicId);
2413 if (systemId != null)
2414 AddAttributeWithValue ("SYSTEM", systemId);
2415 currentAttribute = currentAttributeValue = -1;
2418 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2419 string systemId, string internalSubset)
2421 return GenerateDTDObjectModel (name, publicId, systemId, internalSubset, 0, 0);
2424 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2425 string systemId, string internalSubset, int intSubsetStartLine, int intSubsetStartColumn)
2428 parserContext.Dtd = new DTDObjectModel (this.NameTable); // merges both internal and external subsets in the meantime,
2429 DTD.BaseURI = BaseURI;
2431 DTD.PublicId = publicId;
2432 DTD.SystemId = systemId;
2433 DTD.InternalSubset = internalSubset;
2434 DTD.XmlResolver = resolver;
2435 DTD.IsStandalone = isStandalone;
2436 DTD.LineNumber = line;
2437 DTD.LinePosition = column;
2439 DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn);
2440 dr.Normalization = this.normalization;
2441 return dr.GenerateDTDObjectModel ();
2444 private enum DtdInputState
2457 private class DtdInputStateStack
2459 Stack intern = new Stack ();
2460 public DtdInputStateStack ()
2462 Push (DtdInputState.Free);
2465 public DtdInputState Peek ()
2467 return (DtdInputState) intern.Peek ();
2470 public DtdInputState Pop ()
2472 return (DtdInputState) intern.Pop ();
2475 public void Push (DtdInputState val)
2482 DtdInputStateStack stateStack = new DtdInputStateStack ();
2483 DtdInputState State {
2484 get { return stateStack.Peek (); }
2487 private int ReadValueChar ()
2489 int ret = ReadChar ();
2490 AppendValueChar (ret);
2494 private void ExpectAndAppend (string s)
2497 valueBuffer.Append (s);
2500 // Simply read but not generate any result.
2501 private void ReadInternalSubset ()
2503 bool continueParse = true;
2505 while (continueParse) {
2506 switch (ReadValueChar ()) {
2509 case DtdInputState.Free:
2511 valueBuffer.Remove (valueBuffer.Length - 1, 1);
2512 continueParse = false;
2514 case DtdInputState.InsideDoubleQuoted:
2515 case DtdInputState.InsideSingleQuoted:
2516 case DtdInputState.Comment:
2519 throw NotWFError ("unexpected end of file at DTD.");
2523 throw NotWFError ("unexpected end of file at DTD.");
2526 case DtdInputState.InsideDoubleQuoted:
2527 case DtdInputState.InsideSingleQuoted:
2528 case DtdInputState.Comment:
2529 continue; // well-formed
2531 int c = ReadValueChar ();
2534 stateStack.Push (DtdInputState.PI);
2537 switch (ReadValueChar ()) {
2539 switch (ReadValueChar ()) {
2541 ExpectAndAppend ("EMENT");
2542 stateStack.Push (DtdInputState.ElementDecl);
2545 ExpectAndAppend ("TITY");
2546 stateStack.Push (DtdInputState.EntityDecl);
2549 throw NotWFError ("unexpected token '<!E'.");
2553 ExpectAndAppend ("TTLIST");
2554 stateStack.Push (DtdInputState.AttlistDecl);
2557 ExpectAndAppend ("OTATION");
2558 stateStack.Push (DtdInputState.NotationDecl);
2561 ExpectAndAppend ("-");
2562 stateStack.Push (DtdInputState.Comment);
2567 throw NotWFError (String.Format ("unexpected '<{0}'.", (char) c));
2571 if (State == DtdInputState.InsideSingleQuoted)
2573 else if (State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.Comment)
2574 stateStack.Push (DtdInputState.InsideSingleQuoted);
2577 if (State == DtdInputState.InsideDoubleQuoted)
2579 else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment)
2580 stateStack.Push (DtdInputState.InsideDoubleQuoted);
2584 case DtdInputState.ElementDecl:
2585 goto case DtdInputState.NotationDecl;
2586 case DtdInputState.AttlistDecl:
2587 goto case DtdInputState.NotationDecl;
2588 case DtdInputState.EntityDecl:
2589 goto case DtdInputState.NotationDecl;
2590 case DtdInputState.NotationDecl:
2593 case DtdInputState.InsideDoubleQuoted:
2594 case DtdInputState.InsideSingleQuoted:
2595 case DtdInputState.Comment:
2598 throw NotWFError ("unexpected token '>'");
2602 if (State == DtdInputState.PI) {
2603 if (ReadValueChar () == '>')
2608 if (State == DtdInputState.Comment) {
2609 if (PeekChar () == '-') {
2611 ExpectAndAppend (">");
2617 if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted)
2618 throw NotWFError ("Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8).");
2624 // The reader is positioned on the first 'S' of "SYSTEM".
2625 private string ReadSystemLiteral (bool expectSYSTEM)
2629 if (!SkipWhitespace ())
2630 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
2634 int quoteChar = ReadChar (); // apos or quot
2636 ClearValueBuffer ();
2637 while (c != quoteChar) {
2640 throw NotWFError ("Unexpected end of stream in ExternalID.");
2642 AppendValueChar (c);
2644 return CreateValueString ();
2647 private string ReadPubidLiteral()
2650 if (!SkipWhitespace ())
2651 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
2652 int quoteChar = ReadChar ();
2654 ClearValueBuffer ();
2655 while(c != quoteChar)
2658 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
2659 if(c != quoteChar && !XmlChar.IsPubidChar (c))
2660 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char)c ));
2662 AppendValueChar (c);
2664 return CreateValueString ();
2667 // The reader is positioned on the first character
2669 private string ReadName ()
2671 string prefix, local;
2672 return ReadName (out prefix, out local);
2675 private string ReadName (out string prefix, out string localName)
2677 #if !USE_NAME_BUFFER
2678 bool savePreserve = preserveCurrentTag;
2679 preserveCurrentTag = true;
2681 int startOffset = peekCharsIndex - curNodePeekIndex;
2682 int ch = PeekChar ();
2683 if (!XmlChar.IsFirstNameChar (ch))
2684 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2689 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2691 if (ch == ':' && namespaces && colonAt < 0)
2696 int start = curNodePeekIndex + startOffset;
2698 string name = NameTable.Add (
2699 peekChars, start, length);
2702 prefix = NameTable.Add (
2703 peekChars, start, colonAt);
2704 localName = NameTable.Add (
2705 peekChars, start + colonAt + 1, length - colonAt - 1);
2707 prefix = String.Empty;
2711 preserveCurrentTag = savePreserve;
2715 int ch = PeekChar ();
2716 if (!XmlChar.IsFirstNameChar (ch))
2717 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2722 // AppendNameChar (ch);
2724 // nameBuffer.Length is always non-0 so no need to ExpandNameCapacity () here
2725 if (ch <= Char.MaxValue)
2726 nameBuffer [nameLength++] = (char) ch;
2728 AppendSurrogatePairNameChar (ch);
2733 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2736 if (ch == ':' && namespaces && colonAt < 0)
2737 colonAt = nameLength;
2738 // AppendNameChar (ch);
2740 if (nameLength == nameCapacity)
2741 ExpandNameCapacity ();
2742 if (ch <= Char.MaxValue)
2743 nameBuffer [nameLength++] = (char) ch;
2745 AppendSurrogatePairNameChar (ch);
2749 string name = NameTable.Add (nameBuffer, 0, nameLength);
2752 prefix = NameTable.Add (nameBuffer, 0, colonAt);
2753 localName = NameTable.Add (nameBuffer, colonAt + 1, nameLength - colonAt - 1);
2755 prefix = String.Empty;
2763 // Read the next character and compare it against the
2764 // specified character.
2765 private void Expect (int expected)
2767 int ch = ReadChar ();
2769 if (ch != expected) {
2770 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2771 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
2774 ch < 0 ? (object) "EOF" : (char) ch,
2779 private void Expect (string expected)
2781 for (int i = 0; i < expected.Length; i++)
2782 if (ReadChar () != expected [i])
2783 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2784 "'{0}' is expected", expected));
2787 private void ExpectAfterWhitespace (char c)
2790 int i = ReadChar ();
2791 if (i < 0x21 && XmlChar.IsWhitespace (i))
2794 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0}, but found {1} [{2}]", c, i < 0 ? (object) "EOF" : (char) i, i));
2799 // Does not consume the first non-whitespace character.
2800 private bool SkipWhitespace ()
2802 // FIXME: It should be inlined by the JIT.
2803 // bool skipped = XmlChar.IsWhitespace (PeekChar ());
2804 int ch = PeekChar ();
2805 bool skipped = (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2809 // FIXME: It should be inlined by the JIT.
2810 // while (XmlChar.IsWhitespace (PeekChar ()))
2812 while ((ch = PeekChar ()) == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD)
2817 private bool ReadWhitespace ()
2819 if (currentState == XmlNodeType.None)
2820 currentState = XmlNodeType.XmlDeclaration;
2822 bool savePreserve = preserveCurrentTag;
2823 preserveCurrentTag = true;
2824 int startOffset = peekCharsIndex - curNodePeekIndex; // it should be 0 for now though.
2826 int ch = PeekChar ();
2830 // FIXME: It should be inlined by the JIT.
2831 // } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
2832 } while (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2834 bool isText = currentState == XmlNodeType.Element && ch != -1 && ch != '<';
2836 if (!isText && (whitespaceHandling == WhitespaceHandling.None ||
2837 whitespaceHandling == WhitespaceHandling.Significant && XmlSpace != XmlSpace.Preserve))
2840 ClearValueBuffer ();
2841 valueBuffer.Append (peekChars, curNodePeekIndex, peekCharsIndex - curNodePeekIndex - startOffset);
2842 preserveCurrentTag = savePreserve;
2847 XmlNodeType nodeType = (this.XmlSpace == XmlSpace.Preserve) ?
2848 XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
2849 SetProperties (nodeType,
2854 null, // value: create only when required
2861 // Returns -1 if it should throw an error.
2862 private int ReadCharsInternal (char [] buffer, int offset, int length)
2864 int bufIndex = offset;
2865 for (int i = 0; i < length; i++) {
2866 int c = PeekChar ();
2869 throw NotWFError ("Unexpected end of xml.");
2872 if (PeekChar () != '/') {
2874 buffer [bufIndex++] = '<';
2877 else if (nestLevel-- > 0) {
2878 buffer [bufIndex++] = '<';
2881 // Seems to skip immediate EndElement
2888 readCharsInProgress = false;
2889 Read (); // move to the next node
2893 if (c <= Char.MaxValue)
2894 buffer [bufIndex++] = (char) c;
2896 buffer [bufIndex++] = (char) ((c - 0x10000) / 0x400 + 0xD800);
2897 buffer [bufIndex++] = (char) ((c - 0x10000) % 0x400 + 0xDC00);
2905 private bool ReadUntilEndTag ()
2908 currentState = XmlNodeType.EndElement;
2914 throw NotWFError ("Unexpected end of xml.");
2916 if (PeekChar () != '/') {
2920 else if (--nestLevel > 0)
2923 string name = ReadName ();
2924 if (name != elementNames [elementNameStackPos - 1].Name)