2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
7 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
9 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
10 // Copyright (C) 2005-2006 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //#define USE_NAME_BUFFER
34 // Optimization TODOs:
36 // - support PushbackChar() which reverts one character read.
37 // - ReadTextReader() should always keep one pushback buffer
38 // as pushback safety net.
39 // - Replace (peek,read) * n -> read * n + pushback
43 using System.Collections;
45 using System.Collections.Generic;
47 using System.Globalization;
49 using System.Security.Permissions;
51 using System.Xml.Schema;
64 internal class XmlTextReader : XmlReader,
65 IXmlLineInfo, IXmlNamespaceResolver, IHasXmlParserContext
67 [PermissionSet (SecurityAction.InheritanceDemand, Unrestricted = true)]
68 public class XmlTextReader : XmlReader, IXmlLineInfo, IHasXmlParserContext
73 protected XmlTextReader ()
77 public XmlTextReader (Stream input)
78 : this (new XmlStreamReader (input))
82 public XmlTextReader (string url)
83 : this(url, new NameTable ())
87 public XmlTextReader (TextReader input)
88 : this (input, new NameTable ())
92 protected XmlTextReader (XmlNameTable nt)
93 : this (String.Empty, null, XmlNodeType.None, null)
97 public XmlTextReader (Stream input, XmlNameTable nt)
98 : this(new XmlStreamReader (input), nt)
102 public XmlTextReader (string url, Stream input)
103 : this (url, new XmlStreamReader (input))
107 public XmlTextReader (string url, TextReader input)
108 : this (url, input, new NameTable ())
112 public XmlTextReader (string url, XmlNameTable nt)
115 Stream stream = GetStreamFromUrl (url, out uriString);
116 XmlParserContext ctx = new XmlParserContext (nt,
117 new XmlNamespaceManager (nt),
120 this.InitializeContext (uriString, ctx, new XmlStreamReader (stream), XmlNodeType.Document);
123 public XmlTextReader (TextReader input, XmlNameTable nt)
124 : this (String.Empty, input, nt)
128 // This is used in XmlReader.Create() to indicate that string
129 // argument is uri, not an xml fragment.
130 internal XmlTextReader (bool dummy, XmlResolver resolver, string url, XmlNodeType fragType, XmlParserContext context)
132 if (resolver == null)
133 resolver = new XmlUrlResolver ();
135 this.XmlResolver = resolver;
137 Stream stream = GetStreamFromUrl (url, out uriString);
138 this.InitializeContext (uriString, context, new XmlStreamReader (stream), fragType);
141 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
142 : this (context != null ? context.BaseURI : String.Empty,
143 new XmlStreamReader (xmlFragment),
147 disallowReset = true;
150 internal XmlTextReader (string baseURI, TextReader xmlFragment, XmlNodeType fragType)
151 : this (baseURI, xmlFragment, fragType, null)
155 public XmlTextReader (string url, Stream input, XmlNameTable nt)
156 : this (url, new XmlStreamReader (input), nt)
160 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
161 : this (url, input, XmlNodeType.Document, null)
165 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
166 : this (context != null ? context.BaseURI : String.Empty,
167 new StringReader (xmlFragment),
171 disallowReset = true;
174 internal XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
176 InitializeContext (url, context, fragment, fragType);
179 private Stream GetStreamFromUrl (string url, out string absoluteUriString)
181 Uri uri = resolver.ResolveUri (null, url);
182 absoluteUriString = uri != null ? uri.ToString () : String.Empty;
183 return resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
190 public override int AttributeCount
192 get { return attributeCount; }
195 public override string BaseURI
197 get { return parserContext.BaseURI; }
201 public override bool CanReadBinaryContent {
205 public override bool CanReadValueChunk {
209 internal override bool CanReadBinaryContent {
213 internal override bool CanReadValueChunk {
218 internal bool CharacterChecking {
219 get { return checkCharacters; }
220 set { checkCharacters = value; }
223 // for XmlReaderSettings.CloseInput support
224 internal bool CloseInput {
225 get { return closeInput; }
226 set { closeInput = value; }
229 public override int Depth
232 int nodeTypeMod = currentToken.NodeType == XmlNodeType.Element ? 0 : -1;
233 if (currentAttributeValue >= 0)
234 return nodeTypeMod + elementDepth + 2; // inside attribute value.
235 else if (currentAttribute >= 0)
236 return nodeTypeMod + elementDepth + 1;
241 public Encoding Encoding
243 get { return parserContext.Encoding; }
246 public EntityHandling EntityHandling {
247 get { return entityHandling; }
248 set { entityHandling = value; }
252 public override bool EOF {
253 get { return readState == ReadState.EndOfFile; }
256 public override bool HasValue {
257 get { return cursorToken.Value != null; }
260 public override bool IsDefault {
261 // XmlTextReader does not expand default attributes.
262 get { return false; }
265 public override bool IsEmptyElement {
266 get { return cursorToken.IsEmptyElement; }
271 public override string this [int i] {
272 get { return GetAttribute (i); }
275 public override string this [string name] {
276 get { return GetAttribute (name); }
279 public override string this [string localName, string namespaceName] {
280 get { return GetAttribute (localName, namespaceName); }
284 public int LineNumber {
286 if (useProceedingLineInfo)
289 return cursorToken.LineNumber;
293 public int LinePosition {
295 if (useProceedingLineInfo)
298 return cursorToken.LinePosition;
302 public override string LocalName {
303 get { return cursorToken.LocalName; }
306 public override string Name {
307 get { return cursorToken.Name; }
310 public bool Namespaces {
311 get { return namespaces; }
313 if (readState != ReadState.Initial)
314 throw new InvalidOperationException ("Namespaces have to be set before reading.");
319 public override string NamespaceURI {
320 get { return cursorToken.NamespaceURI; }
323 public override XmlNameTable NameTable {
324 get { return parserContext.NameTable; }
327 public override XmlNodeType NodeType {
328 get { return cursorToken.NodeType; }
331 public bool Normalization {
332 get { return normalization; }
333 set { normalization = value; }
336 public override string Prefix {
337 get { return cursorToken.Prefix; }
341 public bool ProhibitDtd {
342 get { return prohibitDtd; }
343 set { prohibitDtd = value; }
347 public override char QuoteChar {
348 get { return cursorToken.QuoteChar; }
351 public override ReadState ReadState {
352 get { return readState; }
356 public override XmlReaderSettings Settings {
357 get { return base.Settings; }
361 public override string Value {
362 get { return cursorToken.Value != null ? cursorToken.Value : String.Empty; }
365 public WhitespaceHandling WhitespaceHandling {
366 get { return whitespaceHandling; }
367 set { whitespaceHandling = value; }
370 public override string XmlLang {
371 get { return parserContext.XmlLang; }
374 public XmlResolver XmlResolver {
375 set { resolver = value; }
378 public override XmlSpace XmlSpace {
379 get { return parserContext.XmlSpace; }
386 public override void Close ()
388 readState = ReadState.Closed;
390 cursorToken.Clear ();
391 currentToken.Clear ();
393 if (closeInput && reader != null)
397 public override string GetAttribute (int i)
399 if (i >= attributeCount)
400 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
402 return attributeTokens [i].Value;
406 // MS.NET 1.0 msdn says that this method returns String.Empty
407 // for absent attribute, but in fact it returns null.
408 // This description is corrected in MS.NET 1.1 msdn.
409 public override string GetAttribute (string name)
411 for (int i = 0; i < attributeCount; i++)
412 if (attributeTokens [i].Name == name)
413 return attributeTokens [i].Value;
417 private int GetIndexOfQualifiedAttribute (string localName, string namespaceURI)
419 for (int i = 0; i < attributeCount; i++) {
420 XmlAttributeTokenInfo ti = attributeTokens [i];
421 if (ti.LocalName == localName && ti.NamespaceURI == namespaceURI)
427 XmlParserContext IHasXmlParserContext.ParserContext {
428 get { return parserContext; }
431 public override string GetAttribute (string localName, string namespaceURI)
433 int idx = this.GetIndexOfQualifiedAttribute (localName, namespaceURI);
436 return attributeTokens [idx].Value;
440 public IDictionary<string, string> GetNamespacesInScope (XmlNamespaceScope scope)
442 return parserContext.NamespaceManager.GetNamespacesInScope (scope);
445 IDictionary<string, string> IXmlNamespaceResolver.GetNamespacesInScope (XmlNamespaceScope scope)
447 return GetNamespacesInScope (scope);
451 public TextReader GetRemainder ()
453 if (peekCharsLength < 0)
455 return new StringReader (new string (peekChars, peekCharsIndex, peekCharsLength - peekCharsIndex) + reader.ReadToEnd ());
459 public bool HasLineInfo ()
461 bool IXmlLineInfo.HasLineInfo ()
467 public override string LookupNamespace (string prefix)
469 return LookupNamespace (prefix, false);
472 private string LookupNamespace (string prefix, bool atomizedNames)
474 string s = parserContext.NamespaceManager.LookupNamespace (
475 prefix, atomizedNames);
476 return s == String.Empty ? null : s;
480 string IXmlNamespaceResolver.LookupPrefix (string ns)
482 return LookupPrefix (ns, false);
485 public string LookupPrefix (string ns, bool atomizedName)
487 return parserContext.NamespaceManager.LookupPrefix (ns, atomizedName);
491 public override void MoveToAttribute (int i)
493 if (i >= attributeCount)
494 throw new ArgumentOutOfRangeException ("attribute index out of range.");
496 currentAttribute = i;
497 currentAttributeValue = -1;
498 cursorToken = attributeTokens [i];
501 public override bool MoveToAttribute (string name)
503 for (int i = 0; i < attributeCount; i++) {
504 XmlAttributeTokenInfo ti = attributeTokens [i];
505 if (ti.Name == name) {
513 public override bool MoveToAttribute (string localName, string namespaceName)
515 int idx = GetIndexOfQualifiedAttribute (localName, namespaceName);
518 MoveToAttribute (idx);
522 public override bool MoveToElement ()
524 if (currentToken == null) // for attribute .ctor()
527 if (cursorToken == currentToken)
530 if (currentAttribute >= 0) {
531 currentAttribute = -1;
532 currentAttributeValue = -1;
533 cursorToken = currentToken;
540 public override bool MoveToFirstAttribute ()
542 if (attributeCount == 0)
545 return MoveToNextAttribute ();
548 public override bool MoveToNextAttribute ()
550 if (currentAttribute == 0 && attributeCount == 0)
552 if (currentAttribute + 1 < attributeCount) {
554 currentAttributeValue = -1;
555 cursorToken = attributeTokens [currentAttribute];
562 public override bool Read ()
564 if (readState == ReadState.Closed)
566 curNodePeekIndex = peekCharsIndex;
567 preserveCurrentTag = true;
570 if (startNodeType == XmlNodeType.Attribute) {
571 if (currentAttribute == 0)
572 return false; // already read.
573 SkipTextDeclaration ();
575 IncrementAttributeToken ();
576 ReadAttributeValueTokens ('"');
577 cursorToken = attributeTokens [0];
578 currentAttributeValue = -1;
579 readState = ReadState.Interactive;
582 if (readState == ReadState.Initial && currentState == XmlNodeType.Element)
583 SkipTextDeclaration ();
589 readState = ReadState.Interactive;
590 currentLinkedNodeLineNumber = line;
591 currentLinkedNodeLinePosition = column;
592 useProceedingLineInfo = true;
594 cursorToken = currentToken;
596 currentAttribute = currentAttributeValue = -1;
597 currentToken.Clear ();
599 // It was moved from end of ReadStartTag ().
605 if (readCharsInProgress) {
606 readCharsInProgress = false;
607 return ReadUntilEndTag ();
610 more = ReadContent ();
612 if (!more && startNodeType == XmlNodeType.Document && currentState != XmlNodeType.EndElement)
613 throw NotWFError ("Document element did not appear.");
615 useProceedingLineInfo = false;
619 public override bool ReadAttributeValue ()
621 if (readState == ReadState.Initial && startNodeType == XmlNodeType.Attribute) {
625 if (currentAttribute < 0)
627 XmlAttributeTokenInfo ti = attributeTokens [currentAttribute];
628 if (currentAttributeValue < 0)
629 currentAttributeValue = ti.ValueTokenStartIndex - 1;
631 if (currentAttributeValue < ti.ValueTokenEndIndex) {
632 currentAttributeValue++;
633 cursorToken = attributeValueTokens [currentAttributeValue];
640 public int ReadBase64 (byte [] buffer, int offset, int length)
642 BinaryCharGetter = binaryCharGetter;
644 return Binary.ReadBase64 (buffer, offset, length);
646 BinaryCharGetter = null;
650 public int ReadBinHex (byte [] buffer, int offset, int length)
652 BinaryCharGetter = binaryCharGetter;
654 return Binary.ReadBinHex (buffer, offset, length);
656 BinaryCharGetter = null;
660 public int ReadChars (char [] buffer, int offset, int length)
663 throw new ArgumentOutOfRangeException ("offset", offset, "Offset must be non-negative integer.");
665 throw new ArgumentOutOfRangeException ("length", length, "Length must be non-negative integer.");
666 else if (buffer.Length < offset + length)
667 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
669 if (IsEmptyElement) {
674 if (!readCharsInProgress && NodeType != XmlNodeType.Element)
677 preserveCurrentTag = false;
678 readCharsInProgress = true;
679 useProceedingLineInfo = true;
681 return ReadCharsInternal (buffer, offset, length);
684 public void ResetState ()
687 throw new InvalidOperationException ("Cannot call ResetState when parsing an XML fragment.");
691 public override void ResolveEntity ()
693 // XmlTextReader does not resolve entities.
694 throw new InvalidOperationException ("XmlTextReader cannot resolve external entities.");
698 [MonoTODO] // FIXME: Implement, for performance improvement
699 public override void Skip ()
707 // Parsed DTD Objects
708 // Note that thgis property must be kept since dtd2xsd uses it.
709 internal DTDObjectModel DTD {
710 get { return parserContext.Dtd; }
713 internal XmlResolver Resolver {
714 get { return resolver; }
719 internal class XmlTokenInfo
721 public XmlTokenInfo (XmlTextReader xtr)
729 protected XmlTextReader Reader;
732 public string LocalName;
733 public string Prefix;
734 public string NamespaceURI;
735 public bool IsEmptyElement;
736 public char QuoteChar;
737 public int LineNumber;
738 public int LinePosition;
739 public int ValueBufferStart;
740 public int ValueBufferEnd;
742 public XmlNodeType NodeType;
744 public virtual string Value {
746 if (valueCache != null)
748 if (ValueBufferStart >= 0) {
749 //Console.WriteLine (NodeType + " / " + ValueBuffer.Length + " / " + ValueBufferStart + " / " + ValueBufferEnd);
750 valueCache = Reader.valueBuffer.ToString (ValueBufferStart, ValueBufferEnd - ValueBufferStart);
754 case XmlNodeType.Text:
755 case XmlNodeType.SignificantWhitespace:
756 case XmlNodeType.Whitespace:
757 case XmlNodeType.Comment:
758 case XmlNodeType.CDATA:
759 case XmlNodeType.ProcessingInstruction:
760 valueCache = Reader.CreateValueString ();
765 set { valueCache = value; }
768 public virtual void Clear ()
770 ValueBufferStart = -1;
772 NodeType = XmlNodeType.None;
773 Name = LocalName = Prefix = NamespaceURI = String.Empty;
774 IsEmptyElement = false;
776 LineNumber = LinePosition = 0;
780 internal class XmlAttributeTokenInfo : XmlTokenInfo
782 public XmlAttributeTokenInfo (XmlTextReader reader)
785 NodeType = XmlNodeType.Attribute;
788 public int ValueTokenStartIndex;
789 public int ValueTokenEndIndex;
791 StringBuilder tmpBuilder = new StringBuilder ();
793 public override string Value {
795 if (valueCache != null)
798 // An empty value should return String.Empty.
799 if (ValueTokenStartIndex == ValueTokenEndIndex) {
800 XmlTokenInfo ti = Reader.attributeValueTokens [ValueTokenStartIndex];
801 if (ti.NodeType == XmlNodeType.EntityReference)
802 valueCache = String.Concat ("&", ti.Name, ";");
804 valueCache = ti.Value;
808 tmpBuilder.Length = 0;
809 for (int i = ValueTokenStartIndex; i <= ValueTokenEndIndex; i++) {
810 XmlTokenInfo ti = Reader.attributeValueTokens [i];
811 if (ti.NodeType == XmlNodeType.Text)
812 tmpBuilder.Append (ti.Value);
814 tmpBuilder.Append ('&');
815 tmpBuilder.Append (ti.Name);
816 tmpBuilder.Append (';');
820 valueCache = tmpBuilder.ToString (0, tmpBuilder.Length);
824 set { valueCache = value; }
827 public override void Clear ()
831 NodeType = XmlNodeType.Attribute;
832 ValueTokenStartIndex = ValueTokenEndIndex = 0;
835 internal void FillXmlns ()
837 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns))
838 Reader.parserContext.NamespaceManager.AddNamespace (LocalName, Value);
839 else if (Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
840 Reader.parserContext.NamespaceManager.AddNamespace (String.Empty, Value);
843 internal void FillNamespace ()
845 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns) ||
846 Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
847 NamespaceURI = XmlNamespaceManager.XmlnsXmlns;
848 else if (Prefix.Length == 0)
849 NamespaceURI = string.Empty;
851 NamespaceURI = Reader.LookupNamespace (Prefix, true);
855 private XmlTokenInfo cursorToken;
856 private XmlTokenInfo currentToken;
857 private XmlAttributeTokenInfo currentAttributeToken;
858 private XmlTokenInfo currentAttributeValueToken;
859 private XmlAttributeTokenInfo [] attributeTokens = new XmlAttributeTokenInfo [10];
860 private XmlTokenInfo [] attributeValueTokens = new XmlTokenInfo [10];
861 private int currentAttribute;
862 private int currentAttributeValue;
863 private int attributeCount;
865 private XmlParserContext parserContext;
867 private ReadState readState;
868 private bool disallowReset;
871 private int elementDepth;
872 private bool depthUp;
874 private bool popScope;
878 public TagName (string n, string l, string p)
885 public readonly string Name;
886 public readonly string LocalName;
887 public readonly string Prefix;
890 private TagName [] elementNames;
891 int elementNameStackPos;
893 private bool allowMultipleRoot;
895 private bool isStandalone;
897 private bool returnEntityReference;
898 private string entityReferenceName;
901 private char [] nameBuffer;
902 private int nameLength;
903 private int nameCapacity;
904 private const int initialNameCapacity = 32;
907 private StringBuilder valueBuffer;
909 private TextReader reader;
910 private char [] peekChars;
911 private int peekCharsIndex;
912 private int peekCharsLength;
913 private int curNodePeekIndex;
914 private bool preserveCurrentTag;
915 private const int peekCharCapacity = 1024;
920 private int currentLinkedNodeLineNumber;
921 private int currentLinkedNodeLinePosition;
922 private bool useProceedingLineInfo;
924 private XmlNodeType startNodeType;
925 // State machine attribute.
926 // XmlDeclaration: after the first node.
927 // DocumentType: after doctypedecl
928 // Element: inside document element
929 // EndElement: after document element
930 private XmlNodeType currentState;
932 // For ReadChars()/ReadBase64()/ReadBinHex()
933 private int nestLevel;
934 private bool readCharsInProgress;
935 XmlReaderBinarySupport.CharGetter binaryCharGetter;
937 // These values are never re-initialized.
938 private bool namespaces = true;
939 private WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
940 private XmlResolver resolver = new XmlUrlResolver ();
941 private bool normalization = false;
943 private bool checkCharacters;
944 private bool prohibitDtd = false;
945 private bool closeInput = true;
946 private EntityHandling entityHandling; // 2.0
948 private NameTable whitespacePool;
949 private char [] whitespaceCache;
951 private XmlException NotWFError (string message)
953 return new XmlException (this as IXmlLineInfo, BaseURI, message);
958 allowMultipleRoot = false;
959 elementNames = new TagName [10];
960 valueBuffer = new StringBuilder ();
961 binaryCharGetter = new XmlReaderBinarySupport.CharGetter (ReadChars);
963 nameBuffer = new char [initialNameCapacity];
966 checkCharacters = true;
968 if (Settings != null)
969 checkCharacters = Settings.CheckCharacters;
973 entityHandling = EntityHandling.ExpandCharEntities;
976 if (peekChars == null)
977 peekChars = new char [peekCharCapacity];
978 peekCharsLength = -1;
979 curNodePeekIndex = -1; // read from start
984 currentLinkedNodeLineNumber = currentLinkedNodeLinePosition = 0;
989 private void Clear ()
991 currentToken = new XmlTokenInfo (this);
992 cursorToken = currentToken;
993 currentAttribute = -1;
994 currentAttributeValue = -1;
997 readState = ReadState.Initial;
1003 popScope = allowMultipleRoot = false;
1004 elementNameStackPos = 0;
1006 isStandalone = false;
1007 returnEntityReference = false;
1008 entityReferenceName = String.Empty;
1012 nameCapacity = initialNameCapacity;
1014 useProceedingLineInfo = false;
1016 currentState = XmlNodeType.None;
1018 readCharsInProgress = false;
1021 private void InitializeContext (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType)
1023 startNodeType = fragType;
1024 parserContext = context;
1025 if (context == null) {
1026 XmlNameTable nt = new NameTable ();
1027 parserContext = new XmlParserContext (nt,
1028 new XmlNamespaceManager (nt),
1033 if (url != null && url.Length > 0) {
1036 uri = new Uri (url);
1037 } catch (Exception) {
1038 string path = Path.GetFullPath ("./a");
1039 uri = new Uri (new Uri (path), url);
1041 parserContext.BaseURI = uri.ToString ();
1049 case XmlNodeType.Attribute:
1050 reader = new StringReader (fragment.ReadToEnd ().Replace ("\"", """));
1052 case XmlNodeType.Element:
1053 currentState = XmlNodeType.Element;
1054 allowMultipleRoot = true;
1056 case XmlNodeType.Document:
1059 throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType));
1064 internal ConformanceLevel Conformance {
1065 get { return allowMultipleRoot ? ConformanceLevel.Fragment : ConformanceLevel.Document; }
1067 if (value == ConformanceLevel.Fragment) {
1068 currentState = XmlNodeType.Element;
1069 allowMultipleRoot = true;
1074 internal void AdjustLineInfoOffset (int lineNumberOffset, int linePositionOffset)
1076 line += lineNumberOffset;
1077 column += linePositionOffset;
1080 internal void SetNameTable (XmlNameTable nameTable)
1082 parserContext.NameTable = nameTable;
1086 // Use this method rather than setting the properties
1087 // directly so that all the necessary properties can
1088 // be changed in harmony with each other. Maybe the
1089 // fields should be in a seperate class to help enforce
1092 // Namespace URI could not be provided here.
1093 private void SetProperties (
1094 XmlNodeType nodeType,
1098 bool isEmptyElement,
1100 bool clearAttributes)
1102 SetTokenProperties (currentToken, nodeType, name, prefix, localName, isEmptyElement, value, clearAttributes);
1103 currentToken.LineNumber = this.currentLinkedNodeLineNumber;
1104 currentToken.LinePosition = this.currentLinkedNodeLinePosition;
1107 private void SetTokenProperties (
1109 XmlNodeType nodeType,
1113 bool isEmptyElement,
1115 bool clearAttributes)
1117 token.NodeType = nodeType;
1119 token.Prefix = prefix;
1120 token.LocalName = localName;
1121 token.IsEmptyElement = isEmptyElement;
1122 token.Value = value;
1123 this.elementDepth = depth;
1125 if (clearAttributes)
1129 private void ClearAttributes ()
1131 //for (int i = 0; i < attributeCount; i++)
1132 // attributeTokens [i].Clear ();
1134 currentAttribute = -1;
1135 currentAttributeValue = -1;
1138 private int PeekSurrogate (int c)
1140 if (peekCharsLength <= peekCharsIndex + 1) {
1141 if (!ReadTextReader (c))
1142 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1146 int highhalfChar = peekChars [peekCharsIndex];
1147 int lowhalfChar = peekChars [peekCharsIndex+1];
1149 if (((highhalfChar & 0xFC00) != 0xD800) || ((lowhalfChar & 0xFC00) != 0xDC00))
1150 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1151 return highhalfChar;
1152 return 0x10000 + (highhalfChar-0xD800)*0x400 + (lowhalfChar-0xDC00);
1155 private int PeekChar ()
1157 if (peekCharsIndex < peekCharsLength) {
1158 int c = peekChars [peekCharsIndex];
1161 if (c < 0xD800 || c >= 0xDFFF)
1163 return PeekSurrogate (c);
1165 if (!ReadTextReader (-1))
1171 private int ReadChar ()
1173 int ch = PeekChar ();
1177 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1182 } else if (ch != -1) {
1188 private void Advance (int ch) {
1192 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1197 } else if (ch != -1) {
1202 private bool ReadTextReader (int remained)
1204 if (peekCharsLength < 0) { // initialized buffer
1205 peekCharsLength = reader.Read (peekChars, 0, peekChars.Length);
1206 return peekCharsLength > 0;
1208 int offset = remained >= 0 ? 1 : 0;
1209 int copysize = peekCharsLength - curNodePeekIndex;
1211 // It must assure that current tag content always exists
1213 if (!preserveCurrentTag) {
1214 curNodePeekIndex = 0;
1217 } else if (peekCharsLength < peekChars.Length) {
1218 // NonBlockingStreamReader returned less bytes
1219 // than the size of the buffer. In that case,
1220 // just refill the buffer.
1221 } else if (curNodePeekIndex <= (peekCharsLength >> 1)) {
1222 // extend the buffer
1223 char [] tmp = new char [peekChars.Length * 2];
1224 Array.Copy (peekChars, curNodePeekIndex,
1227 curNodePeekIndex = 0;
1228 peekCharsIndex = copysize;
1230 Array.Copy (peekChars, curNodePeekIndex,
1231 peekChars, 0, copysize);
1232 curNodePeekIndex = 0;
1233 peekCharsIndex = copysize;
1236 peekChars [peekCharsIndex] = (char) remained;
1237 int count = peekChars.Length - peekCharsIndex - offset;
1238 if (count > peekCharCapacity)
1239 count = peekCharCapacity;
1240 int read = reader.Read (
1241 peekChars, peekCharsIndex + offset, count);
1242 int remainingSize = offset + read;
1243 peekCharsLength = peekCharsIndex + remainingSize;
1245 return (remainingSize != 0);
1248 private bool ReadContent ()
1251 parserContext.NamespaceManager.PopScope ();
1252 parserContext.PopScope ();
1256 if (returnEntityReference)
1257 SetEntityReferenceProperties ();
1259 int c = PeekChar ();
1261 readState = ReadState.EndOfFile;
1262 ClearValueBuffer ();
1264 XmlNodeType.None, // nodeType
1265 String.Empty, // name
1266 String.Empty, // prefix
1267 String.Empty, // localName
1268 false, // isEmptyElement
1270 true // clearAttributes
1273 throw NotWFError ("unexpected end of file. Current depth is " + depth);
1280 switch (PeekChar ())
1288 ReadProcessingInstruction ();
1303 if (!ReadWhitespace ())
1305 return ReadContent ();
1313 return this.ReadState != ReadState.EndOfFile;
1316 private void SetEntityReferenceProperties ()
1318 DTDEntityDeclaration decl = DTD != null ? DTD.EntityDecls [entityReferenceName] : null;
1319 if (this.isStandalone)
1320 if (DTD == null || decl == null || !decl.IsInternalSubset)
1321 throw NotWFError ("Standalone document must not contain any references to an non-internally declared entity.");
1322 if (decl != null && decl.NotationName != null)
1323 throw NotWFError ("Reference to any unparsed entities is not allowed here.");
1325 ClearValueBuffer ();
1327 XmlNodeType.EntityReference, // nodeType
1328 entityReferenceName, // name
1329 String.Empty, // prefix
1330 entityReferenceName, // localName
1331 false, // isEmptyElement
1333 true // clearAttributes
1336 returnEntityReference = false;
1337 entityReferenceName = String.Empty;
1340 // The leading '<' has already been consumed.
1341 private void ReadStartTag ()
1343 if (currentState == XmlNodeType.EndElement)
1344 throw NotWFError ("Multiple document element was detected.");
1345 currentState = XmlNodeType.Element;
1347 parserContext.NamespaceManager.PushScope ();
1349 currentLinkedNodeLineNumber = line;
1350 currentLinkedNodeLinePosition = column;
1352 string prefix, localName;
1353 string name = ReadName (out prefix, out localName);
1354 if (currentState == XmlNodeType.EndElement)
1355 throw NotWFError ("document has terminated, cannot open new element");
1357 bool isEmptyElement = false;
1362 if (XmlChar.IsFirstNameChar (PeekChar ()))
1363 ReadAttributes (false);
1364 cursorToken = this.currentToken;
1367 for (int i = 0; i < attributeCount; i++)
1368 attributeTokens [i].FillXmlns ();
1369 for (int i = 0; i < attributeCount; i++)
1370 attributeTokens [i].FillNamespace ();
1374 for (int i = 0; i < attributeCount; i++)
1375 if (attributeTokens [i].Prefix == "xmlns" &&
1376 attributeTokens [i].Value == String.Empty)
1377 throw NotWFError ("Empty namespace URI cannot be mapped to non-empty prefix.");
1379 for (int i = 0; i < attributeCount; i++) {
1380 for (int j = i + 1; j < attributeCount; j++)
1381 if (Object.ReferenceEquals (attributeTokens [i].Name, attributeTokens [j].Name) ||
1382 (Object.ReferenceEquals (attributeTokens [i].LocalName, attributeTokens [j].LocalName) &&
1383 Object.ReferenceEquals (attributeTokens [i].NamespaceURI, attributeTokens [j].NamespaceURI)))
1384 throw NotWFError ("Attribute name and qualified name must be identical.");
1387 if (PeekChar () == '/') {
1389 isEmptyElement = true;
1394 PushElementName (name, localName, prefix);
1396 parserContext.PushScope ();
1401 XmlNodeType.Element, // nodeType
1405 isEmptyElement, // isEmptyElement
1407 false // clearAttributes
1409 if (prefix.Length > 0)
1410 currentToken.NamespaceURI = LookupNamespace (prefix, true);
1411 else if (namespaces)
1412 currentToken.NamespaceURI = parserContext.NamespaceManager.DefaultNamespace;
1415 if (NamespaceURI == null)
1416 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1418 for (int i = 0; i < attributeCount; i++) {
1419 MoveToAttribute (i);
1420 if (NamespaceURI == null)
1421 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1428 for (int i = 0; i < attributeCount; i++) {
1429 if (!Object.ReferenceEquals (attributeTokens [i].Prefix, XmlNamespaceManager.PrefixXml))
1431 string aname = attributeTokens [i].LocalName;
1432 string value = attributeTokens [i].Value;
1435 if (this.resolver != null) {
1437 BaseURI != String.Empty ?
1438 new Uri (BaseURI) : null;
1439 Uri uri = resolver.ResolveUri (
1441 parserContext.BaseURI =
1447 parserContext.BaseURI = value;
1450 parserContext.XmlLang = value;
1455 parserContext.XmlSpace = XmlSpace.Preserve;
1458 parserContext.XmlSpace = XmlSpace.Default;
1461 throw NotWFError (String.Format ("Invalid xml:space value: {0}", value));
1468 CheckCurrentStateUpdate ();
1471 private void PushElementName (string name, string local, string prefix)
1473 if (elementNames.Length == elementNameStackPos) {
1474 TagName [] newArray = new TagName [elementNames.Length * 2];
1475 Array.Copy (elementNames, 0, newArray, 0, elementNameStackPos);
1476 elementNames = newArray;
1478 elementNames [elementNameStackPos++] =
1479 new TagName (name, local, prefix);
1482 // The reader is positioned on the first character
1483 // of the element's name.
1484 private void ReadEndTag ()
1486 if (currentState != XmlNodeType.Element)
1487 throw NotWFError ("End tag cannot appear in this state.");
1489 currentLinkedNodeLineNumber = line;
1490 currentLinkedNodeLinePosition = column;
1492 if (elementNameStackPos == 0)
1493 throw NotWFError ("closing element without matching opening element");
1494 TagName expected = elementNames [--elementNameStackPos];
1495 Expect (expected.Name);
1497 ExpectAfterWhitespace ('>');
1502 XmlNodeType.EndElement, // nodeType
1503 expected.Name, // name
1504 expected.Prefix, // prefix
1505 expected.LocalName, // localName
1506 false, // isEmptyElement
1508 true // clearAttributes
1510 if (expected.Prefix.Length > 0)
1511 currentToken.NamespaceURI = LookupNamespace (expected.Prefix, true);
1512 else if (namespaces)
1513 currentToken.NamespaceURI = parserContext.NamespaceManager.DefaultNamespace;
1517 CheckCurrentStateUpdate ();
1520 private void CheckCurrentStateUpdate ()
1522 if (depth == 0 && !allowMultipleRoot && (IsEmptyElement || NodeType == XmlNodeType.EndElement))
1523 currentState = XmlNodeType.EndElement;
1527 private void AppendSurrogatePairNameChar (int ch)
1529 nameBuffer [nameLength++] = (char) ((ch - 0x10000) / 0x400 + 0xD800);
1530 if (nameLength == nameCapacity)
1531 ExpandNameCapacity ();
1532 nameBuffer [nameLength++] = (char) ((ch - 0x10000) % 0x400 + 0xDC00);
1535 private void ExpandNameCapacity ()
1537 nameCapacity = nameCapacity * 2;
1538 char [] oldNameBuffer = nameBuffer;
1539 nameBuffer = new char [nameCapacity];
1540 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1544 private void AppendValueChar (int ch)
1546 if (ch < Char.MaxValue)
1547 valueBuffer.Append ((char) ch);
1549 AppendSurrogatePairValueChar (ch);
1552 private void AppendSurrogatePairValueChar (int ch)
1554 valueBuffer.Append ((char) ((ch - 0x10000) / 0x400 + 0xD800));
1555 valueBuffer.Append ((char) ((ch - 0x10000) % 0x400 + 0xDC00));
1558 private string CreateValueString ()
1560 // Since whitespace strings are mostly identical
1561 // depending on the Depth, we make use of NameTable
1562 // to atomize whitespace strings.
1564 case XmlNodeType.Whitespace:
1565 case XmlNodeType.SignificantWhitespace:
1566 int len = valueBuffer.Length;
1567 if (whitespaceCache == null)
1568 whitespaceCache = new char [32];
1569 if (len >= whitespaceCache.Length)
1571 if (whitespacePool == null)
1572 whitespacePool = new NameTable ();
1574 valueBuffer.CopyTo (0, whitespaceCache, 0, len);
1576 for (int i = 0; i < len; i++)
1577 whitespaceCache [i] = valueBuffer [i];
1579 return whitespacePool.Add (whitespaceCache, 0, valueBuffer.Length);
1581 return (valueBuffer.Capacity < 100) ?
1582 valueBuffer.ToString (0, valueBuffer.Length) :
1583 valueBuffer.ToString ();
1586 private void ClearValueBuffer ()
1588 valueBuffer.Length = 0;
1591 // The reader is positioned on the first character
1593 private void ReadText (bool notWhitespace)
1595 if (currentState != XmlNodeType.Element)
1596 throw NotWFError ("Text node cannot appear in this state.");
1597 preserveCurrentTag = false;
1600 ClearValueBuffer ();
1602 int ch = PeekChar ();
1603 bool previousWasCloseBracket = false;
1605 while (ch != '<' && ch != -1) {
1608 ch = ReadReference (false);
1609 if (returnEntityReference) // Returns -1 if char validation should not be done
1611 } else if (normalization && ch == '\r') {
1615 // append '\n' instead of '\r'.
1616 AppendValueChar ('\n');
1617 // and in case of "\r\n", discard '\r'.
1619 if (CharacterChecking && XmlChar.IsInvalid (ch))
1620 throw NotWFError ("Not allowed character was found.");
1624 // FIXME: it might be optimized by the JIT later,
1625 // AppendValueChar (ch);
1627 if (ch < Char.MaxValue)
1628 valueBuffer.Append ((char) ch);
1630 AppendSurrogatePairValueChar (ch);
1635 if (previousWasCloseBracket)
1636 if (PeekChar () == '>')
1637 throw NotWFError ("Inside text content, character sequence ']]>' is not allowed.");
1638 previousWasCloseBracket = true;
1640 else if (previousWasCloseBracket)
1641 previousWasCloseBracket = false;
1643 notWhitespace = true;
1646 if (returnEntityReference && valueBuffer.Length == 0) {
1647 SetEntityReferenceProperties ();
1649 XmlNodeType nodeType = notWhitespace ? XmlNodeType.Text :
1650 this.XmlSpace == XmlSpace.Preserve ? XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
1652 nodeType, // nodeType
1653 String.Empty, // name
1654 String.Empty, // prefix
1655 String.Empty, // localName
1656 false, // isEmptyElement
1657 null, // value: create only when required
1658 true // clearAttributes
1663 // The leading '&' has already been consumed.
1664 // Returns true if the entity reference isn't a simple
1665 // character reference or one of the predefined entities.
1666 // This allows the ReadText method to break so that the
1667 // next call to Read will return the EntityReference node.
1668 private int ReadReference (bool ignoreEntityReferences)
1670 if (PeekChar () == '#') {
1672 return ReadCharacterReference ();
1674 return ReadEntityReference (ignoreEntityReferences);
1677 private int ReadCharacterReference ()
1682 if (PeekChar () == 'x') {
1685 while ((ch = PeekChar ()) != ';' && ch != -1) {
1688 if (ch >= '0' && ch <= '9')
1689 value = (value << 4) + ch - '0';
1690 else if (ch >= 'A' && ch <= 'F')
1691 value = (value << 4) + ch - 'A' + 10;
1692 else if (ch >= 'a' && ch <= 'f')
1693 value = (value << 4) + ch - 'a' + 10;
1695 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1696 "invalid hexadecimal digit: {0} (#x{1:X})",
1701 while ((ch = PeekChar ()) != ';' && ch != -1) {
1704 if (ch >= '0' && ch <= '9')
1705 value = value * 10 + ch - '0';
1707 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1708 "invalid decimal digit: {0} (#x{1:X})",
1716 // There is no way to save surrogate pairs...
1717 if (CharacterChecking && Normalization &&
1718 XmlChar.IsInvalid (value))
1719 throw NotWFError ("Referenced character was not allowed in XML. Normalization is " + normalization + ", checkCharacters = " + checkCharacters);
1723 // Returns -1 if it should not be validated.
1724 // Real EOF must not be detected here.
1725 private int ReadEntityReference (bool ignoreEntityReferences)
1727 string name = ReadName ();
1730 int predefined = XmlChar.GetPredefinedEntity (name);
1731 if (predefined >= 0)
1734 if (ignoreEntityReferences) {
1735 AppendValueChar ('&');
1736 for (int i = 0; i < name.Length; i++)
1737 AppendValueChar (name [i]);
1738 AppendValueChar (';');
1740 returnEntityReference = true;
1741 entityReferenceName = name;
1747 // The reader is positioned on the first character of
1748 // the attribute name.
1749 private void ReadAttributes (bool isXmlDecl)
1752 bool requireWhitespace = false;
1753 currentAttribute = -1;
1754 currentAttributeValue = -1;
1757 if (!SkipWhitespace () && requireWhitespace)
1758 throw NotWFError ("Unexpected token. Name is required here.");
1760 IncrementAttributeToken ();
1761 currentAttributeToken.LineNumber = line;
1762 currentAttributeToken.LinePosition = column;
1764 string prefix, localName;
1765 currentAttributeToken.Name = ReadName (out prefix, out localName);
1766 currentAttributeToken.Prefix = prefix;
1767 currentAttributeToken.LocalName = localName;
1768 ExpectAfterWhitespace ('=');
1770 ReadAttributeValueTokens (-1);
1771 // This hack is required for xmldecl which has
1772 // both effective attributes and Value.
1775 dummyValue = currentAttributeToken.Value;
1779 if (!SkipWhitespace ())
1780 requireWhitespace = true;
1781 peekChar = PeekChar ();
1783 if (peekChar == '?')
1786 else if (peekChar == '/' || peekChar == '>')
1788 } while (peekChar != -1);
1790 currentAttribute = -1;
1791 currentAttributeValue = -1;
1794 private void AddAttributeWithValue (string name, string value)
1796 IncrementAttributeToken ();
1797 XmlAttributeTokenInfo ati = attributeTokens [currentAttribute];
1798 ati.Name = parserContext.NameTable.Add (name);
1799 ati.Prefix = String.Empty;
1800 ati.NamespaceURI = String.Empty;
1801 IncrementAttributeValueToken ();
1802 XmlTokenInfo vti = attributeValueTokens [currentAttributeValue];
1803 SetTokenProperties (vti,
1815 private void IncrementAttributeToken ()
1818 if (attributeTokens.Length == currentAttribute) {
1819 XmlAttributeTokenInfo [] newArray =
1820 new XmlAttributeTokenInfo [attributeTokens.Length * 2];
1821 attributeTokens.CopyTo (newArray, 0);
1822 attributeTokens = newArray;
1824 if (attributeTokens [currentAttribute] == null)
1825 attributeTokens [currentAttribute] = new XmlAttributeTokenInfo (this);
1826 currentAttributeToken = attributeTokens [currentAttribute];
1827 currentAttributeToken.Clear ();
1830 private void IncrementAttributeValueToken ()
1832 currentAttributeValue++;
1833 if (attributeValueTokens.Length == currentAttributeValue) {
1834 XmlTokenInfo [] newArray = new XmlTokenInfo [attributeValueTokens.Length * 2];
1835 attributeValueTokens.CopyTo (newArray, 0);
1836 attributeValueTokens = newArray;
1838 if (attributeValueTokens [currentAttributeValue] == null)
1839 attributeValueTokens [currentAttributeValue] = new XmlTokenInfo (this);
1840 currentAttributeValueToken = attributeValueTokens [currentAttributeValue];
1841 currentAttributeValueToken.Clear ();
1844 // LAMESPEC: Orthodox XML reader should normalize attribute values
1845 private void ReadAttributeValueTokens (int dummyQuoteChar)
1847 int quoteChar = (dummyQuoteChar < 0) ? ReadChar () : dummyQuoteChar;
1849 if (quoteChar != '\'' && quoteChar != '\"')
1850 throw NotWFError ("an attribute value was not quoted");
1851 currentAttributeToken.QuoteChar = (char) quoteChar;
1853 IncrementAttributeValueToken ();
1854 currentAttributeToken.ValueTokenStartIndex = currentAttributeValue;
1855 currentAttributeValueToken.LineNumber = line;
1856 currentAttributeValueToken.LinePosition = column;
1858 bool incrementToken = false;
1859 bool isNewToken = true;
1862 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1865 if (ch == quoteChar)
1868 if (incrementToken) {
1869 IncrementAttributeValueToken ();
1870 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1871 currentAttributeValueToken.LineNumber = line;
1872 currentAttributeValueToken.LinePosition = column;
1873 incrementToken = false;
1880 throw NotWFError ("attribute values cannot contain '<'");
1882 if (dummyQuoteChar < 0)
1883 throw NotWFError ("unexpected end of file in an attribute value");
1884 else // Attribute value constructor.
1890 if (PeekChar () == '\n')
1891 continue; // skip '\r'.
1893 // The csc in MS.NET 2.0 beta 1 barfs on this goto, so work around that
1902 // When Normalize = true, then replace
1903 // all spaces to ' '
1909 if (PeekChar () == '#') {
1911 ch = ReadCharacterReference ();
1912 AppendValueChar (ch);
1915 // Check XML 1.0 section 3.1 WFC.
1916 string entName = ReadName ();
1918 int predefined = XmlChar.GetPredefinedEntity (entName);
1919 if (predefined < 0) {
1920 CheckAttributeEntityReferenceWFC (entName);
1922 if (entityHandling == EntityHandling.ExpandEntities) {
1923 string value = DTD.GenerateEntityAttributeText (entName);
1924 foreach (char c in value)
1925 AppendValueChar (c);
1929 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1930 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1932 IncrementAttributeValueToken ();
1933 currentAttributeValueToken.Name = entName;
1934 currentAttributeValueToken.Value = String.Empty;
1935 currentAttributeValueToken.NodeType = XmlNodeType.EntityReference;
1936 incrementToken = true;
1940 AppendValueChar (predefined);
1943 if (CharacterChecking && XmlChar.IsInvalid (ch))
1944 throw NotWFError ("Invalid character was found.");
1945 // FIXME: it might be optimized by the JIT later,
1946 // AppendValueChar (ch);
1948 if (ch < Char.MaxValue)
1949 valueBuffer.Append ((char) ch);
1951 AppendSurrogatePairValueChar (ch);
1958 if (!incrementToken) {
1959 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1960 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1962 currentAttributeToken.ValueTokenEndIndex = currentAttributeValue;
1966 private void CheckAttributeEntityReferenceWFC (string entName)
1968 DTDEntityDeclaration entDecl =
1969 DTD == null ? null : DTD.EntityDecls [entName];
1970 if (entDecl == null) {
1971 if (entityHandling == EntityHandling.ExpandEntities
1972 || (DTD != null && resolver != null && entDecl == null))
1973 throw NotWFError (String.Format ("Referenced entity '{0}' does not exist.", entName));
1978 if (entDecl.HasExternalReference)
1979 throw NotWFError ("Reference to external entities is not allowed in the value of an attribute.");
1980 if (isStandalone && !entDecl.IsInternalSubset)
1981 throw NotWFError ("Reference to external entities is not allowed in the internal subset.");
1982 if (entDecl.EntityValue.IndexOf ('<') >= 0)
1983 throw NotWFError ("Attribute must not contain character '<' either directly or indirectly by way of entity references.");
1986 // The reader is positioned on the first character
1989 // It may be xml declaration or processing instruction.
1990 private void ReadProcessingInstruction ()
1992 string target = ReadName ();
1993 if (target != "xml" && target.ToLower (CultureInfo.InvariantCulture) == "xml")
1994 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
1996 if (!SkipWhitespace ())
1997 if (PeekChar () != '?')
1998 throw NotWFError ("Invalid processing instruction name was found.");
2000 ClearValueBuffer ();
2003 while ((ch = PeekChar ()) != -1) {
2006 if (ch == '?' && PeekChar () == '>') {
2011 if (CharacterChecking && XmlChar.IsInvalid (ch))
2012 throw NotWFError ("Invalid character was found.");
2013 AppendValueChar (ch);
2016 if (Object.ReferenceEquals (target, XmlNamespaceManager.PrefixXml))
2017 VerifyXmlDeclaration ();
2019 if (currentState == XmlNodeType.None)
2020 currentState = XmlNodeType.XmlDeclaration;
2023 XmlNodeType.ProcessingInstruction, // nodeType
2025 String.Empty, // prefix
2026 target, // localName
2027 false, // isEmptyElement
2028 null, // value: create only when required
2029 true // clearAttributes
2034 void VerifyXmlDeclaration ()
2036 if (!allowMultipleRoot && currentState != XmlNodeType.None)
2037 throw NotWFError ("XML declaration cannot appear in this state.");
2039 currentState = XmlNodeType.XmlDeclaration;
2041 string text = CreateValueString ();
2047 string encoding = null, standalone = null;
2049 ParseAttributeFromString (text, ref idx, out name, out value);
2050 if (name != "version" || value != "1.0")
2051 throw NotWFError ("'version' is expected.");
2052 name = String.Empty;
2053 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2054 ParseAttributeFromString (text, ref idx, out name, out value);
2055 if (name == "encoding") {
2056 if (!XmlChar.IsValidIANAEncoding (value))
2057 throw NotWFError ("'encoding' must be a valid IANA encoding name.");
2058 if (reader is XmlStreamReader)
2059 parserContext.Encoding = ((XmlStreamReader) reader).Encoding;
2061 parserContext.Encoding = Encoding.Unicode;
2063 name = String.Empty;
2064 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2065 ParseAttributeFromString (text, ref idx, out name, out value);
2067 if (name == "standalone") {
2068 this.isStandalone = value == "yes";
2069 if (value != "yes" && value != "no")
2070 throw NotWFError ("Only 'yes' or 'no' is allow for 'standalone'");
2072 SkipWhitespaceInString (text, ref idx);
2074 else if (name.Length != 0)
2075 throw NotWFError (String.Format ("Unexpected token: '{0}'", name));
2077 if (idx < text.Length)
2078 throw NotWFError ("'?' is expected.");
2080 AddAttributeWithValue ("version", "1.0");
2081 if (encoding != null)
2082 AddAttributeWithValue ("encoding", encoding);
2083 if (standalone != null)
2084 AddAttributeWithValue ("standalone", standalone);
2085 currentAttribute = currentAttributeValue = -1;
2088 XmlNodeType.XmlDeclaration, // nodeType
2090 String.Empty, // prefix
2092 false, // isEmptyElement
2094 false // clearAttributes
2098 bool SkipWhitespaceInString (string text, ref int idx)
2101 while (idx < text.Length && XmlChar.IsWhitespace (text [idx]))
2103 return idx - start > 0;
2106 private void ParseAttributeFromString (string src,
2107 ref int idx, out string name, out string value)
2109 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2113 while (idx < src.Length && XmlChar.IsNameChar (src [idx]))
2115 name = src.Substring (start, idx - start);
2117 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2119 if (idx == src.Length || src [idx] != '=')
2120 throw NotWFError (String.Format ("'=' is expected after {0}", name));
2123 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2126 if (idx == src.Length || src [idx] != '"' && src [idx] != '\'')
2127 throw NotWFError ("'\"' or '\'' is expected.");
2129 char quote = src [idx];
2133 while (idx < src.Length && src [idx] != quote)
2137 value = src.Substring (start, idx - start - 1);
2140 internal void SkipTextDeclaration ()
2142 if (PeekChar () != '<')
2147 if (PeekChar () != '?') {
2153 while (peekCharsIndex < 6) {
2154 if (PeekChar () < 0)
2159 if (new string (peekChars, 2, 4) != "xml ") {
2160 if (new string (peekChars, 2, 4).ToLower (CultureInfo.InvariantCulture) == "xml ") {
2161 throw NotWFError ("Processing instruction name must not be character sequence 'X' 'M' 'L' with case insensitivity.");
2170 if (PeekChar () == 'v') {
2172 ExpectAfterWhitespace ('=');
2174 int quoteChar = ReadChar ();
2175 char [] expect1_0 = new char [3];
2176 int versionLength = 0;
2177 switch (quoteChar) {
2180 while (PeekChar () != quoteChar) {
2181 if (PeekChar () == -1)
2182 throw NotWFError ("Invalid version declaration inside text declaration.");
2183 else if (versionLength == 3)
2184 throw NotWFError ("Invalid version number inside text declaration.");
2186 expect1_0 [versionLength] = (char) ReadChar ();
2188 if (versionLength == 3 && new String (expect1_0) != "1.0")
2189 throw NotWFError ("Invalid version number inside text declaration.");
2196 throw NotWFError ("Invalid version declaration inside text declaration.");
2200 if (PeekChar () == 'e') {
2201 Expect ("encoding");
2202 ExpectAfterWhitespace ('=');
2204 int quoteChar = ReadChar ();
2205 switch (quoteChar) {
2208 while (PeekChar () != quoteChar)
2209 if (ReadChar () == -1)
2210 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2215 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2217 // Encoding value should be checked inside XmlInputStream.
2220 // this condition is to check if this instance is
2221 // not created by XmlReader.Create() (which just
2222 // omits strict text declaration check).
2223 else if (Conformance == ConformanceLevel.Auto)
2224 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
2229 curNodePeekIndex = peekCharsIndex; // without this it causes incorrect value start indication.
2232 // The reader is positioned on the first character after
2233 // the leading '<!'.
2234 private void ReadDeclaration ()
2236 int ch = PeekChar ();
2254 throw NotWFError ("Unexpected declaration markup was found.");
2258 // The reader is positioned on the first character after
2259 // the leading '<!--'.
2260 private void ReadComment ()
2262 if (currentState == XmlNodeType.None)
2263 currentState = XmlNodeType.XmlDeclaration;
2265 preserveCurrentTag = false;
2267 ClearValueBuffer ();
2270 while ((ch = PeekChar ()) != -1) {
2273 if (ch == '-' && PeekChar () == '-') {
2276 if (PeekChar () != '>')
2277 throw NotWFError ("comments cannot contain '--'");
2283 if (XmlChar.IsInvalid (ch))
2284 throw NotWFError ("Not allowed character was found.");
2286 AppendValueChar (ch);
2290 XmlNodeType.Comment, // nodeType
2291 String.Empty, // name
2292 String.Empty, // prefix
2293 String.Empty, // localName
2294 false, // isEmptyElement
2295 null, // value: create only when required
2296 true // clearAttributes
2300 // The reader is positioned on the first character after
2301 // the leading '<![CDATA['.
2302 private void ReadCDATA ()
2304 if (currentState != XmlNodeType.Element)
2305 throw NotWFError ("CDATA section cannot appear in this state.");
2306 preserveCurrentTag = false;
2308 ClearValueBuffer ();
2312 while (PeekChar () != -1) {
2317 if (ch == ']' && PeekChar () == ']') {
2318 ch = ReadChar (); // ']'
2320 if (PeekChar () == '>') {
2327 if (normalization && ch == '\r') {
2330 // append '\n' instead of '\r'.
2331 AppendValueChar ('\n');
2332 // otherwise, discard '\r'.
2335 if (CharacterChecking && XmlChar.IsInvalid (ch))
2336 throw NotWFError ("Invalid character was found.");
2338 // FIXME: it might be optimized by the JIT later,
2339 // AppendValueChar (ch);
2341 if (ch < Char.MaxValue)
2342 valueBuffer.Append ((char) ch);
2344 AppendSurrogatePairValueChar (ch);
2349 XmlNodeType.CDATA, // nodeType
2350 String.Empty, // name
2351 String.Empty, // prefix
2352 String.Empty, // localName
2353 false, // isEmptyElement
2354 null, // value: create only when required
2355 true // clearAttributes
2359 // The reader is positioned on the first character after
2360 // the leading '<!DOCTYPE'.
2361 private void ReadDoctypeDecl ()
2364 throw NotWFError ("Document Type Declaration (DTD) is prohibited in this XML.");
2365 switch (currentState) {
2366 case XmlNodeType.DocumentType:
2367 case XmlNodeType.Element:
2368 case XmlNodeType.EndElement:
2369 throw NotWFError ("Document type cannot appear in this state.");
2371 currentState = XmlNodeType.DocumentType;
2373 string doctypeName = null;
2374 string publicId = null;
2375 string systemId = null;
2376 int intSubsetStartLine = 0;
2377 int intSubsetStartColumn = 0;
2380 doctypeName = ReadName ();
2385 systemId = ReadSystemLiteral (true);
2388 publicId = ReadPubidLiteral ();
2389 if (!SkipWhitespace ())
2390 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
2391 systemId = ReadSystemLiteral (false);
2397 if(PeekChar () == '[')
2399 // read markupdecl etc. or end of decl
2401 intSubsetStartLine = this.LineNumber;
2402 intSubsetStartColumn = this.LinePosition;
2403 ClearValueBuffer ();
2404 ReadInternalSubset ();
2405 parserContext.InternalSubset = CreateValueString ();
2407 // end of DOCTYPE decl.
2408 ExpectAfterWhitespace ('>');
2410 GenerateDTDObjectModel (doctypeName, publicId,
2411 systemId, parserContext.InternalSubset,
2412 intSubsetStartLine, intSubsetStartColumn);
2414 // set properties for <!DOCTYPE> node
2416 XmlNodeType.DocumentType, // nodeType
2417 doctypeName, // name
2418 String.Empty, // prefix
2419 doctypeName, // localName
2420 false, // isEmptyElement
2421 parserContext.InternalSubset, // value
2422 true // clearAttributes
2425 if (publicId != null)
2426 AddAttributeWithValue ("PUBLIC", publicId);
2427 if (systemId != null)
2428 AddAttributeWithValue ("SYSTEM", systemId);
2429 currentAttribute = currentAttributeValue = -1;
2432 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2433 string systemId, string internalSubset)
2435 return GenerateDTDObjectModel (name, publicId, systemId, internalSubset, 0, 0);
2438 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2439 string systemId, string internalSubset, int intSubsetStartLine, int intSubsetStartColumn)
2442 parserContext.Dtd = new DTDObjectModel (this.NameTable); // merges both internal and external subsets in the meantime,
2443 DTD.BaseURI = BaseURI;
2445 DTD.PublicId = publicId;
2446 DTD.SystemId = systemId;
2447 DTD.InternalSubset = internalSubset;
2448 DTD.XmlResolver = resolver;
2449 DTD.IsStandalone = isStandalone;
2450 DTD.LineNumber = line;
2451 DTD.LinePosition = column;
2453 DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn);
2454 dr.Normalization = this.normalization;
2455 return dr.GenerateDTDObjectModel ();
2458 private enum DtdInputState
2471 private class DtdInputStateStack
2473 Stack intern = new Stack ();
2474 public DtdInputStateStack ()
2476 Push (DtdInputState.Free);
2479 public DtdInputState Peek ()
2481 return (DtdInputState) intern.Peek ();
2484 public DtdInputState Pop ()
2486 return (DtdInputState) intern.Pop ();
2489 public void Push (DtdInputState val)
2496 DtdInputStateStack stateStack = new DtdInputStateStack ();
2497 DtdInputState State {
2498 get { return stateStack.Peek (); }
2501 private int ReadValueChar ()
2503 int ret = ReadChar ();
2504 AppendValueChar (ret);
2508 private void ExpectAndAppend (string s)
2511 valueBuffer.Append (s);
2514 // Simply read but not generate any result.
2515 private void ReadInternalSubset ()
2517 bool continueParse = true;
2519 while (continueParse) {
2520 switch (ReadValueChar ()) {
2523 case DtdInputState.Free:
2525 valueBuffer.Remove (valueBuffer.Length - 1, 1);
2526 continueParse = false;
2528 case DtdInputState.InsideDoubleQuoted:
2530 case DtdInputState.InsideSingleQuoted:
2533 throw NotWFError ("unexpected end of file at DTD.");
2537 throw NotWFError ("unexpected end of file at DTD.");
2540 case DtdInputState.InsideDoubleQuoted:
2541 case DtdInputState.InsideSingleQuoted:
2542 case DtdInputState.Comment:
2543 continue; // well-formed
2545 int c = ReadValueChar ();
2548 stateStack.Push (DtdInputState.PI);
2551 switch (ReadValueChar ()) {
2553 switch (ReadValueChar ()) {
2555 ExpectAndAppend ("EMENT");
2556 stateStack.Push (DtdInputState.ElementDecl);
2559 ExpectAndAppend ("TITY");
2560 stateStack.Push (DtdInputState.EntityDecl);
2563 throw NotWFError ("unexpected token '<!E'.");
2567 ExpectAndAppend ("TTLIST");
2568 stateStack.Push (DtdInputState.AttlistDecl);
2571 ExpectAndAppend ("OTATION");
2572 stateStack.Push (DtdInputState.NotationDecl);
2575 ExpectAndAppend ("-");
2576 stateStack.Push (DtdInputState.Comment);
2581 throw NotWFError (String.Format ("unexpected '<{0}'.", (char) c));
2585 if (State == DtdInputState.InsideSingleQuoted)
2587 else if (State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.Comment)
2588 stateStack.Push (DtdInputState.InsideSingleQuoted);
2591 if (State == DtdInputState.InsideDoubleQuoted)
2593 else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment)
2594 stateStack.Push (DtdInputState.InsideDoubleQuoted);
2598 case DtdInputState.ElementDecl:
2599 goto case DtdInputState.NotationDecl;
2600 case DtdInputState.AttlistDecl:
2601 goto case DtdInputState.NotationDecl;
2602 case DtdInputState.EntityDecl:
2603 goto case DtdInputState.NotationDecl;
2604 case DtdInputState.NotationDecl:
2607 case DtdInputState.InsideDoubleQuoted:
2608 case DtdInputState.InsideSingleQuoted:
2609 case DtdInputState.Comment:
2612 throw NotWFError ("unexpected token '>'");
2616 if (State == DtdInputState.PI) {
2617 if (ReadValueChar () == '>')
2622 if (State == DtdInputState.Comment) {
2623 if (PeekChar () == '-') {
2625 ExpectAndAppend (">");
2631 if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted)
2632 throw NotWFError ("Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8).");
2638 // The reader is positioned on the first 'S' of "SYSTEM".
2639 private string ReadSystemLiteral (bool expectSYSTEM)
2643 if (!SkipWhitespace ())
2644 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
2648 int quoteChar = ReadChar (); // apos or quot
2650 ClearValueBuffer ();
2651 while (c != quoteChar) {
2654 throw NotWFError ("Unexpected end of stream in ExternalID.");
2656 AppendValueChar (c);
2658 return CreateValueString ();
2661 private string ReadPubidLiteral()
2664 if (!SkipWhitespace ())
2665 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
2666 int quoteChar = ReadChar ();
2668 ClearValueBuffer ();
2669 while(c != quoteChar)
2672 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
2673 if(c != quoteChar && !XmlChar.IsPubidChar (c))
2674 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char)c ));
2676 AppendValueChar (c);
2678 return CreateValueString ();
2681 // The reader is positioned on the first character
2683 private string ReadName ()
2685 string prefix, local;
2686 return ReadName (out prefix, out local);
2689 private string ReadName (out string prefix, out string localName)
2691 #if !USE_NAME_BUFFER
2692 bool savePreserve = preserveCurrentTag;
2693 preserveCurrentTag = true;
2695 int startOffset = peekCharsIndex - curNodePeekIndex;
2696 int ch = PeekChar ();
2697 if (!XmlChar.IsFirstNameChar (ch))
2698 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2703 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2705 if (ch == ':' && namespaces && colonAt < 0)
2710 int start = curNodePeekIndex + startOffset;
2712 string name = parserContext.NameTable.Add (
2713 peekChars, start, length);
2716 prefix = parserContext.NameTable.Add (
2717 peekChars, start, colonAt);
2718 localName = parserContext.NameTable.Add (
2719 peekChars, start + colonAt + 1, length - colonAt - 1);
2721 prefix = String.Empty;
2725 preserveCurrentTag = savePreserve;
2729 int ch = PeekChar ();
2730 if (!XmlChar.IsFirstNameChar (ch))
2731 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2736 // AppendNameChar (ch);
2738 // nameBuffer.Length is always non-0 so no need to ExpandNameCapacity () here
2739 if (ch < Char.MaxValue)
2740 nameBuffer [nameLength++] = (char) ch;
2742 AppendSurrogatePairNameChar (ch);
2747 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2750 if (ch == ':' && namespaces && colonAt < 0)
2751 colonAt = nameLength;
2752 // AppendNameChar (ch);
2754 if (nameLength == nameCapacity)
2755 ExpandNameCapacity ();
2756 if (ch < Char.MaxValue)
2757 nameBuffer [nameLength++] = (char) ch;
2759 AppendSurrogatePairNameChar (ch);
2763 string name = parserContext.NameTable.Add (nameBuffer, 0, nameLength);
2766 prefix = parserContext.NameTable.Add (nameBuffer, 0, colonAt);
2767 localName = parserContext.NameTable.Add (nameBuffer, colonAt + 1, nameLength - colonAt - 1);
2769 prefix = String.Empty;
2777 // Read the next character and compare it against the
2778 // specified character.
2779 private void Expect (int expected)
2781 int ch = ReadChar ();
2783 if (ch != expected) {
2784 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2785 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
2788 ch < 0 ? (object) "EOF" : (char) ch,
2793 private void Expect (string expected)
2795 for (int i = 0; i < expected.Length; i++)
2796 if (ReadChar () != expected [i])
2797 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2798 "'{0}' is expected", expected));
2801 private void ExpectAfterWhitespace (char c)
2804 int i = ReadChar ();
2805 if (i < 0x21 && XmlChar.IsWhitespace (i))
2808 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0}, but found {1} [{2}]", c, i < 0 ? (object) "EOF" : (char) i, i));
2813 // Does not consume the first non-whitespace character.
2814 private bool SkipWhitespace ()
2816 // FIXME: It should be inlined by the JIT.
2817 // bool skipped = XmlChar.IsWhitespace (PeekChar ());
2818 int ch = PeekChar ();
2819 bool skipped = (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2823 // FIXME: It should be inlined by the JIT.
2824 // while (XmlChar.IsWhitespace (PeekChar ()))
2826 while ((ch = PeekChar ()) == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD)
2831 private bool ReadWhitespace ()
2833 if (currentState == XmlNodeType.None)
2834 currentState = XmlNodeType.XmlDeclaration;
2836 bool savePreserve = preserveCurrentTag;
2837 preserveCurrentTag = true;
2838 int startOffset = peekCharsIndex - curNodePeekIndex; // it should be 0 for now though.
2840 int ch = PeekChar ();
2844 // FIXME: It should be inlined by the JIT.
2845 // } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
2846 } while (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2848 bool isText = currentState == XmlNodeType.Element && ch != -1 && ch != '<';
2850 if (!isText && (whitespaceHandling == WhitespaceHandling.None ||
2851 whitespaceHandling == WhitespaceHandling.Significant && XmlSpace != XmlSpace.Preserve))
2854 ClearValueBuffer ();
2855 valueBuffer.Append (peekChars, curNodePeekIndex, peekCharsIndex - curNodePeekIndex - startOffset);
2856 preserveCurrentTag = savePreserve;
2861 XmlNodeType nodeType = (this.XmlSpace == XmlSpace.Preserve) ?
2862 XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
2863 SetProperties (nodeType,
2868 null, // value: create only when required
2875 // Returns -1 if it should throw an error.
2876 private int ReadCharsInternal (char [] buffer, int offset, int length)
2878 int bufIndex = offset;
2879 for (int i = 0; i < length; i++) {
2880 int c = PeekChar ();
2883 throw NotWFError ("Unexpected end of xml.");
2885 if (i + 1 == length)
2886 // if it does not end here,
2887 // it cannot store another
2888 // character, so stop here.
2891 if (PeekChar () != '/') {
2893 buffer [bufIndex++] = '<';
2896 else if (nestLevel-- > 0) {
2897 buffer [bufIndex++] = '<';
2900 // Seems to skip immediate EndElement
2907 readCharsInProgress = false;
2908 Read (); // move to the next node
2912 if (c < Char.MaxValue)
2913 buffer [bufIndex++] = (char) c;
2915 buffer [bufIndex++] = (char) ((c - 0x10000) / 0x400 + 0xD800);
2916 buffer [bufIndex++] = (char) ((c - 0x10000) % 0x400 + 0xDC00);
2924 private bool ReadUntilEndTag ()
2927 currentState = XmlNodeType.EndElement;
2933 throw NotWFError ("Unexpected end of xml.");
2935 if (PeekChar () != '/') {
2939 else if (--nestLevel > 0)
2942 string name = ReadName ();
2943 if (name != elementNames [elementNameStackPos - 1].Name)