2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
7 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
9 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
10 // Copyright (C) 2005-2006 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //#define USE_NAME_BUFFER
34 // Optimization TODOs:
36 // - support PushbackChar() which reverts one character read.
37 // - ReadTextReader() should always keep one pushback buffer
38 // as pushback safety net.
39 // - Replace (peek,read) * n -> read * n + pushback
43 using System.Collections;
45 using System.Collections.Generic;
47 using System.Globalization;
49 using System.Security.Permissions;
51 using System.Xml.Schema;
64 internal class XmlTextReader : XmlReader,
65 IXmlLineInfo, IXmlNamespaceResolver, IHasXmlParserContext
67 [PermissionSet (SecurityAction.InheritanceDemand, Unrestricted = true)]
68 public class XmlTextReader : XmlReader, IXmlLineInfo, IHasXmlParserContext
73 protected XmlTextReader ()
77 public XmlTextReader (Stream input)
78 : this (new XmlStreamReader (input))
82 public XmlTextReader (string url)
83 : this(url, new NameTable ())
87 public XmlTextReader (TextReader input)
88 : this (input, new NameTable ())
92 protected XmlTextReader (XmlNameTable nt)
93 : this (String.Empty, null, XmlNodeType.None, null)
97 public XmlTextReader (Stream input, XmlNameTable nt)
98 : this(new XmlStreamReader (input), nt)
102 public XmlTextReader (string url, Stream input)
103 : this (url, new XmlStreamReader (input))
107 public XmlTextReader (string url, TextReader input)
108 : this (url, input, new NameTable ())
112 public XmlTextReader (string url, XmlNameTable nt)
115 Stream stream = GetStreamFromUrl (url, out uriString);
116 XmlParserContext ctx = new XmlParserContext (nt,
117 new XmlNamespaceManager (nt),
120 this.InitializeContext (uriString, ctx, new XmlStreamReader (stream), XmlNodeType.Document);
123 public XmlTextReader (TextReader input, XmlNameTable nt)
124 : this (String.Empty, input, nt)
128 // This is used in XmlReader.Create() to indicate that string
129 // argument is uri, not an xml fragment.
130 internal XmlTextReader (bool dummy, XmlResolver resolver, string url, XmlNodeType fragType, XmlParserContext context)
132 if (resolver == null)
133 resolver = new XmlUrlResolver ();
135 this.XmlResolver = resolver;
137 Stream stream = GetStreamFromUrl (url, out uriString);
138 this.InitializeContext (uriString, context, new XmlStreamReader (stream), fragType);
141 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
142 : this (context != null ? context.BaseURI : String.Empty,
143 new XmlStreamReader (xmlFragment),
147 disallowReset = true;
150 internal XmlTextReader (string baseURI, TextReader xmlFragment, XmlNodeType fragType)
151 : this (baseURI, xmlFragment, fragType, null)
155 public XmlTextReader (string url, Stream input, XmlNameTable nt)
156 : this (url, new XmlStreamReader (input), nt)
160 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
161 : this (url, input, XmlNodeType.Document, null)
165 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
166 : this (context != null ? context.BaseURI : String.Empty,
167 new StringReader (xmlFragment),
171 disallowReset = true;
174 internal XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
176 InitializeContext (url, context, fragment, fragType);
179 private Stream GetStreamFromUrl (string url, out string absoluteUriString)
181 Uri uri = resolver.ResolveUri (null, url);
182 absoluteUriString = uri != null ? uri.ToString () : String.Empty;
183 return resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
190 public override int AttributeCount
192 get { return attributeCount; }
195 public override string BaseURI
197 get { return parserContext.BaseURI; }
201 public override bool CanReadBinaryContent {
205 public override bool CanReadValueChunk {
209 internal override bool CanReadBinaryContent {
213 internal override bool CanReadValueChunk {
218 internal bool CharacterChecking {
219 get { return checkCharacters; }
220 set { checkCharacters = value; }
223 // for XmlReaderSettings.CloseInput support
224 internal bool CloseInput {
225 get { return closeInput; }
226 set { closeInput = value; }
229 public override int Depth
232 int nodeTypeMod = currentToken.NodeType == XmlNodeType.Element ? 0 : -1;
233 if (currentAttributeValue >= 0)
234 return nodeTypeMod + elementDepth + 2; // inside attribute value.
235 else if (currentAttribute >= 0)
236 return nodeTypeMod + elementDepth + 1;
241 public Encoding Encoding
243 get { return parserContext.Encoding; }
246 public EntityHandling EntityHandling {
247 get { return entityHandling; }
248 set { entityHandling = value; }
252 public override bool EOF {
253 get { return readState == ReadState.EndOfFile; }
256 public override bool HasValue {
257 get { return cursorToken.Value != null; }
260 public override bool IsDefault {
261 // XmlTextReader does not expand default attributes.
262 get { return false; }
265 public override bool IsEmptyElement {
266 get { return cursorToken.IsEmptyElement; }
271 public override string this [int i] {
272 get { return GetAttribute (i); }
275 public override string this [string name] {
276 get { return GetAttribute (name); }
279 public override string this [string localName, string namespaceName] {
280 get { return GetAttribute (localName, namespaceName); }
284 public int LineNumber {
286 if (useProceedingLineInfo)
289 return cursorToken.LineNumber;
293 public int LinePosition {
295 if (useProceedingLineInfo)
298 return cursorToken.LinePosition;
302 public override string LocalName {
303 get { return cursorToken.LocalName; }
306 public override string Name {
307 get { return cursorToken.Name; }
310 public bool Namespaces {
311 get { return namespaces; }
313 if (readState != ReadState.Initial)
314 throw new InvalidOperationException ("Namespaces have to be set before reading.");
319 public override string NamespaceURI {
320 get { return cursorToken.NamespaceURI; }
323 public override XmlNameTable NameTable {
324 get { return parserContext.NameTable; }
327 public override XmlNodeType NodeType {
328 get { return cursorToken.NodeType; }
331 public bool Normalization {
332 get { return normalization; }
333 set { normalization = value; }
336 public override string Prefix {
337 get { return cursorToken.Prefix; }
340 public bool ProhibitDtd {
341 get { return prohibitDtd; }
342 set { prohibitDtd = value; }
345 public override char QuoteChar {
346 get { return cursorToken.QuoteChar; }
349 public override ReadState ReadState {
350 get { return readState; }
354 public override XmlReaderSettings Settings {
355 get { return base.Settings; }
359 public override string Value {
360 get { return cursorToken.Value != null ? cursorToken.Value : String.Empty; }
363 public WhitespaceHandling WhitespaceHandling {
364 get { return whitespaceHandling; }
365 set { whitespaceHandling = value; }
368 public override string XmlLang {
369 get { return parserContext.XmlLang; }
372 public XmlResolver XmlResolver {
373 set { resolver = value; }
376 public override XmlSpace XmlSpace {
377 get { return parserContext.XmlSpace; }
384 public override void Close ()
386 readState = ReadState.Closed;
388 cursorToken.Clear ();
389 currentToken.Clear ();
391 if (closeInput && reader != null)
395 public override string GetAttribute (int i)
397 if (i >= attributeCount)
398 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
400 return attributeTokens [i].Value;
404 // MS.NET 1.0 msdn says that this method returns String.Empty
405 // for absent attribute, but in fact it returns null.
406 // This description is corrected in MS.NET 1.1 msdn.
407 public override string GetAttribute (string name)
409 for (int i = 0; i < attributeCount; i++)
410 if (attributeTokens [i].Name == name)
411 return attributeTokens [i].Value;
415 private int GetIndexOfQualifiedAttribute (string localName, string namespaceURI)
417 for (int i = 0; i < attributeCount; i++) {
418 XmlAttributeTokenInfo ti = attributeTokens [i];
419 if (ti.LocalName == localName && ti.NamespaceURI == namespaceURI)
425 XmlParserContext IHasXmlParserContext.ParserContext {
426 get { return parserContext; }
429 public override string GetAttribute (string localName, string namespaceURI)
431 int idx = this.GetIndexOfQualifiedAttribute (localName, namespaceURI);
434 return attributeTokens [idx].Value;
438 public IDictionary<string, string> GetNamespacesInScope (XmlNamespaceScope scope)
440 return parserContext.NamespaceManager.GetNamespacesInScope (scope);
443 IDictionary<string, string> IXmlNamespaceResolver.GetNamespacesInScope (XmlNamespaceScope scope)
445 return GetNamespacesInScope (scope);
449 public TextReader GetRemainder ()
451 if (peekCharsLength < 0)
453 return new StringReader (new string (peekChars, peekCharsIndex, peekCharsLength - peekCharsIndex) + reader.ReadToEnd ());
457 public bool HasLineInfo ()
459 bool IXmlLineInfo.HasLineInfo ()
465 public override string LookupNamespace (string prefix)
467 return LookupNamespace (prefix, false);
470 private string LookupNamespace (string prefix, bool atomizedNames)
472 string s = parserContext.NamespaceManager.LookupNamespace (
473 prefix, atomizedNames);
474 return s == String.Empty ? null : s;
478 string IXmlNamespaceResolver.LookupPrefix (string ns)
480 return LookupPrefix (ns, false);
483 public string LookupPrefix (string ns, bool atomizedName)
485 return parserContext.NamespaceManager.LookupPrefix (ns, atomizedName);
489 public override void MoveToAttribute (int i)
491 if (i >= attributeCount)
492 throw new ArgumentOutOfRangeException ("attribute index out of range.");
494 currentAttribute = i;
495 currentAttributeValue = -1;
496 cursorToken = attributeTokens [i];
499 public override bool MoveToAttribute (string name)
501 for (int i = 0; i < attributeCount; i++) {
502 XmlAttributeTokenInfo ti = attributeTokens [i];
503 if (ti.Name == name) {
511 public override bool MoveToAttribute (string localName, string namespaceName)
513 int idx = GetIndexOfQualifiedAttribute (localName, namespaceName);
516 MoveToAttribute (idx);
520 public override bool MoveToElement ()
522 if (currentToken == null) // for attribute .ctor()
525 if (cursorToken == currentToken)
528 if (currentAttribute >= 0) {
529 currentAttribute = -1;
530 currentAttributeValue = -1;
531 cursorToken = currentToken;
538 public override bool MoveToFirstAttribute ()
540 if (attributeCount == 0)
543 return MoveToNextAttribute ();
546 public override bool MoveToNextAttribute ()
548 if (currentAttribute == 0 && attributeCount == 0)
550 if (currentAttribute + 1 < attributeCount) {
552 currentAttributeValue = -1;
553 cursorToken = attributeTokens [currentAttribute];
560 public override bool Read ()
562 if (readState == ReadState.Closed)
564 curNodePeekIndex = peekCharsIndex;
565 preserveCurrentTag = true;
568 if (startNodeType == XmlNodeType.Attribute) {
569 if (currentAttribute == 0)
570 return false; // already read.
571 SkipTextDeclaration ();
573 IncrementAttributeToken ();
574 ReadAttributeValueTokens ('"');
575 cursorToken = attributeTokens [0];
576 currentAttributeValue = -1;
577 readState = ReadState.Interactive;
580 if (readState == ReadState.Initial && currentState == XmlNodeType.Element)
581 SkipTextDeclaration ();
587 readState = ReadState.Interactive;
588 currentLinkedNodeLineNumber = line;
589 currentLinkedNodeLinePosition = column;
590 useProceedingLineInfo = true;
592 cursorToken = currentToken;
594 currentAttribute = currentAttributeValue = -1;
595 currentToken.Clear ();
597 // It was moved from end of ReadStartTag ().
603 if (readCharsInProgress) {
604 readCharsInProgress = false;
605 return ReadUntilEndTag ();
608 more = ReadContent ();
610 if (!more && startNodeType == XmlNodeType.Document && currentState != XmlNodeType.EndElement)
611 throw NotWFError ("Document element did not appear.");
613 useProceedingLineInfo = false;
617 public override bool ReadAttributeValue ()
619 if (readState == ReadState.Initial && startNodeType == XmlNodeType.Attribute) {
623 if (currentAttribute < 0)
625 XmlAttributeTokenInfo ti = attributeTokens [currentAttribute];
626 if (currentAttributeValue < 0)
627 currentAttributeValue = ti.ValueTokenStartIndex - 1;
629 if (currentAttributeValue < ti.ValueTokenEndIndex) {
630 currentAttributeValue++;
631 cursorToken = attributeValueTokens [currentAttributeValue];
638 public int ReadBase64 (byte [] buffer, int offset, int length)
640 BinaryCharGetter = binaryCharGetter;
642 return Binary.ReadBase64 (buffer, offset, length);
644 BinaryCharGetter = null;
648 public int ReadBinHex (byte [] buffer, int offset, int length)
650 BinaryCharGetter = binaryCharGetter;
652 return Binary.ReadBinHex (buffer, offset, length);
654 BinaryCharGetter = null;
658 public int ReadChars (char [] buffer, int offset, int length)
661 throw new ArgumentOutOfRangeException ("offset", offset, "Offset must be non-negative integer.");
663 throw new ArgumentOutOfRangeException ("length", length, "Length must be non-negative integer.");
664 else if (buffer.Length < offset + length)
665 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
667 if (IsEmptyElement) {
672 if (!readCharsInProgress && NodeType != XmlNodeType.Element)
675 preserveCurrentTag = false;
676 readCharsInProgress = true;
677 useProceedingLineInfo = true;
679 return ReadCharsInternal (buffer, offset, length);
682 public void ResetState ()
685 throw new InvalidOperationException ("Cannot call ResetState when parsing an XML fragment.");
689 public override void ResolveEntity ()
691 // XmlTextReader does not resolve entities.
692 throw new InvalidOperationException ("XmlTextReader cannot resolve external entities.");
696 [MonoTODO] // FIXME: Implement, for performance improvement
697 public override void Skip ()
705 // Parsed DTD Objects
706 // Note that thgis property must be kept since dtd2xsd uses it.
707 internal DTDObjectModel DTD {
708 get { return parserContext.Dtd; }
711 internal XmlResolver Resolver {
712 get { return resolver; }
717 internal class XmlTokenInfo
719 public XmlTokenInfo (XmlTextReader xtr)
727 protected XmlTextReader Reader;
730 public string LocalName;
731 public string Prefix;
732 public string NamespaceURI;
733 public bool IsEmptyElement;
734 public char QuoteChar;
735 public int LineNumber;
736 public int LinePosition;
737 public int ValueBufferStart;
738 public int ValueBufferEnd;
740 public XmlNodeType NodeType;
742 public virtual string Value {
744 if (valueCache != null)
746 if (ValueBufferStart >= 0) {
747 //Console.WriteLine (NodeType + " / " + ValueBuffer.Length + " / " + ValueBufferStart + " / " + ValueBufferEnd);
748 valueCache = Reader.valueBuffer.ToString (ValueBufferStart, ValueBufferEnd - ValueBufferStart);
752 case XmlNodeType.Text:
753 case XmlNodeType.SignificantWhitespace:
754 case XmlNodeType.Whitespace:
755 case XmlNodeType.Comment:
756 case XmlNodeType.CDATA:
757 case XmlNodeType.ProcessingInstruction:
758 valueCache = Reader.CreateValueString ();
763 set { valueCache = value; }
766 public virtual void Clear ()
768 ValueBufferStart = -1;
770 NodeType = XmlNodeType.None;
771 Name = LocalName = Prefix = NamespaceURI = String.Empty;
772 IsEmptyElement = false;
774 LineNumber = LinePosition = 0;
778 internal class XmlAttributeTokenInfo : XmlTokenInfo
780 public XmlAttributeTokenInfo (XmlTextReader reader)
783 NodeType = XmlNodeType.Attribute;
786 public int ValueTokenStartIndex;
787 public int ValueTokenEndIndex;
789 StringBuilder tmpBuilder = new StringBuilder ();
791 public override string Value {
793 if (valueCache != null)
796 // An empty value should return String.Empty.
797 if (ValueTokenStartIndex == ValueTokenEndIndex) {
798 XmlTokenInfo ti = Reader.attributeValueTokens [ValueTokenStartIndex];
799 if (ti.NodeType == XmlNodeType.EntityReference)
800 valueCache = String.Concat ("&", ti.Name, ";");
802 valueCache = ti.Value;
806 tmpBuilder.Length = 0;
807 for (int i = ValueTokenStartIndex; i <= ValueTokenEndIndex; i++) {
808 XmlTokenInfo ti = Reader.attributeValueTokens [i];
809 if (ti.NodeType == XmlNodeType.Text)
810 tmpBuilder.Append (ti.Value);
812 tmpBuilder.Append ('&');
813 tmpBuilder.Append (ti.Name);
814 tmpBuilder.Append (';');
818 valueCache = tmpBuilder.ToString (0, tmpBuilder.Length);
822 set { valueCache = value; }
825 public override void Clear ()
829 NodeType = XmlNodeType.Attribute;
830 ValueTokenStartIndex = ValueTokenEndIndex = 0;
833 internal void FillXmlns ()
835 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns))
836 Reader.parserContext.NamespaceManager.AddNamespace (LocalName, Value);
837 else if (Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
838 Reader.parserContext.NamespaceManager.AddNamespace (String.Empty, Value);
841 internal void FillNamespace ()
843 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns) ||
844 Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
845 NamespaceURI = XmlNamespaceManager.XmlnsXmlns;
846 else if (Prefix.Length == 0)
847 NamespaceURI = string.Empty;
849 NamespaceURI = Reader.LookupNamespace (Prefix, true);
853 private XmlTokenInfo cursorToken;
854 private XmlTokenInfo currentToken;
855 private XmlAttributeTokenInfo currentAttributeToken;
856 private XmlTokenInfo currentAttributeValueToken;
857 private XmlAttributeTokenInfo [] attributeTokens = new XmlAttributeTokenInfo [10];
858 private XmlTokenInfo [] attributeValueTokens = new XmlTokenInfo [10];
859 private int currentAttribute;
860 private int currentAttributeValue;
861 private int attributeCount;
863 private XmlParserContext parserContext;
865 private ReadState readState;
866 private bool disallowReset;
869 private int elementDepth;
870 private bool depthUp;
872 private bool popScope;
876 public TagName (string n, string l, string p)
883 public readonly string Name;
884 public readonly string LocalName;
885 public readonly string Prefix;
888 private TagName [] elementNames;
889 int elementNameStackPos;
891 private bool allowMultipleRoot;
893 private bool isStandalone;
895 private bool returnEntityReference;
896 private string entityReferenceName;
899 private char [] nameBuffer;
900 private int nameLength;
901 private int nameCapacity;
902 private const int initialNameCapacity = 32;
905 private StringBuilder valueBuffer;
907 private TextReader reader;
908 private char [] peekChars;
909 private int peekCharsIndex;
910 private int peekCharsLength;
911 private int curNodePeekIndex;
912 private bool preserveCurrentTag;
913 private const int peekCharCapacity = 1024;
918 private int currentLinkedNodeLineNumber;
919 private int currentLinkedNodeLinePosition;
920 private bool useProceedingLineInfo;
922 private XmlNodeType startNodeType;
923 // State machine attribute.
924 // XmlDeclaration: after the first node.
925 // DocumentType: after doctypedecl
926 // Element: inside document element
927 // EndElement: after document element
928 private XmlNodeType currentState;
930 // For ReadChars()/ReadBase64()/ReadBinHex()
931 private int nestLevel;
932 private bool readCharsInProgress;
933 XmlReaderBinarySupport.CharGetter binaryCharGetter;
935 // These values are never re-initialized.
936 private bool namespaces = true;
937 private WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
938 private XmlResolver resolver = new XmlUrlResolver ();
939 private bool normalization = false;
941 private bool checkCharacters;
942 private bool prohibitDtd = false;
943 private bool closeInput = true;
944 private EntityHandling entityHandling; // 2.0
946 private NameTable whitespacePool;
947 private char [] whitespaceCache;
949 private XmlException NotWFError (string message)
951 return new XmlException (this as IXmlLineInfo, BaseURI, message);
956 allowMultipleRoot = false;
957 elementNames = new TagName [10];
958 valueBuffer = new StringBuilder ();
959 binaryCharGetter = new XmlReaderBinarySupport.CharGetter (ReadChars);
961 nameBuffer = new char [initialNameCapacity];
964 checkCharacters = true;
966 if (Settings != null)
967 checkCharacters = Settings.CheckCharacters;
971 entityHandling = EntityHandling.ExpandCharEntities;
974 if (peekChars == null)
975 peekChars = new char [peekCharCapacity];
976 peekCharsLength = -1;
977 curNodePeekIndex = -1; // read from start
982 currentLinkedNodeLineNumber = currentLinkedNodeLinePosition = 0;
987 private void Clear ()
989 currentToken = new XmlTokenInfo (this);
990 cursorToken = currentToken;
991 currentAttribute = -1;
992 currentAttributeValue = -1;
995 readState = ReadState.Initial;
1001 popScope = allowMultipleRoot = false;
1002 elementNameStackPos = 0;
1004 isStandalone = false;
1005 returnEntityReference = false;
1006 entityReferenceName = String.Empty;
1010 nameCapacity = initialNameCapacity;
1012 useProceedingLineInfo = false;
1014 currentState = XmlNodeType.None;
1016 readCharsInProgress = false;
1019 private void InitializeContext (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType)
1021 startNodeType = fragType;
1022 parserContext = context;
1023 if (context == null) {
1024 XmlNameTable nt = new NameTable ();
1025 parserContext = new XmlParserContext (nt,
1026 new XmlNamespaceManager (nt),
1031 if (url != null && url.Length > 0) {
1034 uri = new Uri (url);
1035 } catch (Exception) {
1036 string path = Path.GetFullPath ("./a");
1037 uri = new Uri (new Uri (path), url);
1039 parserContext.BaseURI = uri.ToString ();
1047 case XmlNodeType.Attribute:
1048 reader = new StringReader (fragment.ReadToEnd ().Replace ("\"", """));
1050 case XmlNodeType.Element:
1051 currentState = XmlNodeType.Element;
1052 allowMultipleRoot = true;
1054 case XmlNodeType.Document:
1057 throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType));
1062 internal ConformanceLevel Conformance {
1063 get { return allowMultipleRoot ? ConformanceLevel.Fragment : ConformanceLevel.Document; }
1065 if (value == ConformanceLevel.Fragment) {
1066 currentState = XmlNodeType.Element;
1067 allowMultipleRoot = true;
1072 internal void AdjustLineInfoOffset (int lineNumberOffset, int linePositionOffset)
1074 line += lineNumberOffset;
1075 column += linePositionOffset;
1078 internal void SetNameTable (XmlNameTable nameTable)
1080 parserContext.NameTable = nameTable;
1084 // Use this method rather than setting the properties
1085 // directly so that all the necessary properties can
1086 // be changed in harmony with each other. Maybe the
1087 // fields should be in a seperate class to help enforce
1090 // Namespace URI could not be provided here.
1091 private void SetProperties (
1092 XmlNodeType nodeType,
1096 bool isEmptyElement,
1098 bool clearAttributes)
1100 SetTokenProperties (currentToken, nodeType, name, prefix, localName, isEmptyElement, value, clearAttributes);
1101 currentToken.LineNumber = this.currentLinkedNodeLineNumber;
1102 currentToken.LinePosition = this.currentLinkedNodeLinePosition;
1105 private void SetTokenProperties (
1107 XmlNodeType nodeType,
1111 bool isEmptyElement,
1113 bool clearAttributes)
1115 token.NodeType = nodeType;
1117 token.Prefix = prefix;
1118 token.LocalName = localName;
1119 token.IsEmptyElement = isEmptyElement;
1120 token.Value = value;
1121 this.elementDepth = depth;
1123 if (clearAttributes)
1127 private void ClearAttributes ()
1129 //for (int i = 0; i < attributeCount; i++)
1130 // attributeTokens [i].Clear ();
1132 currentAttribute = -1;
1133 currentAttributeValue = -1;
1136 private int PeekSurrogate (int c)
1138 if (peekCharsLength <= peekCharsIndex + 1) {
1139 if (!ReadTextReader (c))
1140 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1144 int highhalfChar = peekChars [peekCharsIndex];
1145 int lowhalfChar = peekChars [peekCharsIndex+1];
1147 if (((highhalfChar & 0xFC00) != 0xD800) || ((lowhalfChar & 0xFC00) != 0xDC00))
1148 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1149 return highhalfChar;
1150 return 0x10000 + (highhalfChar-0xD800)*0x400 + (lowhalfChar-0xDC00);
1153 private int PeekChar ()
1155 if (peekCharsIndex < peekCharsLength) {
1156 int c = peekChars [peekCharsIndex];
1159 if (c < 0xD800 || c >= 0xDFFF)
1161 return PeekSurrogate (c);
1163 if (!ReadTextReader (-1))
1169 private int ReadChar ()
1171 int ch = PeekChar ();
1175 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1180 } else if (ch != -1) {
1186 private void Advance (int ch) {
1190 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1195 } else if (ch != -1) {
1200 private bool ReadTextReader (int remained)
1202 if (peekCharsLength < 0) { // initialized buffer
1203 peekCharsLength = reader.Read (peekChars, 0, peekChars.Length);
1204 return peekCharsLength > 0;
1206 int offset = remained >= 0 ? 1 : 0;
1207 int copysize = peekCharsLength - curNodePeekIndex;
1209 // It must assure that current tag content always exists
1211 if (!preserveCurrentTag) {
1212 curNodePeekIndex = 0;
1215 } else if (peekCharsLength < peekChars.Length) {
1216 // NonBlockingStreamReader returned less bytes
1217 // than the size of the buffer. In that case,
1218 // just refill the buffer.
1219 } else if (curNodePeekIndex <= (peekCharsLength >> 1)) {
1220 // extend the buffer
1221 char [] tmp = new char [peekChars.Length * 2];
1222 Array.Copy (peekChars, curNodePeekIndex,
1225 curNodePeekIndex = 0;
1226 peekCharsIndex = copysize;
1228 Array.Copy (peekChars, curNodePeekIndex,
1229 peekChars, 0, copysize);
1230 curNodePeekIndex = 0;
1231 peekCharsIndex = copysize;
1234 peekChars [peekCharsIndex] = (char) remained;
1235 int count = peekChars.Length - peekCharsIndex - offset;
1236 if (count > peekCharCapacity)
1237 count = peekCharCapacity;
1238 int read = reader.Read (
1239 peekChars, peekCharsIndex + offset, count);
1240 int remainingSize = offset + read;
1241 peekCharsLength = peekCharsIndex + remainingSize;
1243 return (remainingSize != 0);
1246 private bool ReadContent ()
1249 parserContext.NamespaceManager.PopScope ();
1250 parserContext.PopScope ();
1254 if (returnEntityReference)
1255 SetEntityReferenceProperties ();
1257 int c = PeekChar ();
1259 readState = ReadState.EndOfFile;
1260 ClearValueBuffer ();
1262 XmlNodeType.None, // nodeType
1263 String.Empty, // name
1264 String.Empty, // prefix
1265 String.Empty, // localName
1266 false, // isEmptyElement
1268 true // clearAttributes
1271 throw NotWFError ("unexpected end of file. Current depth is " + depth);
1278 switch (PeekChar ())
1286 ReadProcessingInstruction ();
1301 if (!ReadWhitespace ())
1303 return ReadContent ();
1311 return this.ReadState != ReadState.EndOfFile;
1314 private void SetEntityReferenceProperties ()
1316 DTDEntityDeclaration decl = DTD != null ? DTD.EntityDecls [entityReferenceName] : null;
1317 if (this.isStandalone)
1318 if (DTD == null || decl == null || !decl.IsInternalSubset)
1319 throw NotWFError ("Standalone document must not contain any references to an non-internally declared entity.");
1320 if (decl != null && decl.NotationName != null)
1321 throw NotWFError ("Reference to any unparsed entities is not allowed here.");
1323 ClearValueBuffer ();
1325 XmlNodeType.EntityReference, // nodeType
1326 entityReferenceName, // name
1327 String.Empty, // prefix
1328 entityReferenceName, // localName
1329 false, // isEmptyElement
1331 true // clearAttributes
1334 returnEntityReference = false;
1335 entityReferenceName = String.Empty;
1338 // The leading '<' has already been consumed.
1339 private void ReadStartTag ()
1341 if (currentState == XmlNodeType.EndElement)
1342 throw NotWFError ("Multiple document element was detected.");
1343 currentState = XmlNodeType.Element;
1345 parserContext.NamespaceManager.PushScope ();
1347 currentLinkedNodeLineNumber = line;
1348 currentLinkedNodeLinePosition = column;
1350 string prefix, localName;
1351 string name = ReadName (out prefix, out localName);
1352 if (currentState == XmlNodeType.EndElement)
1353 throw NotWFError ("document has terminated, cannot open new element");
1355 bool isEmptyElement = false;
1360 if (XmlChar.IsFirstNameChar (PeekChar ()))
1361 ReadAttributes (false);
1362 cursorToken = this.currentToken;
1365 for (int i = 0; i < attributeCount; i++)
1366 attributeTokens [i].FillXmlns ();
1367 for (int i = 0; i < attributeCount; i++)
1368 attributeTokens [i].FillNamespace ();
1372 for (int i = 0; i < attributeCount; i++)
1373 if (attributeTokens [i].Prefix == "xmlns" &&
1374 attributeTokens [i].Value == String.Empty)
1375 throw NotWFError ("Empty namespace URI cannot be mapped to non-empty prefix.");
1377 for (int i = 0; i < attributeCount; i++) {
1378 for (int j = i + 1; j < attributeCount; j++)
1379 if (Object.ReferenceEquals (attributeTokens [i].Name, attributeTokens [j].Name) ||
1380 (Object.ReferenceEquals (attributeTokens [i].LocalName, attributeTokens [j].LocalName) &&
1381 Object.ReferenceEquals (attributeTokens [i].NamespaceURI, attributeTokens [j].NamespaceURI)))
1382 throw NotWFError ("Attribute name and qualified name must be identical.");
1385 if (PeekChar () == '/') {
1387 isEmptyElement = true;
1392 PushElementName (name, localName, prefix);
1394 parserContext.PushScope ();
1399 XmlNodeType.Element, // nodeType
1403 isEmptyElement, // isEmptyElement
1405 false // clearAttributes
1407 if (prefix.Length > 0)
1408 currentToken.NamespaceURI = LookupNamespace (prefix, true);
1409 else if (namespaces)
1410 currentToken.NamespaceURI = parserContext.NamespaceManager.DefaultNamespace;
1413 if (NamespaceURI == null)
1414 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1416 for (int i = 0; i < attributeCount; i++) {
1417 MoveToAttribute (i);
1418 if (NamespaceURI == null)
1419 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1426 for (int i = 0; i < attributeCount; i++) {
1427 if (!Object.ReferenceEquals (attributeTokens [i].Prefix, XmlNamespaceManager.PrefixXml))
1429 string aname = attributeTokens [i].LocalName;
1430 string value = attributeTokens [i].Value;
1433 if (this.resolver != null) {
1435 BaseURI != String.Empty ?
1436 new Uri (BaseURI) : null;
1437 Uri uri = resolver.ResolveUri (
1439 parserContext.BaseURI =
1445 parserContext.BaseURI = value;
1448 parserContext.XmlLang = value;
1453 parserContext.XmlSpace = XmlSpace.Preserve;
1456 parserContext.XmlSpace = XmlSpace.Default;
1459 throw NotWFError (String.Format ("Invalid xml:space value: {0}", value));
1466 CheckCurrentStateUpdate ();
1469 private void PushElementName (string name, string local, string prefix)
1471 if (elementNames.Length == elementNameStackPos) {
1472 TagName [] newArray = new TagName [elementNames.Length * 2];
1473 Array.Copy (elementNames, 0, newArray, 0, elementNameStackPos);
1474 elementNames = newArray;
1476 elementNames [elementNameStackPos++] =
1477 new TagName (name, local, prefix);
1480 // The reader is positioned on the first character
1481 // of the element's name.
1482 private void ReadEndTag ()
1484 if (currentState != XmlNodeType.Element)
1485 throw NotWFError ("End tag cannot appear in this state.");
1487 currentLinkedNodeLineNumber = line;
1488 currentLinkedNodeLinePosition = column;
1490 if (elementNameStackPos == 0)
1491 throw NotWFError ("closing element without matching opening element");
1492 TagName expected = elementNames [--elementNameStackPos];
1493 Expect (expected.Name);
1495 ExpectAfterWhitespace ('>');
1500 XmlNodeType.EndElement, // nodeType
1501 expected.Name, // name
1502 expected.Prefix, // prefix
1503 expected.LocalName, // localName
1504 false, // isEmptyElement
1506 true // clearAttributes
1508 if (expected.Prefix.Length > 0)
1509 currentToken.NamespaceURI = LookupNamespace (expected.Prefix, true);
1510 else if (namespaces)
1511 currentToken.NamespaceURI = parserContext.NamespaceManager.DefaultNamespace;
1515 CheckCurrentStateUpdate ();
1518 private void CheckCurrentStateUpdate ()
1520 if (depth == 0 && !allowMultipleRoot && (IsEmptyElement || NodeType == XmlNodeType.EndElement))
1521 currentState = XmlNodeType.EndElement;
1525 private void AppendSurrogatePairNameChar (int ch)
1527 nameBuffer [nameLength++] = (char) ((ch - 0x10000) / 0x400 + 0xD800);
1528 if (nameLength == nameCapacity)
1529 ExpandNameCapacity ();
1530 nameBuffer [nameLength++] = (char) ((ch - 0x10000) % 0x400 + 0xDC00);
1533 private void ExpandNameCapacity ()
1535 nameCapacity = nameCapacity * 2;
1536 char [] oldNameBuffer = nameBuffer;
1537 nameBuffer = new char [nameCapacity];
1538 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1542 private void AppendValueChar (int ch)
1544 if (ch < Char.MaxValue)
1545 valueBuffer.Append ((char) ch);
1547 AppendSurrogatePairValueChar (ch);
1550 private void AppendSurrogatePairValueChar (int ch)
1552 valueBuffer.Append ((char) ((ch - 0x10000) / 0x400 + 0xD800));
1553 valueBuffer.Append ((char) ((ch - 0x10000) % 0x400 + 0xDC00));
1556 private string CreateValueString ()
1558 // Since whitespace strings are mostly identical
1559 // depending on the Depth, we make use of NameTable
1560 // to atomize whitespace strings.
1562 case XmlNodeType.Whitespace:
1563 case XmlNodeType.SignificantWhitespace:
1564 int len = valueBuffer.Length;
1565 if (whitespaceCache == null)
1566 whitespaceCache = new char [32];
1567 if (len >= whitespaceCache.Length)
1569 if (whitespacePool == null)
1570 whitespacePool = new NameTable ();
1572 valueBuffer.CopyTo (0, whitespaceCache, 0, len);
1574 for (int i = 0; i < len; i++)
1575 whitespaceCache [i] = valueBuffer [i];
1577 return whitespacePool.Add (whitespaceCache, 0, valueBuffer.Length);
1579 return (valueBuffer.Capacity < 100) ?
1580 valueBuffer.ToString (0, valueBuffer.Length) :
1581 valueBuffer.ToString ();
1584 private void ClearValueBuffer ()
1586 valueBuffer.Length = 0;
1589 // The reader is positioned on the first character
1591 private void ReadText (bool notWhitespace)
1593 if (currentState != XmlNodeType.Element)
1594 throw NotWFError ("Text node cannot appear in this state.");
1595 preserveCurrentTag = false;
1598 ClearValueBuffer ();
1600 int ch = PeekChar ();
1601 bool previousWasCloseBracket = false;
1603 while (ch != '<' && ch != -1) {
1606 ch = ReadReference (false);
1607 if (returnEntityReference) // Returns -1 if char validation should not be done
1609 } else if (normalization && ch == '\r') {
1613 // append '\n' instead of '\r'.
1614 AppendValueChar ('\n');
1615 // and in case of "\r\n", discard '\r'.
1617 if (CharacterChecking && XmlChar.IsInvalid (ch))
1618 throw NotWFError ("Not allowed character was found.");
1622 // FIXME: it might be optimized by the JIT later,
1623 // AppendValueChar (ch);
1625 if (ch < Char.MaxValue)
1626 valueBuffer.Append ((char) ch);
1628 AppendSurrogatePairValueChar (ch);
1633 if (previousWasCloseBracket)
1634 if (PeekChar () == '>')
1635 throw NotWFError ("Inside text content, character sequence ']]>' is not allowed.");
1636 previousWasCloseBracket = true;
1638 else if (previousWasCloseBracket)
1639 previousWasCloseBracket = false;
1641 notWhitespace = true;
1644 if (returnEntityReference && valueBuffer.Length == 0) {
1645 SetEntityReferenceProperties ();
1647 XmlNodeType nodeType = notWhitespace ? XmlNodeType.Text :
1648 this.XmlSpace == XmlSpace.Preserve ? XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
1650 nodeType, // nodeType
1651 String.Empty, // name
1652 String.Empty, // prefix
1653 String.Empty, // localName
1654 false, // isEmptyElement
1655 null, // value: create only when required
1656 true // clearAttributes
1661 // The leading '&' has already been consumed.
1662 // Returns true if the entity reference isn't a simple
1663 // character reference or one of the predefined entities.
1664 // This allows the ReadText method to break so that the
1665 // next call to Read will return the EntityReference node.
1666 private int ReadReference (bool ignoreEntityReferences)
1668 if (PeekChar () == '#') {
1670 return ReadCharacterReference ();
1672 return ReadEntityReference (ignoreEntityReferences);
1675 private int ReadCharacterReference ()
1680 if (PeekChar () == 'x') {
1683 while ((ch = PeekChar ()) != ';' && ch != -1) {
1686 if (ch >= '0' && ch <= '9')
1687 value = (value << 4) + ch - '0';
1688 else if (ch >= 'A' && ch <= 'F')
1689 value = (value << 4) + ch - 'A' + 10;
1690 else if (ch >= 'a' && ch <= 'f')
1691 value = (value << 4) + ch - 'a' + 10;
1693 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1694 "invalid hexadecimal digit: {0} (#x{1:X})",
1699 while ((ch = PeekChar ()) != ';' && ch != -1) {
1702 if (ch >= '0' && ch <= '9')
1703 value = value * 10 + ch - '0';
1705 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1706 "invalid decimal digit: {0} (#x{1:X})",
1714 // There is no way to save surrogate pairs...
1715 if (CharacterChecking && Normalization &&
1716 XmlChar.IsInvalid (value))
1717 throw NotWFError ("Referenced character was not allowed in XML. Normalization is " + normalization + ", checkCharacters = " + checkCharacters);
1721 // Returns -1 if it should not be validated.
1722 // Real EOF must not be detected here.
1723 private int ReadEntityReference (bool ignoreEntityReferences)
1725 string name = ReadName ();
1728 int predefined = XmlChar.GetPredefinedEntity (name);
1729 if (predefined >= 0)
1732 if (ignoreEntityReferences) {
1733 AppendValueChar ('&');
1734 for (int i = 0; i < name.Length; i++)
1735 AppendValueChar (name [i]);
1736 AppendValueChar (';');
1738 returnEntityReference = true;
1739 entityReferenceName = name;
1745 // The reader is positioned on the first character of
1746 // the attribute name.
1747 private void ReadAttributes (bool isXmlDecl)
1750 bool requireWhitespace = false;
1751 currentAttribute = -1;
1752 currentAttributeValue = -1;
1755 if (!SkipWhitespace () && requireWhitespace)
1756 throw NotWFError ("Unexpected token. Name is required here.");
1758 IncrementAttributeToken ();
1759 currentAttributeToken.LineNumber = line;
1760 currentAttributeToken.LinePosition = column;
1762 string prefix, localName;
1763 currentAttributeToken.Name = ReadName (out prefix, out localName);
1764 currentAttributeToken.Prefix = prefix;
1765 currentAttributeToken.LocalName = localName;
1766 ExpectAfterWhitespace ('=');
1768 ReadAttributeValueTokens (-1);
1769 // This hack is required for xmldecl which has
1770 // both effective attributes and Value.
1773 dummyValue = currentAttributeToken.Value;
1777 if (!SkipWhitespace ())
1778 requireWhitespace = true;
1779 peekChar = PeekChar ();
1781 if (peekChar == '?')
1784 else if (peekChar == '/' || peekChar == '>')
1786 } while (peekChar != -1);
1788 currentAttribute = -1;
1789 currentAttributeValue = -1;
1792 private void AddAttributeWithValue (string name, string value)
1794 IncrementAttributeToken ();
1795 XmlAttributeTokenInfo ati = attributeTokens [currentAttribute];
1796 ati.Name = parserContext.NameTable.Add (name);
1797 ati.Prefix = String.Empty;
1798 ati.NamespaceURI = String.Empty;
1799 IncrementAttributeValueToken ();
1800 XmlTokenInfo vti = attributeValueTokens [currentAttributeValue];
1801 SetTokenProperties (vti,
1813 private void IncrementAttributeToken ()
1816 if (attributeTokens.Length == currentAttribute) {
1817 XmlAttributeTokenInfo [] newArray =
1818 new XmlAttributeTokenInfo [attributeTokens.Length * 2];
1819 attributeTokens.CopyTo (newArray, 0);
1820 attributeTokens = newArray;
1822 if (attributeTokens [currentAttribute] == null)
1823 attributeTokens [currentAttribute] = new XmlAttributeTokenInfo (this);
1824 currentAttributeToken = attributeTokens [currentAttribute];
1825 currentAttributeToken.Clear ();
1828 private void IncrementAttributeValueToken ()
1830 currentAttributeValue++;
1831 if (attributeValueTokens.Length == currentAttributeValue) {
1832 XmlTokenInfo [] newArray = new XmlTokenInfo [attributeValueTokens.Length * 2];
1833 attributeValueTokens.CopyTo (newArray, 0);
1834 attributeValueTokens = newArray;
1836 if (attributeValueTokens [currentAttributeValue] == null)
1837 attributeValueTokens [currentAttributeValue] = new XmlTokenInfo (this);
1838 currentAttributeValueToken = attributeValueTokens [currentAttributeValue];
1839 currentAttributeValueToken.Clear ();
1842 // LAMESPEC: Orthodox XML reader should normalize attribute values
1843 private void ReadAttributeValueTokens (int dummyQuoteChar)
1845 int quoteChar = (dummyQuoteChar < 0) ? ReadChar () : dummyQuoteChar;
1847 if (quoteChar != '\'' && quoteChar != '\"')
1848 throw NotWFError ("an attribute value was not quoted");
1849 currentAttributeToken.QuoteChar = (char) quoteChar;
1851 IncrementAttributeValueToken ();
1852 currentAttributeToken.ValueTokenStartIndex = currentAttributeValue;
1853 currentAttributeValueToken.LineNumber = line;
1854 currentAttributeValueToken.LinePosition = column;
1856 bool incrementToken = false;
1857 bool isNewToken = true;
1860 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1863 if (ch == quoteChar)
1866 if (incrementToken) {
1867 IncrementAttributeValueToken ();
1868 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1869 currentAttributeValueToken.LineNumber = line;
1870 currentAttributeValueToken.LinePosition = column;
1871 incrementToken = false;
1878 throw NotWFError ("attribute values cannot contain '<'");
1880 if (dummyQuoteChar < 0)
1881 throw NotWFError ("unexpected end of file in an attribute value");
1882 else // Attribute value constructor.
1888 if (PeekChar () == '\n')
1889 continue; // skip '\r'.
1891 // The csc in MS.NET 2.0 beta 1 barfs on this goto, so work around that
1900 // When Normalize = true, then replace
1901 // all spaces to ' '
1907 if (PeekChar () == '#') {
1909 ch = ReadCharacterReference ();
1910 AppendValueChar (ch);
1913 // Check XML 1.0 section 3.1 WFC.
1914 string entName = ReadName ();
1916 int predefined = XmlChar.GetPredefinedEntity (entName);
1917 if (predefined < 0) {
1918 CheckAttributeEntityReferenceWFC (entName);
1920 if (entityHandling == EntityHandling.ExpandEntities) {
1921 string value = DTD.GenerateEntityAttributeText (entName);
1922 foreach (char c in value)
1923 AppendValueChar (c);
1927 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1928 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1930 IncrementAttributeValueToken ();
1931 currentAttributeValueToken.Name = entName;
1932 currentAttributeValueToken.Value = String.Empty;
1933 currentAttributeValueToken.NodeType = XmlNodeType.EntityReference;
1934 incrementToken = true;
1938 AppendValueChar (predefined);
1941 if (CharacterChecking && XmlChar.IsInvalid (ch))
1942 throw NotWFError ("Invalid character was found.");
1943 // FIXME: it might be optimized by the JIT later,
1944 // AppendValueChar (ch);
1946 if (ch < Char.MaxValue)
1947 valueBuffer.Append ((char) ch);
1949 AppendSurrogatePairValueChar (ch);
1956 if (!incrementToken) {
1957 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1958 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1960 currentAttributeToken.ValueTokenEndIndex = currentAttributeValue;
1964 private void CheckAttributeEntityReferenceWFC (string entName)
1966 DTDEntityDeclaration entDecl =
1967 DTD == null ? null : DTD.EntityDecls [entName];
1968 if (entDecl == null) {
1969 if (entityHandling == EntityHandling.ExpandEntities
1970 || (DTD != null && resolver != null && entDecl == null))
1971 throw NotWFError (String.Format ("Referenced entity '{0}' does not exist.", entName));
1976 if (entDecl.HasExternalReference)
1977 throw NotWFError ("Reference to external entities is not allowed in the value of an attribute.");
1978 if (isStandalone && !entDecl.IsInternalSubset)
1979 throw NotWFError ("Reference to external entities is not allowed in the internal subset.");
1980 if (entDecl.EntityValue.IndexOf ('<') >= 0)
1981 throw NotWFError ("Attribute must not contain character '<' either directly or indirectly by way of entity references.");
1984 // The reader is positioned on the first character
1987 // It may be xml declaration or processing instruction.
1988 private void ReadProcessingInstruction ()
1990 string target = ReadName ();
1991 if (target != "xml" && target.ToLower (CultureInfo.InvariantCulture) == "xml")
1992 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
1994 if (!SkipWhitespace ())
1995 if (PeekChar () != '?')
1996 throw NotWFError ("Invalid processing instruction name was found.");
1998 ClearValueBuffer ();
2001 while ((ch = PeekChar ()) != -1) {
2004 if (ch == '?' && PeekChar () == '>') {
2009 if (CharacterChecking && XmlChar.IsInvalid (ch))
2010 throw NotWFError ("Invalid character was found.");
2011 AppendValueChar (ch);
2014 if (Object.ReferenceEquals (target, XmlNamespaceManager.PrefixXml))
2015 VerifyXmlDeclaration ();
2017 if (currentState == XmlNodeType.None)
2018 currentState = XmlNodeType.XmlDeclaration;
2021 XmlNodeType.ProcessingInstruction, // nodeType
2023 String.Empty, // prefix
2024 target, // localName
2025 false, // isEmptyElement
2026 null, // value: create only when required
2027 true // clearAttributes
2032 void VerifyXmlDeclaration ()
2034 if (!allowMultipleRoot && currentState != XmlNodeType.None)
2035 throw NotWFError ("XML declaration cannot appear in this state.");
2037 currentState = XmlNodeType.XmlDeclaration;
2039 string text = CreateValueString ();
2045 string encoding = null, standalone = null;
2047 ParseAttributeFromString (text, ref idx, out name, out value);
2048 if (name != "version" || value != "1.0")
2049 throw NotWFError ("'version' is expected.");
2050 name = String.Empty;
2051 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2052 ParseAttributeFromString (text, ref idx, out name, out value);
2053 if (name == "encoding") {
2054 if (!XmlChar.IsValidIANAEncoding (value))
2055 throw NotWFError ("'encoding' must be a valid IANA encoding name.");
2056 if (reader is XmlStreamReader)
2057 parserContext.Encoding = ((XmlStreamReader) reader).Encoding;
2059 parserContext.Encoding = Encoding.Unicode;
2061 name = String.Empty;
2062 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2063 ParseAttributeFromString (text, ref idx, out name, out value);
2065 if (name == "standalone") {
2066 this.isStandalone = value == "yes";
2067 if (value != "yes" && value != "no")
2068 throw NotWFError ("Only 'yes' or 'no' is allow for 'standalone'");
2070 SkipWhitespaceInString (text, ref idx);
2072 else if (name.Length != 0)
2073 throw NotWFError (String.Format ("Unexpected token: '{0}'", name));
2075 if (idx < text.Length)
2076 throw NotWFError ("'?' is expected.");
2078 AddAttributeWithValue ("version", "1.0");
2079 if (encoding != null)
2080 AddAttributeWithValue ("encoding", encoding);
2081 if (standalone != null)
2082 AddAttributeWithValue ("standalone", standalone);
2083 currentAttribute = currentAttributeValue = -1;
2086 XmlNodeType.XmlDeclaration, // nodeType
2088 String.Empty, // prefix
2090 false, // isEmptyElement
2092 false // clearAttributes
2096 bool SkipWhitespaceInString (string text, ref int idx)
2099 while (idx < text.Length && XmlChar.IsWhitespace (text [idx]))
2101 return idx - start > 0;
2104 private void ParseAttributeFromString (string src,
2105 ref int idx, out string name, out string value)
2107 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2111 while (idx < src.Length && XmlChar.IsNameChar (src [idx]))
2113 name = src.Substring (start, idx - start);
2115 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2117 if (idx == src.Length || src [idx] != '=')
2118 throw NotWFError (String.Format ("'=' is expected after {0}", name));
2121 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2124 if (idx == src.Length || src [idx] != '"' && src [idx] != '\'')
2125 throw NotWFError ("'\"' or '\'' is expected.");
2127 char quote = src [idx];
2131 while (idx < src.Length && src [idx] != quote)
2135 value = src.Substring (start, idx - start - 1);
2138 internal void SkipTextDeclaration ()
2140 if (PeekChar () != '<')
2145 if (PeekChar () != '?') {
2151 while (peekCharsIndex < 6) {
2152 if (PeekChar () < 0)
2157 if (new string (peekChars, 2, 4) != "xml ") {
2158 if (new string (peekChars, 2, 4).ToLower (CultureInfo.InvariantCulture) == "xml ") {
2159 throw NotWFError ("Processing instruction name must not be character sequence 'X' 'M' 'L' with case insensitivity.");
2168 if (PeekChar () == 'v') {
2170 ExpectAfterWhitespace ('=');
2172 int quoteChar = ReadChar ();
2173 char [] expect1_0 = new char [3];
2174 int versionLength = 0;
2175 switch (quoteChar) {
2178 while (PeekChar () != quoteChar) {
2179 if (PeekChar () == -1)
2180 throw NotWFError ("Invalid version declaration inside text declaration.");
2181 else if (versionLength == 3)
2182 throw NotWFError ("Invalid version number inside text declaration.");
2184 expect1_0 [versionLength] = (char) ReadChar ();
2186 if (versionLength == 3 && new String (expect1_0) != "1.0")
2187 throw NotWFError ("Invalid version number inside text declaration.");
2194 throw NotWFError ("Invalid version declaration inside text declaration.");
2198 if (PeekChar () == 'e') {
2199 Expect ("encoding");
2200 ExpectAfterWhitespace ('=');
2202 int quoteChar = ReadChar ();
2203 switch (quoteChar) {
2206 while (PeekChar () != quoteChar)
2207 if (ReadChar () == -1)
2208 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2213 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2215 // Encoding value should be checked inside XmlInputStream.
2218 // this condition is to check if this instance is
2219 // not created by XmlReader.Create() (which just
2220 // omits strict text declaration check).
2221 else if (Conformance == ConformanceLevel.Auto)
2222 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
2227 curNodePeekIndex = peekCharsIndex; // without this it causes incorrect value start indication.
2230 // The reader is positioned on the first character after
2231 // the leading '<!'.
2232 private void ReadDeclaration ()
2234 int ch = PeekChar ();
2252 throw NotWFError ("Unexpected declaration markup was found.");
2256 // The reader is positioned on the first character after
2257 // the leading '<!--'.
2258 private void ReadComment ()
2260 if (currentState == XmlNodeType.None)
2261 currentState = XmlNodeType.XmlDeclaration;
2263 preserveCurrentTag = false;
2265 ClearValueBuffer ();
2268 while ((ch = PeekChar ()) != -1) {
2271 if (ch == '-' && PeekChar () == '-') {
2274 if (PeekChar () != '>')
2275 throw NotWFError ("comments cannot contain '--'");
2281 if (XmlChar.IsInvalid (ch))
2282 throw NotWFError ("Not allowed character was found.");
2284 AppendValueChar (ch);
2288 XmlNodeType.Comment, // nodeType
2289 String.Empty, // name
2290 String.Empty, // prefix
2291 String.Empty, // localName
2292 false, // isEmptyElement
2293 null, // value: create only when required
2294 true // clearAttributes
2298 // The reader is positioned on the first character after
2299 // the leading '<![CDATA['.
2300 private void ReadCDATA ()
2302 if (currentState != XmlNodeType.Element)
2303 throw NotWFError ("CDATA section cannot appear in this state.");
2304 preserveCurrentTag = false;
2306 ClearValueBuffer ();
2310 while (PeekChar () != -1) {
2315 if (ch == ']' && PeekChar () == ']') {
2316 ch = ReadChar (); // ']'
2318 if (PeekChar () == '>') {
2325 if (normalization && ch == '\r') {
2328 // append '\n' instead of '\r'.
2329 AppendValueChar ('\n');
2330 // otherwise, discard '\r'.
2333 if (CharacterChecking && XmlChar.IsInvalid (ch))
2334 throw NotWFError ("Invalid character was found.");
2336 // FIXME: it might be optimized by the JIT later,
2337 // AppendValueChar (ch);
2339 if (ch < Char.MaxValue)
2340 valueBuffer.Append ((char) ch);
2342 AppendSurrogatePairValueChar (ch);
2347 XmlNodeType.CDATA, // nodeType
2348 String.Empty, // name
2349 String.Empty, // prefix
2350 String.Empty, // localName
2351 false, // isEmptyElement
2352 null, // value: create only when required
2353 true // clearAttributes
2357 // The reader is positioned on the first character after
2358 // the leading '<!DOCTYPE'.
2359 private void ReadDoctypeDecl ()
2362 throw NotWFError ("Document Type Declaration (DTD) is prohibited in this XML.");
2363 switch (currentState) {
2364 case XmlNodeType.DocumentType:
2365 case XmlNodeType.Element:
2366 case XmlNodeType.EndElement:
2367 throw NotWFError ("Document type cannot appear in this state.");
2369 currentState = XmlNodeType.DocumentType;
2371 string doctypeName = null;
2372 string publicId = null;
2373 string systemId = null;
2374 int intSubsetStartLine = 0;
2375 int intSubsetStartColumn = 0;
2378 doctypeName = ReadName ();
2383 systemId = ReadSystemLiteral (true);
2386 publicId = ReadPubidLiteral ();
2387 if (!SkipWhitespace ())
2388 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
2389 systemId = ReadSystemLiteral (false);
2395 if(PeekChar () == '[')
2397 // read markupdecl etc. or end of decl
2399 intSubsetStartLine = this.LineNumber;
2400 intSubsetStartColumn = this.LinePosition;
2401 ClearValueBuffer ();
2402 ReadInternalSubset ();
2403 parserContext.InternalSubset = CreateValueString ();
2405 // end of DOCTYPE decl.
2406 ExpectAfterWhitespace ('>');
2408 GenerateDTDObjectModel (doctypeName, publicId,
2409 systemId, parserContext.InternalSubset,
2410 intSubsetStartLine, intSubsetStartColumn);
2412 // set properties for <!DOCTYPE> node
2414 XmlNodeType.DocumentType, // nodeType
2415 doctypeName, // name
2416 String.Empty, // prefix
2417 doctypeName, // localName
2418 false, // isEmptyElement
2419 parserContext.InternalSubset, // value
2420 true // clearAttributes
2423 if (publicId != null)
2424 AddAttributeWithValue ("PUBLIC", publicId);
2425 if (systemId != null)
2426 AddAttributeWithValue ("SYSTEM", systemId);
2427 currentAttribute = currentAttributeValue = -1;
2430 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2431 string systemId, string internalSubset)
2433 return GenerateDTDObjectModel (name, publicId, systemId, internalSubset, 0, 0);
2436 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2437 string systemId, string internalSubset, int intSubsetStartLine, int intSubsetStartColumn)
2440 parserContext.Dtd = new DTDObjectModel (this.NameTable); // merges both internal and external subsets in the meantime,
2441 DTD.BaseURI = BaseURI;
2443 DTD.PublicId = publicId;
2444 DTD.SystemId = systemId;
2445 DTD.InternalSubset = internalSubset;
2446 DTD.XmlResolver = resolver;
2447 DTD.IsStandalone = isStandalone;
2448 DTD.LineNumber = line;
2449 DTD.LinePosition = column;
2451 DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn);
2452 dr.Normalization = this.normalization;
2453 return dr.GenerateDTDObjectModel ();
2456 private enum DtdInputState
2469 private class DtdInputStateStack
2471 Stack intern = new Stack ();
2472 public DtdInputStateStack ()
2474 Push (DtdInputState.Free);
2477 public DtdInputState Peek ()
2479 return (DtdInputState) intern.Peek ();
2482 public DtdInputState Pop ()
2484 return (DtdInputState) intern.Pop ();
2487 public void Push (DtdInputState val)
2494 DtdInputStateStack stateStack = new DtdInputStateStack ();
2495 DtdInputState State {
2496 get { return stateStack.Peek (); }
2499 private int ReadValueChar ()
2501 int ret = ReadChar ();
2502 AppendValueChar (ret);
2506 private void ExpectAndAppend (string s)
2509 valueBuffer.Append (s);
2512 // Simply read but not generate any result.
2513 private void ReadInternalSubset ()
2515 bool continueParse = true;
2517 while (continueParse) {
2518 switch (ReadValueChar ()) {
2521 case DtdInputState.Free:
2523 valueBuffer.Remove (valueBuffer.Length - 1, 1);
2524 continueParse = false;
2526 case DtdInputState.InsideDoubleQuoted:
2527 case DtdInputState.InsideSingleQuoted:
2528 case DtdInputState.Comment:
2531 throw NotWFError ("unexpected end of file at DTD.");
2535 throw NotWFError ("unexpected end of file at DTD.");
2538 case DtdInputState.InsideDoubleQuoted:
2539 case DtdInputState.InsideSingleQuoted:
2540 case DtdInputState.Comment:
2541 continue; // well-formed
2543 int c = ReadValueChar ();
2546 stateStack.Push (DtdInputState.PI);
2549 switch (ReadValueChar ()) {
2551 switch (ReadValueChar ()) {
2553 ExpectAndAppend ("EMENT");
2554 stateStack.Push (DtdInputState.ElementDecl);
2557 ExpectAndAppend ("TITY");
2558 stateStack.Push (DtdInputState.EntityDecl);
2561 throw NotWFError ("unexpected token '<!E'.");
2565 ExpectAndAppend ("TTLIST");
2566 stateStack.Push (DtdInputState.AttlistDecl);
2569 ExpectAndAppend ("OTATION");
2570 stateStack.Push (DtdInputState.NotationDecl);
2573 ExpectAndAppend ("-");
2574 stateStack.Push (DtdInputState.Comment);
2579 throw NotWFError (String.Format ("unexpected '<{0}'.", (char) c));
2583 if (State == DtdInputState.InsideSingleQuoted)
2585 else if (State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.Comment)
2586 stateStack.Push (DtdInputState.InsideSingleQuoted);
2589 if (State == DtdInputState.InsideDoubleQuoted)
2591 else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment)
2592 stateStack.Push (DtdInputState.InsideDoubleQuoted);
2596 case DtdInputState.ElementDecl:
2597 goto case DtdInputState.NotationDecl;
2598 case DtdInputState.AttlistDecl:
2599 goto case DtdInputState.NotationDecl;
2600 case DtdInputState.EntityDecl:
2601 goto case DtdInputState.NotationDecl;
2602 case DtdInputState.NotationDecl:
2605 case DtdInputState.InsideDoubleQuoted:
2606 case DtdInputState.InsideSingleQuoted:
2607 case DtdInputState.Comment:
2610 throw NotWFError ("unexpected token '>'");
2614 if (State == DtdInputState.PI) {
2615 if (ReadValueChar () == '>')
2620 if (State == DtdInputState.Comment) {
2621 if (PeekChar () == '-') {
2623 ExpectAndAppend (">");
2629 if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted)
2630 throw NotWFError ("Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8).");
2636 // The reader is positioned on the first 'S' of "SYSTEM".
2637 private string ReadSystemLiteral (bool expectSYSTEM)
2641 if (!SkipWhitespace ())
2642 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
2646 int quoteChar = ReadChar (); // apos or quot
2648 ClearValueBuffer ();
2649 while (c != quoteChar) {
2652 throw NotWFError ("Unexpected end of stream in ExternalID.");
2654 AppendValueChar (c);
2656 return CreateValueString ();
2659 private string ReadPubidLiteral()
2662 if (!SkipWhitespace ())
2663 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
2664 int quoteChar = ReadChar ();
2666 ClearValueBuffer ();
2667 while(c != quoteChar)
2670 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
2671 if(c != quoteChar && !XmlChar.IsPubidChar (c))
2672 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char)c ));
2674 AppendValueChar (c);
2676 return CreateValueString ();
2679 // The reader is positioned on the first character
2681 private string ReadName ()
2683 string prefix, local;
2684 return ReadName (out prefix, out local);
2687 private string ReadName (out string prefix, out string localName)
2689 #if !USE_NAME_BUFFER
2690 bool savePreserve = preserveCurrentTag;
2691 preserveCurrentTag = true;
2693 int startOffset = peekCharsIndex - curNodePeekIndex;
2694 int ch = PeekChar ();
2695 if (!XmlChar.IsFirstNameChar (ch))
2696 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2701 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2703 if (ch == ':' && namespaces && colonAt < 0)
2708 int start = curNodePeekIndex + startOffset;
2710 string name = parserContext.NameTable.Add (
2711 peekChars, start, length);
2714 prefix = parserContext.NameTable.Add (
2715 peekChars, start, colonAt);
2716 localName = parserContext.NameTable.Add (
2717 peekChars, start + colonAt + 1, length - colonAt - 1);
2719 prefix = String.Empty;
2723 preserveCurrentTag = savePreserve;
2727 int ch = PeekChar ();
2728 if (!XmlChar.IsFirstNameChar (ch))
2729 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2734 // AppendNameChar (ch);
2736 // nameBuffer.Length is always non-0 so no need to ExpandNameCapacity () here
2737 if (ch < Char.MaxValue)
2738 nameBuffer [nameLength++] = (char) ch;
2740 AppendSurrogatePairNameChar (ch);
2745 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2748 if (ch == ':' && namespaces && colonAt < 0)
2749 colonAt = nameLength;
2750 // AppendNameChar (ch);
2752 if (nameLength == nameCapacity)
2753 ExpandNameCapacity ();
2754 if (ch < Char.MaxValue)
2755 nameBuffer [nameLength++] = (char) ch;
2757 AppendSurrogatePairNameChar (ch);
2761 string name = parserContext.NameTable.Add (nameBuffer, 0, nameLength);
2764 prefix = parserContext.NameTable.Add (nameBuffer, 0, colonAt);
2765 localName = parserContext.NameTable.Add (nameBuffer, colonAt + 1, nameLength - colonAt - 1);
2767 prefix = String.Empty;
2775 // Read the next character and compare it against the
2776 // specified character.
2777 private void Expect (int expected)
2779 int ch = ReadChar ();
2781 if (ch != expected) {
2782 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2783 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
2786 ch < 0 ? (object) "EOF" : (char) ch,
2791 private void Expect (string expected)
2793 for (int i = 0; i < expected.Length; i++)
2794 if (ReadChar () != expected [i])
2795 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2796 "'{0}' is expected", expected));
2799 private void ExpectAfterWhitespace (char c)
2802 int i = ReadChar ();
2803 if (i < 0x21 && XmlChar.IsWhitespace (i))
2806 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0}, but found {1} [{2}]", c, i < 0 ? (object) "EOF" : (char) i, i));
2811 // Does not consume the first non-whitespace character.
2812 private bool SkipWhitespace ()
2814 // FIXME: It should be inlined by the JIT.
2815 // bool skipped = XmlChar.IsWhitespace (PeekChar ());
2816 int ch = PeekChar ();
2817 bool skipped = (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2821 // FIXME: It should be inlined by the JIT.
2822 // while (XmlChar.IsWhitespace (PeekChar ()))
2824 while ((ch = PeekChar ()) == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD)
2829 private bool ReadWhitespace ()
2831 if (currentState == XmlNodeType.None)
2832 currentState = XmlNodeType.XmlDeclaration;
2834 bool savePreserve = preserveCurrentTag;
2835 preserveCurrentTag = true;
2836 int startOffset = peekCharsIndex - curNodePeekIndex; // it should be 0 for now though.
2838 int ch = PeekChar ();
2842 // FIXME: It should be inlined by the JIT.
2843 // } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
2844 } while (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2846 bool isText = currentState == XmlNodeType.Element && ch != -1 && ch != '<';
2848 if (!isText && (whitespaceHandling == WhitespaceHandling.None ||
2849 whitespaceHandling == WhitespaceHandling.Significant && XmlSpace != XmlSpace.Preserve))
2852 ClearValueBuffer ();
2853 valueBuffer.Append (peekChars, curNodePeekIndex, peekCharsIndex - curNodePeekIndex - startOffset);
2854 preserveCurrentTag = savePreserve;
2859 XmlNodeType nodeType = (this.XmlSpace == XmlSpace.Preserve) ?
2860 XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
2861 SetProperties (nodeType,
2866 null, // value: create only when required
2873 // Returns -1 if it should throw an error.
2874 private int ReadCharsInternal (char [] buffer, int offset, int length)
2876 int bufIndex = offset;
2877 for (int i = 0; i < length; i++) {
2878 int c = PeekChar ();
2881 throw NotWFError ("Unexpected end of xml.");
2883 if (i + 1 == length)
2884 // if it does not end here,
2885 // it cannot store another
2886 // character, so stop here.
2889 if (PeekChar () != '/') {
2891 buffer [bufIndex++] = '<';
2894 else if (nestLevel-- > 0) {
2895 buffer [bufIndex++] = '<';
2898 // Seems to skip immediate EndElement
2905 readCharsInProgress = false;
2906 Read (); // move to the next node
2910 if (c < Char.MaxValue)
2911 buffer [bufIndex++] = (char) c;
2913 buffer [bufIndex++] = (char) ((c - 0x10000) / 0x400 + 0xD800);
2914 buffer [bufIndex++] = (char) ((c - 0x10000) % 0x400 + 0xDC00);
2922 private bool ReadUntilEndTag ()
2925 currentState = XmlNodeType.EndElement;
2931 throw NotWFError ("Unexpected end of xml.");
2933 if (PeekChar () != '/') {
2937 else if (--nestLevel > 0)
2940 string name = ReadName ();
2941 if (name != elementNames [elementNameStackPos - 1].Name)