2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
7 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
9 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
10 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 using System.Collections;
35 using System.Collections.Generic;
37 using System.Globalization;
39 using System.Security.Permissions;
41 using System.Xml.Schema;
54 internal class XmlTextReader : XmlReader,
55 IXmlLineInfo, IXmlNamespaceResolver, IHasXmlParserContext
57 [PermissionSet (SecurityAction.InheritanceDemand, Unrestricted = true)]
58 public class XmlTextReader : XmlReader, IXmlLineInfo, IHasXmlParserContext
63 protected XmlTextReader ()
67 public XmlTextReader (Stream input)
68 : this (new XmlStreamReader (input))
72 public XmlTextReader (string url)
73 : this(url, new NameTable ())
77 public XmlTextReader (TextReader input)
78 : this (input, new NameTable ())
82 protected XmlTextReader (XmlNameTable nt)
83 : this (String.Empty, null, XmlNodeType.None, null)
87 public XmlTextReader (Stream input, XmlNameTable nt)
88 : this(new XmlStreamReader (input), nt)
92 public XmlTextReader (string url, Stream input)
93 : this (url, new XmlStreamReader (input))
97 public XmlTextReader (string url, TextReader input)
98 : this (url, input, new NameTable ())
102 public XmlTextReader (string url, XmlNameTable nt)
104 Uri uri = resolver.ResolveUri (null, url);
105 string uriString = uri != null ? uri.ToString () : String.Empty;
106 Stream s = resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
107 XmlParserContext ctx = new XmlParserContext (nt,
108 new XmlNamespaceManager (nt),
111 this.InitializeContext (uriString, ctx, new XmlStreamReader (s), XmlNodeType.Document);
114 public XmlTextReader (TextReader input, XmlNameTable nt)
115 : this (String.Empty, input, nt)
119 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
120 : this (context != null ? context.BaseURI : String.Empty,
121 new XmlStreamReader (xmlFragment),
127 internal XmlTextReader (string baseURI, TextReader xmlFragment, XmlNodeType fragType)
128 : this (baseURI, xmlFragment, fragType, null)
132 public XmlTextReader (string url, Stream input, XmlNameTable nt)
133 : this (url, new XmlStreamReader (input), nt)
137 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
138 : this (url, input, XmlNodeType.Document, null)
142 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
143 : this (context != null ? context.BaseURI : String.Empty,
144 new StringReader (xmlFragment),
150 internal XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
152 InitializeContext (url, context, fragment, fragType);
159 public override int AttributeCount
161 get { return attributeCount; }
164 public override string BaseURI
166 get { return parserContext.BaseURI; }
170 public override bool CanReadBinaryContent {
174 public override bool CanReadValueChunk {
178 internal override bool CanReadBinaryContent {
182 internal override bool CanReadValueChunk {
187 internal bool CharacterChecking {
188 get { return checkCharacters; }
189 set { checkCharacters = value; }
192 // for XmlReaderSettings.CloseInput support
193 internal bool CloseInput {
194 get { return closeInput; }
195 set { closeInput = value; }
198 public override int Depth
201 int nodeTypeMod = currentToken.NodeType == XmlNodeType.Element ? 0 : -1;
202 if (currentAttributeValue >= 0)
203 return nodeTypeMod + elementDepth + 2; // inside attribute value.
204 else if (currentAttribute >= 0)
205 return nodeTypeMod + elementDepth + 1;
210 public Encoding Encoding
212 get { return parserContext.Encoding; }
215 public EntityHandling EntityHandling {
216 get { return entityHandling; }
217 set { entityHandling = value; }
221 public override bool EOF {
222 get { return readState == ReadState.EndOfFile; }
225 public override bool HasValue {
226 get { return cursorToken.Value != null; }
229 public override bool IsDefault {
230 // XmlTextReader does not expand default attributes.
231 get { return false; }
234 public override bool IsEmptyElement {
235 get { return cursorToken.IsEmptyElement; }
240 public override string this [int i] {
241 get { return GetAttribute (i); }
244 public override string this [string name] {
245 get { return GetAttribute (name); }
248 public override string this [string localName, string namespaceName] {
249 get { return GetAttribute (localName, namespaceName); }
253 public int LineNumber {
255 if (useProceedingLineInfo)
258 return cursorToken.LineNumber;
262 public int LinePosition {
264 if (useProceedingLineInfo)
267 return cursorToken.LinePosition;
271 public override string LocalName {
272 get { return cursorToken.LocalName; }
275 public override string Name {
276 get { return cursorToken.Name; }
279 public bool Namespaces {
280 get { return namespaces; }
282 if (readState != ReadState.Initial)
283 throw new InvalidOperationException ("Namespaces have to be set before reading.");
288 public override string NamespaceURI {
289 get { return cursorToken.NamespaceURI; }
292 public override XmlNameTable NameTable {
293 get { return parserContext.NameTable; }
296 public override XmlNodeType NodeType {
297 get { return cursorToken.NodeType; }
300 public bool Normalization {
301 get { return normalization; }
302 set { normalization = value; }
305 public override string Prefix {
306 get { return cursorToken.Prefix; }
310 public bool ProhibitDtd {
311 get { return prohibitDtd; }
312 set { prohibitDtd = value; }
316 public override char QuoteChar {
317 get { return cursorToken.QuoteChar; }
320 public override ReadState ReadState {
321 get { return readState; }
325 public override XmlReaderSettings Settings {
326 get { return base.Settings; }
330 public override string Value {
331 get { return cursorToken.Value != null ? cursorToken.Value : String.Empty; }
334 public WhitespaceHandling WhitespaceHandling {
335 get { return whitespaceHandling; }
336 set { whitespaceHandling = value; }
339 public override string XmlLang {
340 get { return parserContext.XmlLang; }
343 public XmlResolver XmlResolver {
344 set { resolver = value; }
347 public override XmlSpace XmlSpace {
348 get { return parserContext.XmlSpace; }
355 public override void Close ()
357 readState = ReadState.Closed;
359 cursorToken.Clear ();
360 currentToken.Clear ();
362 if (closeInput && reader != null)
366 public override string GetAttribute (int i)
368 if (i >= attributeCount)
369 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
371 return attributeTokens [i].Value;
375 // MS.NET 1.0 msdn says that this method returns String.Empty
376 // for absent attribute, but in fact it returns null.
377 // This description is corrected in MS.NET 1.1 msdn.
378 public override string GetAttribute (string name)
380 for (int i = 0; i < attributeCount; i++)
381 if (attributeTokens [i].Name == name)
382 return attributeTokens [i].Value;
386 private int GetIndexOfQualifiedAttribute (string localName, string namespaceURI)
388 for (int i = 0; i < attributeCount; i++) {
389 XmlAttributeTokenInfo ti = attributeTokens [i];
390 if (ti.LocalName == localName && ti.NamespaceURI == namespaceURI)
396 XmlParserContext IHasXmlParserContext.ParserContext {
397 get { return parserContext; }
400 public override string GetAttribute (string localName, string namespaceURI)
402 int idx = this.GetIndexOfQualifiedAttribute (localName, namespaceURI);
405 return attributeTokens [idx].Value;
409 public IDictionary<string, string> GetNamespacesInScope (XmlNamespaceScope scope)
411 return parserContext.NamespaceManager.GetNamespacesInScope (scope);
415 public TextReader GetRemainder ()
417 if (peekCharsIndex == peekCharsLength)
419 return new StringReader (new string (peekChars, peekCharsIndex, peekCharsLength - peekCharsIndex) + reader.ReadToEnd ());
423 public bool HasLineInfo ()
425 bool IXmlLineInfo.HasLineInfo ()
431 public override string LookupNamespace (string prefix)
433 return LookupNamespace (prefix, false);
437 public override string LookupNamespace (string prefix, bool atomizedName)
439 internal override string LookupNamespace (string prefix, bool atomizedName)
442 return parserContext.NamespaceManager.LookupNamespace (prefix, atomizedName);
446 string IXmlNamespaceResolver.LookupPrefix (string ns)
448 return LookupPrefix (ns, false);
451 public string LookupPrefix (string ns, bool atomizedName)
453 return parserContext.NamespaceManager.LookupPrefix (ns, atomizedName);
457 public override void MoveToAttribute (int i)
459 if (i >= attributeCount)
460 throw new ArgumentOutOfRangeException ("attribute index out of range.");
462 currentAttribute = i;
463 currentAttributeValue = -1;
464 cursorToken = attributeTokens [i];
467 public override bool MoveToAttribute (string name)
469 for (int i = 0; i < attributeCount; i++) {
470 XmlAttributeTokenInfo ti = attributeTokens [i];
471 if (ti.Name == name) {
479 public override bool MoveToAttribute (string localName, string namespaceName)
481 int idx = GetIndexOfQualifiedAttribute (localName, namespaceName);
484 MoveToAttribute (idx);
488 public override bool MoveToElement ()
490 if (currentToken == null) // for attribute .ctor()
493 if (cursorToken == currentToken)
496 if (currentAttribute >= 0) {
497 currentAttribute = -1;
498 currentAttributeValue = -1;
499 cursorToken = currentToken;
506 public override bool MoveToFirstAttribute ()
508 if (attributeCount == 0)
511 return MoveToNextAttribute ();
514 public override bool MoveToNextAttribute ()
516 if (currentAttribute == 0 && attributeCount == 0)
518 if (currentAttribute + 1 < attributeCount) {
520 currentAttributeValue = -1;
521 cursorToken = attributeTokens [currentAttribute];
528 public override bool Read ()
530 if (startNodeType == XmlNodeType.Attribute) {
531 if (currentAttribute == 0)
532 return false; // already read.
534 IncrementAttributeToken ();
535 ReadAttributeValueTokens ('"');
536 cursorToken = attributeTokens [0];
537 currentAttributeValue = -1;
538 readState = ReadState.Interactive;
546 readState = ReadState.Interactive;
547 currentLinkedNodeLineNumber = line;
548 currentLinkedNodeLinePosition = column;
549 useProceedingLineInfo = true;
551 cursorToken = currentToken;
553 currentAttribute = currentAttributeValue = -1;
554 currentToken.Clear ();
556 // It was moved from end of ReadStartTag ().
562 if (shouldSkipUntilEndTag) {
563 shouldSkipUntilEndTag = false;
564 return ReadUntilEndTag ();
567 more = ReadContent ();
569 if (!more && startNodeType == XmlNodeType.Document && currentState != XmlNodeType.EndElement)
570 throw NotWFError ("Document element did not appear.");
572 useProceedingLineInfo = false;
576 public override bool ReadAttributeValue ()
578 if (readState == ReadState.Initial && startNodeType == XmlNodeType.Attribute) {
582 if (currentAttribute < 0)
584 XmlAttributeTokenInfo ti = attributeTokens [currentAttribute];
585 if (currentAttributeValue < 0)
586 currentAttributeValue = ti.ValueTokenStartIndex - 1;
588 if (currentAttributeValue < ti.ValueTokenEndIndex) {
589 currentAttributeValue++;
590 cursorToken = attributeValueTokens [currentAttributeValue];
597 public int ReadBase64 (byte [] buffer, int offset, int length)
599 BinaryCharGetter = binaryCharGetter;
601 return Binary.ReadBase64 (buffer, offset, length);
603 BinaryCharGetter = null;
607 public int ReadBinHex (byte [] buffer, int offset, int length)
609 BinaryCharGetter = binaryCharGetter;
611 return Binary.ReadBinHex (buffer, offset, length);
613 BinaryCharGetter = null;
617 public int ReadChars (char [] buffer, int offset, int length)
620 throw new ArgumentOutOfRangeException ("offset", offset, "Offset must be non-negative integer.");
622 throw new ArgumentOutOfRangeException ("length", length, "Length must be non-negative integer.");
623 else if (buffer.Length < offset + length)
624 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
626 if (IsEmptyElement) {
631 if (NodeType != XmlNodeType.Element)
634 return ReadCharsInternal (buffer, offset, length);
638 public override string ReadInnerXml ()
640 return ReadInnerXmlInternal ();
643 public override string ReadOuterXml ()
645 return ReadOuterXmlInternal ();
648 public override string ReadString ()
650 return ReadStringInternal ();
654 public void ResetState ()
656 throw new InvalidOperationException ("Cannot call ResetState when parsing an XML fragment.");
660 public override void ResolveEntity ()
662 // XmlTextReader does not resolve entities.
663 throw new InvalidOperationException ("XmlTextReader cannot resolve external entities.");
667 [MonoTODO ("Implement for performance reason")]
668 public override void Skip ()
676 // Parsed DTD Objects
677 internal DTDObjectModel DTD {
678 get { return parserContext.Dtd; }
681 internal XmlResolver Resolver {
682 get { return resolver; }
687 internal class XmlTokenInfo
689 public XmlTokenInfo (XmlTextReader xtr)
697 protected XmlTextReader Reader;
700 public string LocalName;
701 public string Prefix;
702 public string NamespaceURI;
703 public bool IsEmptyElement;
704 public char QuoteChar;
705 public int LineNumber;
706 public int LinePosition;
707 public int ValueBufferStart;
708 public int ValueBufferEnd;
710 public XmlNodeType NodeType;
712 public virtual string Value {
714 if (valueCache != null)
716 if (ValueBufferStart >= 0) {
717 //Console.WriteLine (NodeType + " / " + ValueBuffer.Length + " / " + ValueBufferStart + " / " + ValueBufferEnd);
718 valueCache = Reader.valueBuffer.ToString (ValueBufferStart, ValueBufferEnd - ValueBufferStart);
722 case XmlNodeType.Text:
723 case XmlNodeType.SignificantWhitespace:
724 case XmlNodeType.Whitespace:
725 case XmlNodeType.Comment:
726 case XmlNodeType.CDATA:
727 case XmlNodeType.ProcessingInstruction:
728 valueCache = Reader.CreateValueString ();
733 set { valueCache = value; }
736 public virtual void Clear ()
738 ValueBufferStart = -1;
740 NodeType = XmlNodeType.None;
741 Name = LocalName = Prefix = NamespaceURI = String.Empty;
742 IsEmptyElement = false;
744 LineNumber = LinePosition = 0;
748 internal class XmlAttributeTokenInfo : XmlTokenInfo
750 public XmlAttributeTokenInfo (XmlTextReader reader)
753 NodeType = XmlNodeType.Attribute;
756 public int ValueTokenStartIndex;
757 public int ValueTokenEndIndex;
759 StringBuilder tmpBuilder = new StringBuilder ();
761 public override string Value {
763 if (valueCache != null)
766 // An empty value should return String.Empty.
767 if (ValueTokenStartIndex == ValueTokenEndIndex) {
768 XmlTokenInfo ti = Reader.attributeValueTokens [ValueTokenStartIndex];
769 if (ti.NodeType == XmlNodeType.EntityReference)
770 valueCache = String.Concat ("&", ti.Name, ";");
772 valueCache = ti.Value;
776 tmpBuilder.Length = 0;
777 for (int i = ValueTokenStartIndex; i <= ValueTokenEndIndex; i++) {
778 XmlTokenInfo ti = Reader.attributeValueTokens [i];
779 if (ti.NodeType == XmlNodeType.Text)
780 tmpBuilder.Append (ti.Value);
782 tmpBuilder.Append ('&');
783 tmpBuilder.Append (ti.Name);
784 tmpBuilder.Append (';');
788 valueCache = tmpBuilder.ToString (0, tmpBuilder.Length);
792 set { valueCache = value; }
795 public override void Clear ()
799 NodeType = XmlNodeType.Attribute;
800 ValueTokenStartIndex = ValueTokenEndIndex = 0;
803 internal void FillXmlns ()
805 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns))
806 Reader.parserContext.NamespaceManager.AddNamespace (LocalName, Value);
807 else if (Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
808 Reader.parserContext.NamespaceManager.AddNamespace (String.Empty, Value);
811 internal void FillNamespace ()
813 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns) ||
814 Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
815 NamespaceURI = XmlNamespaceManager.XmlnsXmlns;
816 else if (Prefix.Length == 0)
817 NamespaceURI = string.Empty;
819 NamespaceURI = Reader.LookupNamespace (Prefix, true);
823 private XmlTokenInfo cursorToken;
824 private XmlTokenInfo currentToken;
825 private XmlAttributeTokenInfo currentAttributeToken;
826 private XmlTokenInfo currentAttributeValueToken;
827 private XmlAttributeTokenInfo [] attributeTokens = new XmlAttributeTokenInfo [10];
828 private XmlTokenInfo [] attributeValueTokens = new XmlTokenInfo [10];
829 private int currentAttribute;
830 private int currentAttributeValue;
831 private int attributeCount;
833 private XmlParserContext parserContext;
835 private ReadState readState;
838 private int elementDepth;
839 private bool depthUp;
841 private bool popScope;
843 private string [] elementNames;
844 int elementNameStackPos;
846 private bool allowMultipleRoot;
848 private bool isStandalone;
850 private bool returnEntityReference;
851 private string entityReferenceName;
853 private char [] nameBuffer;
854 private int nameLength;
855 private int nameCapacity;
856 private const int initialNameCapacity = 32;
858 private StringBuilder valueBuffer;
860 private char [] currentTagBuffer;
861 private int currentTagLength;
862 private int currentTagCapacity;
863 private const int initialCurrentTagCapacity = 256;
865 private TextReader reader;
866 private char [] peekChars;
867 private int peekCharsIndex;
868 private int peekCharsLength;
869 private const int peekCharCapacity = 1024;
874 private int currentLinkedNodeLineNumber;
875 private int currentLinkedNodeLinePosition;
876 private bool useProceedingLineInfo;
878 private XmlNodeType startNodeType;
879 // State machine attribute.
880 // XmlDeclaration: after the first node.
881 // DocumentType: after doctypedecl
882 // Element: inside document element
883 // EndElement: after document element
884 private XmlNodeType currentState;
886 // For ReadChars()/ReadBase64()/ReadBinHex()
887 private bool shouldSkipUntilEndTag;
888 XmlReaderBinarySupport.CharGetter binaryCharGetter;
890 // These values are never re-initialized.
891 private bool namespaces = true;
892 private WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
893 private XmlResolver resolver = new XmlUrlResolver ();
894 private bool normalization = false;
896 private bool checkCharacters;
897 private bool prohibitDtd = false;
898 private bool closeInput = true;
899 private EntityHandling entityHandling; // 2.0
901 private XmlException NotWFError (string message)
903 return new XmlException (this as IXmlLineInfo, BaseURI, message);
908 currentToken = new XmlTokenInfo (this);
909 cursorToken = currentToken;
910 currentAttribute = -1;
911 currentAttributeValue = -1;
914 readState = ReadState.Initial;
915 allowMultipleRoot = false;
921 popScope = allowMultipleRoot = false;
922 elementNames = new string [10];
923 elementNameStackPos = 0;
925 isStandalone = false;
926 returnEntityReference = false;
927 entityReferenceName = String.Empty;
929 nameBuffer = new char [initialNameCapacity];
931 nameCapacity = initialNameCapacity;
933 valueBuffer = new StringBuilder ();
935 currentTagBuffer = new char [initialCurrentTagCapacity];
936 currentTagLength = 0;
937 currentTagCapacity = initialCurrentTagCapacity;
941 if (peekChars == null)
942 peekChars = new char [peekCharCapacity];
947 currentLinkedNodeLineNumber = currentLinkedNodeLinePosition = 0;
948 useProceedingLineInfo = false;
950 currentState = XmlNodeType.None;
952 shouldSkipUntilEndTag = false;
953 binaryCharGetter = new XmlReaderBinarySupport.CharGetter (ReadChars);
955 checkCharacters = true;
957 if (Settings != null)
958 checkCharacters = Settings.CheckCharacters;
962 entityHandling = EntityHandling.ExpandCharEntities;
965 private void InitializeContext (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType)
967 startNodeType = fragType;
968 parserContext = context;
969 if (context == null) {
970 XmlNameTable nt = new NameTable ();
971 parserContext = new XmlParserContext (nt,
972 new XmlNamespaceManager (nt),
977 if (url != null && url.Length > 0) {
981 } catch (Exception) {
982 string path = Path.GetFullPath ("./a");
983 uri = new Uri (new Uri (path), url);
985 parserContext.BaseURI = uri.ToString ();
993 case XmlNodeType.Attribute:
994 reader = new StringReader (fragment.ReadToEnd ().Replace ("\"", """));
995 SkipTextDeclaration ();
997 case XmlNodeType.Element:
998 currentState = XmlNodeType.Element;
999 allowMultipleRoot = true;
1000 SkipTextDeclaration ();
1002 case XmlNodeType.Document:
1005 throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType));
1011 internal ConformanceLevel Conformance {
1013 if (value == ConformanceLevel.Fragment) {
1014 currentState = XmlNodeType.Element;
1015 allowMultipleRoot = true;
1020 internal void AdjustLineInfoOffset (int lineNumberOffset, int linePositionOffset)
1022 line += lineNumberOffset;
1023 column += linePositionOffset;
1026 internal void SetNameTable (XmlNameTable nameTable)
1028 parserContext.NameTable = nameTable;
1032 // Use this method rather than setting the properties
1033 // directly so that all the necessary properties can
1034 // be changed in harmony with each other. Maybe the
1035 // fields should be in a seperate class to help enforce
1038 // Namespace URI could not be provided here.
1039 private void SetProperties (
1040 XmlNodeType nodeType,
1044 bool isEmptyElement,
1046 bool clearAttributes)
1048 SetTokenProperties (currentToken, nodeType, name, prefix, localName, isEmptyElement, value, clearAttributes);
1049 currentToken.LineNumber = this.currentLinkedNodeLineNumber;
1050 currentToken.LinePosition = this.currentLinkedNodeLinePosition;
1053 private void SetTokenProperties (
1055 XmlNodeType nodeType,
1059 bool isEmptyElement,
1061 bool clearAttributes)
1063 token.NodeType = nodeType;
1065 token.Prefix = prefix;
1066 token.LocalName = localName;
1067 token.IsEmptyElement = isEmptyElement;
1068 token.Value = value;
1069 this.elementDepth = depth;
1071 if (clearAttributes)
1075 private void ClearAttributes ()
1077 for (int i = 0; i < attributeCount; i++)
1078 attributeTokens [i].Clear ();
1080 currentAttribute = -1;
1081 currentAttributeValue = -1;
1084 private int PeekChar ()
1086 if (peekCharsLength == peekCharsIndex) {
1087 if (!ReadTextReader (-1))
1092 char c = peekChars [peekCharsIndex];
1095 if (!char.IsSurrogate (c))
1097 if (peekCharsLength == peekCharsIndex + 1) {
1098 if (!ReadTextReader (c))
1099 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1103 char highhalfChar = peekChars [peekCharsIndex];
1104 char lowhalfChar = peekChars [peekCharsIndex+1];
1106 if (((highhalfChar & 0xFC00) != 0xD800) || ((lowhalfChar & 0xFC00) != 0xDC00))
1107 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1108 return highhalfChar;
1109 return 0x10000 + (highhalfChar-0xD800)*0x400 + (lowhalfChar-0xDC00);
1112 private int ReadChar ()
1114 int ch = PeekChar ();
1119 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1124 } else if (ch == -1) {
1129 if (currentState != XmlNodeType.Element)
1130 AppendCurrentTagChar (ch);
1134 private bool ReadTextReader (int remained)
1138 peekChars [0] = (char) remained;
1139 int offset = remained >= 0 ? 1 : 0;
1140 peekCharsLength = reader.Read (peekChars, offset,
1141 peekCharCapacity - offset) + offset;
1142 return (peekCharsLength != 0);
1145 private bool ReadContent ()
1147 currentTagLength = 0;
1149 parserContext.NamespaceManager.PopScope ();
1150 parserContext.PopScope ();
1154 if (returnEntityReference)
1155 SetEntityReferenceProperties ();
1157 int c = PeekChar ();
1159 readState = ReadState.EndOfFile;
1160 ClearValueBuffer ();
1162 XmlNodeType.None, // nodeType
1163 String.Empty, // name
1164 String.Empty, // prefix
1165 String.Empty, // localName
1166 false, // isEmptyElement
1168 true // clearAttributes
1171 throw NotWFError ("unexpected end of file. Current depth is " + depth);
1178 switch (PeekChar ())
1186 ReadProcessingInstruction ();
1201 if (whitespaceHandling == WhitespaceHandling.All ||
1202 whitespaceHandling == WhitespaceHandling.Significant)
1206 return ReadContent ();
1215 return this.ReadState != ReadState.EndOfFile;
1218 private void SetEntityReferenceProperties ()
1220 DTDEntityDeclaration decl = DTD != null ? DTD.EntityDecls [entityReferenceName] : null;
1221 if (this.isStandalone)
1222 if (DTD == null || decl == null || !decl.IsInternalSubset)
1223 throw NotWFError ("Standalone document must not contain any references to an non-internally declared entity.");
1224 if (decl != null && decl.NotationName != null)
1225 throw NotWFError ("Reference to any unparsed entities is not allowed here.");
1227 ClearValueBuffer ();
1229 XmlNodeType.EntityReference, // nodeType
1230 entityReferenceName, // name
1231 String.Empty, // prefix
1232 entityReferenceName, // localName
1233 false, // isEmptyElement
1235 true // clearAttributes
1238 returnEntityReference = false;
1239 entityReferenceName = String.Empty;
1242 // The leading '<' has already been consumed.
1243 private void ReadStartTag ()
1245 if (currentState == XmlNodeType.EndElement)
1246 throw NotWFError ("Multiple document element was detected.");
1247 currentState = XmlNodeType.Element;
1249 parserContext.NamespaceManager.PushScope ();
1251 currentLinkedNodeLineNumber = line;
1252 currentLinkedNodeLinePosition = column;
1254 string prefix, localName;
1255 string name = ReadName (out prefix, out localName);
1256 if (currentState == XmlNodeType.EndElement)
1257 throw NotWFError ("document has terminated, cannot open new element");
1259 bool isEmptyElement = false;
1264 if (XmlChar.IsFirstNameChar (PeekChar ()))
1265 ReadAttributes (false);
1266 cursorToken = this.currentToken;
1269 for (int i = 0; i < attributeCount; i++)
1270 attributeTokens [i].FillXmlns ();
1271 for (int i = 0; i < attributeCount; i++)
1272 attributeTokens [i].FillNamespace ();
1275 for (int i = 0; i < attributeCount; i++) {
1276 for (int j = i + 1; j < attributeCount; j++)
1277 if (Object.ReferenceEquals (attributeTokens [i].Name, attributeTokens [j].Name) ||
1278 (Object.ReferenceEquals (attributeTokens [i].LocalName, attributeTokens [j].LocalName) &&
1279 Object.ReferenceEquals (attributeTokens [i].NamespaceURI, attributeTokens [j].NamespaceURI)))
1280 throw NotWFError ("Attribute name and qualified name must be identical.");
1283 if (PeekChar () == '/') {
1285 isEmptyElement = true;
1290 PushElementName (name);
1292 parserContext.PushScope ();
1297 XmlNodeType.Element, // nodeType
1301 isEmptyElement, // isEmptyElement
1303 false // clearAttributes
1305 if (prefix.Length > 0)
1306 currentToken.NamespaceURI = LookupNamespace (prefix, true);
1307 else if (namespaces)
1308 currentToken.NamespaceURI = parserContext.NamespaceManager.DefaultNamespace;
1311 if (NamespaceURI == null)
1312 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1314 for (int i = 0; i < attributeCount; i++) {
1315 MoveToAttribute (i);
1316 if (NamespaceURI == null)
1317 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1324 for (int i = 0; i < attributeCount; i++) {
1325 if (!Object.ReferenceEquals (attributeTokens [i].Prefix, XmlNamespaceManager.PrefixXml))
1327 string aname = attributeTokens [i].LocalName;
1328 string value = attributeTokens [i].Value;
1331 if (this.resolver != null) {
1333 BaseURI != String.Empty ?
1334 new Uri (BaseURI) : null;
1335 Uri uri = resolver.ResolveUri (
1337 parserContext.BaseURI =
1343 parserContext.BaseURI = value;
1346 parserContext.XmlLang = value;
1351 parserContext.XmlSpace = XmlSpace.Preserve;
1354 parserContext.XmlSpace = XmlSpace.Default;
1357 throw NotWFError (String.Format ("Invalid xml:space value: {0}", value));
1364 CheckCurrentStateUpdate ();
1367 private void PushElementName (string name)
1369 if (elementNames.Length == elementNameStackPos) {
1370 string [] newArray = new string [elementNames.Length * 2];
1371 Array.Copy (elementNames, 0, newArray, 0, elementNameStackPos);
1372 elementNames = newArray;
1374 elementNames [elementNameStackPos++] = name;
1377 // The reader is positioned on the first character
1378 // of the element's name.
1379 private void ReadEndTag ()
1381 if (currentState != XmlNodeType.Element)
1382 throw NotWFError ("End tag cannot appear in this state.");
1384 currentLinkedNodeLineNumber = line;
1385 currentLinkedNodeLinePosition = column;
1387 string prefix, localName;
1388 string name = ReadName (out prefix, out localName);
1389 if (elementNameStackPos == 0)
1390 throw NotWFError ("closing element without matching opening element");
1391 string expected = elementNames [--elementNameStackPos];
1392 if (expected != name)
1393 throw NotWFError (String.Format ("unmatched closing element: expected {0} but found {1}", expected, name));
1395 ExpectAfterWhitespace ('>');
1400 XmlNodeType.EndElement, // nodeType
1403 localName, // localName
1404 false, // isEmptyElement
1406 true // clearAttributes
1408 if (prefix.Length > 0)
1409 currentToken.NamespaceURI = LookupNamespace (prefix, true);
1410 else if (namespaces)
1411 currentToken.NamespaceURI = parserContext.NamespaceManager.DefaultNamespace;
1415 CheckCurrentStateUpdate ();
1418 private void CheckCurrentStateUpdate ()
1420 if (depth == 0 && !allowMultipleRoot && (IsEmptyElement || NodeType == XmlNodeType.EndElement))
1421 currentState = XmlNodeType.EndElement;
1424 private void AppendSurrogatePairNameChar (int ch)
1426 nameBuffer [nameLength++] = (char) (ch / 0x10000 + 0xD800 - 1);
1427 if (nameLength == nameCapacity)
1428 ExpandNameCapacity ();
1429 nameBuffer [nameLength++] = (char) (ch % 0x10000 + 0xDC00);
1432 private void ExpandNameCapacity ()
1434 nameCapacity = nameCapacity * 2;
1435 char [] oldNameBuffer = nameBuffer;
1436 nameBuffer = new char [nameCapacity];
1437 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1440 private void AppendValueChar (int ch)
1442 if (ch < Char.MaxValue)
1443 valueBuffer.Append ((char) ch);
1445 AppendSurrogatePairValueChar (ch);
1448 private void AppendSurrogatePairValueChar (int ch)
1450 valueBuffer.Append ((char) (ch / 0x10000 + 0xD800 - 1));
1451 valueBuffer.Append ((char) (ch % 0x10000 + 0xDC00));
1454 private string CreateValueString ()
1456 return (valueBuffer.Capacity < 100) ?
1457 valueBuffer.ToString (0, valueBuffer.Length) :
1458 valueBuffer.ToString ();
1461 private void ClearValueBuffer ()
1463 valueBuffer.Length = 0;
1466 private void AppendCurrentTagChar (int ch)
1468 if (currentTagLength == currentTagCapacity)
1469 ExpandCurrentTagCapacity ();
1470 if (ch < Char.MaxValue)
1471 currentTagBuffer [currentTagLength++] = (char) ch;
1473 currentTagBuffer [currentTagLength++] = (char) (ch / 0x10000 + 0xD800 - 1);
1474 if (currentTagLength == currentTagCapacity)
1475 ExpandCurrentTagCapacity ();
1476 currentTagBuffer [currentTagLength++] = (char) (ch % 0x10000 + 0xDC00);
1480 private void ExpandCurrentTagCapacity ()
1482 currentTagCapacity = currentTagCapacity * 2;
1483 char [] oldCurrentTagBuffer = currentTagBuffer;
1484 currentTagBuffer = new char [currentTagCapacity];
1485 Array.Copy (oldCurrentTagBuffer, currentTagBuffer, currentTagLength);
1488 // The reader is positioned on the first character
1490 private void ReadText (bool notWhitespace)
1492 if (currentState != XmlNodeType.Element)
1493 throw NotWFError ("Text node cannot appear in this state.");
1496 ClearValueBuffer ();
1498 int ch = PeekChar ();
1499 bool previousWasCloseBracket = false;
1501 while (ch != '<' && ch != -1) {
1504 ch = ReadReference (false);
1505 if (returnEntityReference) // Returns -1 if char validation should not be done
1507 } else if (normalization && ch == '\r') {
1511 // append '\n' instead of '\r'.
1512 AppendValueChar ('\n');
1513 // and in case of "\r\n", discard '\r'.
1515 if (CharacterChecking && XmlChar.IsInvalid (ch))
1516 throw NotWFError ("Not allowed character was found.");
1520 // FIXME: it might be optimized by the JIT later,
1521 // AppendValueChar (ch);
1523 if (ch < Char.MaxValue)
1524 valueBuffer.Append ((char) ch);
1526 AppendSurrogatePairValueChar (ch);
1531 if (previousWasCloseBracket)
1532 if (PeekChar () == '>')
1533 throw NotWFError ("Inside text content, character sequence ']]>' is not allowed.");
1534 previousWasCloseBracket = true;
1536 else if (previousWasCloseBracket)
1537 previousWasCloseBracket = false;
1539 notWhitespace = true;
1542 if (returnEntityReference && valueBuffer.Length == 0) {
1543 SetEntityReferenceProperties ();
1545 XmlNodeType nodeType = notWhitespace ? XmlNodeType.Text :
1546 this.XmlSpace == XmlSpace.Preserve ? XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
1548 nodeType, // nodeType
1549 String.Empty, // name
1550 String.Empty, // prefix
1551 String.Empty, // localName
1552 false, // isEmptyElement
1553 null, // value: create only when required
1554 true // clearAttributes
1559 // The leading '&' has already been consumed.
1560 // Returns true if the entity reference isn't a simple
1561 // character reference or one of the predefined entities.
1562 // This allows the ReadText method to break so that the
1563 // next call to Read will return the EntityReference node.
1564 private int ReadReference (bool ignoreEntityReferences)
1566 if (PeekChar () == '#') {
1568 return ReadCharacterReference ();
1570 return ReadEntityReference (ignoreEntityReferences);
1573 private int ReadCharacterReference ()
1577 if (PeekChar () == 'x') {
1580 while (PeekChar () != ';' && PeekChar () != -1) {
1581 int ch = ReadChar ();
1583 if (ch >= '0' && ch <= '9')
1584 value = (value << 4) + ch - '0';
1585 else if (ch >= 'A' && ch <= 'F')
1586 value = (value << 4) + ch - 'A' + 10;
1587 else if (ch >= 'a' && ch <= 'f')
1588 value = (value << 4) + ch - 'a' + 10;
1590 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1591 "invalid hexadecimal digit: {0} (#x{1:X})",
1596 while (PeekChar () != ';' && PeekChar () != -1) {
1597 int ch = ReadChar ();
1599 if (ch >= '0' && ch <= '9')
1600 value = value * 10 + ch - '0';
1602 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1603 "invalid decimal digit: {0} (#x{1:X})",
1611 // There is no way to save surrogate pairs...
1612 if (CharacterChecking && Normalization &&
1613 XmlChar.IsInvalid (value))
1614 throw NotWFError ("Referenced character was not allowed in XML. Normalization is " + normalization + ", checkCharacters = " + checkCharacters);
1618 // Returns -1 if it should not be validated.
1619 // Real EOF must not be detected here.
1620 private int ReadEntityReference (bool ignoreEntityReferences)
1622 string name = ReadName ();
1625 int predefined = XmlChar.GetPredefinedEntity (name);
1626 if (predefined >= 0)
1629 if (ignoreEntityReferences) {
1630 AppendValueChar ('&');
1631 for (int i = 0; i < name.Length; i++)
1632 AppendValueChar (name [i]);
1633 AppendValueChar (';');
1635 returnEntityReference = true;
1636 entityReferenceName = name;
1642 // The reader is positioned on the first character of
1643 // the attribute name.
1644 private void ReadAttributes (bool isXmlDecl)
1647 bool requireWhitespace = false;
1648 currentAttribute = -1;
1649 currentAttributeValue = -1;
1652 if (!SkipWhitespace () && requireWhitespace)
1653 throw NotWFError ("Unexpected token. Name is required here.");
1655 IncrementAttributeToken ();
1656 currentAttributeToken.LineNumber = line;
1657 currentAttributeToken.LinePosition = column;
1659 string prefix, localName;
1660 currentAttributeToken.Name = ReadName (out prefix, out localName);
1661 currentAttributeToken.Prefix = prefix;
1662 currentAttributeToken.LocalName = localName;
1663 ExpectAfterWhitespace ('=');
1665 ReadAttributeValueTokens (-1);
1666 // This hack is required for xmldecl which has
1667 // both effective attributes and Value.
1670 dummyValue = currentAttributeToken.Value;
1674 if (!SkipWhitespace ())
1675 requireWhitespace = true;
1676 peekChar = PeekChar ();
1678 if (peekChar == '?')
1681 else if (peekChar == '/' || peekChar == '>')
1683 } while (peekChar != -1);
1685 currentAttribute = -1;
1686 currentAttributeValue = -1;
1689 private void AddDtdAttribute (string name, string value)
1691 IncrementAttributeToken ();
1692 XmlAttributeTokenInfo ati = attributeTokens [currentAttribute];
1693 ati.Name = parserContext.NameTable.Add (name);
1694 ati.Prefix = String.Empty;
1695 ati.NamespaceURI = String.Empty;
1696 IncrementAttributeValueToken ();
1697 XmlTokenInfo vti = attributeValueTokens [currentAttributeValue];
1699 SetTokenProperties (vti,
1710 private void IncrementAttributeToken ()
1713 if (attributeTokens.Length == currentAttribute) {
1714 XmlAttributeTokenInfo [] newArray =
1715 new XmlAttributeTokenInfo [attributeTokens.Length * 2];
1716 attributeTokens.CopyTo (newArray, 0);
1717 attributeTokens = newArray;
1719 if (attributeTokens [currentAttribute] == null)
1720 attributeTokens [currentAttribute] = new XmlAttributeTokenInfo (this);
1721 currentAttributeToken = attributeTokens [currentAttribute];
1722 currentAttributeToken.Clear ();
1725 private void IncrementAttributeValueToken ()
1727 currentAttributeValue++;
1728 if (attributeValueTokens.Length == currentAttributeValue) {
1729 XmlTokenInfo [] newArray = new XmlTokenInfo [attributeValueTokens.Length * 2];
1730 attributeValueTokens.CopyTo (newArray, 0);
1731 attributeValueTokens = newArray;
1733 if (attributeValueTokens [currentAttributeValue] == null)
1734 attributeValueTokens [currentAttributeValue] = new XmlTokenInfo (this);
1735 currentAttributeValueToken = attributeValueTokens [currentAttributeValue];
1736 currentAttributeValueToken.Clear ();
1739 // LAMESPEC: Orthodox XML reader should normalize attribute values
1740 private void ReadAttributeValueTokens (int dummyQuoteChar)
1742 int quoteChar = (dummyQuoteChar < 0) ? ReadChar () : dummyQuoteChar;
1744 if (quoteChar != '\'' && quoteChar != '\"')
1745 throw NotWFError ("an attribute value was not quoted");
1746 currentAttributeToken.QuoteChar = (char) quoteChar;
1748 IncrementAttributeValueToken ();
1749 currentAttributeToken.ValueTokenStartIndex = currentAttributeValue;
1750 currentAttributeValueToken.LineNumber = line;
1751 currentAttributeValueToken.LinePosition = column;
1753 bool incrementToken = false;
1754 bool isNewToken = true;
1757 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1760 if (ch == quoteChar)
1763 if (incrementToken) {
1764 IncrementAttributeValueToken ();
1765 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1766 currentAttributeValueToken.LineNumber = line;
1767 currentAttributeValueToken.LinePosition = column;
1768 incrementToken = false;
1775 throw NotWFError ("attribute values cannot contain '<'");
1777 if (dummyQuoteChar < 0)
1778 throw NotWFError ("unexpected end of file in an attribute value");
1779 else // Attribute value constructor.
1785 if (PeekChar () == '\n')
1786 continue; // skip '\r'.
1788 // The csc in MS.NET 2.0 beta 1 barfs on this goto, so work around that
1797 // When Normalize = true, then replace
1798 // all spaces to ' '
1804 if (PeekChar () == '#') {
1806 ch = ReadCharacterReference ();
1807 AppendValueChar (ch);
1810 // Check XML 1.0 section 3.1 WFC.
1811 string entName = ReadName ();
1813 int predefined = XmlChar.GetPredefinedEntity (entName);
1814 if (predefined < 0) {
1815 CheckAttributeEntityReferenceWFC (entName);
1817 if (entityHandling == EntityHandling.ExpandEntities) {
1818 string value = DTD.GenerateEntityAttributeText (entName);
1819 foreach (char c in value)
1820 AppendValueChar (c);
1824 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1825 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1827 IncrementAttributeValueToken ();
1828 currentAttributeValueToken.Name = entName;
1829 currentAttributeValueToken.Value = String.Empty;
1830 currentAttributeValueToken.NodeType = XmlNodeType.EntityReference;
1831 incrementToken = true;
1835 AppendValueChar (predefined);
1838 if (CharacterChecking && XmlChar.IsInvalid (ch))
1839 throw NotWFError ("Invalid character was found.");
1840 // FIXME: it might be optimized by the JIT later,
1841 // AppendValueChar (ch);
1843 if (ch < Char.MaxValue)
1844 valueBuffer.Append ((char) ch);
1846 AppendSurrogatePairValueChar (ch);
1853 if (!incrementToken) {
1854 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1855 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1857 currentAttributeToken.ValueTokenEndIndex = currentAttributeValue;
1861 private void CheckAttributeEntityReferenceWFC (string entName)
1863 DTDEntityDeclaration entDecl =
1864 DTD == null ? null : DTD.EntityDecls [entName];
1865 if (entDecl == null) {
1866 if (entityHandling == EntityHandling.ExpandEntities
1867 || (DTD != null && resolver != null && entDecl == null))
1868 throw NotWFError (String.Format ("Referenced entity '{0}' does not exist.", entName));
1873 if (entDecl.HasExternalReference)
1874 throw NotWFError ("Reference to external entities is not allowed in the value of an attribute.");
1875 if (isStandalone && !entDecl.IsInternalSubset)
1876 throw NotWFError ("Reference to external entities is not allowed in the internal subset.");
1877 if (entDecl.EntityValue.IndexOf ('<') >= 0)
1878 throw NotWFError ("Attribute must not contain character '<' either directly or indirectly by way of entity references.");
1881 // The reader is positioned on the first character
1884 // It may be xml declaration or processing instruction.
1885 private void ReadProcessingInstruction ()
1887 string target = ReadName ();
1888 if (target == "xml") {
1889 ReadXmlDeclaration ();
1891 } else if (target.ToLower (CultureInfo.InvariantCulture) == "xml")
1892 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
1894 if (currentState == XmlNodeType.None)
1895 currentState = XmlNodeType.XmlDeclaration;
1897 if (!SkipWhitespace ())
1898 if (PeekChar () != '?')
1899 throw NotWFError ("Invalid processing instruction name was found.");
1901 ClearValueBuffer ();
1903 while (PeekChar () != -1) {
1904 int ch = ReadChar ();
1906 if (ch == '?' && PeekChar () == '>') {
1911 if (CharacterChecking && XmlChar.IsInvalid (ch))
1912 throw NotWFError ("Invalid character was found.");
1913 AppendValueChar (ch);
1917 XmlNodeType.ProcessingInstruction, // nodeType
1919 String.Empty, // prefix
1920 target, // localName
1921 false, // isEmptyElement
1922 null, // value: create only when required
1923 true // clearAttributes
1927 // The reader is positioned after "<?xml "
1928 private void ReadXmlDeclaration ()
1930 if (currentState != XmlNodeType.None) {
1931 throw NotWFError ("XML declaration cannot appear in this state.");
1933 currentState = XmlNodeType.XmlDeclaration;
1937 ReadAttributes (true); // They must have "version."
1938 string version = GetAttribute ("version");
1940 string message = null;
1942 if (attributeTokens [0].Name != "version" || version != "1.0")
1943 message = "Version 1.0 declaration is required in XML Declaration.";
1944 else if (attributeCount > 1 &&
1945 (attributeTokens [1].Name != "encoding" &&
1946 attributeTokens [1].Name != "standalone"))
1947 message = "Invalid Xml Declaration markup was found.";
1948 else if (attributeCount > 2 && attributeTokens [2].Name != "standalone")
1949 message = "Invalid Xml Declaration markup was found.";
1950 string sa = GetAttribute ("standalone");
1951 if (sa != null && sa != "yes" && sa != "no")
1952 message = String.Format ("Only 'yes' or 'no' is allowed for standalone. Value was '{0}'", sa);
1954 this.isStandalone = (sa == "yes");
1956 if (message != null)
1957 throw NotWFError (message);
1960 XmlNodeType.XmlDeclaration, // nodeType
1962 String.Empty, // prefix
1964 false, // isEmptyElement
1965 new string (currentTagBuffer, 6, currentTagLength - 6), // value
1966 false // clearAttributes
1972 private void SkipTextDeclaration ()
1974 this.currentState = XmlNodeType.Element;
1976 if (PeekChar () != '<')
1981 if (PeekChar () != '?') {
1987 while (peekCharsIndex < 6) {
1988 if (PeekChar () < 0)
1993 if (new string (peekChars, 2, 4) != "xml ") {
1994 if (new string (peekChars, 2, 3).ToLower (CultureInfo.InvariantCulture) == "xml") {
1995 throw NotWFError ("Processing instruction name must not be character sequence 'X' 'M' 'L' with case insensitivity.");
2004 if (PeekChar () == 'v') {
2006 ExpectAfterWhitespace ('=');
2008 int quoteChar = ReadChar ();
2009 char [] expect1_0 = new char [3];
2010 int versionLength = 0;
2011 switch (quoteChar) {
2014 while (PeekChar () != quoteChar) {
2015 if (PeekChar () == -1)
2016 throw NotWFError ("Invalid version declaration inside text declaration.");
2017 else if (versionLength == 3)
2018 throw NotWFError ("Invalid version number inside text declaration.");
2020 expect1_0 [versionLength] = (char) ReadChar ();
2022 if (versionLength == 3 && new String (expect1_0) != "1.0")
2023 throw NotWFError ("Invalid version number inside text declaration.");
2030 throw NotWFError ("Invalid version declaration inside text declaration.");
2034 if (PeekChar () == 'e') {
2035 Expect ("encoding");
2036 ExpectAfterWhitespace ('=');
2038 int quoteChar = ReadChar ();
2039 switch (quoteChar) {
2042 while (PeekChar () != quoteChar)
2043 if (ReadChar () == -1)
2044 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2049 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2051 // Encoding value should be checked inside XmlInputStream.
2054 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
2059 // The reader is positioned on the first character after
2060 // the leading '<!'.
2061 private void ReadDeclaration ()
2063 int ch = PeekChar ();
2081 throw NotWFError ("Unexpected declaration markup was found.");
2085 // The reader is positioned on the first character after
2086 // the leading '<!--'.
2087 private void ReadComment ()
2089 if (currentState == XmlNodeType.None)
2090 currentState = XmlNodeType.XmlDeclaration;
2092 ClearValueBuffer ();
2094 while (PeekChar () != -1) {
2095 int ch = ReadChar ();
2097 if (ch == '-' && PeekChar () == '-') {
2100 if (PeekChar () != '>')
2101 throw NotWFError ("comments cannot contain '--'");
2107 if (XmlChar.IsInvalid (ch))
2108 throw NotWFError ("Not allowed character was found.");
2110 AppendValueChar (ch);
2114 XmlNodeType.Comment, // nodeType
2115 String.Empty, // name
2116 String.Empty, // prefix
2117 String.Empty, // localName
2118 false, // isEmptyElement
2119 null, // value: create only when required
2120 true // clearAttributes
2124 // The reader is positioned on the first character after
2125 // the leading '<![CDATA['.
2126 private void ReadCDATA ()
2128 if (currentState != XmlNodeType.Element)
2129 throw NotWFError ("CDATA section cannot appear in this state.");
2131 ClearValueBuffer ();
2135 while (PeekChar () != -1) {
2140 if (ch == ']' && PeekChar () == ']') {
2141 ch = ReadChar (); // ']'
2143 if (PeekChar () == '>') {
2150 if (normalization && ch == '\r') {
2153 // append '\n' instead of '\r'.
2154 AppendValueChar ('\n');
2155 // otherwise, discard '\r'.
2158 if (CharacterChecking && XmlChar.IsInvalid (ch))
2159 throw NotWFError ("Invalid character was found.");
2161 // FIXME: it might be optimized by the JIT later,
2162 // AppendValueChar (ch);
2164 if (ch < Char.MaxValue)
2165 valueBuffer.Append ((char) ch);
2167 AppendSurrogatePairValueChar (ch);
2172 XmlNodeType.CDATA, // nodeType
2173 String.Empty, // name
2174 String.Empty, // prefix
2175 String.Empty, // localName
2176 false, // isEmptyElement
2177 null, // value: create only when required
2178 true // clearAttributes
2182 // The reader is positioned on the first character after
2183 // the leading '<!DOCTYPE'.
2184 private void ReadDoctypeDecl ()
2187 throw NotWFError ("Document Type Declaration (DTD) is prohibited in this XML.");
2188 switch (currentState) {
2189 case XmlNodeType.DocumentType:
2190 case XmlNodeType.Element:
2191 case XmlNodeType.EndElement:
2192 throw NotWFError ("Document type cannot appear in this state.");
2194 currentState = XmlNodeType.DocumentType;
2196 string doctypeName = null;
2197 string publicId = null;
2198 string systemId = null;
2199 int intSubsetStartLine = 0;
2200 int intSubsetStartColumn = 0;
2203 doctypeName = ReadName ();
2208 systemId = ReadSystemLiteral (true);
2211 publicId = ReadPubidLiteral ();
2212 if (!SkipWhitespace ())
2213 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
2214 systemId = ReadSystemLiteral (false);
2220 if(PeekChar () == '[')
2222 // read markupdecl etc. or end of decl
2224 intSubsetStartLine = this.LineNumber;
2225 intSubsetStartColumn = this.LinePosition;
2226 int startPos = currentTagLength;
2227 ReadInternalSubset ();
2228 int endPos = currentTagLength - 1;
2229 parserContext.InternalSubset = new string (currentTagBuffer, startPos, endPos - startPos);
2231 // end of DOCTYPE decl.
2232 ExpectAfterWhitespace ('>');
2234 GenerateDTDObjectModel (doctypeName, publicId,
2235 systemId, parserContext.InternalSubset,
2236 intSubsetStartLine, intSubsetStartColumn);
2238 // set properties for <!DOCTYPE> node
2240 XmlNodeType.DocumentType, // nodeType
2241 doctypeName, // name
2242 String.Empty, // prefix
2243 doctypeName, // localName
2244 false, // isEmptyElement
2245 parserContext.InternalSubset, // value
2246 true // clearAttributes
2249 if (publicId != null)
2250 AddDtdAttribute ("PUBLIC", publicId);
2251 if (systemId != null)
2252 AddDtdAttribute ("SYSTEM", systemId);
2253 currentAttribute = currentAttributeValue = -1;
2256 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2257 string systemId, string internalSubset)
2259 return GenerateDTDObjectModel (name, publicId, systemId, internalSubset, 0, 0);
2262 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2263 string systemId, string internalSubset, int intSubsetStartLine, int intSubsetStartColumn)
2266 parserContext.Dtd = new DTDObjectModel (this.NameTable); // merges both internal and external subsets in the meantime,
2267 DTD.BaseURI = BaseURI;
2269 DTD.PublicId = publicId;
2270 DTD.SystemId = systemId;
2271 DTD.InternalSubset = internalSubset;
2272 DTD.XmlResolver = resolver;
2273 DTD.IsStandalone = isStandalone;
2274 DTD.LineNumber = line;
2275 DTD.LinePosition = column;
2277 DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn);
2278 dr.Normalization = this.normalization;
2279 return dr.GenerateDTDObjectModel ();
2282 private enum DtdInputState
2295 private class DtdInputStateStack
2297 Stack intern = new Stack ();
2298 public DtdInputStateStack ()
2300 Push (DtdInputState.Free);
2303 public DtdInputState Peek ()
2305 return (DtdInputState) intern.Peek ();
2308 public DtdInputState Pop ()
2310 return (DtdInputState) intern.Pop ();
2313 public void Push (DtdInputState val)
2320 DtdInputStateStack stateStack = new DtdInputStateStack ();
2321 DtdInputState State {
2322 get { return stateStack.Peek (); }
2325 // Simply read but not generate any result.
2326 private void ReadInternalSubset ()
2328 bool continueParse = true;
2330 while (continueParse) {
2331 switch (ReadChar ()) {
2334 case DtdInputState.Free:
2335 continueParse = false;
2337 case DtdInputState.InsideDoubleQuoted:
2339 case DtdInputState.InsideSingleQuoted:
2342 throw NotWFError ("unexpected end of file at DTD.");
2346 throw NotWFError ("unexpected end of file at DTD.");
2349 case DtdInputState.InsideDoubleQuoted:
2350 case DtdInputState.InsideSingleQuoted:
2351 case DtdInputState.Comment:
2352 continue; // well-formed
2354 int c = ReadChar ();
2357 stateStack.Push (DtdInputState.PI);
2360 switch (ReadChar ()) {
2362 switch (ReadChar ()) {
2365 stateStack.Push (DtdInputState.ElementDecl);
2369 stateStack.Push (DtdInputState.EntityDecl);
2372 throw NotWFError ("unexpected token '<!E'.");
2377 stateStack.Push (DtdInputState.AttlistDecl);
2381 stateStack.Push (DtdInputState.NotationDecl);
2385 stateStack.Push (DtdInputState.Comment);
2390 throw NotWFError (String.Format ("unexpected '<{0}'.", (char) c));
2394 if (State == DtdInputState.InsideSingleQuoted)
2396 else if (State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.Comment)
2397 stateStack.Push (DtdInputState.InsideSingleQuoted);
2400 if (State == DtdInputState.InsideDoubleQuoted)
2402 else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment)
2403 stateStack.Push (DtdInputState.InsideDoubleQuoted);
2407 case DtdInputState.ElementDecl:
2408 goto case DtdInputState.NotationDecl;
2409 case DtdInputState.AttlistDecl:
2410 goto case DtdInputState.NotationDecl;
2411 case DtdInputState.EntityDecl:
2412 goto case DtdInputState.NotationDecl;
2413 case DtdInputState.NotationDecl:
2416 case DtdInputState.InsideDoubleQuoted:
2417 case DtdInputState.InsideSingleQuoted:
2418 case DtdInputState.Comment:
2421 throw NotWFError ("unexpected token '>'");
2425 if (State == DtdInputState.PI) {
2426 if (ReadChar () == '>')
2431 if (State == DtdInputState.Comment) {
2432 if (PeekChar () == '-') {
2440 if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted)
2441 throw NotWFError ("Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8).");
2447 // The reader is positioned on the first 'S' of "SYSTEM".
2448 private string ReadSystemLiteral (bool expectSYSTEM)
2452 if (!SkipWhitespace ())
2453 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
2457 int quoteChar = ReadChar (); // apos or quot
2459 ClearValueBuffer ();
2460 while (c != quoteChar) {
2463 throw NotWFError ("Unexpected end of stream in ExternalID.");
2465 AppendValueChar (c);
2467 return CreateValueString ();
2470 private string ReadPubidLiteral()
2473 if (!SkipWhitespace ())
2474 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
2475 int quoteChar = ReadChar ();
2477 ClearValueBuffer ();
2478 while(c != quoteChar)
2481 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
2482 if(c != quoteChar && !XmlChar.IsPubidChar (c))
2483 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char)c ));
2485 AppendValueChar (c);
2487 return CreateValueString ();
2490 // The reader is positioned on the first character
2492 private string ReadName ()
2494 string prefix, local;
2495 return ReadName (out prefix, out local);
2498 private string ReadName (out string prefix, out string localName)
2500 // FIXME: need to reject non-QName names?
2502 int ch = PeekChar ();
2503 if (!XmlChar.IsFirstNameChar (ch))
2504 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2509 // AppendNameChar (ch);
2511 if (nameLength == nameCapacity)
2512 ExpandNameCapacity ();
2513 if (ch < Char.MaxValue)
2514 nameBuffer [nameLength++] = (char) ch;
2516 AppendSurrogatePairNameChar (ch);
2521 while (XmlChar.IsNameChar (PeekChar ())) {
2524 if (namespaces && colonAt < 0 && ch == ':')
2525 colonAt = nameLength;
2526 // AppendNameChar (ch);
2528 if (nameLength == nameCapacity)
2529 ExpandNameCapacity ();
2530 if (ch < Char.MaxValue)
2531 nameBuffer [nameLength++] = (char) ch;
2533 AppendSurrogatePairNameChar (ch);
2537 string name = parserContext.NameTable.Add (nameBuffer, 0, nameLength);
2539 if (namespaces && colonAt > 0) {
2540 prefix = parserContext.NameTable.Add (nameBuffer, 0, colonAt);
2541 localName = parserContext.NameTable.Add (nameBuffer, colonAt + 1, nameLength - colonAt - 1);
2544 prefix = String.Empty;
2551 // Read the next character and compare it against the
2552 // specified character.
2553 private void Expect (int expected)
2555 int ch = ReadChar ();
2557 if (ch != expected) {
2558 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2559 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
2562 ch < 0 ? (object) "EOF" : (char) ch,
2567 private void Expect (string expected)
2569 int len = expected.Length;
2570 for(int i=0; i< len; i++)
2571 Expect (expected[i]);
2574 private void ExpectAfterWhitespace (char c)
2577 int i = ReadChar ();
2578 if (i < 0x21 && XmlChar.IsWhitespace (i))
2581 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0}, but found {1} [{2}]", c, i < 0 ? (object) "EOF" : (char) i, i));
2586 // Does not consume the first non-whitespace character.
2587 private bool SkipWhitespace ()
2589 // FIXME: It should be inlined by the JIT.
2590 // bool skipped = XmlChar.IsWhitespace (PeekChar ());
2591 int ch = PeekChar ();
2592 bool skipped = (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2596 // FIXME: It should be inlined by the JIT.
2597 // while (XmlChar.IsWhitespace (PeekChar ()))
2599 while ((ch = PeekChar ()) == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD)
2604 private void ReadWhitespace ()
2606 if (currentState == XmlNodeType.None)
2607 currentState = XmlNodeType.XmlDeclaration;
2609 ClearValueBuffer ();
2610 int ch = PeekChar ();
2612 // FIXME: it might be optimized by the JIT later,
2613 // AppendValueChar (ReadChar ());
2616 if (ch < Char.MaxValue)
2617 valueBuffer.Append ((char) ch);
2619 AppendSurrogatePairValueChar (ch);
2621 // FIXME: It should be inlined by the JIT.
2622 // } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
2624 } while (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2626 if (currentState == XmlNodeType.Element && ch != -1 && ch != '<')
2629 XmlNodeType nodeType = (this.XmlSpace == XmlSpace.Preserve) ?
2630 XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
2631 SetProperties (nodeType,
2636 null, // value: create only when required
2643 // Returns -1 if it should throw an error.
2644 private int ReadCharsInternal (char [] buffer, int offset, int length)
2646 shouldSkipUntilEndTag = true;
2648 int bufIndex = offset;
2649 for (int i = 0; i < length; i++) {
2650 int c = PeekChar ();
2653 throw NotWFError ("Unexpected end of xml.");
2656 if (PeekChar () != '/') {
2657 buffer [bufIndex++] = '<';
2660 // Seems to skip immediate EndElement
2667 shouldSkipUntilEndTag = false;
2668 Read (); // move to the next node
2672 if (c < Char.MaxValue)
2673 buffer [bufIndex++] = (char) c;
2675 buffer [bufIndex++] = (char) (c / 0x10000 + 0xD800 - 1);
2676 buffer [bufIndex++] = (char) (c % 0x10000 + 0xDC00);
2684 private bool ReadUntilEndTag ()
2687 currentState = XmlNodeType.EndElement;
2693 throw NotWFError ("Unexpected end of xml.");
2695 if (PeekChar () != '/')
2698 string name = ReadName ();
2699 if (name != elementNames [elementNameStackPos - 1])
2703 elementNames [--elementNameStackPos] = null;