2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
7 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
9 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
14 // Some thought needs to be given to performance.
16 // If current node is on an Attribute, Prefix might be null, and
17 // in several fields which uses XmlReader, it should be considered.
21 using System.Collections;
23 using System.Security.Policy;
25 using System.Xml.Schema;
30 public class XmlTextReader : XmlReader, IXmlLineInfo
34 protected XmlTextReader ()
38 public XmlTextReader (Stream input)
39 : this (new XmlStreamReader (input))
43 public XmlTextReader (string url)
44 : this(url, new NameTable ())
48 public XmlTextReader (TextReader input)
49 : this (input, new NameTable ())
53 protected XmlTextReader (XmlNameTable nt)
54 : this (String.Empty, null, XmlNodeType.None, null)
58 public XmlTextReader (Stream input, XmlNameTable nt)
59 : this(new XmlStreamReader (input), nt)
63 public XmlTextReader (string url, Stream input)
64 : this (url, new XmlStreamReader (input))
68 public XmlTextReader (string url, TextReader input)
69 : this (url, input, new NameTable ())
73 public XmlTextReader (string url, XmlNameTable nt)
75 Uri uri = resolver.ResolveUri (null, url);
76 Stream s = resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
77 XmlParserContext ctx = new XmlParserContext (nt,
78 new XmlNamespaceManager (nt),
81 this.InitializeContext (uri.ToString(), ctx, new XmlStreamReader (s), XmlNodeType.Document);
84 public XmlTextReader (TextReader input, XmlNameTable nt)
85 : this (String.Empty, input, nt)
89 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
90 : this (context != null ? context.BaseURI : String.Empty,
91 new XmlStreamReader (xmlFragment),
97 public XmlTextReader (string url, Stream input, XmlNameTable nt)
98 : this (url, new XmlStreamReader (input), nt)
102 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
103 : this (url, input, XmlNodeType.Document, null)
107 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
108 : this (context != null ? context.BaseURI : String.Empty,
109 new StringReader (xmlFragment),
115 XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
117 InitializeContext (url, context, fragment, fragType);
124 public override int AttributeCount
126 get { return attributeCount; }
129 public override string BaseURI
131 get { return parserContext.BaseURI; }
134 public override int Depth
137 int nodeTypeMod = currentToken.NodeType == XmlNodeType.Element ? 0 : -1;
138 if (currentAttributeValue >= 0)
139 return nodeTypeMod + elementDepth + 2; // inside attribute value.
140 else if (currentAttribute >= 0)
141 return nodeTypeMod + elementDepth + 1;
146 public Encoding Encoding
148 get { return parserContext.Encoding; }
152 public EntityHandling EntityHandling {
153 get { throw new NotImplementedException (); }
157 public override bool EOF
162 readState == ReadState.EndOfFile ||
163 readState == ReadState.Closed;
169 public override Evidence [] Evidences {
170 get { return base.Evidences; }
174 public override bool HasValue {
175 get { return cursorToken.Value != null; }
178 public override bool IsDefault {
179 // XmlTextReader does not expand default attributes.
180 get { return false; }
183 public override bool IsEmptyElement {
184 get { return cursorToken.IsEmptyElement; }
187 public override string this [int i] {
188 get { return GetAttribute (i); }
191 public override string this [string name] {
192 get { return GetAttribute (name); }
195 public override string this [string localName, string namespaceName] {
196 get { return GetAttribute (localName, namespaceName); }
199 public int LineNumber {
201 if (useProceedingLineInfo)
204 return cursorToken.LineNumber;
208 public int LinePosition {
210 if (useProceedingLineInfo)
213 return cursorToken.LinePosition;
217 public override string LocalName {
218 get { return cursorToken.LocalName; }
221 public override string Name {
222 get { return cursorToken.Name; }
225 public bool Namespaces {
226 get { return namespaces; }
228 if (readState != ReadState.Initial)
229 throw new InvalidOperationException ("Namespaces have to be set before reading.");
234 public override string NamespaceURI {
235 get { return cursorToken.NamespaceURI; }
238 public override XmlNameTable NameTable {
239 get { return parserContext.NameTable; }
242 public override XmlNodeType NodeType {
243 get { return cursorToken.NodeType; }
246 public bool Normalization {
247 get { return normalization; }
248 set { normalization = value; }
251 public override string Prefix {
252 get { return cursorToken.Prefix; }
255 public override char QuoteChar {
256 get { return cursorToken.QuoteChar; }
259 public override ReadState ReadState {
260 get { return readState; }
263 public override string Value {
264 get { return cursorToken.Value != null ? cursorToken.Value : String.Empty; }
267 public WhitespaceHandling WhitespaceHandling {
268 get { return whitespaceHandling; }
269 set { whitespaceHandling = value; }
272 public override string XmlLang {
273 get { return parserContext.XmlLang; }
276 public XmlResolver XmlResolver {
277 set { resolver = value; }
280 public override XmlSpace XmlSpace {
281 get { return parserContext.XmlSpace; }
288 public override void Close ()
290 readState = ReadState.Closed;
292 cursorToken.Clear ();
293 currentToken.Clear ();
299 public override string GetAttribute (int i)
301 if (i >= attributeCount)
302 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
304 return attributeTokens [i].Value;
308 // MS.NET 1.0 msdn says that this method returns String.Empty
309 // for absent attribute, but in fact it returns null.
310 // This description is corrected in MS.NET 1.1 msdn.
311 public override string GetAttribute (string name)
313 for (int i = 0; i < attributeCount; i++)
314 if (attributeTokens [i].Name == name)
315 return attributeTokens [i].Value;
319 private int GetIndexOfQualifiedAttribute (string localName, string namespaceURI)
321 for (int i = 0; i < attributeCount; i++) {
322 XmlAttributeTokenInfo ti = attributeTokens [i];
323 if (ti.LocalName == localName && ti.NamespaceURI == namespaceURI)
329 internal XmlParserContext GetInternalParserContext ()
331 return parserContext;
334 public override string GetAttribute (string localName, string namespaceURI)
336 int idx = this.GetIndexOfQualifiedAttribute (localName, namespaceURI);
339 return attributeTokens [idx].Value;
342 public TextReader GetRemainder ()
344 if (peekCharsIndex == peekCharsLength)
346 return new StringReader (new string (peekChars, peekCharsIndex, peekCharsLength - peekCharsIndex) + reader.ReadToEnd ());
349 bool IXmlLineInfo.HasLineInfo ()
354 public override string LookupNamespace (string prefix)
356 return LookupNamespace (prefix, false);
359 internal string LookupNamespace (string prefix, bool atomizedName)
361 return parserContext.NamespaceManager.LookupNamespace (prefix, atomizedName);
364 public override void MoveToAttribute (int i)
366 if (i >= attributeCount)
367 throw new ArgumentOutOfRangeException ("attribute index out of range.");
369 currentAttribute = i;
370 currentAttributeValue = -1;
371 cursorToken = attributeTokens [i];
374 public override bool MoveToAttribute (string name)
376 for (int i = 0; i < attributeCount; i++) {
377 XmlAttributeTokenInfo ti = attributeTokens [i];
378 if (ti.Name == name) {
386 public override bool MoveToAttribute (string localName, string namespaceName)
388 int idx = GetIndexOfQualifiedAttribute (localName, namespaceName);
391 MoveToAttribute (idx);
395 public override bool MoveToElement ()
397 if (currentToken == null) // for attribute .ctor()
400 if (cursorToken == currentToken)
403 if (currentAttribute >= 0) {
404 currentAttribute = -1;
405 currentAttributeValue = -1;
406 cursorToken = currentToken;
413 public override bool MoveToFirstAttribute ()
415 if (attributeCount == 0)
418 return MoveToNextAttribute ();
421 public override bool MoveToNextAttribute ()
423 if (currentAttribute == 0 && attributeCount == 0)
425 if (currentAttribute + 1 < attributeCount) {
427 currentAttributeValue = -1;
428 cursorToken = attributeTokens [currentAttribute];
435 public override bool Read ()
437 if (startNodeType == XmlNodeType.Attribute) {
438 if (currentAttribute == 0)
439 return false; // already read.
441 IncrementAttributeToken ();
442 ReadAttributeValueTokens ('"');
443 cursorToken = attributeTokens [0];
444 currentAttributeValue = -1;
445 readState = ReadState.Interactive;
450 readState = ReadState.Interactive;
451 currentLinkedNodeLineNumber = line;
452 currentLinkedNodeLinePosition = column;
453 useProceedingLineInfo = true;
455 cursorToken = currentToken;
457 currentAttribute = currentAttributeValue = -1;
458 currentToken.Clear ();
460 // It was moved from end of ReadStartTag ().
466 if (shouldSkipUntilEndTag) {
467 shouldSkipUntilEndTag = false;
468 return ReadUntilEndTag ();
471 base64CacheStartsAt = -1;
473 more = ReadContent ();
475 if (!more && startNodeType == XmlNodeType.Document && currentState != XmlNodeType.EndElement)
476 throw new XmlException ("Document element did not appear.");
478 useProceedingLineInfo = false;
482 public override bool ReadAttributeValue ()
484 if (readState == ReadState.Initial && startNodeType == XmlNodeType.Attribute) {
488 if (currentAttribute < 0)
490 XmlAttributeTokenInfo ti = attributeTokens [currentAttribute];
491 if (currentAttributeValue < 0)
492 currentAttributeValue = ti.ValueTokenStartIndex - 1;
494 if (currentAttributeValue < ti.ValueTokenEndIndex) {
495 currentAttributeValue++;
496 cursorToken = attributeValueTokens [currentAttributeValue];
503 public int ReadBase64 (byte [] buffer, int offset, int length)
506 throw new ArgumentOutOfRangeException ("offset", offset, "Offset must be non-negative integer.");
508 throw new ArgumentOutOfRangeException ("length", length, "Length must be non-negative integer.");
509 else if (buffer.Length < offset + length)
510 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
512 if (length == 0) // It does not raise an error.
515 int bufIndex = offset;
516 int bufLast = offset + length;
518 if (base64CacheStartsAt >= 0) {
519 for (int i = base64CacheStartsAt; i < 3; i++) {
520 buffer [bufIndex++] = base64Cache [base64CacheStartsAt++];
521 if (bufIndex == bufLast)
522 return bufLast - offset;
526 for (int i = 0; i < 3; i++)
528 base64CacheStartsAt = -1;
530 int max = (int) System.Math.Ceiling (4.0 / 3 * length);
531 int additional = max % 4;
533 max += 4 - additional;
534 char [] chars = new char [max];
535 int charsLength = ReadChars (chars, 0, max);
539 for (int i = 0; i < charsLength - 3; i += 4) {
540 b = (byte) (GetBase64Byte (chars [i]) << 2);
541 if (bufIndex < bufLast)
542 buffer [bufIndex] = b;
544 if (base64CacheStartsAt < 0)
545 base64CacheStartsAt = 0;
548 // charsLength mod 4 might not equals to 0.
549 if (i + 1 == charsLength)
551 b = GetBase64Byte (chars [i + 1]);
552 work = (byte) (b >> 4);
553 if (bufIndex < bufLast) {
554 buffer [bufIndex] += work;
558 base64Cache [0] += work;
560 work = (byte) ((b & 0xf) << 4);
561 if (bufIndex < bufLast) {
562 buffer [bufIndex] = work;
565 if (base64CacheStartsAt < 0)
566 base64CacheStartsAt = 1;
567 base64Cache [1] = work;
570 if (i + 2 == charsLength)
572 b = GetBase64Byte (chars [i + 2]);
573 work = (byte) (b >> 2);
574 if (bufIndex < bufLast) {
575 buffer [bufIndex] += work;
579 base64Cache [1] += work;
581 work = (byte) ((b & 3) << 6);
582 if (bufIndex < bufLast)
583 buffer [bufIndex] = work;
585 if (base64CacheStartsAt < 0)
586 base64CacheStartsAt = 2;
587 base64Cache [2] = work;
589 if (i + 3 == charsLength)
591 work = GetBase64Byte (chars [i + 3]);
592 if (bufIndex < bufLast) {
593 buffer [bufIndex] += work;
597 base64Cache [2] += work;
599 return System.Math.Min (bufLast - offset, bufIndex - offset);
602 public int ReadBinHex (byte [] buffer, int offset, int length)
605 throw new ArgumentOutOfRangeException ("offset", offset, "Offset must be non-negative integer.");
607 throw new ArgumentOutOfRangeException ("length", length, "Length must be non-negative integer.");
608 else if (buffer.Length < offset + length)
609 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
614 char [] chars = new char [length * 2];
615 int charsLength = ReadChars (chars, 0, length * 2);
616 return XmlConvert.FromBinHexString (chars, offset, charsLength, buffer);
619 public int ReadChars (char [] buffer, int offset, int length)
621 return ReadCharsInternal (buffer, offset, length);
626 public override string ReadInnerXml ()
628 return ReadInnerXmlInternal ();
631 public override string ReadOuterXml ()
633 return ReadOuterXmlInternal ();
636 public override string ReadString ()
638 return ReadStringInternal ();
642 public void ResetState ()
647 public override void ResolveEntity ()
649 // XmlTextReader does not resolve entities.
650 throw new InvalidOperationException ("XmlTextReader cannot resolve external entities.");
654 [MonoTODO ("Implement for performance reason")]
655 public override void Skip ()
663 // Parsed DTD Objects
664 #if DTD_HANDLE_EVENTS
665 internal event ValidationEventHandler ValidationEventHandler;
668 internal DTDObjectModel DTD {
669 get { return parserContext.Dtd; }
672 internal XmlResolver Resolver {
673 get { return resolver; }
678 internal class XmlTokenInfo
680 public XmlTokenInfo (XmlTextReader xtr, bool isPrimaryToken)
682 this.isPrimaryToken = isPrimaryToken;
690 protected XmlTextReader Reader;
693 public string LocalName;
694 public string Prefix;
695 public string NamespaceURI;
696 public bool IsEmptyElement;
697 public char QuoteChar;
698 public int LineNumber;
699 public int LinePosition;
701 public XmlNodeType NodeType;
703 public virtual string Value {
705 if (valueCache != null)
708 case XmlNodeType.Text:
709 case XmlNodeType.SignificantWhitespace:
710 case XmlNodeType.Whitespace:
711 case XmlNodeType.Comment:
712 case XmlNodeType.CDATA:
713 case XmlNodeType.ProcessingInstruction:
714 valueCache = Reader.CreateValueString ();
719 set { valueCache = value; }
722 public virtual void Clear ()
725 NodeType = XmlNodeType.None;
726 Name = LocalName = Prefix = NamespaceURI = String.Empty;
727 IsEmptyElement = false;
729 LineNumber = LinePosition = 0;
732 internal virtual void FillNames ()
734 if (Reader.Namespaces) {
735 int indexOfColon = -1;
737 case XmlNodeType.Attribute:
738 case XmlNodeType.Element:
739 case XmlNodeType.EndElement:
740 indexOfColon = Name.IndexOf (':');
744 if (indexOfColon == -1) {
745 Prefix = String.Empty;
748 // This improves speed by at least nearly 5%, but eats more memory at least nearly 0.3%
749 // However, this might be reverted if NameTable is got improved.
750 char [] nameArr = Name.ToCharArray ();
751 Prefix = Reader.NameTable.Add (nameArr, 0, indexOfColon);
752 LocalName = Reader.NameTable.Add (nameArr, indexOfColon + 1, nameArr.Length - indexOfColon - 1);
753 // Prefix = Reader.NameTable.Add (Name.Substring (0, indexOfColon));
754 // LocalName = Reader.NameTable.Add (Name.Substring (indexOfColon + 1));
759 case XmlNodeType.Attribute:
760 if (Prefix.Length == 0)
761 NamespaceURI = string.Empty;
763 NamespaceURI = Reader.LookupNamespace (Prefix, true);
766 case XmlNodeType.Element:
767 case XmlNodeType.EndElement:
768 NamespaceURI = Reader.LookupNamespace (Prefix, true);
775 Prefix = String.Empty;
781 internal class XmlAttributeTokenInfo : XmlTokenInfo
783 public XmlAttributeTokenInfo (XmlTextReader reader)
784 : base (reader, false)
786 NodeType = XmlNodeType.Attribute;
789 public int ValueTokenStartIndex;
790 public int ValueTokenEndIndex;
792 bool cachedNormalization;
793 StringBuilder tmpBuilder = new StringBuilder ();
795 public override string Value {
797 if (cachedNormalization != Reader.Normalization)
799 if (valueCache != null)
802 cachedNormalization = Reader.Normalization;
804 // An empty value should return String.Empty.
805 if (ValueTokenStartIndex == ValueTokenEndIndex) {
806 XmlTokenInfo ti = Reader.attributeValueTokens [ValueTokenStartIndex];
807 if (ti.NodeType == XmlNodeType.EntityReference)
808 valueCache = String.Concat ("&", ti.Name, ";");
810 valueCache = ti.Value;
811 if (cachedNormalization)
816 tmpBuilder.Length = 0;
817 for (int i = ValueTokenStartIndex; i <= ValueTokenEndIndex; i++) {
818 XmlTokenInfo ti = Reader.attributeValueTokens [i];
819 if (ti.NodeType == XmlNodeType.Text)
820 tmpBuilder.Append (ti.Value);
822 tmpBuilder.Append ('&');
823 tmpBuilder.Append (ti.Name);
824 tmpBuilder.Append (';');
828 valueCache = tmpBuilder.ToString ();
829 if (cachedNormalization)
834 set { valueCache = value; }
837 public override void Clear ()
841 NodeType = XmlNodeType.Attribute;
842 ValueTokenStartIndex = ValueTokenEndIndex = 0;
845 internal override void FillNames ()
848 if (Prefix == "xmlns" || Name == "xmlns")
849 NamespaceURI = XmlNamespaceManager.XmlnsXmlns;
852 private void NormalizeSpaces ()
854 tmpBuilder.Length = 0;
855 for (int i = 0; i < valueCache.Length; i++)
856 switch (valueCache [i]) {
858 if (i + 1 < valueCache.Length && valueCache [i + 1] == '\n')
863 tmpBuilder.Append (' ');
866 tmpBuilder.Append (valueCache [i]);
869 valueCache = tmpBuilder.ToString ();
873 private XmlTokenInfo cursorToken;
874 private XmlTokenInfo currentToken;
875 private XmlAttributeTokenInfo currentAttributeToken;
876 private XmlTokenInfo currentAttributeValueToken;
877 private XmlAttributeTokenInfo [] attributeTokens = new XmlAttributeTokenInfo [10];
878 private XmlTokenInfo [] attributeValueTokens = new XmlTokenInfo [10];
879 private int currentAttribute;
880 private int currentAttributeValue;
881 private int attributeCount;
883 private XmlParserContext parserContext;
885 private ReadState readState;
888 private int elementDepth;
889 private bool depthUp;
891 private bool popScope;
893 private string [] elementNames;
894 int elementNameStackPos;
896 private bool allowMultipleRoot;
898 private bool isStandalone;
900 private bool returnEntityReference;
901 private string entityReferenceName;
903 private char [] nameBuffer;
904 private int nameLength;
905 private int nameCapacity;
906 private const int initialNameCapacity = 32;
908 private char [] valueBuffer;
909 private int valueLength;
910 private int valueCapacity;
911 private const int initialValueCapacity = 256;
913 private char [] currentTagBuffer;
914 private int currentTagLength;
915 private int currentTagCapacity;
916 private const int initialCurrentTagCapacity = 256;
918 private TextReader reader;
919 private char [] peekChars;
920 private int peekCharsIndex;
921 private int peekCharsLength;
922 private const int peekCharCapacity = 1024;
927 private int currentLinkedNodeLineNumber;
928 private int currentLinkedNodeLinePosition;
929 private bool useProceedingLineInfo;
931 private XmlNodeType startNodeType;
932 // State machine attribute.
933 // XmlDeclaration: after the first node.
934 // DocumentType: after doctypedecl
935 // Element: inside document element
936 // EndElement: after document element
937 private XmlNodeType currentState;
939 // For ReadChars()/ReadBase64()/ReadBinHex()
940 private bool shouldSkipUntilEndTag;
941 private byte [] base64Cache = new byte [3];
942 private int base64CacheStartsAt;
944 // These values are never re-initialized.
945 private bool namespaces = true;
946 private WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
947 private XmlResolver resolver = new XmlUrlResolver ();
948 private bool normalization = false;
952 currentToken = new XmlTokenInfo (this, true);
953 cursorToken = currentToken;
954 currentAttribute = -1;
955 currentAttributeValue = -1;
958 readState = ReadState.Initial;
959 allowMultipleRoot = false;
965 popScope = allowMultipleRoot = false;
966 elementNames = new string [10];
967 elementNameStackPos = 0;
969 isStandalone = false;
970 returnEntityReference = false;
971 entityReferenceName = String.Empty;
973 nameBuffer = new char [initialNameCapacity];
975 nameCapacity = initialNameCapacity;
977 valueBuffer = new char [initialValueCapacity];
979 valueCapacity = initialValueCapacity;
981 currentTagBuffer = new char [initialCurrentTagCapacity];
982 currentTagLength = 0;
983 currentTagCapacity = initialCurrentTagCapacity;
987 if (peekChars == null)
988 peekChars = new char [peekCharCapacity];
992 currentTagLength = 0;
994 currentLinkedNodeLineNumber = currentLinkedNodeLinePosition = 0;
995 useProceedingLineInfo = false;
997 currentState = XmlNodeType.None;
999 shouldSkipUntilEndTag = false;
1000 base64CacheStartsAt = -1;
1003 private void InitializeContext (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType)
1005 startNodeType = fragType;
1006 parserContext = context;
1007 if (context == null) {
1008 XmlNameTable nt = new NameTable ();
1009 parserContext = new XmlParserContext (nt,
1010 new XmlNamespaceManager (nt),
1015 if (url != null && url.Length > 0) {
1018 uri = new Uri (url);
1019 } catch (Exception) {
1020 string path = Path.GetFullPath ("./a");
1021 uri = new Uri (new Uri (path), url);
1023 parserContext.BaseURI = uri.ToString ();
1029 case XmlNodeType.Attribute:
1030 fragment = new StringReader (fragment.ReadToEnd ().Replace ("\"", """));
1032 case XmlNodeType.Element:
1033 currentState = XmlNodeType.Element;
1034 allowMultipleRoot = true;
1036 case XmlNodeType.Document:
1039 throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType));
1045 // Use this method rather than setting the properties
1046 // directly so that all the necessary properties can
1047 // be changed in harmony with each other. Maybe the
1048 // fields should be in a seperate class to help enforce
1050 private void SetProperties (
1051 XmlNodeType nodeType,
1053 bool isEmptyElement,
1055 bool clearAttributes)
1057 SetProperties (currentToken, nodeType, name, isEmptyElement, value, clearAttributes);
1058 currentToken.LineNumber = this.currentLinkedNodeLineNumber;
1059 currentToken.LinePosition = this.currentLinkedNodeLinePosition;
1062 private void SetProperties (
1064 XmlNodeType nodeType,
1066 bool isEmptyElement,
1068 bool clearAttributes)
1071 token.NodeType = nodeType;
1073 token.IsEmptyElement = isEmptyElement;
1074 token.Value = value;
1075 this.elementDepth = depth;
1077 if (clearAttributes)
1083 private void ClearAttributes ()
1085 for (int i = 0; i < attributeCount; i++)
1086 attributeTokens [i].Clear ();
1088 currentAttribute = -1;
1089 currentAttributeValue = -1;
1092 private int PeekChar ()
1094 if (peekCharsLength == peekCharsIndex) {
1095 if (!ReadTextReader ())
1100 return peekChars [peekCharsIndex];
1103 private int ReadChar ()
1107 if (peekCharsLength == peekCharsIndex) {
1108 if (!ReadTextReader ())
1112 ch = peekChars [peekCharsIndex++];
1120 if (currentState != XmlNodeType.Element)
1121 AppendCurrentTagChar (ch);
1125 private bool ReadTextReader ()
1128 peekCharsLength = reader.Read (peekChars, 0, peekCharCapacity);
1129 if (peekCharsLength == 0)
1134 private string ExpandSurrogateChar (int ch)
1136 if (ch < Char.MaxValue)
1137 return ((char) ch).ToString ();
1139 char [] tmp = new char [] {(char) (ch / 0x10000 + 0xD800 - 1), (char) (ch % 0x10000 + 0xDC00)};
1140 return new string (tmp);
1144 // This should really keep track of some state so
1145 // that it's not possible to have more than one document
1146 // element or text outside of the document element.
1147 private bool ReadContent ()
1149 currentTagLength = 0;
1151 parserContext.NamespaceManager.PopScope ();
1155 if (returnEntityReference)
1156 SetEntityReferenceProperties ();
1158 switch (PeekChar ()) {
1163 case '\r': goto case ' ';
1164 case '\n': goto case ' ';
1165 case '\t': goto case ' ';
1167 if (whitespaceHandling == WhitespaceHandling.All ||
1168 whitespaceHandling == WhitespaceHandling.Significant)
1172 return ReadContent ();
1176 readState = ReadState.EndOfFile;
1177 ClearValueBuffer ();
1179 XmlNodeType.None, // nodeType
1180 String.Empty, // name
1181 false, // isEmptyElement
1183 true // clearAttributes
1186 throw new XmlException ("unexpected end of file. Current depth is " + depth);
1194 return this.ReadState != ReadState.EndOfFile;
1197 private void SetEntityReferenceProperties ()
1199 DTDEntityDeclaration decl = DTD != null ? DTD.EntityDecls [entityReferenceName] : null;
1200 if (this.isStandalone)
1201 if (DTD == null || decl == null || !decl.IsInternalSubset)
1202 throw new XmlException (this as IXmlLineInfo,
1203 "Standalone document must not contain any references to an non-internally declared entity.");
1204 if (decl != null && decl.NotationName != null)
1205 throw new XmlException (this as IXmlLineInfo,
1206 "Reference to any unparsed entities is not allowed here.");
1208 ClearValueBuffer ();
1210 XmlNodeType.EntityReference, // nodeType
1211 entityReferenceName, // name
1212 false, // isEmptyElement
1214 true // clearAttributes
1217 returnEntityReference = false;
1218 entityReferenceName = String.Empty;
1221 // The leading '<' has already been consumed.
1222 private void ReadTag ()
1224 switch (PeekChar ())
1232 ReadProcessingInstruction ();
1244 // The leading '<' has already been consumed.
1245 private void ReadStartTag ()
1247 if (currentState == XmlNodeType.EndElement)
1248 throw new XmlException (this as IXmlLineInfo,
1249 "Element cannot appear in this state.");
1250 currentState = XmlNodeType.Element;
1252 parserContext.NamespaceManager.PushScope ();
1254 string name = ReadName ();
1255 if (currentState == XmlNodeType.EndElement)
1256 throw new XmlException (this as IXmlLineInfo,"document has terminated, cannot open new element");
1258 bool isEmptyElement = false;
1263 if (XmlChar.IsFirstNameChar (PeekChar ()))
1264 ReadAttributes (false);
1265 cursorToken = this.currentToken;
1268 for (int i = 0; i < attributeCount; i++)
1269 attributeTokens [i].FillNames ();
1272 for (int i = 0; i < attributeCount; i++)
1273 for (int j = i + 1; j < attributeCount; j++)
1274 if (Object.ReferenceEquals (attributeTokens [i].Name, attributeTokens [j].Name) ||
1275 (Object.ReferenceEquals (attributeTokens [i].LocalName, attributeTokens [j].LocalName) &&
1276 Object.ReferenceEquals (attributeTokens [i].NamespaceURI, attributeTokens [j].NamespaceURI)))
1277 throw new XmlException (this as IXmlLineInfo,
1278 "Attribute name and qualified name must be identical.");
1280 string baseUri = GetAttribute ("xml:base");
1281 if (baseUri != null) {
1282 if (this.resolver != null)
1283 parserContext.BaseURI = resolver.ResolveUri (new Uri (BaseURI), baseUri).ToString ();
1285 parserContext.BaseURI = baseUri;
1287 string xmlLang = GetAttribute ("xml:lang");
1288 if (xmlLang != null)
1289 parserContext.XmlLang = xmlLang;
1290 string xmlSpaceAttr = GetAttribute ("xml:space");
1291 if (xmlSpaceAttr != null) {
1292 if (xmlSpaceAttr == "preserve")
1293 parserContext.XmlSpace = XmlSpace.Preserve;
1294 else if (xmlSpaceAttr == "default")
1295 parserContext.XmlSpace = XmlSpace.Default;
1297 throw new XmlException (this as IXmlLineInfo,String.Format ("Invalid xml:space value: {0}", xmlSpaceAttr));
1299 if (PeekChar () == '/') {
1301 isEmptyElement = true;
1306 PushElementName (name);
1307 parserContext.PushScope ();
1312 XmlNodeType.Element, // nodeType
1314 isEmptyElement, // isEmptyElement
1316 false // clearAttributes
1320 CheckCurrentStateUpdate ();
1323 private void PushElementName (string name)
1325 if (elementNames.Length == elementNameStackPos) {
1326 string [] newArray = new string [elementNames.Length * 2];
1327 Array.Copy (elementNames, 0, newArray, 0, elementNameStackPos);
1328 elementNames = newArray;
1330 elementNames [elementNameStackPos++] = name;
1333 // The reader is positioned on the first character
1334 // of the element's name.
1335 private void ReadEndTag ()
1337 if (currentState != XmlNodeType.Element)
1338 throw new XmlException (this as IXmlLineInfo,
1339 "End tag cannot appear in this state.");
1341 string name = ReadName ();
1342 if (elementNameStackPos == 0)
1343 throw new XmlException (this as IXmlLineInfo,"closing element without matching opening element");
1344 string expected = elementNames [--elementNameStackPos];
1345 if (expected != name)
1346 throw new XmlException (this as IXmlLineInfo,String.Format ("unmatched closing element: expected {0} but found {1}", expected, name));
1347 parserContext.PopScope ();
1349 ExpectAfterWhitespace ('>');
1354 XmlNodeType.EndElement, // nodeType
1356 false, // isEmptyElement
1358 true // clearAttributes
1363 CheckCurrentStateUpdate ();
1366 private void CheckCurrentStateUpdate ()
1368 if (depth == 0 && !allowMultipleRoot && (IsEmptyElement || NodeType == XmlNodeType.EndElement))
1369 currentState = XmlNodeType.EndElement;
1372 private void AppendNameChar (int ch)
1374 if (nameLength == nameCapacity)
1375 ExpandNameCapacity ();
1376 if (ch < Char.MaxValue)
1377 nameBuffer [nameLength++] = (char) ch;
1379 nameBuffer [nameLength++] = (char) (ch / 0x10000 + 0xD800 - 1);
1380 if (nameLength == nameCapacity)
1381 ExpandNameCapacity ();
1382 nameBuffer [nameLength++] = (char) (ch % 0x10000 + 0xDC00);
1386 private void ExpandNameCapacity ()
1388 nameCapacity = nameCapacity * 2;
1389 char [] oldNameBuffer = nameBuffer;
1390 nameBuffer = new char [nameCapacity];
1391 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1394 private string CreateNameString ()
1396 return parserContext.NameTable.Add (nameBuffer, 0, nameLength);
1399 private void AppendValueChar (int ch)
1401 if (valueLength == valueCapacity)
1402 ExpandValueCapacity ();
1403 if (ch < Char.MaxValue)
1404 valueBuffer [valueLength++] = (char) ch;
1406 valueBuffer [valueLength++] = (char) (ch / 0x10000 + 0xD800 - 1);
1407 if (valueLength == valueCapacity)
1408 ExpandValueCapacity ();
1409 valueBuffer [valueLength++] = (char) (ch % 0x10000 + 0xDC00);
1413 private void ExpandValueCapacity ()
1415 valueCapacity = valueCapacity * 2;
1416 char [] oldValueBuffer = valueBuffer;
1417 valueBuffer = new char [valueCapacity];
1418 Array.Copy (oldValueBuffer, valueBuffer, valueLength);
1421 private string CreateValueString ()
1423 return new string (valueBuffer, 0, valueLength);
1426 private void ClearValueBuffer ()
1431 private void AppendCurrentTagChar (int ch)
1433 if (currentTagLength == currentTagCapacity)
1434 ExpandCurrentTagCapacity ();
1435 if (ch < Char.MaxValue)
1436 currentTagBuffer [currentTagLength++] = (char) ch;
1438 currentTagBuffer [currentTagLength++] = (char) (ch / 0x10000 + 0xD800 - 1);
1439 if (currentTagLength == currentTagCapacity)
1440 ExpandCurrentTagCapacity ();
1441 currentTagBuffer [currentTagLength++] = (char) (ch % 0x10000 + 0xDC00);
1445 private void ExpandCurrentTagCapacity ()
1447 currentTagCapacity = currentTagCapacity * 2;
1448 char [] oldCurrentTagBuffer = currentTagBuffer;
1449 currentTagBuffer = new char [currentTagCapacity];
1450 Array.Copy (oldCurrentTagBuffer, currentTagBuffer, currentTagLength);
1453 private string CreateCurrentTagString ()
1455 return new string (currentTagBuffer, 0, currentTagLength);
1458 private void ClearCurrentTagBuffer ()
1460 currentTagLength = 0;
1463 // The reader is positioned on the first character
1465 private void ReadText (bool notWhitespace)
1467 if (currentState != XmlNodeType.Element)
1468 throw new XmlException (this as IXmlLineInfo,
1469 "Text node cannot appear in this state.");
1472 ClearValueBuffer ();
1474 int ch = PeekChar ();
1475 bool previousWasCloseBracket = false;
1477 while (ch != '<' && ch != -1) {
1480 ch = ReadReference (false);
1481 if (returnEntityReference) // Returns -1 if char validation should not be done
1487 if (normalization && XmlChar.IsInvalid (ch))
1488 throw new XmlException (this, "Not allowed character was found.");
1489 AppendValueChar (ch);
1493 if (previousWasCloseBracket)
1494 if (PeekChar () == '>')
1495 throw new XmlException (this as IXmlLineInfo,
1496 "Inside text content, character sequence ']]>' is not allowed.");
1497 previousWasCloseBracket = true;
1499 else if (previousWasCloseBracket)
1500 previousWasCloseBracket = false;
1502 notWhitespace = true;
1505 if (returnEntityReference && valueLength == 0) {
1506 SetEntityReferenceProperties ();
1508 XmlNodeType nodeType = notWhitespace ? XmlNodeType.Text :
1509 this.XmlSpace == XmlSpace.Preserve ? XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
1511 nodeType, // nodeType
1512 String.Empty, // name
1513 false, // isEmptyElement
1514 null, // value: create only when required
1515 true // clearAttributes
1520 // The leading '&' has already been consumed.
1521 // Returns true if the entity reference isn't a simple
1522 // character reference or one of the predefined entities.
1523 // This allows the ReadText method to break so that the
1524 // next call to Read will return the EntityReference node.
1525 private int ReadReference (bool ignoreEntityReferences)
1527 if (PeekChar () == '#') {
1529 return ReadCharacterReference ();
1531 return ReadEntityReference (ignoreEntityReferences);
1534 private int ReadCharacterReference ()
1538 if (PeekChar () == 'x') {
1541 while (PeekChar () != ';' && PeekChar () != -1) {
1542 int ch = ReadChar ();
1544 if (ch >= '0' && ch <= '9')
1545 value = (value << 4) + ch - '0';
1546 else if (ch >= 'A' && ch <= 'F')
1547 value = (value << 4) + ch - 'A' + 10;
1548 else if (ch >= 'a' && ch <= 'f')
1549 value = (value << 4) + ch - 'a' + 10;
1551 throw new XmlException (this as IXmlLineInfo,
1553 "invalid hexadecimal digit: {0} (#x{1:X})",
1558 while (PeekChar () != ';' && PeekChar () != -1) {
1559 int ch = ReadChar ();
1561 if (ch >= '0' && ch <= '9')
1562 value = value * 10 + ch - '0';
1564 throw new XmlException (this as IXmlLineInfo,
1566 "invalid decimal digit: {0} (#x{1:X})",
1574 // There is no way to save surrogate pairs...
1575 if (normalization && XmlChar.IsInvalid (value))
1576 throw new XmlException (this as IXmlLineInfo,
1577 "Referenced character was not allowed in XML.");
1581 // Returns -1 if it should not be validated.
1582 // Real EOF must not be detected here.
1583 private int ReadEntityReference (bool ignoreEntityReferences)
1585 string name = ReadName ();
1588 int predefined = XmlChar.GetPredefinedEntity (name);
1589 if (predefined >= 0)
1592 if (ignoreEntityReferences) {
1593 AppendValueChar ('&');
1594 for (int i = 0; i < name.Length; i++)
1595 AppendValueChar (name [i]);
1596 AppendValueChar (';');
1598 returnEntityReference = true;
1599 entityReferenceName = name;
1605 // The reader is positioned on the first character of
1606 // the attribute name.
1607 private void ReadAttributes (bool isXmlDecl)
1610 bool requireWhitespace = false;
1611 currentAttribute = -1;
1612 currentAttributeValue = -1;
1615 if (!SkipWhitespace () && requireWhitespace)
1616 throw new XmlException ("Unexpected token. Name is required here.");
1618 IncrementAttributeToken ();
1619 currentAttributeToken.LineNumber = line;
1620 currentAttributeToken.LinePosition = column;
1622 currentAttributeToken.LocalName =
1623 currentAttributeToken.Name = ReadName ();
1624 ExpectAfterWhitespace ('=');
1626 ReadAttributeValueTokens (-1);
1629 if (currentAttributeToken.Name == "xmlns")
1630 parserContext.NamespaceManager.AddNamespace (String.Empty, GetAttribute (currentAttribute));
1631 else if (currentAttributeToken.Name.StartsWith ("xmlns:")) {
1632 string nsPrefix = currentAttributeToken.Name.Substring (6);
1633 parserContext.NamespaceManager.AddNamespace (nsPrefix, GetAttribute (currentAttribute));
1636 if (!SkipWhitespace ())
1637 requireWhitespace = true;
1638 peekChar = PeekChar ();
1640 if (peekChar == '?')
1643 else if (peekChar == '/' || peekChar == '>')
1645 } while (peekChar != -1);
1647 currentAttribute = -1;
1648 currentAttributeValue = -1;
1651 private void AddAttribute (string name, string value)
1653 IncrementAttributeToken ();
1654 XmlAttributeTokenInfo ati = attributeTokens [currentAttribute];
1655 ati.Name = "SYSTEM";
1657 IncrementAttributeValueToken ();
1658 XmlTokenInfo vti = attributeValueTokens [currentAttributeValue];
1660 SetProperties (vti, XmlNodeType.Text, String.Empty, false, value, false);
1664 private void IncrementAttributeToken ()
1667 if (attributeTokens.Length == currentAttribute) {
1668 XmlAttributeTokenInfo [] newArray =
1669 new XmlAttributeTokenInfo [attributeTokens.Length * 2];
1670 attributeTokens.CopyTo (newArray, 0);
1671 attributeTokens = newArray;
1673 if (attributeTokens [currentAttribute] == null)
1674 attributeTokens [currentAttribute] = new XmlAttributeTokenInfo (this);
1675 currentAttributeToken = attributeTokens [currentAttribute];
1676 currentAttributeToken.Clear ();
1679 private void IncrementAttributeValueToken ()
1681 ClearValueBuffer ();
1682 currentAttributeValue++;
1683 if (attributeValueTokens.Length == currentAttributeValue) {
1684 XmlTokenInfo [] newArray = new XmlTokenInfo [attributeValueTokens.Length * 2];
1685 attributeValueTokens.CopyTo (newArray, 0);
1686 attributeValueTokens = newArray;
1688 if (attributeValueTokens [currentAttributeValue] == null)
1689 attributeValueTokens [currentAttributeValue] = new XmlTokenInfo (this, false);
1690 currentAttributeValueToken = attributeValueTokens [currentAttributeValue];
1691 currentAttributeValueToken.Clear ();
1694 // LAMESPEC: Orthodox XML reader should normalize attribute values
1695 private void ReadAttributeValueTokens (int dummyQuoteChar)
1697 int quoteChar = (dummyQuoteChar < 0) ? ReadChar () : dummyQuoteChar;
1699 if (quoteChar != '\'' && quoteChar != '\"')
1700 throw new XmlException (this as IXmlLineInfo,"an attribute value was not quoted");
1701 currentAttributeToken.QuoteChar = (char) quoteChar;
1703 IncrementAttributeValueToken ();
1704 currentAttributeToken.ValueTokenStartIndex = currentAttributeValue;
1705 currentAttributeValueToken.LineNumber = line;
1706 currentAttributeValueToken.LinePosition = column;
1708 bool incrementToken = false;
1709 bool isNewToken = true;
1714 if (ch == quoteChar)
1717 if (incrementToken) {
1718 IncrementAttributeValueToken ();
1719 currentAttributeValueToken.LineNumber = line;
1720 currentAttributeValueToken.LinePosition = column;
1721 incrementToken = false;
1728 throw new XmlException (this as IXmlLineInfo,"attribute values cannot contain '<'");
1730 if (dummyQuoteChar < 0)
1731 throw new XmlException (this as IXmlLineInfo,"unexpected end of file in an attribute value");
1732 else // Attribute value constructor.
1736 int startPosition = currentTagLength - 1;
1737 if (PeekChar () == '#') {
1739 ch = ReadCharacterReference ();
1740 if (normalization && XmlChar.IsInvalid (ch))
1741 throw new XmlException (this as IXmlLineInfo,
1742 "Not allowed character was found.");
1743 AppendValueChar (ch);
1746 // Check XML 1.0 section 3.1 WFC.
1747 string entName = ReadName ();
1749 int predefined = XmlChar.GetPredefinedEntity (entName);
1750 if (predefined < 0) {
1751 CheckAttributeEntityReferenceWFC (entName);
1752 currentAttributeValueToken.Value = CreateValueString ();
1753 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1755 IncrementAttributeValueToken ();
1756 currentAttributeValueToken.Name = entName;
1757 currentAttributeValueToken.Value = String.Empty;
1758 currentAttributeValueToken.NodeType = XmlNodeType.EntityReference;
1759 incrementToken = true;
1762 AppendValueChar (predefined);
1765 if (normalization && XmlChar.IsInvalid (ch))
1766 throw new XmlException (this, "Invalid character was found.");
1767 AppendValueChar (ch);
1773 if (!incrementToken) {
1774 currentAttributeValueToken.Value = CreateValueString ();
1775 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1777 currentAttributeToken.ValueTokenEndIndex = currentAttributeValue;
1781 private void CheckAttributeEntityReferenceWFC (string entName)
1783 DTDEntityDeclaration entDecl =
1784 DTD == null ? null : DTD.EntityDecls [entName];
1785 if (DTD != null && resolver != null && entDecl == null)
1786 throw new XmlException (this, "Referenced entity does not exist.");
1788 if (entDecl == null)
1791 if (entDecl.HasExternalReference)
1792 throw new XmlException (this, "Reference to external entities is not allowed in the value of an attribute.");
1793 if (isStandalone && !entDecl.IsInternalSubset)
1794 throw new XmlException (this, "Reference to external entities is not allowed in the internal subset.");
1795 if (entDecl.EntityValue.IndexOf ('<') >= 0)
1796 throw new XmlException (this, "Attribute must not contain character '<' either directly or indirectly by way of entity references.");
1799 // The reader is positioned on the first character
1802 // It may be xml declaration or processing instruction.
1803 private void ReadProcessingInstruction ()
1805 string target = ReadName ();
1806 if (target == "xml") {
1807 ReadXmlDeclaration ();
1809 } else if (target.ToLower () == "xml")
1810 throw new XmlException (this as IXmlLineInfo,
1811 "Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
1813 if (currentState == XmlNodeType.None)
1814 currentState = XmlNodeType.XmlDeclaration;
1816 if (!SkipWhitespace ())
1817 if (PeekChar () != '?')
1818 throw new XmlException (this as IXmlLineInfo,
1819 "Invalid processing instruction name was found.");
1821 ClearValueBuffer ();
1823 while (PeekChar () != -1) {
1824 int ch = ReadChar ();
1826 if (ch == '?' && PeekChar () == '>') {
1831 if (normalization && XmlChar.IsInvalid (ch))
1832 throw new XmlException (this, "Invalid character was found.");
1833 AppendValueChar (ch);
1837 XmlNodeType.ProcessingInstruction, // nodeType
1839 false, // isEmptyElement
1840 null, // value: create only when required
1841 true // clearAttributes
1845 // The reader is positioned after "<?xml "
1846 private void ReadXmlDeclaration ()
1848 if (currentState != XmlNodeType.None) {
1849 throw new XmlException (this as IXmlLineInfo,
1850 "XML declaration cannot appear in this state.");
1852 currentState = XmlNodeType.XmlDeclaration;
1856 ReadAttributes (true); // They must have "version."
1857 string version = GetAttribute ("version");
1859 string message = null;
1861 if (attributeTokens [0].Name != "version" || version != "1.0")
1862 message = "Version 1.0 declaration is required in XML Declaration.";
1863 else if (attributeCount > 1 &&
1864 (attributeTokens [1].Name != "encoding" &&
1865 attributeTokens [1].Name != "standalone"))
1866 message = "Invalid Xml Declaration markup was found.";
1867 else if (attributeCount > 2 && attributeTokens [2].Name != "standalone")
1868 message = "Invalid Xml Declaration markup was found.";
1869 string sa = GetAttribute ("standalone");
1870 if (sa != null && sa != "yes" && sa != "no")
1871 message = "Only 'yes' or 'no' is allowed for standalone.";
1873 this.isStandalone = (sa == "yes");
1875 if (message != null)
1876 throw new XmlException (this as IXmlLineInfo, message);
1879 XmlNodeType.XmlDeclaration, // nodeType
1881 false, // isEmptyElement
1882 new string (currentTagBuffer, 6, currentTagLength - 6), // value
1883 false // clearAttributes
1889 internal void SkipTextDeclaration ()
1891 this.currentState = XmlNodeType.Element;
1893 if (PeekChar () != '<')
1898 if (PeekChar () != '?') {
1904 while (peekCharsIndex < 6) {
1905 if (PeekChar () < 0)
1910 if (new string (peekChars, 2, 4) != "xml ") {
1911 if (new string (peekChars, 2, 3).ToLower () == "xml") {
1912 throw new XmlException (this as IXmlLineInfo,
1913 "Processing instruction name must not be character sequence 'X' 'M' 'L' with case insensitivity.");
1922 if (PeekChar () == 'v') {
1924 ExpectAfterWhitespace ('=');
1926 int quoteChar = ReadChar ();
1927 char [] expect1_0 = new char [3];
1928 int versionLength = 0;
1929 switch (quoteChar) {
1932 while (PeekChar () != quoteChar) {
1933 if (PeekChar () == -1)
1934 throw new XmlException (this as IXmlLineInfo,
1935 "Invalid version declaration inside text declaration.");
1936 else if (versionLength == 3)
1937 throw new XmlException (this as IXmlLineInfo,
1938 "Invalid version number inside text declaration.");
1940 expect1_0 [versionLength] = (char) ReadChar ();
1942 if (versionLength == 3 && new String (expect1_0) != "1.0")
1943 throw new XmlException (this as IXmlLineInfo,
1944 "Invalid version number inside text declaration.");
1951 throw new XmlException (this as IXmlLineInfo,
1952 "Invalid version declaration inside text declaration.");
1956 if (PeekChar () == 'e') {
1957 Expect ("encoding");
1958 ExpectAfterWhitespace ('=');
1960 int quoteChar = ReadChar ();
1961 switch (quoteChar) {
1964 while (PeekChar () != quoteChar)
1965 if (ReadChar () == -1)
1966 throw new XmlException (this as IXmlLineInfo,
1967 "Invalid encoding declaration inside text declaration.");
1972 throw new XmlException (this as IXmlLineInfo,
1973 "Invalid encoding declaration inside text declaration.");
1975 // Encoding value should be checked inside XmlInputStream.
1978 throw new XmlException (this as IXmlLineInfo,
1979 "Encoding declaration is mandatory in text declaration.");
1984 // The reader is positioned on the first character after
1985 // the leading '<!'.
1986 private void ReadDeclaration ()
1988 int ch = PeekChar ();
2006 throw new XmlException (this as IXmlLineInfo,
2007 "Unexpected declaration markup was found.");
2011 // The reader is positioned on the first character after
2012 // the leading '<!--'.
2013 private void ReadComment ()
2015 if (currentState == XmlNodeType.None)
2016 currentState = XmlNodeType.XmlDeclaration;
2018 ClearValueBuffer ();
2020 while (PeekChar () != -1) {
2021 int ch = ReadChar ();
2023 if (ch == '-' && PeekChar () == '-') {
2026 if (PeekChar () != '>')
2027 throw new XmlException (this as IXmlLineInfo,"comments cannot contain '--'");
2033 if (XmlChar.IsInvalid (ch))
2034 throw new XmlException (this as IXmlLineInfo,
2035 "Not allowed character was found.");
2037 AppendValueChar (ch);
2041 XmlNodeType.Comment, // nodeType
2042 String.Empty, // name
2043 false, // isEmptyElement
2044 null, // value: create only when required
2045 true // clearAttributes
2049 // The reader is positioned on the first character after
2050 // the leading '<![CDATA['.
2051 private void ReadCDATA ()
2053 if (currentState != XmlNodeType.Element)
2054 throw new XmlException (this as IXmlLineInfo,
2055 "CDATA section cannot appear in this state.");
2057 ClearValueBuffer ();
2061 while (PeekChar () != -1) {
2066 if (ch == ']' && PeekChar () == ']') {
2067 ch = ReadChar (); // ']'
2069 if (PeekChar () == '>') {
2076 if (normalization && XmlChar.IsInvalid (ch))
2077 throw new XmlException (this, "Invalid character was found.");
2079 AppendValueChar (ch);
2083 XmlNodeType.CDATA, // nodeType
2084 String.Empty, // name
2085 false, // isEmptyElement
2086 null, // value: create only when required
2087 true // clearAttributes
2091 // The reader is positioned on the first character after
2092 // the leading '<!DOCTYPE'.
2093 private void ReadDoctypeDecl ()
2095 switch (currentState) {
2096 case XmlNodeType.DocumentType:
2097 case XmlNodeType.Element:
2098 case XmlNodeType.EndElement:
2099 throw new XmlException (this as IXmlLineInfo,
2100 "Document type cannot appear in this state.");
2102 currentState = XmlNodeType.DocumentType;
2104 string doctypeName = null;
2105 string publicId = null;
2106 string systemId = null;
2107 int intSubsetStartLine = 0;
2108 int intSubsetStartColumn = 0;
2111 doctypeName = ReadName ();
2116 systemId = ReadSystemLiteral (true);
2119 publicId = ReadPubidLiteral ();
2120 if (!SkipWhitespace ())
2121 throw new XmlException (this as IXmlLineInfo,
2122 "Whitespace is required between PUBLIC id and SYSTEM id.");
2123 systemId = ReadSystemLiteral (false);
2129 if(PeekChar () == '[')
2131 // read markupdecl etc. or end of decl
2133 intSubsetStartLine = this.LineNumber;
2134 intSubsetStartColumn = this.LinePosition;
2135 int startPos = currentTagLength;
2136 ReadInternalSubset ();
2137 int endPos = currentTagLength - 1;
2138 parserContext.InternalSubset = new string (currentTagBuffer, startPos, endPos - startPos);
2140 // end of DOCTYPE decl.
2141 ExpectAfterWhitespace ('>');
2143 GenerateDTDObjectModel (doctypeName, publicId,
2144 systemId, parserContext.InternalSubset,
2145 intSubsetStartLine, intSubsetStartColumn);
2147 // set properties for <!DOCTYPE> node
2149 XmlNodeType.DocumentType, // nodeType
2150 doctypeName, // name
2151 false, // isEmptyElement
2152 parserContext.InternalSubset, // value
2153 true // clearAttributes
2156 if (publicId != null)
2157 AddAttribute ("PUBLIC", publicId);
2158 if (systemId != null)
2159 AddAttribute ("SYSTEM", systemId);
2160 currentAttribute = currentAttributeValue = -1;
2163 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2164 string systemId, string internalSubset)
2166 return GenerateDTDObjectModel (name, publicId, systemId, internalSubset, 0, 0);
2169 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2170 string systemId, string internalSubset, int intSubsetStartLine, int intSubsetStartColumn)
2173 parserContext.Dtd = new DTDObjectModel (this.NameTable); // merges both internal and external subsets in the meantime,
2174 DTD.BaseURI = BaseURI;
2176 DTD.PublicId = publicId;
2177 DTD.SystemId = systemId;
2178 DTD.InternalSubset = internalSubset;
2179 DTD.XmlResolver = resolver;
2180 DTD.IsStandalone = isStandalone;
2181 DTD.LineNumber = line;
2182 DTD.LinePosition = column;
2184 DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn);
2185 dr.Normalization = this.normalization;
2186 #if DTD_HANDLE_EVENTS
2187 dr.ValidationEventHandler += new ValidationEventHandler (OnValidationEvent);
2189 return dr.GenerateDTDObjectModel ();
2192 private void OnValidationEvent (object o, ValidationEventArgs e)
2194 #if DTD_HANDLE_EVENTS
2195 if (ValidationEventHandler != null)
2196 // Override object as this.
2197 ValidationEventHandler (this, e);
2201 private enum DtdInputState
2214 private class DtdInputStateStack
2216 Stack intern = new Stack ();
2217 public DtdInputStateStack ()
2219 Push (DtdInputState.Free);
2222 public DtdInputState Peek ()
2224 return (DtdInputState) intern.Peek ();
2227 public DtdInputState Pop ()
2229 return (DtdInputState) intern.Pop ();
2232 public void Push (DtdInputState val)
2239 DtdInputStateStack stateStack = new DtdInputStateStack ();
2240 DtdInputState State {
2241 get { return stateStack.Peek (); }
2244 // Simply read but not generate any result.
2245 private void ReadInternalSubset ()
2247 bool continueParse = true;
2249 while (continueParse) {
2250 switch (ReadChar ()) {
2253 case DtdInputState.Free:
2254 continueParse = false;
2256 case DtdInputState.InsideDoubleQuoted:
2258 case DtdInputState.InsideSingleQuoted:
2261 throw new XmlException (this as IXmlLineInfo,"unexpected end of file at DTD.");
2265 throw new XmlException (this as IXmlLineInfo,"unexpected end of file at DTD.");
2267 if (State == DtdInputState.InsideDoubleQuoted ||
2268 State == DtdInputState.InsideSingleQuoted)
2269 continue; // well-formed
2270 switch (ReadChar ()) {
2272 stateStack.Push (DtdInputState.PI);
2275 switch (ReadChar ()) {
2277 switch (ReadChar ()) {
2280 stateStack.Push (DtdInputState.ElementDecl);
2284 stateStack.Push (DtdInputState.EntityDecl);
2287 throw new XmlException (this as IXmlLineInfo,"unexpected token '<!E'.");
2292 stateStack.Push (DtdInputState.AttlistDecl);
2296 stateStack.Push (DtdInputState.NotationDecl);
2300 stateStack.Push (DtdInputState.Comment);
2305 throw new XmlException (this as IXmlLineInfo,"unexpected '>'.");
2309 if (State == DtdInputState.InsideSingleQuoted)
2311 else if (State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.Comment)
2312 stateStack.Push (DtdInputState.InsideSingleQuoted);
2315 if (State == DtdInputState.InsideDoubleQuoted)
2317 else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment)
2318 stateStack.Push (DtdInputState.InsideDoubleQuoted);
2322 case DtdInputState.ElementDecl:
2323 goto case DtdInputState.NotationDecl;
2324 case DtdInputState.AttlistDecl:
2325 goto case DtdInputState.NotationDecl;
2326 case DtdInputState.EntityDecl:
2327 goto case DtdInputState.NotationDecl;
2328 case DtdInputState.NotationDecl:
2331 case DtdInputState.InsideDoubleQuoted:
2333 case DtdInputState.InsideSingleQuoted:
2334 continue; // well-formed
2335 case DtdInputState.Comment:
2338 throw new XmlException (this as IXmlLineInfo,"unexpected token '>'");
2342 if (State == DtdInputState.PI) {
2343 if (ReadChar () == '>')
2348 if (State == DtdInputState.Comment) {
2349 if (PeekChar () == '-') {
2357 if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted)
2358 throw new XmlException (this as IXmlLineInfo,"Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8).");
2364 // The reader is positioned on the first 'S' of "SYSTEM".
2365 private string ReadSystemLiteral (bool expectSYSTEM)
2369 if (!SkipWhitespace ())
2370 throw new XmlException (this as IXmlLineInfo,
2371 "Whitespace is required after 'SYSTEM'.");
2375 int quoteChar = ReadChar (); // apos or quot
2376 int startPos = currentTagLength;
2378 ClearValueBuffer ();
2379 while (c != quoteChar) {
2382 throw new XmlException (this as IXmlLineInfo,"Unexpected end of stream in ExternalID.");
2384 AppendValueChar (c);
2386 return CreateValueString ();
2389 private string ReadPubidLiteral()
2392 if (!SkipWhitespace ())
2393 throw new XmlException (this as IXmlLineInfo,
2394 "Whitespace is required after 'PUBLIC'.");
2395 int quoteChar = ReadChar ();
2396 int startPos = currentTagLength;
2398 ClearValueBuffer ();
2399 while(c != quoteChar)
2402 if(c < 0) throw new XmlException (this as IXmlLineInfo,"Unexpected end of stream in ExternalID.");
2403 if(c != quoteChar && !XmlChar.IsPubidChar (c))
2404 throw new XmlException (this as IXmlLineInfo,"character '" + (char) c + "' not allowed for PUBLIC ID");
2406 AppendValueChar (c);
2408 return CreateValueString ();
2411 // The reader is positioned on the first character
2413 private string ReadName ()
2415 int ch = PeekChar ();
2416 if (!XmlChar.IsFirstNameChar (ch))
2417 throw new XmlException (this as IXmlLineInfo,String.Format ("a name did not start with a legal character {0} ({1})", ch, (char) ch));
2421 AppendNameChar (ReadChar ());
2423 while (XmlChar.IsNameChar (PeekChar ())) {
2424 AppendNameChar (ReadChar ());
2427 return CreateNameString ();
2430 // Read the next character and compare it against the
2431 // specified character.
2432 private void Expect (int expected)
2434 int ch = ReadChar ();
2436 if (ch != expected) {
2437 throw new XmlException (this as IXmlLineInfo,
2439 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
2447 private void Expect (string expected)
2449 int len = expected.Length;
2450 for(int i=0; i< len; i++)
2451 Expect (expected[i]);
2454 private void ExpectAfterWhitespace (char c)
2457 int i = ReadChar ();
2458 if (i < 0x21 && XmlChar.IsWhitespace (i))
2461 throw new XmlException (this, String.Join (String.Empty, new string [] {"Expected ", c.ToString (), ", but found " + (char) i, "[", i.ToString (), "]"}));
2466 // Does not consume the first non-whitespace character.
2467 private bool SkipWhitespace ()
2469 bool skipped = XmlChar.IsWhitespace (PeekChar ());
2472 while (XmlChar.IsWhitespace (PeekChar ()))
2477 private void ReadWhitespace ()
2479 if (currentState == XmlNodeType.None)
2480 currentState = XmlNodeType.XmlDeclaration;
2482 ClearValueBuffer ();
2483 int ch = PeekChar ();
2485 AppendValueChar (ReadChar ());
2486 } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
2488 if (currentState == XmlNodeType.Element && ch != -1 && ch != '<')
2491 XmlNodeType nodeType = (this.XmlSpace == XmlSpace.Preserve) ?
2492 XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
2493 SetProperties (nodeType,
2496 null, // value: create only when required
2503 private byte GetBase64Byte (char ch)
2513 if (ch >= 'A' && ch <= 'Z')
2514 return (byte) (ch - 'A');
2515 else if (ch >= 'a' && ch <= 'z')
2516 return (byte) (ch - 'a' + 26);
2517 else if (ch >= '0' && ch <= '9')
2518 return (byte) (ch - '0' + 52);
2520 throw new XmlException ("Invalid Base64 character was found.");
2524 // Returns -1 if it should throw an error.
2525 private int ReadCharsInternal (char [] buffer, int offset, int length)
2527 if (IsEmptyElement) {
2532 shouldSkipUntilEndTag = true;
2535 throw new ArgumentOutOfRangeException ("offset", offset, "Offset must be non-negative integer.");
2536 else if (length < 0)
2537 throw new ArgumentOutOfRangeException ("length", length, "Length must be non-negative integer.");
2538 else if (buffer.Length < offset + length)
2539 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
2541 if (NodeType != XmlNodeType.Element)
2544 int bufIndex = offset;
2545 for (int i = 0; i < length; i++) {
2546 int c = PeekChar ();
2549 throw new XmlException (this as IXmlLineInfo, "Unexpected end of xml.");
2552 if (PeekChar () != '/') {
2553 buffer [bufIndex++] = '<';
2556 // Seems to skip immediate EndElement
2563 shouldSkipUntilEndTag = false;
2568 if (c < Char.MaxValue)
2569 buffer [bufIndex++] = (char) c;
2571 buffer [bufIndex++] = (char) (c / 0x10000 + 0xD800 - 1);
2572 buffer [bufIndex++] = (char) (c % 0x10000 + 0xDC00);
2580 private bool ReadUntilEndTag ()
2587 throw new XmlException (this as IXmlLineInfo,
2588 "Unexpected end of xml.");
2590 if (PeekChar () != '/')
2593 string name = ReadName ();
2594 if (name != elementNames [elementNameStackPos - 1])
2598 elementNames [--elementNameStackPos] = null;