2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
8 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
12 // This can only parse basic XML: elements, attributes, processing
13 // instructions, and comments are OK.
15 // It barfs on DOCTYPE declarations.
17 // There's also no checking being done for either well-formedness
20 // NameTables aren't being used everywhere yet.
22 // Some thought needs to be given to performance. There's too many
23 // strings being allocated.
25 // Some of the MoveTo methods haven't been implemented yet.
27 // LineNumber and LinePosition aren't being tracked.
29 // xml:space, xml:lang, and xml:base aren't being tracked.
33 using System.Collections;
39 public class XmlTextReader : XmlReader, IXmlLineInfo
43 protected XmlTextReader ()
48 public XmlTextReader (Stream input)
50 // We can share some code in the constructors (at least for this one and next 2)
51 XmlNameTable nt = new NameTable ();
52 XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
53 parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
55 reader = new StreamReader (input);
59 public XmlTextReader (string url)
61 XmlNameTable nt = new NameTable ();
62 XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
63 parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
65 reader = new StreamReader(url);
69 public XmlTextReader (TextReader input)
71 XmlNameTable nt = new NameTable ();
72 XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
73 parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
79 protected XmlTextReader (XmlNameTable nt)
81 throw new NotImplementedException ();
85 public XmlTextReader (Stream input, XmlNameTable nt)
87 throw new NotImplementedException ();
91 public XmlTextReader (string url, Stream input)
93 throw new NotImplementedException ();
97 public XmlTextReader (string url, TextReader input)
99 throw new NotImplementedException ();
103 public XmlTextReader (string url, XmlNameTable nt)
105 throw new NotImplementedException ();
109 public XmlTextReader (TextReader input, XmlNameTable nt)
111 throw new NotImplementedException ();
115 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
117 throw new NotImplementedException ();
121 public XmlTextReader (string url, Stream input, XmlNameTable nt)
123 throw new NotImplementedException ();
127 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
129 throw new NotImplementedException ();
133 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
135 //Waiting for Validating reader for fragType rules.
136 parserContext = context;
138 reader = new StringReader(xmlFragment);
145 public override int AttributeCount
147 get { return attributes.Count; }
151 public override string BaseURI
153 get { throw new NotImplementedException (); }
156 public override int Depth
158 get { return depth > 0 ? depth : 0; }
162 public Encoding Encoding
164 get { throw new NotImplementedException (); }
167 public override bool EOF
172 readState == ReadState.EndOfFile ||
173 readState == ReadState.Closed;
177 public override bool HasValue
179 get { return value != String.Empty; }
182 public override bool IsDefault
186 // XmlTextReader does not expand default attributes.
191 public override bool IsEmptyElement
193 get { return isEmptyElement; }
196 public override string this [int i]
198 get { return GetAttribute (i); }
201 public override string this [string name]
203 get { return GetAttribute (name); }
206 public override string this [string localName, string namespaceName]
208 get { return GetAttribute (localName, namespaceName); }
211 public int LineNumber
216 public int LinePosition
218 get { return column; }
221 public override string LocalName
223 get { return localName; }
226 public override string Name
232 public bool Namespaces
234 get { throw new NotImplementedException (); }
235 set { throw new NotImplementedException (); }
238 public override string NamespaceURI
240 get { return namespaceURI; }
243 public override XmlNameTable NameTable
245 get { return parserContext.NameTable; }
248 public override XmlNodeType NodeType
250 get { return nodeType; }
254 public bool Normalization
256 get { throw new NotImplementedException (); }
257 set { throw new NotImplementedException (); }
260 public override string Prefix
262 get { return prefix; }
266 public override char QuoteChar
268 get { throw new NotImplementedException (); }
271 public override ReadState ReadState
273 get { return readState; }
276 public override string Value
278 get { return value; }
282 public WhitespaceHandling WhitespaceHandling
284 get { throw new NotImplementedException (); }
285 set { throw new NotImplementedException (); }
289 public override string XmlLang
291 get { throw new NotImplementedException (); }
295 public XmlResolver XmlResolver
297 set { throw new NotImplementedException (); }
301 public override XmlSpace XmlSpace
303 get { throw new NotImplementedException (); }
311 public override void Close ()
313 readState = ReadState.Closed;
317 public override string GetAttribute (int i)
319 if (i > attributes.Count)
320 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
322 throw new NotImplementedException ();
325 public override string GetAttribute (string name)
327 return attributes [name] as string;
330 public override string GetAttribute (string localName, string namespaceURI)
332 foreach (DictionaryEntry entry in attributes)
334 string thisName = entry.Key as string;
336 int indexOfColon = thisName.IndexOf (':');
338 if (indexOfColon != -1) {
339 string thisLocalName = thisName.Substring (indexOfColon + 1);
341 if (localName == thisLocalName) {
342 string thisPrefix = thisName.Substring (0, indexOfColon);
343 string thisNamespaceURI = LookupNamespace (thisPrefix);
345 if (namespaceURI == thisNamespaceURI)
346 return attributes [thisName] as string;
348 } else if (localName == "xmlns" && namespaceURI == "http://www.w3.org/2000/xmlns/" && thisName == "xmlns")
349 return attributes [thisName] as string;
356 public TextReader GetRemainder ()
358 throw new NotImplementedException ();
362 bool IXmlLineInfo.HasLineInfo ()
367 public override string LookupNamespace (string prefix)
369 return parserContext.NamespaceManager.LookupNamespace (prefix);
373 public override void MoveToAttribute (int i)
375 throw new NotImplementedException ();
378 public override bool MoveToAttribute (string name)
383 if (attributes == null)
386 if (orderedAttributesEnumerator == null) {
388 orderedAttributesEnumerator = orderedAttributes.GetEnumerator ();
391 while (orderedAttributesEnumerator.MoveNext ()) {
392 if(name == orderedAttributesEnumerator.Current as string) {
400 string value = attributes [name] as string;
402 XmlNodeType.Attribute, // nodeType
404 false, // isEmptyElement
406 false // clearAttributes
414 public override bool MoveToAttribute (string localName, string namespaceName)
416 throw new NotImplementedException ();
419 public override bool MoveToElement ()
421 if (orderedAttributesEnumerator != null) {
422 orderedAttributesEnumerator = null;
423 RestoreProperties ();
430 public override bool MoveToFirstAttribute ()
433 return MoveToNextAttribute ();
436 public override bool MoveToNextAttribute ()
438 if (attributes == null)
441 if (orderedAttributesEnumerator == null) {
443 orderedAttributesEnumerator = orderedAttributes.GetEnumerator ();
446 if (orderedAttributesEnumerator.MoveNext ()) {
447 string name = orderedAttributesEnumerator.Current as string;
448 string value = attributes [name] as string;
450 XmlNodeType.Attribute, // nodeType
452 false, // isEmptyElement
454 false // clearAttributes
462 public override bool Read ()
466 readState = ReadState.Interactive;
468 more = ReadContent ();
474 public override bool ReadAttributeValue ()
476 throw new NotImplementedException ();
480 public int ReadBase64 (byte [] buffer, int offset, int length)
482 throw new NotImplementedException ();
486 public int ReadBinHex (byte [] buffer, int offset, int length)
488 throw new NotImplementedException ();
492 public int ReadChars (char [] buffer, int offset, int length)
494 throw new NotImplementedException ();
498 public override string ReadInnerXml ()
500 // Still need a Well Formedness check.
501 // Will wait for Validating reader ;-)
502 if (NodeType == XmlNodeType.Attribute) {
505 saveToXmlBuffer = true;
506 string startname = this.Name;
507 string endname = string.Empty;
508 readState = ReadState.Interactive;
510 while (startname != endname) {
515 xmlBuffer.Replace(currentTag.ToString (), "");
516 saveToXmlBuffer = false;
517 string InnerXml = xmlBuffer.ToString ();
518 xmlBuffer.Length = 0;
524 public override string ReadOuterXml ()
526 // Still need a Well Formedness check.
527 // Will wait for Validating reader ;-)
528 if (NodeType == XmlNodeType.Attribute) {
529 return Name+"=\""+Value+"\"";
531 saveToXmlBuffer = true;
532 xmlBuffer.Append(currentTag.ToString ());
533 string startname = this.Name;
534 string endname = string.Empty;
535 readState = ReadState.Interactive;
537 while (startname != endname) {
541 saveToXmlBuffer = false;
542 string OuterXml = xmlBuffer.ToString ();
543 xmlBuffer.Length = 0;
549 public override string ReadString ()
551 throw new NotImplementedException ();
555 public void ResetState ()
557 throw new NotImplementedException ();
560 public override void ResolveEntity ()
562 // XmlTextReaders don't resolve entities.
563 throw new InvalidOperationException ("XmlTextReaders don't resolve entities.");
570 private XmlParserContext parserContext;
572 private TextReader reader;
573 private ReadState readState;
576 private bool depthDown;
578 private bool popScope;
580 private XmlNodeType nodeType;
582 private string prefix;
583 private string localName;
584 private string namespaceURI;
585 private bool isEmptyElement;
586 private string value;
588 private XmlNodeType saveNodeType;
589 private string saveName;
590 private string savePrefix;
591 private string saveLocalName;
592 private string saveNamespaceURI;
593 private bool saveIsEmptyElement;
595 private Hashtable attributes;
596 private ArrayList orderedAttributes;
597 private IEnumerator orderedAttributesEnumerator;
599 private bool returnEntityReference;
600 private string entityReferenceName;
602 private char [] nameBuffer;
603 private int nameLength;
604 private int nameCapacity;
605 private const int initialNameCapacity = 256;
607 private char [] valueBuffer;
608 private int valueLength;
609 private int valueCapacity;
610 private const int initialValueCapacity = 8192;
612 private StringBuilder xmlBuffer; // This is for Read(Inner|Outer)Xml
613 private StringBuilder currentTag; // A buffer for ReadContent for ReadOuterXml
614 private bool saveToXmlBuffer;
615 private int line = 1;
616 private int column = 1;
620 readState = ReadState.Initial;
627 nodeType = XmlNodeType.None;
629 prefix = String.Empty;
630 localName = string.Empty;
631 isEmptyElement = false;
632 value = String.Empty;
634 attributes = new Hashtable ();
635 orderedAttributes = new ArrayList ();
636 orderedAttributesEnumerator = null;
638 returnEntityReference = false;
639 entityReferenceName = String.Empty;
641 nameBuffer = new char [initialNameCapacity];
643 nameCapacity = initialNameCapacity;
645 valueBuffer = new char [initialValueCapacity];
647 valueCapacity = initialValueCapacity;
649 xmlBuffer = new StringBuilder ();
650 currentTag = new StringBuilder ();
653 // Use this method rather than setting the properties
654 // directly so that all the necessary properties can
655 // be changed in harmony with each other. Maybe the
656 // fields should be in a seperate class to help enforce
658 private void SetProperties (
659 XmlNodeType nodeType,
663 bool clearAttributes)
665 this.nodeType = nodeType;
667 this.isEmptyElement = isEmptyElement;
673 int indexOfColon = name.IndexOf (':');
675 if (indexOfColon == -1) {
676 prefix = String.Empty;
679 prefix = name.Substring (0, indexOfColon);
680 localName = name.Substring (indexOfColon + 1);
683 namespaceURI = LookupNamespace (prefix);
686 private void SaveProperties ()
688 saveNodeType = nodeType;
691 saveLocalName = localName;
692 saveNamespaceURI = namespaceURI;
693 saveIsEmptyElement = isEmptyElement;
694 // An element's value is always String.Empty.
697 private void RestoreProperties ()
699 nodeType = saveNodeType;
702 localName = saveLocalName;
703 namespaceURI = saveNamespaceURI;
704 isEmptyElement = saveIsEmptyElement;
705 value = String.Empty;
708 private void AddAttribute (string name, string value)
710 attributes.Add (name, value);
711 orderedAttributes.Add (name);
714 private void ClearAttributes ()
716 if (attributes.Count > 0) {
718 orderedAttributes.Clear ();
721 orderedAttributesEnumerator = null;
724 private int PeekChar ()
726 return reader.Peek ();
729 private int ReadChar ()
731 int ch = reader.Read ();
738 if (saveToXmlBuffer) {
739 xmlBuffer.Append ((char) ch);
741 currentTag.Append ((char) ch);
745 // This should really keep track of some state so
746 // that it's not possible to have more than one document
747 // element or text outside of the document element.
748 private bool ReadContent ()
751 currentTag.Length = 0;
753 parserContext.NamespaceManager.PopScope ();
760 if (returnEntityReference) {
762 SetEntityReferenceProperties ();
773 readState = ReadState.EndOfFile;
775 XmlNodeType.None, // nodeType
776 String.Empty, // name
777 false, // isEmptyElement
778 String.Empty, // value
779 true // clearAttributes
793 private void SetEntityReferenceProperties ()
796 XmlNodeType.EntityReference, // nodeType
797 entityReferenceName, // name
798 false, // isEmptyElement
799 String.Empty, // value
800 true // clearAttributes
803 returnEntityReference = false;
804 entityReferenceName = String.Empty;
807 // The leading '<' has already been consumed.
808 private void ReadTag ()
818 ReadProcessingInstruction ();
830 // The leading '<' has already been consumed.
831 private void ReadStartTag ()
833 parserContext.NamespaceManager.PushScope ();
835 string name = ReadName ();
838 bool isEmptyElement = false;
842 if (XmlChar.IsFirstNameChar (PeekChar ()))
845 if (PeekChar () == '/') {
847 isEmptyElement = true;
857 XmlNodeType.Element, // nodeType
859 isEmptyElement, // isEmptyElement
860 String.Empty, // value
861 false // clearAttributes
865 // The reader is positioned on the first character
866 // of the element's name.
867 private void ReadEndTag ()
869 string name = ReadName ();
876 XmlNodeType.EndElement, // nodeType
878 false, // isEmptyElement
879 String.Empty, // value
880 true // clearAttributes
886 private void AppendNameChar (int ch)
888 CheckNameCapacity ();
889 nameBuffer [nameLength++] = (char)ch;
892 private void CheckNameCapacity ()
894 if (nameLength == nameCapacity) {
895 nameCapacity = nameCapacity * 2;
896 char [] oldNameBuffer = nameBuffer;
897 nameBuffer = new char [nameCapacity];
898 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
902 private string CreateNameString ()
904 return new String (nameBuffer, 0, nameLength);
907 private void AppendValueChar (int ch)
909 CheckValueCapacity ();
910 valueBuffer [valueLength++] = (char)ch;
913 private void CheckValueCapacity ()
915 if (valueLength == valueCapacity) {
916 valueCapacity = valueCapacity * 2;
917 char [] oldValueBuffer = valueBuffer;
918 valueBuffer = new char [valueCapacity];
919 Array.Copy (oldValueBuffer, valueBuffer, valueLength);
923 private string CreateValueString ()
925 return new String (valueBuffer, 0, valueLength);
928 // The reader is positioned on the first character
930 private void ReadText ()
934 int ch = PeekChar ();
936 while (ch != '<' && ch != -1) {
939 if (ReadReference (false))
942 AppendValueChar (ReadChar ());
947 if (returnEntityReference && valueLength == 0) {
949 SetEntityReferenceProperties ();
957 XmlNodeType.Text, // nodeType
958 String.Empty, // name
959 false, // isEmptyElement
960 CreateValueString (), // value
961 true // clearAttributes
966 // The leading '&' has already been consumed.
967 // Returns true if the entity reference isn't a simple
968 // character reference or one of the predefined entities.
969 // This allows the ReadText method to break so that the
970 // next call to Read will return the EntityReference node.
971 private bool ReadReference (bool ignoreEntityReferences)
973 if (PeekChar () == '#') {
975 ReadCharacterReference ();
977 ReadEntityReference (ignoreEntityReferences);
979 return returnEntityReference;
982 private void ReadCharacterReference ()
986 if (PeekChar () == 'x') {
989 while (PeekChar () != ';' && PeekChar () != -1) {
990 int ch = ReadChar ();
992 if (ch >= '0' && ch <= '9')
993 value = (value << 4) + ch - '0';
994 else if (ch >= 'A' && ch <= 'F')
995 value = (value << 4) + ch - 'A' + 10;
996 else if (ch >= 'a' && ch <= 'f')
997 value = (value << 4) + ch - 'a' + 10;
999 throw new XmlException (
1001 "invalid hexadecimal digit: {0} (#x{1:X})",
1006 while (PeekChar () != ';' && PeekChar () != -1) {
1007 int ch = ReadChar ();
1009 if (ch >= '0' && ch <= '9')
1010 value = value * 10 + ch - '0';
1012 throw new XmlException (
1014 "invalid decimal digit: {0} (#x{1:X})",
1022 AppendValueChar (value);
1025 private void ReadEntityReference (bool ignoreEntityReferences)
1029 int ch = PeekChar ();
1031 while (ch != ';' && ch != -1) {
1032 AppendNameChar (ReadChar ());
1038 string name = CreateNameString ();
1043 AppendValueChar ('<');
1046 AppendValueChar ('>');
1049 AppendValueChar ('&');
1052 AppendValueChar ('\'');
1055 AppendValueChar ('"');
1058 if (ignoreEntityReferences) {
1059 AppendValueChar ('&');
1061 foreach (char ch2 in name) {
1062 AppendValueChar (ch2);
1065 AppendValueChar (';');
1067 returnEntityReference = true;
1068 entityReferenceName = name;
1074 // The reader is positioned on the first character of
1075 // the attribute name.
1076 private void ReadAttributes ()
1079 string name = ReadName ();
1083 string value = ReadAttribute ();
1086 if (name == "xmlns")
1087 parserContext.NamespaceManager.AddNamespace (String.Empty, value);
1088 else if (name.StartsWith ("xmlns:"))
1089 parserContext.NamespaceManager.AddNamespace (name.Substring (6), value);
1091 AddAttribute (name, value);
1092 } while (PeekChar () != '/' && PeekChar () != '>' && PeekChar () != -1);
1095 // The reader is positioned on the quote character.
1096 private string ReadAttribute ()
1098 int quoteChar = ReadChar ();
1100 if (quoteChar != '\'' && quoteChar != '\"')
1101 throw new XmlException ("an attribute value was not quoted");
1105 while (PeekChar () != quoteChar) {
1106 int ch = ReadChar ();
1111 throw new XmlException ("attribute values cannot contain '<'");
1113 ReadReference (true);
1116 throw new XmlException ("unexpected end of file in an attribute value");
1118 AppendValueChar (ch);
1123 ReadChar (); // quoteChar
1125 return CreateValueString ();
1128 // The reader is positioned on the first character
1130 private void ReadProcessingInstruction ()
1132 string target = ReadName ();
1137 while (PeekChar () != -1) {
1138 int ch = ReadChar ();
1140 if (ch == '?' && PeekChar () == '>') {
1145 AppendValueChar ((char)ch);
1149 XmlNodeType.ProcessingInstruction, // nodeType
1151 false, // isEmptyElement
1152 CreateValueString (), // value
1153 true // clearAttributes
1157 // The reader is positioned on the first character after
1158 // the leading '<!'.
1159 private void ReadDeclaration ()
1161 int ch = PeekChar ();
1183 // The reader is positioned on the first character after
1184 // the leading '<!--'.
1185 private void ReadComment ()
1189 while (PeekChar () != -1) {
1190 int ch = ReadChar ();
1192 if (ch == '-' && PeekChar () == '-') {
1195 if (PeekChar () != '>')
1196 throw new XmlException ("comments cannot contain '--'");
1202 AppendValueChar ((char)ch);
1206 XmlNodeType.Comment, // nodeType
1207 String.Empty, // name
1208 false, // isEmptyElement
1209 CreateValueString (), // value
1210 true // clearAttributes
1214 // The reader is positioned on the first character after
1215 // the leading '<![CDATA['.
1216 private void ReadCDATA ()
1220 while (PeekChar () != -1) {
1221 int ch = ReadChar ();
1223 if (ch == ']' && PeekChar () == ']') {
1224 ch = ReadChar (); // ']'
1226 if (PeekChar () == '>') {
1230 AppendValueChar (']');
1231 AppendValueChar (']');
1236 AppendValueChar ((char)ch);
1242 XmlNodeType.CDATA, // nodeType
1243 String.Empty, // name
1244 false, // isEmptyElement
1245 CreateValueString (), // value
1246 true // clearAttributes
1250 // The reader is positioned on the first character
1252 private string ReadName ()
1254 if (!XmlChar.IsFirstNameChar (PeekChar ()))
1255 throw new XmlException ("a name did not start with a legal character");
1259 AppendNameChar (ReadChar ());
1261 while (XmlChar.IsNameChar (PeekChar ())) {
1262 AppendNameChar (ReadChar ());
1265 return CreateNameString ();
1268 // Read the next character and compare it against the
1269 // specified character.
1270 private void Expect (int expected)
1272 int ch = ReadChar ();
1274 if (ch != expected) {
1275 throw new XmlException (
1277 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
1285 // Does not consume the first non-whitespace character.
1286 private void SkipWhitespace ()
1288 while (XmlChar.IsWhitespace (PeekChar ()))