2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
8 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
12 // This can only parse basic XML: elements, attributes, processing
13 // instructions, and comments are OK.
15 // It barfs on DOCTYPE declarations.
17 // There's also no checking being done for either well-formedness
20 // NameTables aren't being used everywhere yet.
22 // Some thought needs to be given to performance. There's too many
23 // strings being allocated.
25 // Some of the MoveTo methods haven't been implemented yet.
27 // LineNumber and LinePosition aren't being tracked.
29 // xml:space, xml:lang, and xml:base aren't being tracked.
33 using System.Collections;
39 public class XmlTextReader : XmlReader, IXmlLineInfo
44 protected XmlTextReader ()
46 throw new NotImplementedException ();
50 public XmlTextReader (Stream input)
52 // We can share some code in the constructors (at least for this one and next 2)
53 XmlNameTable nt = new NameTable ();
54 XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
55 parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
57 reader = new StreamReader (input);
61 public XmlTextReader (string url)
63 XmlNameTable nt = new NameTable ();
64 XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
65 parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
67 reader = new StreamReader(url);
71 public XmlTextReader (TextReader input)
73 XmlNameTable nt = new NameTable ();
74 XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
75 parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
81 protected XmlTextReader (XmlNameTable nt)
83 throw new NotImplementedException ();
87 public XmlTextReader (Stream input, XmlNameTable nt)
89 throw new NotImplementedException ();
93 public XmlTextReader (string url, Stream input)
95 throw new NotImplementedException ();
99 public XmlTextReader (string url, TextReader input)
101 throw new NotImplementedException ();
105 public XmlTextReader (string url, XmlNameTable nt)
107 throw new NotImplementedException ();
111 public XmlTextReader (TextReader input, XmlNameTable nt)
113 throw new NotImplementedException ();
117 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
119 throw new NotImplementedException ();
123 public XmlTextReader (string url, Stream input, XmlNameTable nt)
125 throw new NotImplementedException ();
129 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
131 throw new NotImplementedException ();
135 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
137 //Waiting for Validating reader for fragType rules.
138 parserContext = context;
140 reader = new StringReader(xmlFragment);
147 public override int AttributeCount
149 get { return attributes.Count; }
153 public override string BaseURI
155 get { throw new NotImplementedException (); }
158 public override int Depth
160 get { return depth > 0 ? depth : 0; }
164 public Encoding Encoding
166 get { throw new NotImplementedException (); }
169 public override bool EOF
174 readState == ReadState.EndOfFile ||
175 readState == ReadState.Closed;
179 public override bool HasValue
181 get { return value != String.Empty; }
184 public override bool IsDefault
188 // XmlTextReader does not expand default attributes.
193 public override bool IsEmptyElement
195 get { return isEmptyElement; }
198 public override string this [int i]
200 get { return GetAttribute (i); }
203 public override string this [string name]
205 get { return GetAttribute (name); }
208 public override string this [string localName, string namespaceName]
210 get { return GetAttribute (localName, namespaceName); }
213 public int LineNumber
218 public int LinePosition
220 get { return column; }
223 public override string LocalName
225 get { return localName; }
228 public override string Name
234 public bool Namespaces
236 get { throw new NotImplementedException (); }
237 set { throw new NotImplementedException (); }
240 public override string NamespaceURI
242 get { return namespaceURI; }
245 public override XmlNameTable NameTable
247 get { return parserContext.NameTable; }
250 public override XmlNodeType NodeType
252 get { return nodeType; }
256 public bool Normalization
258 get { throw new NotImplementedException (); }
259 set { throw new NotImplementedException (); }
262 public override string Prefix
264 get { return prefix; }
268 public override char QuoteChar
270 get { throw new NotImplementedException (); }
273 public override ReadState ReadState
275 get { return readState; }
278 public override string Value
280 get { return value; }
284 public WhitespaceHandling WhitespaceHandling
286 get { throw new NotImplementedException (); }
287 set { throw new NotImplementedException (); }
291 public override string XmlLang
293 get { throw new NotImplementedException (); }
297 public XmlResolver XmlResolver
299 set { throw new NotImplementedException (); }
303 public override XmlSpace XmlSpace
305 get { throw new NotImplementedException (); }
313 public override void Close ()
315 readState = ReadState.Closed;
319 public override string GetAttribute (int i)
321 throw new NotImplementedException ();
324 public override string GetAttribute (string name)
326 return attributes [name] as string;
329 public override string GetAttribute (string localName, string namespaceURI)
331 foreach (DictionaryEntry entry in attributes)
333 string thisName = entry.Key as string;
335 int indexOfColon = thisName.IndexOf (':');
337 if (indexOfColon != -1) {
338 string thisLocalName = thisName.Substring (indexOfColon + 1);
340 if (localName == thisLocalName) {
341 string thisPrefix = thisName.Substring (0, indexOfColon);
342 string thisNamespaceURI = LookupNamespace (thisPrefix);
344 if (namespaceURI == thisNamespaceURI)
345 return attributes [thisName] as string;
347 } else if (localName == "xmlns" && namespaceURI == "http://www.w3.org/2000/xmlns/" && thisName == "xmlns")
348 return attributes [thisName] as string;
355 public TextReader GetRemainder ()
357 throw new NotImplementedException ();
361 bool IXmlLineInfo.HasLineInfo ()
366 public override string LookupNamespace (string prefix)
368 return parserContext.NamespaceManager.LookupNamespace (prefix);
372 public override void MoveToAttribute (int i)
374 throw new NotImplementedException ();
377 public override bool MoveToAttribute (string name)
382 if (attributes == null)
385 if (orderedAttributesEnumerator == null) {
387 orderedAttributesEnumerator = orderedAttributes.GetEnumerator ();
390 while (orderedAttributesEnumerator.MoveNext ()) {
391 if(name == orderedAttributesEnumerator.Current as string) {
399 string value = attributes [name] as string;
401 XmlNodeType.Attribute, // nodeType
403 false, // isEmptyElement
405 false // clearAttributes
413 public override bool MoveToAttribute (string localName, string namespaceName)
415 throw new NotImplementedException ();
418 public override bool MoveToElement ()
420 if (orderedAttributesEnumerator != null) {
421 orderedAttributesEnumerator = null;
422 RestoreProperties ();
429 public override bool MoveToFirstAttribute ()
432 return MoveToNextAttribute ();
435 public override bool MoveToNextAttribute ()
437 if (attributes == null)
440 if (orderedAttributesEnumerator == null) {
442 orderedAttributesEnumerator = orderedAttributes.GetEnumerator ();
445 if (orderedAttributesEnumerator.MoveNext ()) {
446 string name = orderedAttributesEnumerator.Current as string;
447 string value = attributes [name] as string;
449 XmlNodeType.Attribute, // nodeType
451 false, // isEmptyElement
453 false // clearAttributes
461 public override bool Read ()
465 readState = ReadState.Interactive;
467 more = ReadContent ();
473 public override bool ReadAttributeValue ()
475 throw new NotImplementedException ();
479 public int ReadBase64 (byte [] buffer, int offset, int length)
481 throw new NotImplementedException ();
485 public int ReadBinHex (byte [] buffer, int offset, int length)
487 throw new NotImplementedException ();
491 public int ReadChars (char [] buffer, int offset, int length)
493 throw new NotImplementedException ();
497 public override string ReadInnerXml ()
499 // Still need a Well Formedness check.
500 // Will wait for Validating reader ;-)
501 if (NodeType == XmlNodeType.Attribute) {
504 saveToXmlBuffer = true;
505 string startname = this.Name;
506 string endname = string.Empty;
507 readState = ReadState.Interactive;
509 while (startname != endname) {
514 xmlBuffer.Replace(currentTag.ToString (), "");
515 saveToXmlBuffer = false;
516 string InnerXml = xmlBuffer.ToString ();
517 xmlBuffer.Length = 0;
523 public override string ReadOuterXml ()
525 // Still need a Well Formedness check.
526 // Will wait for Validating reader ;-)
527 if (NodeType == XmlNodeType.Attribute) {
528 return Name+"=\""+Value+"\"";
530 saveToXmlBuffer = true;
531 xmlBuffer.Append(currentTag.ToString ());
532 string startname = this.Name;
533 string endname = string.Empty;
534 readState = ReadState.Interactive;
536 while (startname != endname) {
540 saveToXmlBuffer = false;
541 string OuterXml = xmlBuffer.ToString ();
542 xmlBuffer.Length = 0;
548 public override string ReadString ()
550 throw new NotImplementedException ();
554 public void ResetState ()
556 throw new NotImplementedException ();
559 public override void ResolveEntity ()
561 // XmlTextReaders don't resolve entities.
562 throw new InvalidOperationException ("XmlTextReaders don't resolve entities.");
569 private XmlParserContext parserContext;
571 private TextReader reader;
572 private ReadState readState;
575 private bool depthDown;
577 private bool popScope;
579 private XmlNodeType nodeType;
581 private string prefix;
582 private string localName;
583 private string namespaceURI;
584 private bool isEmptyElement;
585 private string value;
587 private XmlNodeType saveNodeType;
588 private string saveName;
589 private string savePrefix;
590 private string saveLocalName;
591 private string saveNamespaceURI;
592 private bool saveIsEmptyElement;
594 private Hashtable attributes;
595 private ArrayList orderedAttributes;
596 private IEnumerator orderedAttributesEnumerator;
598 private bool returnEntityReference;
599 private string entityReferenceName;
601 private char [] nameBuffer;
602 private int nameLength;
603 private int nameCapacity;
604 private const int initialNameCapacity = 256;
606 private char [] valueBuffer;
607 private int valueLength;
608 private int valueCapacity;
609 private const int initialValueCapacity = 8192;
611 private StringBuilder xmlBuffer; // This is for Read(Inner|Outer)Xml
612 private StringBuilder currentTag; // A buffer for ReadContent for ReadOuterXml
613 private bool saveToXmlBuffer;
619 readState = ReadState.Initial;
626 nodeType = XmlNodeType.None;
628 prefix = String.Empty;
629 localName = string.Empty;
630 isEmptyElement = false;
631 value = String.Empty;
633 attributes = new Hashtable ();
634 orderedAttributes = new ArrayList ();
635 orderedAttributesEnumerator = null;
637 returnEntityReference = false;
638 entityReferenceName = String.Empty;
640 nameBuffer = new char [initialNameCapacity];
642 nameCapacity = initialNameCapacity;
644 valueBuffer = new char [initialValueCapacity];
646 valueCapacity = initialValueCapacity;
648 xmlBuffer = new StringBuilder ();
649 currentTag = new StringBuilder ();
652 // Use this method rather than setting the properties
653 // directly so that all the necessary properties can
654 // be changed in harmony with each other. Maybe the
655 // fields should be in a seperate class to help enforce
657 private void SetProperties (
658 XmlNodeType nodeType,
662 bool clearAttributes)
664 this.nodeType = nodeType;
666 this.isEmptyElement = isEmptyElement;
672 int indexOfColon = name.IndexOf (':');
674 if (indexOfColon == -1) {
675 prefix = String.Empty;
678 prefix = name.Substring (0, indexOfColon);
679 localName = name.Substring (indexOfColon + 1);
682 namespaceURI = LookupNamespace (prefix);
685 private void SaveProperties ()
687 saveNodeType = nodeType;
690 saveLocalName = localName;
691 saveNamespaceURI = namespaceURI;
692 saveIsEmptyElement = isEmptyElement;
693 // An element's value is always String.Empty.
696 private void RestoreProperties ()
698 nodeType = saveNodeType;
701 localName = saveLocalName;
702 namespaceURI = saveNamespaceURI;
703 isEmptyElement = saveIsEmptyElement;
704 value = String.Empty;
707 private void AddAttribute (string name, string value)
709 attributes.Add (name, value);
710 orderedAttributes.Add (name);
713 private void ClearAttributes ()
715 if (attributes.Count > 0) {
717 orderedAttributes.Clear ();
720 orderedAttributesEnumerator = null;
723 private int PeekChar ()
725 return reader.Peek ();
728 private int ReadChar ()
730 int ch = reader.Read ();
737 if (saveToXmlBuffer) {
738 xmlBuffer.Append ((char) ch);
740 currentTag.Append ((char) ch);
744 // This should really keep track of some state so
745 // that it's not possible to have more than one document
746 // element or text outside of the document element.
747 private bool ReadContent ()
750 currentTag.Length = 0;
752 parserContext.NamespaceManager.PopScope ();
759 if (returnEntityReference) {
761 SetEntityReferenceProperties ();
772 readState = ReadState.EndOfFile;
774 XmlNodeType.None, // nodeType
775 String.Empty, // name
776 false, // isEmptyElement
777 String.Empty, // value
778 true // clearAttributes
792 private void SetEntityReferenceProperties ()
795 XmlNodeType.EntityReference, // nodeType
796 entityReferenceName, // name
797 false, // isEmptyElement
798 String.Empty, // value
799 true // clearAttributes
802 returnEntityReference = false;
803 entityReferenceName = String.Empty;
806 // The leading '<' has already been consumed.
807 private void ReadTag ()
817 ReadProcessingInstruction ();
829 // The leading '<' has already been consumed.
830 private void ReadStartTag ()
832 parserContext.NamespaceManager.PushScope ();
834 string name = ReadName ();
837 bool isEmptyElement = false;
841 if (XmlChar.IsFirstNameChar (PeekChar ()))
844 if (PeekChar () == '/') {
846 isEmptyElement = true;
856 XmlNodeType.Element, // nodeType
858 isEmptyElement, // isEmptyElement
859 String.Empty, // value
860 false // clearAttributes
864 // The reader is positioned on the first character
865 // of the element's name.
866 private void ReadEndTag ()
868 string name = ReadName ();
875 XmlNodeType.EndElement, // nodeType
877 false, // isEmptyElement
878 String.Empty, // value
879 true // clearAttributes
885 private void AppendNameChar (int ch)
887 CheckNameCapacity ();
888 nameBuffer [nameLength++] = (char)ch;
891 private void CheckNameCapacity ()
893 if (nameLength == nameCapacity) {
894 nameCapacity = nameCapacity * 2;
895 char [] oldNameBuffer = nameBuffer;
896 nameBuffer = new char [nameCapacity];
897 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
901 private string CreateNameString ()
903 return new String (nameBuffer, 0, nameLength);
906 private void AppendValueChar (int ch)
908 CheckValueCapacity ();
909 valueBuffer [valueLength++] = (char)ch;
912 private void CheckValueCapacity ()
914 if (valueLength == valueCapacity) {
915 valueCapacity = valueCapacity * 2;
916 char [] oldValueBuffer = valueBuffer;
917 valueBuffer = new char [valueCapacity];
918 Array.Copy (oldValueBuffer, valueBuffer, valueLength);
922 private string CreateValueString ()
924 return new String (valueBuffer, 0, valueLength);
927 // The reader is positioned on the first character
929 private void ReadText ()
933 int ch = PeekChar ();
935 while (ch != '<' && ch != -1) {
938 if (ReadReference (false))
941 AppendValueChar (ReadChar ());
946 if (returnEntityReference && valueLength == 0) {
948 SetEntityReferenceProperties ();
956 XmlNodeType.Text, // nodeType
957 String.Empty, // name
958 false, // isEmptyElement
959 CreateValueString (), // value
960 true // clearAttributes
965 // The leading '&' has already been consumed.
966 // Returns true if the entity reference isn't a simple
967 // character reference or one of the predefined entities.
968 // This allows the ReadText method to break so that the
969 // next call to Read will return the EntityReference node.
970 private bool ReadReference (bool ignoreEntityReferences)
972 if (PeekChar () == '#') {
974 ReadCharacterReference ();
976 ReadEntityReference (ignoreEntityReferences);
978 return returnEntityReference;
981 private void ReadCharacterReference ()
985 if (PeekChar () == 'x') {
988 while (PeekChar () != ';' && PeekChar () != -1) {
989 int ch = ReadChar ();
991 if (ch >= '0' && ch <= '9')
992 value = (value << 4) + ch - '0';
993 else if (ch >= 'A' && ch <= 'F')
994 value = (value << 4) + ch - 'A' + 10;
995 else if (ch >= 'a' && ch <= 'f')
996 value = (value << 4) + ch - 'a' + 10;
998 throw new XmlException (
1000 "invalid hexadecimal digit: {0} (#x{1:X})",
1005 while (PeekChar () != ';' && PeekChar () != -1) {
1006 int ch = ReadChar ();
1008 if (ch >= '0' && ch <= '9')
1009 value = value * 10 + ch - '0';
1011 throw new XmlException (
1013 "invalid decimal digit: {0} (#x{1:X})",
1021 AppendValueChar (value);
1024 private void ReadEntityReference (bool ignoreEntityReferences)
1028 int ch = PeekChar ();
1030 while (ch != ';' && ch != -1) {
1031 AppendNameChar (ReadChar ());
1037 string name = CreateNameString ();
1042 AppendValueChar ('<');
1045 AppendValueChar ('>');
1048 AppendValueChar ('&');
1051 AppendValueChar ('\'');
1054 AppendValueChar ('"');
1057 if (ignoreEntityReferences) {
1058 AppendValueChar ('&');
1060 foreach (char ch2 in name) {
1061 AppendValueChar (ch2);
1064 AppendValueChar (';');
1066 returnEntityReference = true;
1067 entityReferenceName = name;
1073 // The reader is positioned on the first character of
1074 // the attribute name.
1075 private void ReadAttributes ()
1078 string name = ReadName ();
1082 string value = ReadAttribute ();
1085 if (name == "xmlns")
1086 parserContext.NamespaceManager.AddNamespace (String.Empty, value);
1087 else if (name.StartsWith ("xmlns:"))
1088 parserContext.NamespaceManager.AddNamespace (name.Substring (6), value);
1090 AddAttribute (name, value);
1091 } while (PeekChar () != '/' && PeekChar () != '>' && PeekChar () != -1);
1094 // The reader is positioned on the quote character.
1095 private string ReadAttribute ()
1097 int quoteChar = ReadChar ();
1099 if (quoteChar != '\'' && quoteChar != '\"')
1100 throw new XmlException ("an attribute value was not quoted");
1104 while (PeekChar () != quoteChar) {
1105 int ch = ReadChar ();
1110 throw new XmlException ("attribute values cannot contain '<'");
1112 ReadReference (true);
1115 throw new XmlException ("unexpected end of file in an attribute value");
1117 AppendValueChar (ch);
1122 ReadChar (); // quoteChar
1124 return CreateValueString ();
1127 // The reader is positioned on the first character
1129 private void ReadProcessingInstruction ()
1131 string target = ReadName ();
1136 while (PeekChar () != -1) {
1137 int ch = ReadChar ();
1139 if (ch == '?' && PeekChar () == '>') {
1144 AppendValueChar ((char)ch);
1148 XmlNodeType.ProcessingInstruction, // nodeType
1150 false, // isEmptyElement
1151 CreateValueString (), // value
1152 true // clearAttributes
1156 // The reader is positioned on the first character after
1157 // the leading '<!'.
1158 private void ReadDeclaration ()
1160 int ch = PeekChar ();
1182 // The reader is positioned on the first character after
1183 // the leading '<!--'.
1184 private void ReadComment ()
1188 while (PeekChar () != -1) {
1189 int ch = ReadChar ();
1191 if (ch == '-' && PeekChar () == '-') {
1194 if (PeekChar () != '>')
1195 throw new XmlException ("comments cannot contain '--'");
1201 AppendValueChar ((char)ch);
1205 XmlNodeType.Comment, // nodeType
1206 String.Empty, // name
1207 false, // isEmptyElement
1208 CreateValueString (), // value
1209 true // clearAttributes
1213 // The reader is positioned on the first character after
1214 // the leading '<![CDATA['.
1215 private void ReadCDATA ()
1219 while (PeekChar () != -1) {
1220 int ch = ReadChar ();
1222 if (ch == ']' && PeekChar () == ']') {
1223 ch = ReadChar (); // ']'
1225 if (PeekChar () == '>') {
1229 AppendValueChar (']');
1230 AppendValueChar (']');
1235 AppendValueChar ((char)ch);
1241 XmlNodeType.CDATA, // nodeType
1242 String.Empty, // name
1243 false, // isEmptyElement
1244 CreateValueString (), // value
1245 true // clearAttributes
1249 // The reader is positioned on the first character
1251 private string ReadName ()
1253 if (!XmlChar.IsFirstNameChar (PeekChar ()))
1254 throw new XmlException ("a name did not start with a legal character");
1258 AppendNameChar (ReadChar ());
1260 while (XmlChar.IsNameChar (PeekChar ())) {
1261 AppendNameChar (ReadChar ());
1264 return CreateNameString ();
1267 // Read the next character and compare it against the
1268 // specified character.
1269 private void Expect (int expected)
1271 int ch = ReadChar ();
1273 if (ch != expected) {
1274 throw new XmlException (
1276 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
1284 // Does not consume the first non-whitespace character.
1285 private void SkipWhitespace ()
1287 while (XmlChar.IsWhitespace (PeekChar ()))