2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
8 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
12 // This can only parse basic XML: elements, attributes, processing
13 // instructions, and comments are OK.
15 // It barfs on DOCTYPE declarations.
17 // There's also no checking being done for either well-formedness
20 // NameTables aren't being used everywhere yet.
22 // Some thought needs to be given to performance. There's too many
23 // strings being allocated.
25 // Some of the MoveTo methods haven't been implemented yet.
27 // LineNumber and LinePosition aren't being tracked.
29 // xml:space, xml:lang, and xml:base aren't being tracked.
33 using System.Collections;
39 public class XmlTextReader : XmlReader, IXmlLineInfo
41 WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
44 protected XmlTextReader ()
49 public XmlTextReader (Stream input)
51 // We can share some code in the constructors (at least for this one and next 2)
52 XmlNameTable nt = new NameTable ();
53 XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
54 parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
56 reader = new StreamReader (input);
60 public XmlTextReader (string url)
62 XmlNameTable nt = new NameTable ();
63 XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
64 parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
66 // StreamReader does not support url, only filepath;-)
67 reader = new StreamReader(url);
71 public XmlTextReader (TextReader input)
73 XmlNameTable nt = new NameTable ();
74 XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
75 parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
81 protected XmlTextReader (XmlNameTable nt)
83 throw new NotImplementedException ();
87 public XmlTextReader (Stream input, XmlNameTable nt)
89 XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
90 parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
92 reader = new StreamReader (input);
96 public XmlTextReader (string url, Stream input)
98 throw new NotImplementedException ();
102 public XmlTextReader (string url, TextReader input)
104 throw new NotImplementedException ();
108 public XmlTextReader (string url, XmlNameTable nt)
110 throw new NotImplementedException ();
114 public XmlTextReader (TextReader input, XmlNameTable nt)
116 XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
117 parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
123 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
125 parserContext = context;
126 reader = new StreamReader(xmlFragment);
128 // throw new NotImplementedException ();
132 public XmlTextReader (string url, Stream input, XmlNameTable nt)
134 throw new NotImplementedException ();
138 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
140 throw new NotImplementedException ();
144 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
146 //Waiting for Validating reader for fragType rules.
147 parserContext = context;
149 reader = new StringReader(xmlFragment);
156 public override int AttributeCount
158 get { return attributes.Count; }
161 public override string BaseURI
163 get { return parserContext.BaseURI; }
166 public override int Depth
173 public Encoding Encoding
175 get { return parserContext.Encoding; }
178 public override bool EOF
183 readState == ReadState.EndOfFile ||
184 readState == ReadState.Closed;
188 public override bool HasValue
190 get { return value != String.Empty; }
193 public override bool IsDefault
197 // XmlTextReader does not expand default attributes.
202 public override bool IsEmptyElement
204 get { return isEmptyElement; }
207 public override string this [int i]
209 get { return GetAttribute (i); }
212 public override string this [string name]
214 get { return GetAttribute (name); }
217 public override string this [string localName, string namespaceName]
219 get { return GetAttribute (localName, namespaceName); }
222 public int LineNumber
227 public int LinePosition
229 get { return column; }
232 public override string LocalName
234 get { return localName; }
237 public override string Name
243 public bool Namespaces
245 get { throw new NotImplementedException (); }
246 set { throw new NotImplementedException (); }
249 public override string NamespaceURI
251 get { return namespaceURI; }
254 public override XmlNameTable NameTable
256 get { return parserContext.NameTable; }
259 public override XmlNodeType NodeType
261 get { return nodeType; }
265 public bool Normalization
267 get { throw new NotImplementedException (); }
268 set { throw new NotImplementedException (); }
271 public override string Prefix
273 get { return prefix; }
277 public override char QuoteChar
279 get { throw new NotImplementedException (); }
282 public override ReadState ReadState
284 get { return readState; }
287 public override string Value
289 get { return value; }
292 public WhitespaceHandling WhitespaceHandling
294 get { return whitespaceHandling; }
295 set { whitespaceHandling = value; }
299 public override string XmlLang
301 get { throw new NotImplementedException (); }
305 public XmlResolver XmlResolver
307 set { throw new NotImplementedException (); }
311 public override XmlSpace XmlSpace
313 get { throw new NotImplementedException (); }
321 public override void Close ()
323 readState = ReadState.Closed;
327 public override string GetAttribute (int i)
329 if (i > attributes.Count)
330 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
332 throw new NotImplementedException ();
335 public override string GetAttribute (string name)
337 return attributes.ContainsKey (name) ?
338 attributes [name] as string : String.Empty;
341 public override string GetAttribute (string localName, string namespaceURI)
343 foreach (DictionaryEntry entry in attributes)
345 string thisName = entry.Key as string;
347 int indexOfColon = thisName.IndexOf (':');
349 if (indexOfColon != -1) {
350 string thisLocalName = thisName.Substring (indexOfColon + 1);
352 if (localName == thisLocalName) {
353 string thisPrefix = thisName.Substring (0, indexOfColon);
354 string thisNamespaceURI = LookupNamespace (thisPrefix);
356 if (namespaceURI == thisNamespaceURI)
357 return attributes.ContainsKey (thisName) ?
358 attributes [thisName] as string : String.Empty;
360 } else if (localName == "xmlns" && namespaceURI == "http://www.w3.org/2000/xmlns/" && thisName == "xmlns")
361 return attributes.ContainsKey (thisName) ?
362 attributes [thisName] as string : String.Empty;
369 public TextReader GetRemainder ()
371 throw new NotImplementedException ();
375 bool IXmlLineInfo.HasLineInfo ()
380 public override string LookupNamespace (string prefix)
382 return parserContext.NamespaceManager.LookupNamespace (prefix);
386 public override void MoveToAttribute (int i)
388 throw new NotImplementedException ();
391 public override bool MoveToAttribute (string name)
396 if (attributes == null)
399 if (orderedAttributesEnumerator == null) {
401 orderedAttributesEnumerator = orderedAttributes.GetEnumerator ();
404 while (orderedAttributesEnumerator.MoveNext ()) {
405 if(name == orderedAttributesEnumerator.Current as string) {
413 string value = attributes [name] as string;
415 XmlNodeType.Attribute, // nodeType
417 false, // isEmptyElement
419 false // clearAttributes
427 public override bool MoveToAttribute (string localName, string namespaceName)
429 throw new NotImplementedException ();
432 public override bool MoveToElement ()
434 if (orderedAttributesEnumerator != null) {
435 orderedAttributesEnumerator = null;
436 RestoreProperties ();
443 public override bool MoveToFirstAttribute ()
446 return MoveToNextAttribute ();
449 public override bool MoveToNextAttribute ()
451 if (attributes == null)
454 if (orderedAttributesEnumerator == null) {
456 orderedAttributesEnumerator = orderedAttributes.GetEnumerator ();
459 if (orderedAttributesEnumerator.MoveNext ()) {
460 string name = orderedAttributesEnumerator.Current as string;
461 string value = attributes [name] as string;
463 XmlNodeType.Attribute, // nodeType
465 false, // isEmptyElement
467 false // clearAttributes
475 public override bool Read ()
479 readState = ReadState.Interactive;
481 more = ReadContent ();
487 public override bool ReadAttributeValue ()
489 throw new NotImplementedException ();
493 public int ReadBase64 (byte [] buffer, int offset, int length)
495 throw new NotImplementedException ();
499 public int ReadBinHex (byte [] buffer, int offset, int length)
501 throw new NotImplementedException ();
505 public int ReadChars (char [] buffer, int offset, int length)
507 throw new NotImplementedException ();
511 public override string ReadInnerXml ()
513 // Still need a Well Formedness check.
514 // Will wait for Validating reader ;-)
515 if (NodeType == XmlNodeType.Attribute) {
518 saveToXmlBuffer = true;
519 string startname = this.Name;
520 string endname = string.Empty;
521 readState = ReadState.Interactive;
523 while (startname != endname) {
528 xmlBuffer.Replace(currentTag.ToString (), "");
529 saveToXmlBuffer = false;
530 string InnerXml = xmlBuffer.ToString ();
531 xmlBuffer.Length = 0;
537 public override string ReadOuterXml ()
539 if (NodeType == XmlNodeType.Attribute) {
540 return Name+"=\""+Value+"\"";
542 saveToXmlBuffer = true;
543 xmlBuffer.Append(currentTag.ToString ());
544 string startname = this.Name;
545 string endname = string.Empty;
546 readState = ReadState.Interactive;
548 while (startname != endname) {
552 saveToXmlBuffer = false;
553 string OuterXml = xmlBuffer.ToString ();
554 xmlBuffer.Length = 0;
560 public override string ReadString ()
562 throw new NotImplementedException ();
566 public void ResetState ()
568 throw new NotImplementedException ();
571 public override void ResolveEntity ()
573 // XmlTextReaders don't resolve entities.
574 throw new InvalidOperationException ("XmlTextReaders don't resolve entities.");
581 private XmlParserContext parserContext;
583 private TextReader reader;
584 private ReadState readState;
587 private int elementDepth;
588 private bool depthDown;
590 private bool popScope;
592 private XmlNodeType nodeType;
594 private string prefix;
595 private string localName;
596 private string namespaceURI;
597 private bool isEmptyElement;
598 private string value;
600 private XmlNodeType saveNodeType;
601 private string saveName;
602 private string savePrefix;
603 private string saveLocalName;
604 private string saveNamespaceURI;
605 private bool saveIsEmptyElement;
607 private Hashtable attributes;
608 private ArrayList orderedAttributes;
609 private IEnumerator orderedAttributesEnumerator;
611 private bool returnEntityReference;
612 private string entityReferenceName;
614 private char [] nameBuffer;
615 private int nameLength;
616 private int nameCapacity;
617 private const int initialNameCapacity = 256;
619 private char [] valueBuffer;
620 private int valueLength;
621 private int valueCapacity;
622 private const int initialValueCapacity = 8192;
624 private StringBuilder xmlBuffer; // This is for Read(Inner|Outer)Xml
625 private StringBuilder currentTag; // A buffer for ReadContent for ReadOuterXml
626 private bool saveToXmlBuffer;
627 private int line = 1;
628 private int column = 1;
632 readState = ReadState.Initial;
639 nodeType = XmlNodeType.None;
641 prefix = String.Empty;
642 localName = string.Empty;
643 isEmptyElement = false;
644 value = String.Empty;
646 attributes = new Hashtable ();
647 orderedAttributes = new ArrayList ();
648 orderedAttributesEnumerator = null;
650 returnEntityReference = false;
651 entityReferenceName = String.Empty;
653 nameBuffer = new char [initialNameCapacity];
655 nameCapacity = initialNameCapacity;
657 valueBuffer = new char [initialValueCapacity];
659 valueCapacity = initialValueCapacity;
661 xmlBuffer = new StringBuilder ();
662 currentTag = new StringBuilder ();
665 // Use this method rather than setting the properties
666 // directly so that all the necessary properties can
667 // be changed in harmony with each other. Maybe the
668 // fields should be in a seperate class to help enforce
670 private void SetProperties (
671 XmlNodeType nodeType,
675 bool clearAttributes)
677 this.nodeType = nodeType;
679 this.isEmptyElement = isEmptyElement;
681 this.elementDepth = depth;
686 int indexOfColon = name.IndexOf (':');
688 if (indexOfColon == -1) {
689 prefix = String.Empty;
692 prefix = name.Substring (0, indexOfColon);
693 localName = name.Substring (indexOfColon + 1);
696 namespaceURI = LookupNamespace (prefix);
699 private void SaveProperties ()
701 saveNodeType = nodeType;
704 saveLocalName = localName;
705 saveNamespaceURI = namespaceURI;
706 saveIsEmptyElement = isEmptyElement;
707 // An element's value is always String.Empty.
710 private void RestoreProperties ()
712 nodeType = saveNodeType;
715 localName = saveLocalName;
716 namespaceURI = saveNamespaceURI;
717 isEmptyElement = saveIsEmptyElement;
718 value = String.Empty;
721 private void AddAttribute (string name, string value)
723 attributes.Add (name, value);
724 orderedAttributes.Add (name);
727 private void ClearAttributes ()
729 if (attributes.Count > 0) {
731 orderedAttributes.Clear ();
734 orderedAttributesEnumerator = null;
737 private int PeekChar ()
739 return reader.Peek ();
742 private int ReadChar ()
744 int ch = reader.Read ();
751 if (saveToXmlBuffer) {
752 xmlBuffer.Append ((char) ch);
754 currentTag.Append ((char) ch);
758 // This should really keep track of some state so
759 // that it's not possible to have more than one document
760 // element or text outside of the document element.
761 private bool ReadContent ()
763 currentTag.Length = 0;
765 parserContext.NamespaceManager.PopScope ();
769 if (returnEntityReference) {
771 SetEntityReferenceProperties ();
780 if (whitespaceHandling == WhitespaceHandling.All ||
781 whitespaceHandling == WhitespaceHandling.Significant)
782 return ReadWhitespace ();
785 return ReadContent ();
787 if (whitespaceHandling == WhitespaceHandling.All ||
788 whitespaceHandling == WhitespaceHandling.Significant)
789 return ReadWhitespace ();
792 return ReadContent ();
794 if (whitespaceHandling == WhitespaceHandling.All ||
795 whitespaceHandling == WhitespaceHandling.Significant)
796 return ReadWhitespace ();
799 return ReadContent ();
801 readState = ReadState.EndOfFile;
803 XmlNodeType.None, // nodeType
804 String.Empty, // name
805 false, // isEmptyElement
806 String.Empty, // value
807 true // clearAttributes
816 // return (PeekChar () != -1);
817 return this.ReadState != ReadState.EndOfFile;
820 private void SetEntityReferenceProperties ()
823 XmlNodeType.EntityReference, // nodeType
824 entityReferenceName, // name
825 false, // isEmptyElement
826 String.Empty, // value
827 true // clearAttributes
830 returnEntityReference = false;
831 entityReferenceName = String.Empty;
834 // The leading '<' has already been consumed.
835 private void ReadTag ()
845 ReadProcessingInstruction ();
857 // The leading '<' has already been consumed.
858 private void ReadStartTag ()
860 parserContext.NamespaceManager.PushScope ();
862 string name = ReadName ();
865 bool isEmptyElement = false;
869 if (XmlChar.IsFirstNameChar (PeekChar ()))
872 if (PeekChar () == '/') {
874 isEmptyElement = true;
882 XmlNodeType.Element, // nodeType
884 isEmptyElement, // isEmptyElement
885 String.Empty, // value
886 false // clearAttributes
896 // The reader is positioned on the first character
897 // of the element's name.
898 private void ReadEndTag ()
900 string name = ReadName ();
907 XmlNodeType.EndElement, // nodeType
909 false, // isEmptyElement
910 String.Empty, // value
911 true // clearAttributes
917 private void AppendNameChar (int ch)
919 CheckNameCapacity ();
920 nameBuffer [nameLength++] = (char)ch;
923 private void CheckNameCapacity ()
925 if (nameLength == nameCapacity) {
926 nameCapacity = nameCapacity * 2;
927 char [] oldNameBuffer = nameBuffer;
928 nameBuffer = new char [nameCapacity];
929 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
933 private string CreateNameString ()
935 return new String (nameBuffer, 0, nameLength);
938 private void AppendValueChar (int ch)
940 CheckValueCapacity ();
941 valueBuffer [valueLength++] = (char)ch;
944 private void CheckValueCapacity ()
946 if (valueLength == valueCapacity) {
947 valueCapacity = valueCapacity * 2;
948 char [] oldValueBuffer = valueBuffer;
949 valueBuffer = new char [valueCapacity];
950 Array.Copy (oldValueBuffer, valueBuffer, valueLength);
954 private string CreateValueString ()
956 return new String (valueBuffer, 0, valueLength);
959 // The reader is positioned on the first character
961 private void ReadText (bool cleanValue)
966 int ch = PeekChar ();
968 while (ch != '<' && ch != -1) {
971 if (ReadReference (false))
974 AppendValueChar (ReadChar ());
979 if (returnEntityReference && valueLength == 0) {
980 SetEntityReferenceProperties ();
983 XmlNodeType.Text, // nodeType
984 String.Empty, // name
985 false, // isEmptyElement
986 CreateValueString (), // value
987 true // clearAttributes
992 // The leading '&' has already been consumed.
993 // Returns true if the entity reference isn't a simple
994 // character reference or one of the predefined entities.
995 // This allows the ReadText method to break so that the
996 // next call to Read will return the EntityReference node.
997 private bool ReadReference (bool ignoreEntityReferences)
999 if (PeekChar () == '#') {
1001 ReadCharacterReference ();
1003 ReadEntityReference (ignoreEntityReferences);
1005 return returnEntityReference;
1008 private void ReadCharacterReference ()
1012 if (PeekChar () == 'x') {
1015 while (PeekChar () != ';' && PeekChar () != -1) {
1016 int ch = ReadChar ();
1018 if (ch >= '0' && ch <= '9')
1019 value = (value << 4) + ch - '0';
1020 else if (ch >= 'A' && ch <= 'F')
1021 value = (value << 4) + ch - 'A' + 10;
1022 else if (ch >= 'a' && ch <= 'f')
1023 value = (value << 4) + ch - 'a' + 10;
1025 throw new XmlException (
1027 "invalid hexadecimal digit: {0} (#x{1:X})",
1032 while (PeekChar () != ';' && PeekChar () != -1) {
1033 int ch = ReadChar ();
1035 if (ch >= '0' && ch <= '9')
1036 value = value * 10 + ch - '0';
1038 throw new XmlException (
1040 "invalid decimal digit: {0} (#x{1:X})",
1048 AppendValueChar (value);
1051 private void ReadEntityReference (bool ignoreEntityReferences)
1055 int ch = PeekChar ();
1057 while (ch != ';' && ch != -1) {
1058 AppendNameChar (ReadChar ());
1064 string name = CreateNameString ();
1069 AppendValueChar ('<');
1072 AppendValueChar ('>');
1075 AppendValueChar ('&');
1078 AppendValueChar ('\'');
1081 AppendValueChar ('"');
1084 if (ignoreEntityReferences) {
1085 AppendValueChar ('&');
1087 foreach (char ch2 in name) {
1088 AppendValueChar (ch2);
1091 AppendValueChar (';');
1093 returnEntityReference = true;
1094 entityReferenceName = name;
1100 // The reader is positioned on the first character of
1101 // the attribute name.
1102 private void ReadAttributes ()
1105 string name = ReadName ();
1109 string value = ReadAttribute ();
1112 if (name == "xmlns")
1113 parserContext.NamespaceManager.AddNamespace (String.Empty, value);
1114 else if (name.StartsWith ("xmlns:"))
1115 parserContext.NamespaceManager.AddNamespace (name.Substring (6), value);
1117 AddAttribute (name, value);
1118 } while (PeekChar () != '/' && PeekChar () != '>' && PeekChar () != -1);
1121 // The reader is positioned on the quote character.
1122 private string ReadAttribute ()
1124 int quoteChar = ReadChar ();
1126 if (quoteChar != '\'' && quoteChar != '\"')
1127 throw new XmlException ("an attribute value was not quoted");
1131 while (PeekChar () != quoteChar) {
1132 int ch = ReadChar ();
1137 throw new XmlException ("attribute values cannot contain '<'");
1139 ReadReference (true);
1142 throw new XmlException ("unexpected end of file in an attribute value");
1144 AppendValueChar (ch);
1149 ReadChar (); // quoteChar
1151 return CreateValueString ();
1154 // The reader is positioned on the first character
1157 // Now it also reads XmlDeclaration, this method name became improper...
1158 private void ReadProcessingInstruction ()
1160 string target = ReadName ();
1165 while (PeekChar () != -1) {
1166 int ch = ReadChar ();
1168 if (ch == '?' && PeekChar () == '>') {
1173 AppendValueChar ((char)ch);
1178 XmlNodeType.XmlDeclaration :
\r
1179 XmlNodeType.ProcessingInstruction, // nodeType
\r
1181 false, // isEmptyElement
1182 CreateValueString (), // value
1183 true // clearAttributes
1187 // The reader is positioned on the first character after
1188 // the leading '<!'.
1189 private void ReadDeclaration ()
1191 int ch = PeekChar ();
1213 // The reader is positioned on the first character after
1214 // the leading '<!--'.
1215 private void ReadComment ()
1219 while (PeekChar () != -1) {
1220 int ch = ReadChar ();
1222 if (ch == '-' && PeekChar () == '-') {
1225 if (PeekChar () != '>')
1226 throw new XmlException ("comments cannot contain '--'");
1232 AppendValueChar ((char)ch);
1236 XmlNodeType.Comment, // nodeType
1237 String.Empty, // name
1238 false, // isEmptyElement
1239 CreateValueString (), // value
1240 true // clearAttributes
1244 // The reader is positioned on the first character after
1245 // the leading '<![CDATA['.
1246 private void ReadCDATA ()
1250 while (PeekChar () != -1) {
1251 int ch = ReadChar ();
1253 if (ch == ']' && PeekChar () == ']') {
1254 ch = ReadChar (); // ']'
1256 if (PeekChar () == '>') {
1260 AppendValueChar (']');
1261 AppendValueChar (']');
1266 AppendValueChar ((char)ch);
1270 XmlNodeType.CDATA, // nodeType
1271 String.Empty, // name
1272 false, // isEmptyElement
1273 CreateValueString (), // value
1274 true // clearAttributes
1278 // The reader is positioned on the first character
1280 private string ReadName ()
1282 if (!XmlChar.IsFirstNameChar (PeekChar ()))
1283 throw new XmlException ("a name did not start with a legal character");
1287 AppendNameChar (ReadChar ());
1289 while (XmlChar.IsNameChar (PeekChar ())) {
1290 AppendNameChar (ReadChar ());
1293 return CreateNameString ();
1296 // Read the next character and compare it against the
1297 // specified character.
1298 private void Expect (int expected)
1300 int ch = ReadChar ();
1302 if (ch != expected) {
1303 throw new XmlException (
1305 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
1313 // Does not consume the first non-whitespace character.
1314 private void SkipWhitespace ()
1316 //FIXME: Should not skip if whitespaceHandling == WhiteSpaceHandling.None
1317 while (XmlChar.IsWhitespace (PeekChar ()))
1321 private bool ReadWhitespace ()
1324 int ch = PeekChar ();
1326 AppendValueChar (ReadChar ());
1327 } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
1329 if (ch != -1 && ch != '<')
1332 SetProperties (XmlNodeType.Whitespace,
1335 CreateValueString (),
1338 return (PeekChar () != -1);