1 // -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
3 // System.Xml.XmlTextReader.cs
6 // Jason Diamond (jason@injektilo.org)
8 // (C) 2001 Jason Diamond http://injektilo.org/
12 // This can only parse basic XML: elements, attributes, processing
13 // instructions, and comments are OK.
15 // It barfs on DOCTYPE declarations.
17 // There's also no checking being done for either well-formedness
20 // ParserContext and NameTables aren't being used yet.
22 // Some thought needs to be given to performance. There's too many
23 // strings being allocated.
25 // None of the MoveTo methods have been implemented yet.
27 // LineNumber and LinePosition aren't being tracked.
29 // xml:space, xml:lang, and xml:base aren't being tracked.
31 // Depth isn't being tracked.
34 using System.Collections;
41 public class XmlTextReader : XmlReader, IXmlLineInfo
45 protected XmlTextReader()
50 public XmlTextReader(Stream input)
53 reader = new StreamReader(
59 public XmlTextReader(string url)
62 WebClient client = new WebClient();
63 reader = new StreamReader(
69 public XmlTextReader(TextReader input)
75 public XmlTextReader(Stream input, XmlNameTable nameTable)
77 this.nameTable = nameTable;
79 // TODO: implement me.
80 throw new NotImplementedException();
83 public XmlTextReader(string baseURI, Stream input)
85 // TODO: implement me.
86 throw new NotImplementedException();
89 public XmlTextReader(string baseURI, TextReader input)
91 // TODO: implement me.
92 throw new NotImplementedException();
95 public XmlTextReader(string url, XmlNameTable nameTable)
97 this.nameTable = nameTable;
99 // TODO: implement me.
100 throw new NotImplementedException();
103 public XmlTextReader(
105 XmlNameTable nameTable)
107 this.nameTable = nameTable;
109 // TODO: implement me.
110 throw new NotImplementedException();
113 public XmlTextReader(
114 Stream inputFragment,
115 XmlNodeType fragmentType,
116 XmlParserContext context)
118 // TODO: implement me.
119 throw new NotImplementedException();
122 public XmlTextReader(
125 XmlNameTable nameTable)
127 this.nameTable = nameTable;
129 // TODO: implement me.
130 throw new NotImplementedException();
133 public XmlTextReader(
136 XmlNameTable nameTable)
138 this.nameTable = nameTable;
139 // TODO: implement me.
140 throw new NotImplementedException();
143 public XmlTextReader(
145 XmlNodeType fragmentType,
146 XmlParserContext context)
148 // TODO: implement me.
149 throw new NotImplementedException();
154 public override int AttributeCount
158 return attributes.Count;
162 public override string BaseURI
166 // TODO: implement me.
171 public override bool CanResolveEntity
175 // XmlTextReaders don't resolve entities.
180 public override int Depth
184 // TODO: implement me.
185 return depth > 0 ? depth : 0;
189 public Encoding Encoding
193 // TODO: implement me.
198 public override bool EOF
203 readState == ReadState.EndOfFile ||
204 readState == ReadState.Closed;
208 public override bool HasValue
212 return value != String.Empty;
216 public override bool IsDefault
220 // XmlTextReader does not expand default attributes.
225 public override bool IsEmptyElement
229 return isEmptyElement;
233 public override string this[int i]
237 return GetAttribute(i);
241 public override string this[string name]
245 return GetAttribute(name);
249 public override string this[
251 string namespaceName]
255 return GetAttribute(localName, namespaceName);
259 public int LineNumber
263 // TODO: implement me.
268 public int LinePosition
272 // TODO: implement me.
277 public override string LocalName
285 public override string Name
293 public bool Namespaces
297 // TODO: implement me.
303 // TODO: implement me.
307 public override string NamespaceURI
315 public override XmlNameTable NameTable
319 // TODO: implement me.
324 public override XmlNodeType NodeType
332 public bool Normalization
336 // TODO: implement me.
342 // TODO: implement me.
346 public override string Prefix
354 public override char QuoteChar
358 // TODO: implement me.
363 public override ReadState ReadState
371 public override string Value
379 public WhitespaceHandling WhitespaceHandling
383 // TODO: implement me.
384 return WhitespaceHandling.All;
389 // TODO: implement me.
393 public override string XmlLang
397 // TODO: implement me.
402 public XmlResolver XmlResolver
406 // TODO: implement me.
410 public override XmlSpace XmlSpace
414 // TODO: implement me.
415 return XmlSpace.Default;
421 public override void Close()
423 readState = ReadState.Closed;
426 public override string GetAttribute(int i)
428 // TODO: implement me.
432 public override string GetAttribute(string name)
434 return (string)attributes[name];
437 public override string GetAttribute(
441 foreach (DictionaryEntry entry in attributes)
443 string thisName = (string)entry.Key;
445 int indexOfColon = thisName.IndexOf(':');
447 if (indexOfColon != -1)
449 string thisLocalName = thisName.Substring(indexOfColon + 1);
451 if (localName == thisLocalName)
453 string thisPrefix = thisName.Substring(0, indexOfColon);
454 string thisNamespaceURI = LookupNamespace(thisPrefix);
456 if (namespaceURI == thisNamespaceURI)
458 return (string)attributes[thisName];
467 public TextReader GetRemainder()
469 // TODO: implement me.
473 // Why does this use explicit interface implementation?
474 bool IXmlLineInfo.HasLineInfo()
476 // TODO: implement me.
480 public override string LookupNamespace(string prefix)
482 return namespaceManager.LookupNamespace(prefix);
485 public override void MoveToAttribute(int i)
487 // TODO: implement me.
490 public override bool MoveToAttribute(string name)
492 // TODO: implement me.
496 public override bool MoveToAttribute(
498 string namespaceName)
500 // TODO: implement me.
504 public override bool MoveToElement()
506 // TODO: implement me.
510 public override bool MoveToFirstAttribute()
512 // TODO: implement me.
516 public override bool MoveToNextAttribute()
518 // TODO: implement me.
522 public override bool Read()
526 readState = ReadState.Interactive;
528 more = ReadContent();
533 public override bool ReadAttributeValue()
535 // TODO: implement me.
539 public int ReadBase64(byte[] buffer, int offset, int length)
541 // TODO: implement me.
545 public int ReadBinHex(byte[] buffer, int offset, int length)
547 // TODO: implement me.
551 public int ReadChars(char[] buffer, int offset, int length)
553 // TODO: implement me.
557 public override string ReadInnerXml()
559 // TODO: implement me.
563 public override string ReadOuterXml()
565 // TODO: implement me.
569 public override string ReadString()
571 // TODO: implement me.
575 public override void ResolveEntity()
577 // XmlTextReaders don't resolve entities.
578 throw new InvalidOperationException("XmlTextReaders don't resolve entities.");
583 private TextReader reader;
584 private ReadState readState;
587 private bool depthDown;
589 private XmlNameTable nameTable;
590 private XmlNamespaceManager namespaceManager;
591 private bool popScope;
593 private XmlNodeType nodeType;
595 private string prefix;
596 private string localName;
597 private string namespaceURI;
598 private bool isEmptyElement;
599 private string value;
600 private Hashtable attributes;
602 private bool returnEntityReference;
603 private string entityReferenceName;
605 private char[] nameBuffer;
606 private int nameLength;
607 private int nameCapacity;
608 private const int initialNameCapacity = 256;
610 private char[] valueBuffer;
611 private int valueLength;
612 private int valueCapacity;
613 private const int initialValueCapacity = 8192;
617 namespaceManager = new XmlNamespaceManager(nameTable);
620 readState = ReadState.Initial;
625 nodeType = XmlNodeType.None;
627 prefix = String.Empty;
628 localName = string.Empty;
629 isEmptyElement = false;
630 value = String.Empty;
631 attributes = new Hashtable();
633 returnEntityReference = false;
634 entityReferenceName = String.Empty;
636 nameBuffer = new char[initialNameCapacity];
638 nameCapacity = initialNameCapacity;
640 valueBuffer = new char[initialValueCapacity];
642 valueCapacity = initialValueCapacity;
645 // Use this method rather than setting the properties
646 // directly so that all the necessary properties can
647 // be changed in harmony with each other. Maybe the
648 // fields should be in a seperate class to help enforce
650 private void SetProperties(
651 XmlNodeType nodeType,
655 bool clearAttributes)
657 this.nodeType = nodeType;
659 this.isEmptyElement = isEmptyElement;
667 int indexOfColon = name.IndexOf(':');
669 if (indexOfColon == -1)
671 prefix = String.Empty;
676 prefix = name.Substring(0, indexOfColon);
677 localName = name.Substring(indexOfColon + 1);
680 namespaceURI = LookupNamespace(prefix);
683 private void AddAttribute(string name, string value)
685 attributes.Add(name, value);
688 private void ClearAttributes()
690 if (attributes.Count > 0)
696 private int PeekChar()
698 return reader.Peek();
701 private int ReadChar()
703 return reader.Read();
706 // This should really keep track of some state so
707 // that it's not possible to have more than one document
708 // element or text outside of the document element.
709 private bool ReadContent()
715 namespaceManager.PopScope();
724 if (returnEntityReference)
727 SetEntityReferenceProperties();
740 readState = ReadState.EndOfFile;
742 XmlNodeType.None, // nodeType
743 String.Empty, // name
744 false, // isEmptyElement
745 String.Empty, // value
746 true // clearAttributes
760 private void SetEntityReferenceProperties()
763 XmlNodeType.EntityReference, // nodeType
764 entityReferenceName, // name
765 false, // isEmptyElement
766 String.Empty, // value
767 true // clearAttributes
770 returnEntityReference = false;
771 entityReferenceName = String.Empty;
774 // The leading '<' has already been consumed.
775 private void ReadTag()
785 ReadProcessingInstruction();
797 // The leading '<' has already been consumed.
798 private void ReadStartTag()
800 namespaceManager.PushScope();
802 string name = ReadName();
805 bool isEmptyElement = false;
809 if (XmlChar.IsFirstNameChar(PeekChar()))
814 if (PeekChar() == '/')
817 isEmptyElement = true;
827 XmlNodeType.Element, // nodeType
829 isEmptyElement, // isEmptyElement
830 String.Empty, // value
831 false // clearAttributes
835 // The reader is positioned on the first character
836 // of the element's name.
837 private void ReadEndTag()
839 string name = ReadName();
846 XmlNodeType.EndElement, // nodeType
848 false, // isEmptyElement
849 String.Empty, // value
850 true // clearAttributes
856 private void AppendNameChar(int ch)
859 nameBuffer[nameLength++] = (char)ch;
862 private void CheckNameCapacity()
864 if (nameLength == nameCapacity)
866 nameCapacity = nameCapacity * 2;
867 char[] oldNameBuffer = nameBuffer;
868 nameBuffer = new char[nameCapacity];
869 Array.Copy(oldNameBuffer, nameBuffer, nameLength);
873 private string CreateNameString()
875 return new String(nameBuffer, 0, nameLength);
878 private void AppendValueChar(int ch)
880 CheckValueCapacity();
881 valueBuffer[valueLength++] = (char)ch;
884 private void CheckValueCapacity()
886 if (valueLength == valueCapacity)
888 valueCapacity = valueCapacity * 2;
889 char[] oldValueBuffer = valueBuffer;
890 valueBuffer = new char[valueCapacity];
891 Array.Copy(oldValueBuffer, valueBuffer, valueLength);
895 private string CreateValueString()
897 return new String(valueBuffer, 0, valueLength);
900 // The reader is positioned on the first character
902 private void ReadText()
908 while (ch != '<' && ch != -1)
914 if (ReadReference(false))
921 AppendValueChar(ReadChar());
927 if (returnEntityReference && valueLength == 0)
930 SetEntityReferenceProperties();
941 XmlNodeType.Text, // nodeType
942 String.Empty, // name
943 false, // isEmptyElement
944 CreateValueString(), // value
945 true // clearAttributes
950 // The leading '&' has already been consumed.
951 // Returns true if the entity reference isn't a simple
952 // character reference or one of the predefined entities.
953 // This allows the ReadText method to break so that the
954 // next call to Read will return the EntityReference node.
955 private bool ReadReference(bool ignoreEntityReferences)
957 if (PeekChar() == '#')
960 ReadCharacterReference();
964 ReadEntityReference(ignoreEntityReferences);
967 return returnEntityReference;
970 private void ReadCharacterReference()
974 if (PeekChar() == 'x')
978 while (PeekChar() != ';' && PeekChar() != -1)
982 if (ch >= '0' && ch <= '9')
984 value = (value << 4) + ch - '0';
986 else if (ch >= 'A' && ch <= 'F')
988 value = (value << 4) + ch - 'A' + 10;
990 else if (ch >= 'a' && ch <= 'f')
992 value = (value << 4) + ch - 'a' + 10;
998 "invalid hexadecimal digit: {0} (#x{1:X})",
1006 while (PeekChar() != ';' && PeekChar() != -1)
1008 int ch = ReadChar();
1010 if (ch >= '0' && ch <= '9')
1012 value = value * 10 + ch - '0';
1016 throw new Exception(
1018 "invalid decimal digit: {0} (#x{1:X})",
1027 AppendValueChar(value);
1030 private void ReadEntityReference(bool ignoreEntityReferences)
1034 int ch = PeekChar();
1036 while (ch != ';' && ch != -1)
1038 AppendNameChar(ReadChar());
1044 string name = CreateNameString();
1049 AppendValueChar('<');
1052 AppendValueChar('>');
1055 AppendValueChar('&');
1058 AppendValueChar('\'');
1061 AppendValueChar('"');
1064 if (ignoreEntityReferences)
1066 AppendValueChar('&');
1068 foreach (char ch2 in name)
1070 AppendValueChar(ch2);
1073 AppendValueChar(';');
1077 returnEntityReference = true;
1078 entityReferenceName = name;
1084 // The reader is positioned on the first character of
1085 // the attribute name.
1086 private void ReadAttributes()
1090 string name = ReadName();
1094 string value = ReadAttribute();
1097 if (name == "xmlns")
1099 namespaceManager.AddNamespace(String.Empty, value);
1101 else if (name.StartsWith("xmlns:"))
1103 namespaceManager.AddNamespace(name.Substring(6), value);
1107 AddAttribute(name, value);
1110 while (PeekChar() != '/' && PeekChar() != '>' && PeekChar() != -1);
1113 // The reader is positioned on the quote character.
1114 private string ReadAttribute()
1116 int quoteChar = ReadChar();
1118 if (quoteChar != '\'' && quoteChar != '\"')
1120 throw new Exception("an attribute value was not quoted");
1125 while (PeekChar() != quoteChar)
1127 int ch = ReadChar();
1132 throw new Exception("attribute values cannot contain '<'");
1134 ReadReference(true);
1137 throw new Exception("unexpected end of file in an attribute value");
1139 AppendValueChar(ch);
1144 ReadChar(); // quoteChar
1146 return CreateValueString();
1149 // The reader is positioned on the first character
1151 private void ReadProcessingInstruction()
1153 string target = ReadName();
1158 while (PeekChar() != -1)
1160 int ch = ReadChar();
1162 if (ch == '?' && PeekChar() == '>')
1168 AppendValueChar((char)ch);
1172 XmlNodeType.ProcessingInstruction, // nodeType
1174 false, // isEmptyElement
1175 CreateValueString(), // value
1176 true // clearAttributes
1180 // The reader is positioned on the first character after
1181 // the leading '<!'.
1182 private void ReadDeclaration()
1184 int ch = PeekChar();
1206 // The reader is positioned on the first character after
1207 // the leading '<!--'.
1208 private void ReadComment()
1212 while (PeekChar() != -1)
1214 int ch = ReadChar();
1216 if (ch == '-' && PeekChar() == '-')
1220 if (PeekChar() != '>')
1222 throw new Exception("comments cannot contain '--'");
1229 AppendValueChar((char)ch);
1233 XmlNodeType.Comment, // nodeType
1234 String.Empty, // name
1235 false, // isEmptyElement
1236 CreateValueString(), // value
1237 true // clearAttributes
1241 // The reader is positioned on the first character after
1242 // the leading '<![CDATA['.
1243 private void ReadCDATA()
1247 while (PeekChar() != -1)
1249 int ch = ReadChar();
1251 if (ch == ']' && PeekChar() == ']')
1253 ch = ReadChar(); // ']'
1255 if (PeekChar() == '>')
1262 AppendValueChar(']');
1263 AppendValueChar(']');
1268 AppendValueChar((char)ch);
1274 XmlNodeType.CDATA, // nodeType
1275 String.Empty, // name
1276 false, // isEmptyElement
1277 CreateValueString(), // value
1278 true // clearAttributes
1282 // The reader is positioned on the first character
1284 private string ReadName()
1286 if (!XmlChar.IsFirstNameChar(PeekChar()))
1288 throw new Exception("a name did not start with a legal character");
1293 AppendNameChar(ReadChar());
1295 while (XmlChar.IsNameChar(PeekChar()))
1297 AppendNameChar(ReadChar());
1300 return CreateNameString();
1303 // Read the next character and compare it against the
1304 // specified character.
1305 private void Expect(int expected)
1307 int ch = ReadChar();
1311 throw new Exception(String.Format(
1312 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
1320 // Does not consume the first non-whitespace character.
1321 private void SkipWhitespace()
1323 while (XmlChar.IsWhitespace(PeekChar()))