2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
8 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
12 // This can only parse basic XML: elements, attributes, processing
13 // instructions, and comments are OK.
15 // It barfs on DOCTYPE declarations.
16 // => No barfing, but parsing is incomplete.
17 // DTD nodes are not still created.
19 // There's also no checking being done for validity.
21 // More checking should be done for well-formedness.
23 // NameTables aren't being used everywhere yet.
25 // Some thought needs to be given to performance. There's too many
26 // strings being allocated.
28 // Some of the MoveTo methods haven't been implemented yet.
30 // xml:space, xml:lang, and xml:base aren't being tracked.
34 using System.Collections;
40 public class XmlTextReader : XmlReader, IXmlLineInfo
42 WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
45 protected XmlTextReader ()
49 public XmlTextReader (Stream input)
50 : this (new StreamReader (input))
54 public XmlTextReader (string url)
55 : this(url, new NameTable ())
59 public XmlTextReader (TextReader input)
60 : this (input, new NameTable ())
64 protected XmlTextReader (XmlNameTable nt)
65 : this (String.Empty, null, XmlNodeType.None, null)
69 public XmlTextReader (Stream input, XmlNameTable nt)
70 : this(new StreamReader (input), nt)
74 public XmlTextReader (string url, Stream input)
75 : this (url, new StreamReader (input))
79 public XmlTextReader (string url, TextReader input)
80 : this (url, input, new NameTable ())
84 [MonoTODO("Non-filename-url must be supported. Waiting for WebClient")]
85 public XmlTextReader (string url, XmlNameTable nt)
86 // : this(url, new StreamReader ((Stream)new XmlUrlResolver ().GetEntity (new Uri (url), null, typeof(Stream))), nt)
87 : this (url, new StreamReader (url), nt)
91 public XmlTextReader (TextReader input, XmlNameTable nt)
92 : this(String.Empty, input, nt)
96 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
97 : this (String.Empty, new StreamReader (xmlFragment), fragType, context)
101 public XmlTextReader (string url, Stream input, XmlNameTable nt)
102 : this (url, new StreamReader (input), nt)
106 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
107 : this (url, input, XmlNodeType.Document, new XmlParserContext (nt, new XmlNamespaceManager (nt), null, XmlSpace.None))
111 [MonoTODO("TODO as same as private XmlTextReader(TextReader, XmlNodeType, XmlParserContext)")]
112 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
113 : this (String.Empty, new StringReader (xmlFragment), fragType, context)
117 // TODO still remains as described at head of this file,
118 // but it might not be TODO of the constructors...
119 XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
121 this.SetReaderContext(url, context);
122 this.SetReaderFragment(fragment, fragType);
129 public override int AttributeCount
131 get { return attributes.Count; }
134 public override string BaseURI
136 get { return parserContext.BaseURI; }
139 public override int Depth
146 public Encoding Encoding
148 get { return parserContext.Encoding; }
151 public override bool EOF
156 readState == ReadState.EndOfFile ||
157 readState == ReadState.Closed;
161 public override bool HasValue
163 get { return value != String.Empty; }
166 public override bool IsDefault
170 // XmlTextReader does not expand default attributes.
175 public override bool IsEmptyElement
177 get { return isEmptyElement; }
180 public override string this [int i]
182 get { return GetAttribute (i); }
185 public override string this [string name]
187 get { return GetAttribute (name); }
190 public override string this [string localName, string namespaceName]
192 get { return GetAttribute (localName, namespaceName); }
195 public int LineNumber
200 public int LinePosition
202 get { return column; }
205 public override string LocalName
207 get { return localName; }
210 public override string Name
216 public bool Namespaces
218 get { throw new NotImplementedException (); }
219 set { throw new NotImplementedException (); }
222 public override string NamespaceURI
224 get { return namespaceURI; }
227 public override XmlNameTable NameTable
229 get { return parserContext.NameTable; }
232 public override XmlNodeType NodeType
234 get { return nodeType; }
238 public bool Normalization
240 get { throw new NotImplementedException (); }
241 set { throw new NotImplementedException (); }
244 public override string Prefix
246 get { return prefix; }
249 public override char QuoteChar
252 // value string holds attribute quotation char.
253 if (NodeType == XmlNodeType.Attribute)
260 public override ReadState ReadState
262 get { return readState; }
265 public override string Value
268 if(NodeType == XmlNodeType.Attribute)
269 return UnescapeAttributeValue(value);
275 public WhitespaceHandling WhitespaceHandling
277 get { return whitespaceHandling; }
278 set { whitespaceHandling = value; }
282 public override string XmlLang
284 get { throw new NotImplementedException (); }
288 public XmlResolver XmlResolver
290 set { throw new NotImplementedException (); }
294 public override XmlSpace XmlSpace
296 get { throw new NotImplementedException (); }
304 public override void Close ()
306 readState = ReadState.Closed;
309 public override string GetAttribute (int i)
311 if (i > attributes.Count)
312 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
314 return UnescapeAttributeValue (attributes [orderedAttributes [i]] as string);
317 public override string GetAttribute (string name)
319 return attributes.ContainsKey (name) ?
320 UnescapeAttributeValue (attributes [name] as string) : String.Empty;
323 public override string GetAttribute (string localName, string namespaceURI)
325 foreach (DictionaryEntry entry in attributes)
327 string thisName = entry.Key as string;
329 int indexOfColon = thisName.IndexOf (':');
331 if (indexOfColon != -1) {
332 string thisLocalName = thisName.Substring (indexOfColon + 1);
334 if (localName == thisLocalName) {
335 string thisPrefix = thisName.Substring (0, indexOfColon);
336 string thisNamespaceURI = LookupNamespace (thisPrefix);
338 if (namespaceURI == thisNamespaceURI)
339 return attributes.ContainsKey (thisName) ?
340 UnescapeAttributeValue (attributes [thisName] as string) : String.Empty;
342 } else if (localName == "xmlns" && namespaceURI == "http://www.w3.org/2000/xmlns/" && thisName == "xmlns")
343 return attributes.ContainsKey (thisName) ?
344 UnescapeAttributeValue (attributes [thisName] as string) : String.Empty;
351 public TextReader GetRemainder ()
353 throw new NotImplementedException ();
356 bool IXmlLineInfo.HasLineInfo ()
361 public override string LookupNamespace (string prefix)
363 return parserContext.NamespaceManager.LookupNamespace (prefix);
366 public override void MoveToAttribute (int i)
370 if (attributes == null || orderedAttributes.Count < i || i < 0)
371 throw new ArgumentOutOfRangeException ("attribute index out of range.");
373 string name = orderedAttributes [i] as string;
374 string value = attributes [name] as string;
376 XmlNodeType.Attribute, // nodeType
378 false, // isEmptyElement
380 false // clearAttributes
382 attributeValuePos = 0;
385 public override bool MoveToAttribute (string name)
390 if (attributes == null)
393 if (orderedAttributesEnumerator == null) {
395 orderedAttributesEnumerator = orderedAttributes.GetEnumerator ();
398 while (orderedAttributesEnumerator.MoveNext ()) {
399 if(name == orderedAttributesEnumerator.Current as string) {
406 string value = attributes [name] as string;
408 XmlNodeType.Attribute, // nodeType
410 false, // isEmptyElement
412 false // clearAttributes
414 attributeValuePos = 0;
421 public override bool MoveToAttribute (string localName, string namespaceName)
423 throw new NotImplementedException ();
426 public override bool MoveToElement ()
428 if (orderedAttributesEnumerator != null) {
429 orderedAttributesEnumerator = null;
430 RestoreProperties ();
437 public override bool MoveToFirstAttribute ()
440 return MoveToNextAttribute ();
443 public override bool MoveToNextAttribute ()
445 if (attributes == null)
448 if (orderedAttributesEnumerator == null) {
450 orderedAttributesEnumerator = orderedAttributes.GetEnumerator ();
453 if (orderedAttributesEnumerator.MoveNext ()) {
454 string name = orderedAttributesEnumerator.Current as string;
455 string value = attributes [name] as string;
457 XmlNodeType.Attribute, // nodeType
459 false, // isEmptyElement
461 false // clearAttributes
463 attributeValuePos = 0;
470 public override bool Read ()
474 readState = ReadState.Interactive;
476 more = ReadContent ();
481 public override bool ReadAttributeValue ()
483 // 'attributeString' holds real string value (without their
484 // quotation characters).
486 // 'attributeValuePos' holds current position
487 // of 'attributeString' while iterating ReadAttribute().
489 // -1 if ReadAttributeValue() has already finished.
490 // 0 if ReadAttributeValue() ready to start reading.
491 // >0 if ReadAttributeValue() already got 1 or more values
493 // local 'refPosition' holds the position on the
494 // attributeString which may be used next time.
496 if (attributeValuePos < 0) {
497 SetProperties (XmlNodeType.None,
505 // If not started, then initialize attributeString when parsing is at start.
506 if (attributeValuePos == 0)
508 value.Substring (1, value.Length - 2);
510 returnEntityReference = false;
511 value = String.Empty;
516 refPosition = attributeString.IndexOf ('&', attributeValuePos);
517 if (refPosition < 0) {
518 // Reached to the end of value string.
519 value += attributeString.Substring (attributeValuePos);
520 attributeValuePos = -1;
522 } else if (refPosition == attributeValuePos) {
523 string parsed = ReadAttributeValueReference ();
527 // Found that an entity reference starts from this point.
528 // reset position to after '&'.
529 attributeValuePos = refPosition;
530 if (value.Length <= 0) {
531 int endNamePos = attributeString.IndexOf (";", attributeValuePos);
532 value = attributeString.Substring (attributeValuePos+1, endNamePos - attributeValuePos - 1);
533 attributeValuePos += value.Length + 2;
534 returnEntityReference = true;
539 value += attributeString.Substring (attributeValuePos,
540 refPosition - attributeValuePos);
541 attributeValuePos = refPosition;
544 } while (++loop > 0);
546 if (returnEntityReference)
547 SetProperties (XmlNodeType.EntityReference,
553 SetProperties (XmlNodeType.Text,
563 public int ReadBase64 (byte [] buffer, int offset, int length)
565 throw new NotImplementedException ();
569 public int ReadBinHex (byte [] buffer, int offset, int length)
571 throw new NotImplementedException ();
575 public int ReadChars (char [] buffer, int offset, int length)
577 throw new NotImplementedException ();
580 public override string ReadInnerXml ()
582 if (readState != ReadState.Interactive)
586 case XmlNodeType.Attribute:
587 return value.Substring (1, value.Length - 2);
588 case XmlNodeType.Element:
592 int startDepth = depth;
594 innerXmlBuilder.Length = 0;
597 if (NodeType != XmlNodeType.EndElement || depth + 1 > startDepth)
598 innerXmlBuilder.Append (currentTag);
599 } while (depth >= startDepth);
601 string xml = innerXmlBuilder.ToString ();
602 innerXmlBuilder.Length = 0;
604 case XmlNodeType.None:
605 // MS document is incorrect. Seems not to progress.
613 public override string ReadOuterXml ()
615 if (readState != ReadState.Interactive)
619 case XmlNodeType.Attribute:
620 // strictly incompatible with MS... (it holds spaces attribute between name, value and "=" char (very trivial).
621 return String.Format ("{0}={1}{2}{1}", Name, QuoteChar, ReadInnerXml ());
622 case XmlNodeType.Element:
623 bool isEmpty = IsEmptyElement;
624 string startTag = currentTag.ToString ();
627 if (NodeType == XmlNodeType.Element && !isEmpty)
628 return String.Format ("{0}{1}</{2}>", startTag, ReadInnerXml (), name);
630 return currentTag.ToString ();
631 case XmlNodeType.None:
632 // MS document is incorrect. Seems not to progress.
641 public override string ReadString ()
643 throw new NotImplementedException ();
647 public void ResetState ()
649 throw new NotImplementedException ();
652 public override void ResolveEntity ()
654 // XmlTextReaders don't resolve entities.
655 throw new InvalidOperationException ("XmlTextReader cannot resolve external entities.");
661 internal string publicId;
662 internal string systemId;
664 internal void SetReaderContext (string url, XmlParserContext context)
666 parserContext = context;
667 parserContext.BaseURI = url;
671 internal void SetReaderFragment(TextReader fragment, XmlNodeType fragType)
673 this.reader = fragment;
674 StreamReader sr = fragment as StreamReader;
676 can_seek = sr.BaseStream.CanSeek;
678 can_seek = fragment != null && fragment.Peek () != -1;
680 if (fragType == XmlNodeType.Attribute)
682 else if (fragType == XmlNodeType.DocumentFragment)
683 allowMultipleRoot = true;
687 case XmlNodeType.Attribute: // attribute content
688 parserContext.InputState = XmlParserInputState.AttributeValue;
690 case XmlNodeType.DocumentFragment: // element content
691 parserContext.InputState = XmlParserInputState.Content;
693 case XmlNodeType.Element: // one element
694 parserContext.InputState = XmlParserInputState.StartTag;
696 case XmlNodeType.Document: // document content
697 parserContext.InputState = XmlParserInputState.Start;
700 throw new InvalidOperationException("setting this xml node type not allowed.");
708 private XmlParserContext parserContext;
710 private TextReader reader;
711 private ReadState readState;
714 private int elementDepth;
715 private bool depthDown;
717 private bool popScope;
718 private Stack elementStack;
719 private bool haveEnteredDocument;
720 private bool allowMultipleRoot = false;
722 private XmlNodeType nodeType;
724 private string prefix;
725 private string localName;
726 private string namespaceURI;
727 private bool isEmptyElement;
728 private string value;
730 private XmlNodeType saveNodeType;
731 private string saveName;
732 private string savePrefix;
733 private string saveLocalName;
734 private string saveNamespaceURI;
735 private bool saveIsEmptyElement;
737 private Hashtable attributes;
738 private ArrayList orderedAttributes;
739 private IEnumerator orderedAttributesEnumerator;
741 private bool returnEntityReference;
742 private string entityReferenceName;
744 private char [] nameBuffer;
745 private int nameLength;
746 private int nameCapacity;
747 private const int initialNameCapacity = 256;
749 private char [] valueBuffer;
750 private int valueLength;
751 private int valueCapacity;
752 private const int initialValueCapacity = 8192;
754 private StringBuilder currentTag; // A buffer for ReadContent for ReadOuterXml
755 private int line = 1;
756 private int column = 1;
757 private bool has_peek;
758 private bool can_seek;
759 private int peek_char;
761 private string attributeString = String.Empty;
762 private int attributeValuePos;
763 // This should be only referenced(used) by ReadInnerXml(). Kind of flyweight pattern.
764 private StringBuilder innerXmlBuilder;
766 private XmlException ReaderError(string message)
768 return new XmlException(message, LineNumber, LinePosition);
772 readState = ReadState.Initial;
778 elementStack = new Stack();
779 haveEnteredDocument = false;
781 nodeType = XmlNodeType.None;
783 prefix = String.Empty;
784 localName = string.Empty;
785 isEmptyElement = false;
786 value = String.Empty;
788 attributes = new Hashtable ();
789 orderedAttributes = new ArrayList ();
790 orderedAttributesEnumerator = null;
792 returnEntityReference = false;
793 entityReferenceName = String.Empty;
795 nameBuffer = new char [initialNameCapacity];
797 nameCapacity = initialNameCapacity;
799 valueBuffer = new char [initialValueCapacity];
801 valueCapacity = initialValueCapacity;
803 currentTag = new StringBuilder ();
804 innerXmlBuilder = new StringBuilder ();
807 // Use this method rather than setting the properties
808 // directly so that all the necessary properties can
809 // be changed in harmony with each other. Maybe the
810 // fields should be in a seperate class to help enforce
812 private void SetProperties (
813 XmlNodeType nodeType,
817 bool clearAttributes)
819 this.nodeType = nodeType;
821 this.isEmptyElement = isEmptyElement;
823 this.elementDepth = depth;
828 int indexOfColon = name.IndexOf (':');
830 if (indexOfColon == -1) {
831 prefix = String.Empty;
834 prefix = name.Substring (0, indexOfColon);
835 localName = name.Substring (indexOfColon + 1);
838 namespaceURI = LookupNamespace (prefix);
841 private void SaveProperties ()
843 saveNodeType = nodeType;
846 saveLocalName = localName;
847 saveNamespaceURI = namespaceURI;
848 saveIsEmptyElement = isEmptyElement;
849 // An element's value is always String.Empty.
852 private void RestoreProperties ()
854 nodeType = saveNodeType;
857 localName = saveLocalName;
858 namespaceURI = saveNamespaceURI;
859 isEmptyElement = saveIsEmptyElement;
860 value = String.Empty;
863 private void AddAttribute (string name, string value)
865 attributes.Add (name, value);
866 orderedAttributes.Add (name);
869 private void ClearAttributes ()
871 if (attributes.Count > 0) {
873 orderedAttributes.Clear ();
876 orderedAttributesEnumerator = null;
879 private int PeekChar ()
882 return reader.Peek ();
887 peek_char = reader.Read ();
892 private int ReadChar ()
908 currentTag.Append ((char) ch);
912 // This should really keep track of some state so
913 // that it's not possible to have more than one document
914 // element or text outside of the document element.
915 private bool ReadContent ()
917 currentTag.Length = 0;
919 parserContext.NamespaceManager.PopScope ();
923 if (returnEntityReference) {
924 SetEntityReferenceProperties ();
933 if (whitespaceHandling == WhitespaceHandling.All ||
934 whitespaceHandling == WhitespaceHandling.Significant)
935 return ReadWhitespace ();
938 return ReadContent ();
940 if (whitespaceHandling == WhitespaceHandling.All ||
941 whitespaceHandling == WhitespaceHandling.Significant)
942 return ReadWhitespace ();
945 return ReadContent ();
947 if (whitespaceHandling == WhitespaceHandling.All ||
948 whitespaceHandling == WhitespaceHandling.Significant)
949 return ReadWhitespace ();
952 return ReadContent ();
954 readState = ReadState.EndOfFile;
956 XmlNodeType.None, // nodeType
957 String.Empty, // name
958 false, // isEmptyElement
959 String.Empty, // value
960 true // clearAttributes
968 return this.ReadState != ReadState.EndOfFile;
971 private void SetEntityReferenceProperties ()
974 XmlNodeType.EntityReference, // nodeType
975 entityReferenceName, // name
976 false, // isEmptyElement
977 String.Empty, // value
978 true // clearAttributes
981 returnEntityReference = false;
982 entityReferenceName = String.Empty;
985 // The leading '<' has already been consumed.
986 private void ReadTag ()
996 ReadProcessingInstruction ();
1008 // The leading '<' has already been consumed.
1009 private void ReadStartTag ()
1011 parserContext.NamespaceManager.PushScope ();
1013 string name = ReadName ();
1014 if (haveEnteredDocument && elementStack.Count == 0 && !allowMultipleRoot)
1015 throw ReaderError("document has terminated, cannot open new element");
1017 haveEnteredDocument = true;
1020 bool isEmptyElement = false;
1024 if (XmlConstructs.IsNameStart (PeekChar ()))
1027 if (PeekChar () == '/') {
1029 isEmptyElement = true;
1034 elementStack.Push(name);
1039 XmlNodeType.Element, // nodeType
1041 isEmptyElement, // isEmptyElement
1042 String.Empty, // value
1043 false // clearAttributes
1053 // The reader is positioned on the first character
1054 // of the element's name.
1055 private void ReadEndTag ()
1057 string name = ReadName ();
1058 if (elementStack.Count == 0)
1059 throw ReaderError("closing element without matching opening element");
1060 if ((string)elementStack.Pop() != name)
1061 throw ReaderError("unmatched closing element");
1069 XmlNodeType.EndElement, // nodeType
1071 false, // isEmptyElement
1072 String.Empty, // value
1073 true // clearAttributes
1079 private void AppendNameChar (int ch)
1081 CheckNameCapacity ();
1082 nameBuffer [nameLength++] = (char)ch;
1085 private void CheckNameCapacity ()
1087 if (nameLength == nameCapacity) {
1088 nameCapacity = nameCapacity * 2;
1089 char [] oldNameBuffer = nameBuffer;
1090 nameBuffer = new char [nameCapacity];
1091 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1095 private string CreateNameString ()
1097 return new String (nameBuffer, 0, nameLength);
1100 private void AppendValueChar (int ch)
1102 CheckValueCapacity ();
1103 valueBuffer [valueLength++] = (char)ch;
1106 private void CheckValueCapacity ()
1108 if (valueLength == valueCapacity) {
1109 valueCapacity = valueCapacity * 2;
1110 char [] oldValueBuffer = valueBuffer;
1111 valueBuffer = new char [valueCapacity];
1112 Array.Copy (oldValueBuffer, valueBuffer, valueLength);
1116 private string CreateValueString ()
1118 return new String (valueBuffer, 0, valueLength);
1121 // The reader is positioned on the first character
1123 private void ReadText (bool cleanValue)
1128 int ch = PeekChar ();
1130 while (ch != '<' && ch != -1) {
1133 if (ReadReference (false))
1136 AppendValueChar (ReadChar ());
1141 if (returnEntityReference && valueLength == 0) {
1142 SetEntityReferenceProperties ();
1145 XmlNodeType.Text, // nodeType
1146 String.Empty, // name
1147 false, // isEmptyElement
1148 CreateValueString (), // value
1149 true // clearAttributes
1154 // The leading '&' has already been consumed.
1155 // Returns true if the entity reference isn't a simple
1156 // character reference or one of the predefined entities.
1157 // This allows the ReadText method to break so that the
1158 // next call to Read will return the EntityReference node.
1159 private bool ReadReference (bool ignoreEntityReferences)
1161 if (PeekChar () == '#') {
1163 ReadCharacterReference ();
1165 ReadEntityReference (ignoreEntityReferences);
1167 return returnEntityReference;
1170 private void ReadCharacterReference ()
1174 if (PeekChar () == 'x') {
1177 while (PeekChar () != ';' && PeekChar () != -1) {
1178 int ch = ReadChar ();
1180 if (ch >= '0' && ch <= '9')
1181 value = (value << 4) + ch - '0';
1182 else if (ch >= 'A' && ch <= 'F')
1183 value = (value << 4) + ch - 'A' + 10;
1184 else if (ch >= 'a' && ch <= 'f')
1185 value = (value << 4) + ch - 'a' + 10;
1189 "invalid hexadecimal digit: {0} (#x{1:X})",
1194 while (PeekChar () != ';' && PeekChar () != -1) {
1195 int ch = ReadChar ();
1197 if (ch >= '0' && ch <= '9')
1198 value = value * 10 + ch - '0';
1202 "invalid decimal digit: {0} (#x{1:X})",
1210 AppendValueChar (value);
1213 private void ReadEntityReference (bool ignoreEntityReferences)
1217 int ch = PeekChar ();
1219 while (ch != ';' && ch != -1) {
1220 AppendNameChar (ReadChar ());
1226 string name = CreateNameString ();
1231 AppendValueChar ('<');
1234 AppendValueChar ('>');
1237 AppendValueChar ('&');
1240 AppendValueChar ('\'');
1243 AppendValueChar ('"');
1246 if (ignoreEntityReferences) {
1247 AppendValueChar ('&');
1249 foreach (char ch2 in name) {
1250 AppendValueChar (ch2);
1253 AppendValueChar (';');
1255 returnEntityReference = true;
1256 entityReferenceName = name;
1262 // The reader is positioned on the first character of
1263 // the attribute name.
1264 private void ReadAttributes ()
1267 string name = ReadName ();
1271 string value = ReadAttribute ();
1274 if (name == "xmlns")
1275 parserContext.NamespaceManager.AddNamespace (String.Empty, UnescapeAttributeValue (value));
1276 else if (name.StartsWith ("xmlns:"))
1277 parserContext.NamespaceManager.AddNamespace (name.Substring (6), UnescapeAttributeValue (value));
1279 AddAttribute (name, value);
1280 } while (PeekChar () != '/' && PeekChar () != '>' && PeekChar () != -1);
1283 // The reader is positioned on the quote character.
1284 // *Keeps quote char* to value to get_QuoteChar() correctly.
1285 private string ReadAttribute ()
1289 int quoteChar = ReadChar ();
1291 if (quoteChar != '\'' && quoteChar != '\"')
1292 throw ReaderError ("an attribute value was not quoted");
1294 AppendValueChar (quoteChar);
1296 while (PeekChar () != quoteChar) {
1297 int ch = ReadChar ();
1302 throw ReaderError ("attribute values cannot contain '<'");
1304 throw ReaderError ("unexpected end of file in an attribute value");
1306 AppendValueChar (ch);
1311 ReadChar (); // quoteChar
1312 AppendValueChar (quoteChar);
1314 return CreateValueString ();
1317 // The reader is positioned on the first character
1320 // Now it also reads XmlDeclaration, this method name became improper...
1321 private void ReadProcessingInstruction ()
1323 string target = ReadName ();
1328 while (PeekChar () != -1) {
1329 int ch = ReadChar ();
1331 if (ch == '?' && PeekChar () == '>') {
1336 AppendValueChar ((char)ch);
1340 if(target == "xml") && parserContext.InputState != XmlParserInputState.Start)
1341 throw new XmlException("Xml declaration is not allowed here.");
1343 parserContext.InputState = XmlParserInputState.DTD; //for future use
1348 XmlNodeType.XmlDeclaration :
1349 XmlNodeType.ProcessingInstruction, // nodeType
1351 false, // isEmptyElement
1352 CreateValueString (), // value
1353 true // clearAttributes
1357 // The reader is positioned on the first character after
1358 // the leading '<!'.
1359 private void ReadDeclaration ()
1361 int ch = PeekChar ();
1381 // The reader is positioned on the first character after
1382 // the leading '<!--'.
1383 private void ReadComment ()
1387 while (PeekChar () != -1) {
1388 int ch = ReadChar ();
1390 if (ch == '-' && PeekChar () == '-') {
1393 if (PeekChar () != '>')
1394 throw ReaderError ("comments cannot contain '--'");
1400 AppendValueChar ((char)ch);
1404 XmlNodeType.Comment, // nodeType
1405 String.Empty, // name
1406 false, // isEmptyElement
1407 CreateValueString (), // value
1408 true // clearAttributes
1412 // The reader is positioned on the first character after
1413 // the leading '<![CDATA['.
1414 private void ReadCDATA ()
1418 while (PeekChar () != -1) {
1419 int ch = ReadChar ();
1421 if (ch == ']' && PeekChar () == ']') {
1422 ch = ReadChar (); // ']'
1424 if (PeekChar () == '>') {
1428 AppendValueChar (']');
1429 AppendValueChar (']');
1434 AppendValueChar ((char)ch);
1438 XmlNodeType.CDATA, // nodeType
1439 String.Empty, // name
1440 false, // isEmptyElement
1441 CreateValueString (), // value
1442 true // clearAttributes
1446 // The reader is positioned on the first character after
1447 // the leading '<!DOCTYPE'.
1448 private void ReadDoctypeDecl ()
1450 string doctypeName = null;
1451 string publicId = String.Empty;
1452 string systemId = String.Empty;
1455 doctypeName = ReadName ();
1460 systemId = ReadSystemLiteral (true);
1463 publicId = ReadPubidLiteral ();
1465 systemId = ReadSystemLiteral (false);
1471 if(PeekChar () == '[')
1473 // read markupdecl etc. or end of decl
1475 int startPos = currentTag.Length;
1477 ReadDTDInternalSubset ();
1478 } while (nodeType != XmlNodeType.None);
1479 int endPos = currentTag.Length - 1;
1480 parserContext.InternalSubset = currentTag.ToString (startPos, endPos - startPos);
1482 // end of DOCTYPE decl.
1486 // set properties for <!DOCTYPE> node
1488 XmlNodeType.DocumentType, // nodeType
1489 doctypeName, // name
1490 false, // isEmptyElement
1491 parserContext.InternalSubset, // value
1492 true // clearAttributes
1496 // Read any one of following:
1497 // elementdecl, AttlistDecl, EntityDecl, NotationDecl,
1498 // PI, Comment, Parameter Entity, or doctype termination char(']')
1500 // returns a node of some nodeType or null, setting nodeType.
1501 // (if None then ']' was found.)
1502 private void ReadDTDInternalSubset()
1508 nodeType = XmlNodeType.None;
1511 string peName = ReadName ();
1513 nodeType = XmlNodeType.EntityReference; // It's chating a bit;-)
1519 ReadProcessingInstruction ();
1540 throw ReaderError ("Syntax Error after '<!E' (ELEMENT or ENTITY must be found)");
1549 ReadNotationDecl ();
1552 throw ReaderError ("Syntax Error after '<!' characters.");
1556 throw ReaderError ("Syntax Error after '<' character.");
1560 throw ReaderError ("Syntax Error inside doctypedecl markup.");
1564 // The reader is positioned on the head of the name.
1565 private void ReadElementDecl()
1567 while(ReadChar () != '>');
1570 private void ReadEntityDecl()
1572 while(ReadChar () != '>');
1575 private void ReadAttListDecl()
1577 while(ReadChar () != '>');
1580 private void ReadNotationDecl()
1582 while(ReadChar () != '>');
1585 // The reader is positioned on the first 'S' of "SYSTEM".
1586 private string ReadSystemLiteral (bool expectSYSTEM)
1591 int quoteChar = ReadChar (); // apos or quot
1592 int startPos = currentTag.Length;
1594 while(c != quoteChar) {
1596 if(c < 0) throw ReaderError ("Unexpected end of stream in ExternalID.");
1598 return currentTag.ToString (startPos, currentTag.Length - 1 - startPos);
1601 private string ReadPubidLiteral()
1605 int quoteChar = ReadChar ();
1606 int startPos = currentTag.Length;
1608 while(c != quoteChar)
1611 if(c < 0) throw ReaderError ("Unexpected end of stream in ExternalID.");
1612 if(c != quoteChar && !XmlConstructs.IsPubid (c))
1613 throw ReaderError("character '" + (char)c + "' not allowed for PUBLIC ID");
1615 ReadChar(); // skips quoteChar
1616 return currentTag.ToString (startPos, currentTag.Length - 1 - startPos);
1619 // The reader is positioned on the first character
1621 private string ReadName ()
1623 if (!XmlConstructs.IsNameStart (PeekChar ()))
1624 throw ReaderError ("a name did not start with a legal character" + PeekChar ());
1628 AppendNameChar (ReadChar ());
1630 while (XmlConstructs.IsName (PeekChar ())) {
1631 AppendNameChar (ReadChar ());
1634 return CreateNameString ();
1637 // Read the next character and compare it against the
1638 // specified character.
1639 private void Expect (int expected)
1641 int ch = ReadChar ();
1643 if (ch != expected) {
1646 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
1654 private void Expect (string expected)
1656 int len = expected.Length;
1657 for(int i=0; i< len; i++)
1658 Expect (expected[i]);
1661 // Does not consume the first non-whitespace character.
1662 private void SkipWhitespace ()
1664 //FIXME: Should not skip if whitespaceHandling == WhiteSpaceHandling.None
1665 while (XmlConstructs.IsSpace (PeekChar ()))
1669 private bool ReadWhitespace ()
1672 int ch = PeekChar ();
1674 AppendValueChar (ReadChar ());
1675 } while ((ch = PeekChar ()) != -1 && XmlConstructs.IsSpace (ch));
1677 if (ch != -1 && ch != '<')
1680 SetProperties (XmlNodeType.Whitespace,
1683 CreateValueString (),
1686 return (PeekChar () != -1);
1689 // read entity reference from attribute string and if parsable then return the value.
1690 private string ReadAttributeValueReference ()
1692 int endEntityPosition = attributeString.IndexOf(';',
1694 string entityName = attributeString.Substring (attributeValuePos + 1,
1695 endEntityPosition - attributeValuePos - 1);
1697 attributeValuePos = endEntityPosition + 1;
1699 if(entityName [0] == '#') {
1702 if(entityName [1] == 'x') {
1704 c = (char) int.Parse ("0" + entityName.Substring (2),
1705 System.Globalization.NumberStyles.HexNumber);
1708 c = (char) int.Parse (entityName.Substring (1));
1710 return c.ToString();
1715 case "lt": return "<";
1716 case "gt": return ">";
1717 case "amp": return "&";
1718 case "quot": return "\"";
1719 case "apos": return "'";
1720 default: return null;
1725 private string UnescapeAttributeValue (string unresolved)
1727 if(unresolved == null) return null;
1728 StringBuilder resolved = new StringBuilder();
1731 // trim start/end edge of quotation character.
1732 unresolved = unresolved.Substring (1, unresolved.Length - 2);
1734 int next = unresolved.IndexOf ('&');
1740 resolved.Append (unresolved.Substring (pos, next - pos));// - 1);
1741 int endPos = unresolved.IndexOf (';', next+1);
1743 unresolved.Substring (next + 1, endPos - next - 1);
1744 if(entityName [0] == '#') {
1747 if(entityName [1] == 'x') {
1749 c = (char) int.Parse ("0" + entityName.Substring (2),
1750 System.Globalization.NumberStyles.HexNumber);
1753 c = (char) int.Parse (entityName.Substring (1));
1755 resolved.Append (c);
1757 switch(entityName) {
1758 case "lt": resolved.Append ("<"); break;
1759 case "gt": resolved.Append (">"); break;
1760 case "amp": resolved.Append ("&"); break;
1761 case "quot": resolved.Append ("\""); break;
1762 case "apos": resolved.Append ("'"); break;
1763 // With respect to "Value", MS document is helpless
1764 // and the implemention returns inconsistent value
1765 // (e.g. XML: "&ent; &ent;" ---> Value: "&ent; &ent;".)
1766 default: resolved.Append ("&" + entityName + ";"); break;
1770 if(pos > unresolved.Length)
1772 next = unresolved.IndexOf('&', pos);
1774 resolved.Append (unresolved.Substring(pos));
1776 return resolved.ToString();