2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
7 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
9 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
10 // Copyright (C) 2005-2006 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //#define USE_NAME_BUFFER
34 // Optimization TODOs:
36 // - support PushbackChar() which reverts one character read.
37 // - ReadTextReader() should always keep one pushback buffer
38 // as pushback safety net.
39 // - Replace (peek,read) * n -> read * n + pushback
43 using System.Collections;
44 using System.Collections.Generic;
45 using System.Globalization;
47 using System.Security.Permissions;
49 using System.Xml.Schema;
55 class XmlTextReader : XmlReader,
56 IXmlLineInfo, IXmlNamespaceResolver, IHasXmlParserContext
60 protected XmlTextReader ()
64 public XmlTextReader (Stream input)
65 : this (new XmlStreamReader (input))
69 public XmlTextReader (string url)
70 : this(url, new NameTable ())
74 public XmlTextReader (TextReader input)
75 : this (input, new NameTable ())
79 protected XmlTextReader (XmlNameTable nt)
80 : this (String.Empty, null, XmlNodeType.None, null)
84 public XmlTextReader (Stream input, XmlNameTable nt)
85 : this(new XmlStreamReader (input), nt)
89 public XmlTextReader (string url, Stream input)
90 : this (url, new XmlStreamReader (input))
94 public XmlTextReader (string url, TextReader input)
95 : this (url, input, new NameTable ())
99 public XmlTextReader (string url, XmlNameTable nt)
101 reader_uri = resolver.ResolveUri (null, url);
102 string uriString = (reader_uri == null) ? String.Empty : reader_uri.ToString ();
103 XmlParserContext ctx = new XmlParserContext (nt,
104 new XmlNamespaceManager (nt),
107 this.InitializeContext (uriString, ctx, null, XmlNodeType.Document);
110 public XmlTextReader (TextReader input, XmlNameTable nt)
111 : this (String.Empty, input, nt)
115 // This is used in XmlReader.Create() to indicate that string
116 // argument is uri, not an xml fragment.
117 internal XmlTextReader (bool dummy, XmlResolver resolver, string url, XmlNodeType fragType, XmlParserContext context)
119 if (resolver == null) {
120 resolver = new XmlUrlResolver ();
122 this.XmlResolver = resolver;
125 Stream stream = GetStreamFromUrl (url, out uriString);
126 this.InitializeContext (uriString, context, new XmlStreamReader (stream), fragType);
129 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
130 : this (context != null ? context.BaseURI : String.Empty,
131 new XmlStreamReader (xmlFragment),
135 disallowReset = true;
138 internal XmlTextReader (string baseURI, TextReader xmlFragment, XmlNodeType fragType)
139 : this (baseURI, xmlFragment, fragType, null)
143 public XmlTextReader (string url, Stream input, XmlNameTable nt)
144 : this (url, new XmlStreamReader (input), nt)
148 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
149 : this (url, input, XmlNodeType.Document, null)
153 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
154 : this (context != null ? context.BaseURI : String.Empty,
155 new StringReader (xmlFragment),
159 disallowReset = true;
162 internal XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
164 InitializeContext (url, context, fragment, fragType);
167 Uri ResolveUri (string url)
169 return resolver == null ? null : resolver.ResolveUri (null, url);
172 Stream GetStreamFromUrl (string url, out string absoluteUriString)
176 throw new ArgumentNullException ("url");
178 throw new ArgumentException ("url");
180 Uri uri = ResolveUri (url);
181 absoluteUriString = uri != null ? uri.ToString () : String.Empty;
182 return resolver == null ? null : resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
189 public override int AttributeCount
191 get { return attributeCount; }
194 public override string BaseURI
196 get { return parserContext.BaseURI; }
199 public override bool CanReadBinaryContent {
203 public override bool CanReadValueChunk {
207 internal bool CharacterChecking {
208 get { return checkCharacters; }
209 set { checkCharacters = value; }
212 // for XmlReaderSettings.CloseInput support
213 internal bool CloseInput {
214 get { return closeInput; }
215 set { closeInput = value; }
218 public override int Depth
221 int nodeTypeMod = currentToken.NodeType == XmlNodeType.Element ? 0 : -1;
222 if (currentAttributeValue >= 0)
223 return nodeTypeMod + elementDepth + 2; // inside attribute value.
224 else if (currentAttribute >= 0)
225 return nodeTypeMod + elementDepth + 1;
230 public Encoding Encoding
232 get { return parserContext.Encoding; }
235 public EntityHandling EntityHandling {
236 get { return entityHandling; }
237 set { entityHandling = value; }
240 public override bool EOF {
241 get { return readState == ReadState.EndOfFile; }
244 public override bool HasValue {
245 get { return cursorToken.Value != null; }
248 public override bool IsDefault {
249 // XmlTextReader does not expand default attributes.
250 get { return false; }
253 public override bool IsEmptyElement {
254 get { return cursorToken.IsEmptyElement; }
257 public int LineNumber {
259 if (useProceedingLineInfo)
262 return cursorToken.LineNumber;
266 public int LinePosition {
268 if (useProceedingLineInfo)
271 return cursorToken.LinePosition;
275 public override string LocalName {
276 get { return cursorToken.LocalName; }
279 public override string Name {
280 get { return cursorToken.Name; }
283 public bool Namespaces {
284 get { return namespaces; }
286 if (readState != ReadState.Initial)
287 throw new InvalidOperationException ("Namespaces have to be set before reading.");
292 public override string NamespaceURI {
293 get { return cursorToken.NamespaceURI; }
296 public override XmlNameTable NameTable {
297 get { return nameTable; }
300 public override XmlNodeType NodeType {
301 get { return cursorToken.NodeType; }
304 public bool Normalization {
305 get { return normalization; }
306 set { normalization = value; }
309 public override string Prefix {
310 get { return cursorToken.Prefix; }
313 public bool ProhibitDtd {
314 get { return prohibitDtd; }
315 set { prohibitDtd = value; }
318 public override char QuoteChar {
319 get { return cursorToken.QuoteChar; }
322 public override ReadState ReadState {
323 get { return readState; }
326 public override XmlReaderSettings Settings {
327 get { return base.Settings; }
330 public override string Value {
331 get { return cursorToken.Value != null ? cursorToken.Value : String.Empty; }
334 public WhitespaceHandling WhitespaceHandling {
335 get { return whitespaceHandling; }
336 set { whitespaceHandling = value; }
339 public override string XmlLang {
340 get { return parserContext.XmlLang; }
343 public XmlResolver XmlResolver {
344 set { resolver = value; }
347 public override XmlSpace XmlSpace {
348 get { return parserContext.XmlSpace; }
355 public override void Close ()
357 readState = ReadState.Closed;
359 cursorToken.Clear ();
360 currentToken.Clear ();
362 if (closeInput && reader != null)
366 public override string GetAttribute (int i)
368 if (i >= attributeCount)
369 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
371 return attributeTokens [i].Value;
375 // MS.NET 1.0 msdn says that this method returns String.Empty
376 // for absent attribute, but in fact it returns null.
377 // This description is corrected in MS.NET 1.1 msdn.
378 public override string GetAttribute (string name)
380 for (int i = 0; i < attributeCount; i++)
381 if (attributeTokens [i].Name == name)
382 return attributeTokens [i].Value;
386 private int GetIndexOfQualifiedAttribute (string localName, string namespaceURI)
388 namespaceURI = namespaceURI ?? String.Empty;
389 for (int i = 0; i < attributeCount; i++) {
390 XmlAttributeTokenInfo ti = attributeTokens [i];
391 if (ti.LocalName == localName && ti.NamespaceURI == namespaceURI)
397 XmlParserContext IHasXmlParserContext.ParserContext {
398 get { return parserContext; }
401 public override string GetAttribute (string localName, string namespaceURI)
403 int idx = this.GetIndexOfQualifiedAttribute (localName, namespaceURI);
406 return attributeTokens [idx].Value;
409 public IDictionary<string, string> GetNamespacesInScope (XmlNamespaceScope scope)
411 return nsmgr.GetNamespacesInScope (scope);
414 IDictionary<string, string> IXmlNamespaceResolver.GetNamespacesInScope (XmlNamespaceScope scope)
416 return GetNamespacesInScope (scope);
419 public TextReader GetRemainder ()
421 if (peekCharsLength < 0)
423 return new StringReader (new string (peekChars, peekCharsIndex, peekCharsLength - peekCharsIndex) + reader.ReadToEnd ());
426 public bool HasLineInfo ()
431 public override string LookupNamespace (string prefix)
433 return LookupNamespace (prefix, false);
436 private string LookupNamespace (string prefix, bool atomizedNames)
438 string s = nsmgr.LookupNamespace (
439 prefix, atomizedNames);
440 return s == String.Empty ? null : s;
443 string IXmlNamespaceResolver.LookupPrefix (string ns)
445 return LookupPrefix (ns, false);
448 public string LookupPrefix (string ns, bool atomizedName)
450 return nsmgr.LookupPrefix (ns, atomizedName);
453 public override void MoveToAttribute (int i)
455 if (i >= attributeCount)
456 throw new ArgumentOutOfRangeException ("attribute index out of range.");
458 currentAttribute = i;
459 currentAttributeValue = -1;
460 cursorToken = attributeTokens [i];
463 public override bool MoveToAttribute (string name)
465 for (int i = 0; i < attributeCount; i++) {
466 XmlAttributeTokenInfo ti = attributeTokens [i];
467 if (ti.Name == name) {
475 public override bool MoveToAttribute (string localName, string namespaceName)
477 int idx = GetIndexOfQualifiedAttribute (localName, namespaceName);
480 MoveToAttribute (idx);
484 public override bool MoveToElement ()
486 if (currentToken == null) // for attribute .ctor()
489 if (cursorToken == currentToken)
492 if (currentAttribute >= 0) {
493 currentAttribute = -1;
494 currentAttributeValue = -1;
495 cursorToken = currentToken;
502 public override bool MoveToFirstAttribute ()
504 if (attributeCount == 0)
507 return MoveToNextAttribute ();
510 public override bool MoveToNextAttribute ()
512 if (currentAttribute == 0 && attributeCount == 0)
514 if (currentAttribute + 1 < attributeCount) {
516 currentAttributeValue = -1;
517 cursorToken = attributeTokens [currentAttribute];
524 public override bool Read ()
526 if (readState == ReadState.Closed)
528 curNodePeekIndex = peekCharsIndex;
529 preserveCurrentTag = true;
533 if (startNodeType == XmlNodeType.Attribute) {
534 if (currentAttribute == 0)
535 return false; // already read.
536 SkipTextDeclaration ();
538 IncrementAttributeToken ();
539 ReadAttributeValueTokens ('"');
540 cursorToken = attributeTokens [0];
541 currentAttributeValue = -1;
542 readState = ReadState.Interactive;
545 if (readState == ReadState.Initial && currentState == XmlNodeType.Element)
546 SkipTextDeclaration ();
552 readState = ReadState.Interactive;
553 currentLinkedNodeLineNumber = line;
554 currentLinkedNodeLinePosition = column;
555 useProceedingLineInfo = true;
557 cursorToken = currentToken;
559 currentAttribute = currentAttributeValue = -1;
560 currentToken.Clear ();
562 // It was moved from end of ReadStartTag ().
568 if (readCharsInProgress) {
569 readCharsInProgress = false;
570 return ReadUntilEndTag ();
573 more = ReadContent ();
575 if (!more && startNodeType == XmlNodeType.Document && currentState != XmlNodeType.EndElement)
576 throw NotWFError ("Document element did not appear.");
578 useProceedingLineInfo = false;
582 public override bool ReadAttributeValue ()
584 if (readState == ReadState.Initial && startNodeType == XmlNodeType.Attribute) {
588 if (currentAttribute < 0)
590 XmlAttributeTokenInfo ti = attributeTokens [currentAttribute];
591 if (currentAttributeValue < 0)
592 currentAttributeValue = ti.ValueTokenStartIndex - 1;
594 if (currentAttributeValue < ti.ValueTokenEndIndex) {
595 currentAttributeValue++;
596 cursorToken = attributeValueTokens [currentAttributeValue];
603 public int ReadBase64 (byte [] buffer, int offset, int length)
605 BinaryCharGetter = binaryCharGetter;
607 return Binary.ReadBase64 (buffer, offset, length);
609 BinaryCharGetter = null;
613 public int ReadBinHex (byte [] buffer, int offset, int length)
615 BinaryCharGetter = binaryCharGetter;
617 return Binary.ReadBinHex (buffer, offset, length);
619 BinaryCharGetter = null;
623 public int ReadChars (char [] buffer, int offset, int length)
626 throw new ArgumentOutOfRangeException (
630 "Offset must be non-negative integer.");
632 } else if (length < 0) {
633 throw new ArgumentOutOfRangeException (
637 "Length must be non-negative integer.");
639 } else if (buffer.Length < offset + length)
640 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
642 if (IsEmptyElement) {
647 if (!readCharsInProgress && NodeType != XmlNodeType.Element)
650 preserveCurrentTag = false;
651 readCharsInProgress = true;
652 useProceedingLineInfo = true;
654 return ReadCharsInternal (buffer, offset, length);
657 public void ResetState ()
660 throw new InvalidOperationException ("Cannot call ResetState when parsing an XML fragment.");
664 public override void ResolveEntity ()
666 // XmlTextReader does not resolve entities.
667 throw new InvalidOperationException ("XmlTextReader cannot resolve external entities.");
670 [MonoTODO] // FIXME: Implement, for performance improvement
671 public override void Skip ()
678 // Parsed DTD Objects
679 // Note that thgis property must be kept since dtd2xsd uses it.
680 internal DTDObjectModel DTD {
681 get { return parserContext.Dtd; }
684 internal XmlResolver Resolver {
685 get { return resolver; }
690 internal class XmlTokenInfo
692 public XmlTokenInfo (XmlTextReader xtr)
700 protected XmlTextReader Reader;
703 public string LocalName;
704 public string Prefix;
705 public string NamespaceURI;
706 public bool IsEmptyElement;
707 public char QuoteChar;
708 public int LineNumber;
709 public int LinePosition;
710 public int ValueBufferStart;
711 public int ValueBufferEnd;
713 public XmlNodeType NodeType;
715 public virtual string Value {
717 if (valueCache != null)
719 if (ValueBufferStart >= 0) {
720 //Console.WriteLine (NodeType + " / " + ValueBuffer.Length + " / " + ValueBufferStart + " / " + ValueBufferEnd);
721 valueCache = Reader.valueBuffer.ToString (ValueBufferStart, ValueBufferEnd - ValueBufferStart);
725 case XmlNodeType.Text:
726 case XmlNodeType.SignificantWhitespace:
727 case XmlNodeType.Whitespace:
728 case XmlNodeType.Comment:
729 case XmlNodeType.CDATA:
730 case XmlNodeType.ProcessingInstruction:
731 valueCache = Reader.CreateValueString ();
736 set { valueCache = value; }
739 public virtual void Clear ()
741 ValueBufferStart = -1;
743 NodeType = XmlNodeType.None;
744 Name = LocalName = Prefix = NamespaceURI = String.Empty;
745 IsEmptyElement = false;
747 LineNumber = LinePosition = 0;
751 internal class XmlAttributeTokenInfo : XmlTokenInfo
753 public XmlAttributeTokenInfo (XmlTextReader reader)
756 NodeType = XmlNodeType.Attribute;
759 public int ValueTokenStartIndex;
760 public int ValueTokenEndIndex;
762 StringBuilder tmpBuilder = new StringBuilder ();
764 public override string Value {
766 if (valueCache != null)
769 // An empty value should return String.Empty.
770 if (ValueTokenStartIndex == ValueTokenEndIndex) {
771 XmlTokenInfo ti = Reader.attributeValueTokens [ValueTokenStartIndex];
772 if (ti.NodeType == XmlNodeType.EntityReference)
773 valueCache = String.Concat ("&", ti.Name, ";");
775 valueCache = ti.Value;
779 tmpBuilder.Length = 0;
780 for (int i = ValueTokenStartIndex; i <= ValueTokenEndIndex; i++) {
781 XmlTokenInfo ti = Reader.attributeValueTokens [i];
782 if (ti.NodeType == XmlNodeType.Text)
783 tmpBuilder.Append (ti.Value);
785 tmpBuilder.Append ('&');
786 tmpBuilder.Append (ti.Name);
787 tmpBuilder.Append (';');
791 valueCache = tmpBuilder.ToString (0, tmpBuilder.Length);
795 set { valueCache = value; }
798 public override void Clear ()
802 NodeType = XmlNodeType.Attribute;
803 ValueTokenStartIndex = ValueTokenEndIndex = 0;
806 internal void FillXmlns ()
808 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns))
809 Reader.nsmgr.AddNamespace (LocalName, Value);
810 else if (Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
811 Reader.nsmgr.AddNamespace (String.Empty, Value);
814 internal void FillNamespace ()
816 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns) ||
817 Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
818 NamespaceURI = XmlNamespaceManager.XmlnsXmlns;
819 else if (Prefix.Length == 0)
820 NamespaceURI = string.Empty;
822 NamespaceURI = Reader.LookupNamespace (Prefix, true);
826 private XmlTokenInfo cursorToken;
827 private XmlTokenInfo currentToken;
828 private XmlAttributeTokenInfo currentAttributeToken;
829 private XmlTokenInfo currentAttributeValueToken;
830 private XmlAttributeTokenInfo [] attributeTokens = new XmlAttributeTokenInfo [10];
831 private XmlTokenInfo [] attributeValueTokens = new XmlTokenInfo [10];
832 private int currentAttribute;
833 private int currentAttributeValue;
834 private int attributeCount;
836 private XmlParserContext parserContext;
837 private XmlNameTable nameTable;
838 private XmlNamespaceManager nsmgr;
840 private ReadState readState;
841 private bool disallowReset;
844 private int elementDepth;
845 private bool depthUp;
847 private bool popScope;
851 public TagName (string n, string l, string p)
858 public readonly string Name;
859 public readonly string LocalName;
860 public readonly string Prefix;
863 private TagName [] elementNames;
864 int elementNameStackPos;
866 private bool allowMultipleRoot;
868 private bool isStandalone;
870 private bool returnEntityReference;
871 private string entityReferenceName;
874 private char [] nameBuffer;
875 private int nameLength;
876 private int nameCapacity;
877 private const int initialNameCapacity = 32;
880 private StringBuilder valueBuffer;
883 private TextReader reader;
884 private char [] peekChars;
885 private int peekCharsIndex;
886 private int peekCharsLength;
887 private int curNodePeekIndex;
888 private bool preserveCurrentTag;
889 private const int peekCharCapacity = 1024;
894 private int currentLinkedNodeLineNumber;
895 private int currentLinkedNodeLinePosition;
896 private bool useProceedingLineInfo;
898 private XmlNodeType startNodeType;
899 // State machine attribute.
900 // XmlDeclaration: after the first node.
901 // DocumentType: after doctypedecl
902 // Element: inside document element
903 // EndElement: after document element
904 private XmlNodeType currentState;
906 // For ReadChars()/ReadBase64()/ReadBinHex()
907 private int nestLevel;
908 private bool readCharsInProgress;
909 XmlReaderBinarySupport.CharGetter binaryCharGetter;
911 // These values are never re-initialized.
912 private bool namespaces = true;
913 private WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
914 private XmlResolver resolver = new XmlUrlResolver ();
915 private bool normalization = false;
917 private bool checkCharacters;
918 private bool prohibitDtd = false;
919 private bool closeInput = true;
920 private EntityHandling entityHandling; // 2.0
922 private NameTable whitespacePool;
923 private char [] whitespaceCache;
925 private XmlException NotWFError (string message)
927 return new XmlException (this as IXmlLineInfo, BaseURI, message);
932 allowMultipleRoot = false;
933 elementNames = new TagName [10];
934 valueBuffer = new StringBuilder ();
935 binaryCharGetter = new XmlReaderBinarySupport.CharGetter (ReadChars);
937 nameBuffer = new char [initialNameCapacity];
940 checkCharacters = true;
941 if (Settings != null)
942 checkCharacters = Settings.CheckCharacters;
945 entityHandling = EntityHandling.ExpandCharEntities;
948 if (peekChars == null)
949 peekChars = new char [peekCharCapacity];
950 peekCharsLength = -1;
951 curNodePeekIndex = -1; // read from start
956 currentLinkedNodeLineNumber = currentLinkedNodeLinePosition = 0;
961 private void Clear ()
963 currentToken = new XmlTokenInfo (this);
964 cursorToken = currentToken;
965 currentAttribute = -1;
966 currentAttributeValue = -1;
969 readState = ReadState.Initial;
975 popScope = allowMultipleRoot = false;
976 elementNameStackPos = 0;
978 isStandalone = false;
979 returnEntityReference = false;
980 entityReferenceName = String.Empty;
984 nameCapacity = initialNameCapacity;
986 useProceedingLineInfo = false;
988 currentState = XmlNodeType.None;
990 readCharsInProgress = false;
993 private void InitializeContext (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType)
995 startNodeType = fragType;
996 parserContext = context;
997 if (context == null) {
998 XmlNameTable nt = new NameTable ();
999 parserContext = new XmlParserContext (nt,
1000 new XmlNamespaceManager (nt),
1004 nameTable = parserContext.NameTable;
1005 nameTable = nameTable != null ? nameTable : new NameTable ();
1006 nsmgr = parserContext.NamespaceManager;
1007 nsmgr = nsmgr != null ? nsmgr : new XmlNamespaceManager (nameTable);
1009 if (url != null && url.Length > 0) {
1011 Uri uri = new Uri (url, UriKind.RelativeOrAbsolute);
1015 uri = new Uri (url, UriKind.RelativeOrAbsolute);
1016 } catch (Exception) {
1017 string path = Path.GetFullPath ("./a");
1018 uri = new Uri (new Uri (path), url);
1021 parserContext.BaseURI = uri.ToString ();
1029 case XmlNodeType.Attribute:
1030 reader = new StringReader (fragment.ReadToEnd ().Replace ("\"", """));
1032 case XmlNodeType.Element:
1033 currentState = XmlNodeType.Element;
1034 allowMultipleRoot = true;
1036 case XmlNodeType.Document:
1039 throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType));
1043 internal ConformanceLevel Conformance {
1044 get { return allowMultipleRoot ? ConformanceLevel.Fragment : ConformanceLevel.Document; }
1046 if (value == ConformanceLevel.Fragment) {
1047 currentState = XmlNodeType.Element;
1048 allowMultipleRoot = true;
1053 internal void AdjustLineInfoOffset (int lineNumberOffset, int linePositionOffset)
1055 line += lineNumberOffset;
1056 column += linePositionOffset;
1059 internal void SetNameTable (XmlNameTable nameTable)
1061 parserContext.NameTable = nameTable;
1064 // Use this method rather than setting the properties
1065 // directly so that all the necessary properties can
1066 // be changed in harmony with each other. Maybe the
1067 // fields should be in a seperate class to help enforce
1070 // Namespace URI could not be provided here.
1071 private void SetProperties (
1072 XmlNodeType nodeType,
1076 bool isEmptyElement,
1078 bool clearAttributes)
1080 SetTokenProperties (currentToken, nodeType, name, prefix, localName, isEmptyElement, value, clearAttributes);
1081 currentToken.LineNumber = this.currentLinkedNodeLineNumber;
1082 currentToken.LinePosition = this.currentLinkedNodeLinePosition;
1085 private void SetTokenProperties (
1087 XmlNodeType nodeType,
1091 bool isEmptyElement,
1093 bool clearAttributes)
1095 token.NodeType = nodeType;
1097 token.Prefix = prefix;
1098 token.LocalName = localName;
1099 token.IsEmptyElement = isEmptyElement;
1100 token.Value = value;
1101 this.elementDepth = depth;
1103 if (clearAttributes)
1107 private void ClearAttributes ()
1109 //for (int i = 0; i < attributeCount; i++)
1110 // attributeTokens [i].Clear ();
1112 currentAttribute = -1;
1113 currentAttributeValue = -1;
1116 private int PeekSurrogate (int c)
1118 if (peekCharsLength <= peekCharsIndex + 1) {
1119 if (!ReadTextReader (c))
1120 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1124 int highhalfChar = peekChars [peekCharsIndex];
1125 int lowhalfChar = peekChars [peekCharsIndex+1];
1127 if (((highhalfChar & 0xFC00) != 0xD800) || ((lowhalfChar & 0xFC00) != 0xDC00))
1128 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1129 return highhalfChar;
1130 return 0x10000 + (highhalfChar-0xD800)*0x400 + (lowhalfChar-0xDC00);
1133 private int PeekChar ()
1135 if (peekCharsIndex < peekCharsLength) {
1136 int c = peekChars [peekCharsIndex];
1139 if (c < 0xD800 || c >= 0xDFFF)
1141 return PeekSurrogate (c);
1143 if (!ReadTextReader (-1))
1149 private int ReadChar ()
1151 int ch = PeekChar ();
1155 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1160 } else if (ch != -1) {
1166 private void Advance (int ch) {
1170 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1175 } else if (ch != -1) {
1180 private bool ReadTextReader (int remained)
1182 if (reader == null && reader_uri != null) {
1183 Uri uri = reader_uri;
1186 Stream stream = GetStreamFromUrl (uri.ToString (), out uriString);
1189 reader = new XmlStreamReader (stream);
1191 if (peekCharsLength < 0) { // initialized buffer
1192 peekCharsLength = reader.Read (peekChars, 0, peekChars.Length);
1193 return peekCharsLength > 0;
1195 int offset = remained >= 0 ? 1 : 0;
1196 int copysize = peekCharsLength - curNodePeekIndex;
1198 // It must assure that current tag content always exists
1200 if (!preserveCurrentTag) {
1201 curNodePeekIndex = 0;
1204 } else if (peekCharsLength < peekChars.Length) {
1205 // NonBlockingStreamReader returned less bytes
1206 // than the size of the buffer. In that case,
1207 // just refill the buffer.
1208 } else if (curNodePeekIndex <= (peekCharsLength >> 1)) {
1209 // extend the buffer
1210 char [] tmp = new char [peekChars.Length * 2];
1211 Array.Copy (peekChars, curNodePeekIndex,
1214 curNodePeekIndex = 0;
1215 peekCharsIndex = copysize;
1217 Array.Copy (peekChars, curNodePeekIndex,
1218 peekChars, 0, copysize);
1219 curNodePeekIndex = 0;
1220 peekCharsIndex = copysize;
1223 peekChars [peekCharsIndex] = (char) remained;
1224 int count = peekChars.Length - peekCharsIndex - offset;
1225 if (count > peekCharCapacity)
1226 count = peekCharCapacity;
1227 int read = reader.Read (
1228 peekChars, peekCharsIndex + offset, count);
1229 int remainingSize = offset + read;
1230 peekCharsLength = peekCharsIndex + remainingSize;
1232 return (remainingSize != 0);
1235 private bool ReadContent ()
1239 parserContext.PopScope ();
1243 if (returnEntityReference)
1244 SetEntityReferenceProperties ();
1246 int c = PeekChar ();
1248 readState = ReadState.EndOfFile;
1249 ClearValueBuffer ();
1251 XmlNodeType.None, // nodeType
1252 String.Empty, // name
1253 String.Empty, // prefix
1254 String.Empty, // localName
1255 false, // isEmptyElement
1257 true // clearAttributes
1260 throw NotWFError ("unexpected end of file. Current depth is " + depth);
1267 switch (PeekChar ())
1275 ReadProcessingInstruction ();
1290 if (!ReadWhitespace ())
1292 return ReadContent ();
1300 return this.ReadState != ReadState.EndOfFile;
1303 private void SetEntityReferenceProperties ()
1305 DTDEntityDeclaration decl = DTD != null ? DTD.EntityDecls [entityReferenceName] : null;
1306 if (this.isStandalone)
1307 if (DTD == null || decl == null || !decl.IsInternalSubset)
1308 throw NotWFError ("Standalone document must not contain any references to an non-internally declared entity.");
1309 if (decl != null && decl.NotationName != null)
1310 throw NotWFError ("Reference to any unparsed entities is not allowed here.");
1312 ClearValueBuffer ();
1314 XmlNodeType.EntityReference, // nodeType
1315 entityReferenceName, // name
1316 String.Empty, // prefix
1317 entityReferenceName, // localName
1318 false, // isEmptyElement
1320 true // clearAttributes
1323 returnEntityReference = false;
1324 entityReferenceName = String.Empty;
1327 // The leading '<' has already been consumed.
1328 private void ReadStartTag ()
1330 if (currentState == XmlNodeType.EndElement)
1331 throw NotWFError ("Multiple document element was detected.");
1332 currentState = XmlNodeType.Element;
1336 currentLinkedNodeLineNumber = line;
1337 currentLinkedNodeLinePosition = column;
1339 string prefix, localName;
1340 string name = ReadName (out prefix, out localName);
1341 if (currentState == XmlNodeType.EndElement)
1342 throw NotWFError ("document has terminated, cannot open new element");
1344 bool isEmptyElement = false;
1349 if (XmlChar.IsFirstNameChar (PeekChar ()))
1350 ReadAttributes (false);
1351 cursorToken = this.currentToken;
1354 for (int i = 0; i < attributeCount; i++)
1355 attributeTokens [i].FillXmlns ();
1356 for (int i = 0; i < attributeCount; i++)
1357 attributeTokens [i].FillNamespace ();
1361 for (int i = 0; i < attributeCount; i++)
1362 if (attributeTokens [i].Prefix == "xmlns" &&
1363 attributeTokens [i].Value == String.Empty)
1364 throw NotWFError ("Empty namespace URI cannot be mapped to non-empty prefix.");
1366 for (int i = 0; i < attributeCount; i++) {
1367 for (int j = i + 1; j < attributeCount; j++)
1368 if (Object.ReferenceEquals (attributeTokens [i].Name, attributeTokens [j].Name) ||
1369 (Object.ReferenceEquals (attributeTokens [i].LocalName, attributeTokens [j].LocalName) &&
1370 Object.ReferenceEquals (attributeTokens [i].NamespaceURI, attributeTokens [j].NamespaceURI)))
1371 throw NotWFError ("Attribute name and qualified name must be identical.");
1374 if (PeekChar () == '/') {
1376 isEmptyElement = true;
1381 PushElementName (name, localName, prefix);
1383 parserContext.PushScope ();
1388 XmlNodeType.Element, // nodeType
1392 isEmptyElement, // isEmptyElement
1394 false // clearAttributes
1396 if (prefix.Length > 0)
1397 currentToken.NamespaceURI = LookupNamespace (prefix, true);
1398 else if (namespaces)
1399 currentToken.NamespaceURI = nsmgr.DefaultNamespace;
1402 if (NamespaceURI == null)
1403 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1405 for (int i = 0; i < attributeCount; i++) {
1406 MoveToAttribute (i);
1407 if (NamespaceURI == null)
1408 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1415 for (int i = 0; i < attributeCount; i++) {
1416 if (!Object.ReferenceEquals (attributeTokens [i].Prefix, XmlNamespaceManager.PrefixXml))
1418 string aname = attributeTokens [i].LocalName;
1419 string value = attributeTokens [i].Value;
1422 if (this.resolver != null) {
1424 BaseURI != String.Empty ?
1425 new Uri (BaseURI) : null;
1426 // xml:base="" without any base URI -> pointless. However there are
1427 // some people who use such xml:base. Seealso bug #608391.
1428 if (buri == null && String.IsNullOrEmpty (value))
1430 Uri uri = resolver.ResolveUri (
1432 parserContext.BaseURI =
1438 parserContext.BaseURI = value;
1441 parserContext.XmlLang = value;
1446 parserContext.XmlSpace = XmlSpace.Preserve;
1449 parserContext.XmlSpace = XmlSpace.Default;
1452 throw NotWFError (String.Format ("Invalid xml:space value: {0}", value));
1459 CheckCurrentStateUpdate ();
1462 private void PushElementName (string name, string local, string prefix)
1464 if (elementNames.Length == elementNameStackPos) {
1465 TagName [] newArray = new TagName [elementNames.Length * 2];
1466 Array.Copy (elementNames, 0, newArray, 0, elementNameStackPos);
1467 elementNames = newArray;
1469 elementNames [elementNameStackPos++] =
1470 new TagName (name, local, prefix);
1473 // The reader is positioned on the first character
1474 // of the element's name.
1475 private void ReadEndTag ()
1477 if (currentState != XmlNodeType.Element)
1478 throw NotWFError ("End tag cannot appear in this state.");
1480 currentLinkedNodeLineNumber = line;
1481 currentLinkedNodeLinePosition = column;
1483 if (elementNameStackPos == 0)
1484 throw NotWFError ("closing element without matching opening element");
1485 TagName expected = elementNames [--elementNameStackPos];
1486 Expect (expected.Name);
1488 ExpectAfterWhitespace ('>');
1493 XmlNodeType.EndElement, // nodeType
1494 expected.Name, // name
1495 expected.Prefix, // prefix
1496 expected.LocalName, // localName
1497 false, // isEmptyElement
1499 true // clearAttributes
1501 if (expected.Prefix.Length > 0)
1502 currentToken.NamespaceURI = LookupNamespace (expected.Prefix, true);
1503 else if (namespaces)
1504 currentToken.NamespaceURI = nsmgr.DefaultNamespace;
1508 CheckCurrentStateUpdate ();
1511 private void CheckCurrentStateUpdate ()
1513 if (depth == 0 && !allowMultipleRoot && (IsEmptyElement || NodeType == XmlNodeType.EndElement))
1514 currentState = XmlNodeType.EndElement;
1518 private void AppendSurrogatePairNameChar (int ch)
1520 nameBuffer [nameLength++] = (char) ((ch - 0x10000) / 0x400 + 0xD800);
1521 if (nameLength == nameCapacity)
1522 ExpandNameCapacity ();
1523 nameBuffer [nameLength++] = (char) ((ch - 0x10000) % 0x400 + 0xDC00);
1526 private void ExpandNameCapacity ()
1528 nameCapacity = nameCapacity * 2;
1529 char [] oldNameBuffer = nameBuffer;
1530 nameBuffer = new char [nameCapacity];
1531 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1535 private void AppendValueChar (int ch)
1537 if (ch <= Char.MaxValue)
1538 valueBuffer.Append ((char) ch);
1540 AppendSurrogatePairValueChar (ch);
1543 private void AppendSurrogatePairValueChar (int ch)
1545 valueBuffer.Append ((char) ((ch - 0x10000) / 0x400 + 0xD800));
1546 valueBuffer.Append ((char) ((ch - 0x10000) % 0x400 + 0xDC00));
1549 private string CreateValueString ()
1551 // Since whitespace strings are mostly identical
1552 // depending on the Depth, we make use of NameTable
1553 // to atomize whitespace strings.
1555 case XmlNodeType.Whitespace:
1556 case XmlNodeType.SignificantWhitespace:
1557 int len = valueBuffer.Length;
1558 if (whitespaceCache == null)
1559 whitespaceCache = new char [32];
1560 if (len >= whitespaceCache.Length)
1562 if (whitespacePool == null)
1563 whitespacePool = new NameTable ();
1565 valueBuffer.CopyTo (0, whitespaceCache, 0, len);
1567 for (int i = 0; i < len; i++)
1568 whitespaceCache [i] = valueBuffer [i];
1570 return whitespacePool.Add (whitespaceCache, 0, valueBuffer.Length);
1572 return (valueBuffer.Capacity < 100) ?
1573 valueBuffer.ToString (0, valueBuffer.Length) :
1574 valueBuffer.ToString ();
1577 private void ClearValueBuffer ()
1579 valueBuffer.Length = 0;
1582 // The reader is positioned on the first character
1584 private void ReadText (bool notWhitespace)
1586 if (currentState != XmlNodeType.Element)
1587 throw NotWFError ("Text node cannot appear in this state.");
1588 preserveCurrentTag = false;
1591 ClearValueBuffer ();
1593 int ch = PeekChar ();
1594 bool previousWasCloseBracket = false;
1596 while (ch != '<' && ch != -1) {
1599 ch = ReadReference (false);
1600 if (returnEntityReference) // Returns -1 if char validation should not be done
1602 } else if (normalization && ch == '\r') {
1606 // append '\n' instead of '\r'.
1607 AppendValueChar ('\n');
1608 // and in case of "\r\n", discard '\r'.
1611 if (CharacterChecking && XmlChar.IsInvalid (ch))
1612 throw NotWFError ("Not allowed character was found.");
1616 // FIXME: it might be optimized by the JIT later,
1617 // AppendValueChar (ch);
1619 if (ch <= Char.MaxValue)
1620 valueBuffer.Append ((char) ch);
1622 AppendSurrogatePairValueChar (ch);
1627 if (previousWasCloseBracket)
1628 if (PeekChar () == '>')
1629 throw NotWFError ("Inside text content, character sequence ']]>' is not allowed.");
1630 previousWasCloseBracket = true;
1632 else if (previousWasCloseBracket)
1633 previousWasCloseBracket = false;
1635 notWhitespace = true;
1638 if (returnEntityReference && valueBuffer.Length == 0) {
1639 SetEntityReferenceProperties ();
1641 XmlNodeType nodeType = notWhitespace ? XmlNodeType.Text :
1642 this.XmlSpace == XmlSpace.Preserve ? XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
1644 nodeType, // nodeType
1645 String.Empty, // name
1646 String.Empty, // prefix
1647 String.Empty, // localName
1648 false, // isEmptyElement
1649 null, // value: create only when required
1650 true // clearAttributes
1655 // The leading '&' has already been consumed.
1656 // Returns true if the entity reference isn't a simple
1657 // character reference or one of the predefined entities.
1658 // This allows the ReadText method to break so that the
1659 // next call to Read will return the EntityReference node.
1660 private int ReadReference (bool ignoreEntityReferences)
1662 if (PeekChar () == '#') {
1664 return ReadCharacterReference ();
1666 return ReadEntityReference (ignoreEntityReferences);
1669 private int ReadCharacterReference ()
1674 if (PeekChar () == 'x') {
1677 while ((ch = PeekChar ()) != ';' && ch != -1) {
1680 if (ch >= '0' && ch <= '9')
1681 value = (value << 4) + ch - '0';
1682 else if (ch >= 'A' && ch <= 'F')
1683 value = (value << 4) + ch - 'A' + 10;
1684 else if (ch >= 'a' && ch <= 'f')
1685 value = (value << 4) + ch - 'a' + 10;
1687 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1688 "invalid hexadecimal digit: {0} (#x{1:X})",
1693 while ((ch = PeekChar ()) != ';' && ch != -1) {
1696 if (ch >= '0' && ch <= '9')
1697 value = value * 10 + ch - '0';
1699 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1700 "invalid decimal digit: {0} (#x{1:X})",
1708 // There is no way to save surrogate pairs...
1709 if (CharacterChecking && Normalization &&
1710 XmlChar.IsInvalid (value))
1711 throw NotWFError ("Referenced character was not allowed in XML. Normalization is " + normalization + ", checkCharacters = " + checkCharacters);
1715 // Returns -1 if it should not be validated.
1716 // Real EOF must not be detected here.
1717 private int ReadEntityReference (bool ignoreEntityReferences)
1719 string name = ReadName ();
1722 int predefined = XmlChar.GetPredefinedEntity (name);
1723 if (predefined >= 0)
1726 if (ignoreEntityReferences) {
1727 AppendValueChar ('&');
1728 for (int i = 0; i < name.Length; i++)
1729 AppendValueChar (name [i]);
1730 AppendValueChar (';');
1732 returnEntityReference = true;
1733 entityReferenceName = name;
1739 // The reader is positioned on the first character of
1740 // the attribute name.
1741 private void ReadAttributes (bool isXmlDecl)
1744 bool requireWhitespace = false;
1745 currentAttribute = -1;
1746 currentAttributeValue = -1;
1749 if (!SkipWhitespace () && requireWhitespace)
1750 throw NotWFError ("Unexpected token. Name is required here.");
1752 IncrementAttributeToken ();
1753 currentAttributeToken.LineNumber = line;
1754 currentAttributeToken.LinePosition = column;
1756 string prefix, localName;
1757 currentAttributeToken.Name = ReadName (out prefix, out localName);
1758 currentAttributeToken.Prefix = prefix;
1759 currentAttributeToken.LocalName = localName;
1760 ExpectAfterWhitespace ('=');
1762 ReadAttributeValueTokens (-1);
1763 // This hack is required for xmldecl which has
1764 // both effective attributes and Value.
1767 dummyValue = currentAttributeToken.Value;
1771 if (!SkipWhitespace ())
1772 requireWhitespace = true;
1773 peekChar = PeekChar ();
1775 if (peekChar == '?')
1778 else if (peekChar == '/' || peekChar == '>')
1780 } while (peekChar != -1);
1782 currentAttribute = -1;
1783 currentAttributeValue = -1;
1786 private void AddAttributeWithValue (string name, string value)
1788 IncrementAttributeToken ();
1789 XmlAttributeTokenInfo ati = attributeTokens [currentAttribute];
1790 ati.Name = NameTable.Add (name);
1791 ati.Prefix = String.Empty;
1792 ati.NamespaceURI = String.Empty;
1793 IncrementAttributeValueToken ();
1794 XmlTokenInfo vti = attributeValueTokens [currentAttributeValue];
1795 SetTokenProperties (vti,
1807 private void IncrementAttributeToken ()
1810 if (attributeTokens.Length == currentAttribute) {
1811 XmlAttributeTokenInfo [] newArray =
1812 new XmlAttributeTokenInfo [attributeTokens.Length * 2];
1813 attributeTokens.CopyTo (newArray, 0);
1814 attributeTokens = newArray;
1816 if (attributeTokens [currentAttribute] == null)
1817 attributeTokens [currentAttribute] = new XmlAttributeTokenInfo (this);
1818 currentAttributeToken = attributeTokens [currentAttribute];
1819 currentAttributeToken.Clear ();
1822 private void IncrementAttributeValueToken ()
1824 currentAttributeValue++;
1825 if (attributeValueTokens.Length == currentAttributeValue) {
1826 XmlTokenInfo [] newArray = new XmlTokenInfo [attributeValueTokens.Length * 2];
1827 attributeValueTokens.CopyTo (newArray, 0);
1828 attributeValueTokens = newArray;
1830 if (attributeValueTokens [currentAttributeValue] == null)
1831 attributeValueTokens [currentAttributeValue] = new XmlTokenInfo (this);
1832 currentAttributeValueToken = attributeValueTokens [currentAttributeValue];
1833 currentAttributeValueToken.Clear ();
1836 // LAMESPEC: Orthodox XML reader should normalize attribute values
1837 private void ReadAttributeValueTokens (int dummyQuoteChar)
1839 int quoteChar = (dummyQuoteChar < 0) ? ReadChar () : dummyQuoteChar;
1841 if (quoteChar != '\'' && quoteChar != '\"')
1842 throw NotWFError ("an attribute value was not quoted");
1843 currentAttributeToken.QuoteChar = (char) quoteChar;
1845 IncrementAttributeValueToken ();
1846 currentAttributeToken.ValueTokenStartIndex = currentAttributeValue;
1847 currentAttributeValueToken.LineNumber = line;
1848 currentAttributeValueToken.LinePosition = column;
1850 bool incrementToken = false;
1851 bool isNewToken = true;
1854 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1857 if (ch == quoteChar)
1860 if (incrementToken) {
1861 IncrementAttributeValueToken ();
1862 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1863 currentAttributeValueToken.LineNumber = line;
1864 currentAttributeValueToken.LinePosition = column;
1865 incrementToken = false;
1872 throw NotWFError ("attribute values cannot contain '<'");
1874 if (dummyQuoteChar < 0)
1875 throw NotWFError ("unexpected end of file in an attribute value");
1876 else // Attribute value constructor.
1882 if (PeekChar () == '\n')
1883 continue; // skip '\r'.
1885 // The csc in MS.NET 2.0 beta 1 barfs on this goto, so work around that
1894 // When Normalize = true, then replace
1895 // all spaces to ' '
1901 if (PeekChar () == '#') {
1903 ch = ReadCharacterReference ();
1904 AppendValueChar (ch);
1907 // Check XML 1.0 section 3.1 WFC.
1908 string entName = ReadName ();
1910 int predefined = XmlChar.GetPredefinedEntity (entName);
1911 if (predefined < 0) {
1912 CheckAttributeEntityReferenceWFC (entName);
1913 if (entityHandling == EntityHandling.ExpandEntities) {
1914 string value = DTD.GenerateEntityAttributeText (entName);
1915 foreach (char c in (IEnumerable<char>) value)
1916 AppendValueChar (c);
1918 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1919 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1921 IncrementAttributeValueToken ();
1922 currentAttributeValueToken.Name = entName;
1923 currentAttributeValueToken.Value = String.Empty;
1924 currentAttributeValueToken.NodeType = XmlNodeType.EntityReference;
1925 incrementToken = true;
1929 AppendValueChar (predefined);
1932 if (CharacterChecking && XmlChar.IsInvalid (ch))
1933 throw NotWFError ("Invalid character was found.");
1934 // FIXME: it might be optimized by the JIT later,
1935 // AppendValueChar (ch);
1937 if (ch <= Char.MaxValue)
1938 valueBuffer.Append ((char) ch);
1940 AppendSurrogatePairValueChar (ch);
1947 if (!incrementToken) {
1948 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1949 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1951 currentAttributeToken.ValueTokenEndIndex = currentAttributeValue;
1955 private void CheckAttributeEntityReferenceWFC (string entName)
1957 DTDEntityDeclaration entDecl =
1958 DTD == null ? null : DTD.EntityDecls [entName];
1959 if (entDecl == null) {
1960 if (entityHandling == EntityHandling.ExpandEntities
1961 || (DTD != null && resolver != null && entDecl == null))
1962 throw NotWFError (String.Format ("Referenced entity '{0}' does not exist.", entName));
1967 if (entDecl.HasExternalReference)
1968 throw NotWFError ("Reference to external entities is not allowed in the value of an attribute.");
1969 if (isStandalone && !entDecl.IsInternalSubset)
1970 throw NotWFError ("Reference to external entities is not allowed in the internal subset.");
1971 if (entDecl.EntityValue.IndexOf ('<') >= 0)
1972 throw NotWFError ("Attribute must not contain character '<' either directly or indirectly by way of entity references.");
1975 // The reader is positioned on the first character
1978 // It may be xml declaration or processing instruction.
1979 private void ReadProcessingInstruction ()
1981 string target = ReadName ();
1982 if (target != "xml" && target.ToLower (CultureInfo.InvariantCulture) == "xml")
1983 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
1985 if (!SkipWhitespace ())
1986 if (PeekChar () != '?')
1987 throw NotWFError ("Invalid processing instruction name was found.");
1989 ClearValueBuffer ();
1992 while ((ch = PeekChar ()) != -1) {
1995 if (ch == '?' && PeekChar () == '>') {
2000 if (CharacterChecking && XmlChar.IsInvalid (ch))
2001 throw NotWFError ("Invalid character was found.");
2002 AppendValueChar (ch);
2005 if (Object.ReferenceEquals (target, XmlNamespaceManager.PrefixXml))
2006 VerifyXmlDeclaration ();
2008 if (currentState == XmlNodeType.None)
2009 currentState = XmlNodeType.XmlDeclaration;
2012 XmlNodeType.ProcessingInstruction, // nodeType
2014 String.Empty, // prefix
2015 target, // localName
2016 false, // isEmptyElement
2017 null, // value: create only when required
2018 true // clearAttributes
2023 void VerifyXmlDeclaration ()
2025 if (!allowMultipleRoot && currentState != XmlNodeType.None)
2026 throw NotWFError ("XML declaration cannot appear in this state.");
2028 currentState = XmlNodeType.XmlDeclaration;
2030 string text = CreateValueString ();
2036 string encoding = null, standalone = null;
2038 ParseAttributeFromString (text, ref idx, out name, out value);
2039 if (name != "version" || value != "1.0")
2040 throw NotWFError ("'version' is expected.");
2041 name = String.Empty;
2042 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2043 ParseAttributeFromString (text, ref idx, out name, out value);
2044 if (name == "encoding") {
2045 if (!XmlChar.IsValidIANAEncoding (value))
2046 throw NotWFError ("'encoding' must be a valid IANA encoding name.");
2047 if (reader is XmlStreamReader)
2048 parserContext.Encoding = ((XmlStreamReader) reader).Encoding;
2050 parserContext.Encoding = Encoding.Unicode;
2052 name = String.Empty;
2053 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2054 ParseAttributeFromString (text, ref idx, out name, out value);
2056 if (name == "standalone") {
2057 this.isStandalone = value == "yes";
2058 if (value != "yes" && value != "no")
2059 throw NotWFError ("Only 'yes' or 'no' is allow for 'standalone'");
2061 SkipWhitespaceInString (text, ref idx);
2063 else if (name.Length != 0)
2064 throw NotWFError (String.Format ("Unexpected token: '{0}'", name));
2066 if (idx < text.Length)
2067 throw NotWFError ("'?' is expected.");
2069 AddAttributeWithValue ("version", "1.0");
2070 if (encoding != null)
2071 AddAttributeWithValue ("encoding", encoding);
2072 if (standalone != null)
2073 AddAttributeWithValue ("standalone", standalone);
2074 currentAttribute = currentAttributeValue = -1;
2077 XmlNodeType.XmlDeclaration, // nodeType
2079 String.Empty, // prefix
2081 false, // isEmptyElement
2083 false // clearAttributes
2087 bool SkipWhitespaceInString (string text, ref int idx)
2090 while (idx < text.Length && XmlChar.IsWhitespace (text [idx]))
2092 return idx - start > 0;
2095 private void ParseAttributeFromString (string src,
2096 ref int idx, out string name, out string value)
2098 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2102 while (idx < src.Length && XmlChar.IsNameChar (src [idx]))
2104 name = src.Substring (start, idx - start);
2106 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2108 if (idx == src.Length || src [idx] != '=')
2109 throw NotWFError (String.Format ("'=' is expected after {0}", name));
2112 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2115 if (idx == src.Length || src [idx] != '"' && src [idx] != '\'')
2116 throw NotWFError ("'\"' or '\'' is expected.");
2118 char quote = src [idx];
2122 while (idx < src.Length && src [idx] != quote)
2126 value = src.Substring (start, idx - start - 1);
2129 internal void SkipTextDeclaration ()
2131 if (PeekChar () != '<')
2136 if (PeekChar () != '?') {
2142 while (peekCharsIndex < 6) {
2143 if (PeekChar () < 0)
2148 if (new string (peekChars, 2, 4) != "xml ") {
2149 if (new string (peekChars, 2, 4).ToLower (CultureInfo.InvariantCulture) == "xml ") {
2150 throw NotWFError ("Processing instruction name must not be character sequence 'X' 'M' 'L' with case insensitivity.");
2159 if (PeekChar () == 'v') {
2161 ExpectAfterWhitespace ('=');
2163 int quoteChar = ReadChar ();
2164 char [] expect1_0 = new char [3];
2165 int versionLength = 0;
2166 switch (quoteChar) {
2169 while (PeekChar () != quoteChar) {
2170 if (PeekChar () == -1)
2171 throw NotWFError ("Invalid version declaration inside text declaration.");
2172 else if (versionLength == 3)
2173 throw NotWFError ("Invalid version number inside text declaration.");
2175 expect1_0 [versionLength] = (char) ReadChar ();
2177 if (versionLength == 3 && new String (expect1_0) != "1.0")
2178 throw NotWFError ("Invalid version number inside text declaration.");
2185 throw NotWFError ("Invalid version declaration inside text declaration.");
2189 if (PeekChar () == 'e') {
2190 Expect ("encoding");
2191 ExpectAfterWhitespace ('=');
2193 int quoteChar = ReadChar ();
2194 switch (quoteChar) {
2197 while (PeekChar () != quoteChar)
2198 if (ReadChar () == -1)
2199 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2204 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2206 // Encoding value should be checked inside XmlInputStream.
2208 // this condition is to check if this instance is
2209 // not created by XmlReader.Create() (which just
2210 // omits strict text declaration check).
2211 else if (Conformance == ConformanceLevel.Auto)
2212 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
2216 curNodePeekIndex = peekCharsIndex; // without this it causes incorrect value start indication.
2219 // The reader is positioned on the first character after
2220 // the leading '<!'.
2221 private void ReadDeclaration ()
2223 int ch = PeekChar ();
2241 throw NotWFError ("Unexpected declaration markup was found.");
2245 // The reader is positioned on the first character after
2246 // the leading '<!--'.
2247 private void ReadComment ()
2249 if (currentState == XmlNodeType.None)
2250 currentState = XmlNodeType.XmlDeclaration;
2252 preserveCurrentTag = false;
2254 ClearValueBuffer ();
2257 while ((ch = PeekChar ()) != -1) {
2260 if (ch == '-' && PeekChar () == '-') {
2263 if (PeekChar () != '>')
2264 throw NotWFError ("comments cannot contain '--'");
2270 if (XmlChar.IsInvalid (ch))
2271 throw NotWFError ("Not allowed character was found.");
2273 AppendValueChar (ch);
2277 XmlNodeType.Comment, // nodeType
2278 String.Empty, // name
2279 String.Empty, // prefix
2280 String.Empty, // localName
2281 false, // isEmptyElement
2282 null, // value: create only when required
2283 true // clearAttributes
2287 // The reader is positioned on the first character after
2288 // the leading '<![CDATA['.
2289 private void ReadCDATA ()
2291 if (currentState != XmlNodeType.Element)
2292 throw NotWFError ("CDATA section cannot appear in this state.");
2293 preserveCurrentTag = false;
2295 ClearValueBuffer ();
2299 while (PeekChar () != -1) {
2304 if (ch == ']' && PeekChar () == ']') {
2305 ch = ReadChar (); // ']'
2307 if (PeekChar () == '>') {
2314 if (normalization && ch == '\r') {
2317 // append '\n' instead of '\r'.
2318 AppendValueChar ('\n');
2319 // otherwise, discard '\r'.
2322 if (CharacterChecking && XmlChar.IsInvalid (ch))
2323 throw NotWFError ("Invalid character was found.");
2325 // FIXME: it might be optimized by the JIT later,
2326 // AppendValueChar (ch);
2328 if (ch <= Char.MaxValue)
2329 valueBuffer.Append ((char) ch);
2331 AppendSurrogatePairValueChar (ch);
2336 XmlNodeType.CDATA, // nodeType
2337 String.Empty, // name
2338 String.Empty, // prefix
2339 String.Empty, // localName
2340 false, // isEmptyElement
2341 null, // value: create only when required
2342 true // clearAttributes
2346 // The reader is positioned on the first character after
2347 // the leading '<!DOCTYPE'.
2348 private void ReadDoctypeDecl ()
2351 throw NotWFError ("Document Type Declaration (DTD) is prohibited in this XML.");
2352 switch (currentState) {
2353 case XmlNodeType.DocumentType:
2354 case XmlNodeType.Element:
2355 case XmlNodeType.EndElement:
2356 throw NotWFError ("Document type cannot appear in this state.");
2358 currentState = XmlNodeType.DocumentType;
2360 string doctypeName = null;
2361 string publicId = null;
2362 string systemId = null;
2363 int intSubsetStartLine = 0;
2364 int intSubsetStartColumn = 0;
2367 doctypeName = ReadName ();
2372 systemId = ReadSystemLiteral (true);
2375 publicId = ReadPubidLiteral ();
2376 if (!SkipWhitespace ())
2377 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
2378 systemId = ReadSystemLiteral (false);
2384 if(PeekChar () == '[')
2386 // read markupdecl etc. or end of decl
2388 intSubsetStartLine = this.LineNumber;
2389 intSubsetStartColumn = this.LinePosition;
2390 ClearValueBuffer ();
2391 ReadInternalSubset ();
2392 parserContext.InternalSubset = CreateValueString ();
2394 // end of DOCTYPE decl.
2395 ExpectAfterWhitespace ('>');
2397 GenerateDTDObjectModel (doctypeName, publicId,
2398 systemId, parserContext.InternalSubset,
2399 intSubsetStartLine, intSubsetStartColumn);
2401 // set properties for <!DOCTYPE> node
2403 XmlNodeType.DocumentType, // nodeType
2404 doctypeName, // name
2405 String.Empty, // prefix
2406 doctypeName, // localName
2407 false, // isEmptyElement
2408 parserContext.InternalSubset, // value
2409 true // clearAttributes
2412 if (publicId != null)
2413 AddAttributeWithValue ("PUBLIC", publicId);
2414 if (systemId != null)
2415 AddAttributeWithValue ("SYSTEM", systemId);
2416 currentAttribute = currentAttributeValue = -1;
2419 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2420 string systemId, string internalSubset)
2422 return GenerateDTDObjectModel (name, publicId, systemId, internalSubset, 0, 0);
2425 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2426 string systemId, string internalSubset, int intSubsetStartLine, int intSubsetStartColumn)
2429 parserContext.Dtd = new DTDObjectModel (this.NameTable); // merges both internal and external subsets in the meantime,
2430 DTD.BaseURI = BaseURI;
2432 DTD.PublicId = publicId;
2433 DTD.SystemId = systemId;
2434 DTD.InternalSubset = internalSubset;
2435 DTD.XmlResolver = resolver;
2436 DTD.IsStandalone = isStandalone;
2437 DTD.LineNumber = line;
2438 DTD.LinePosition = column;
2440 DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn);
2441 dr.Normalization = this.normalization;
2442 return dr.GenerateDTDObjectModel ();
2445 private enum DtdInputState
2458 private class DtdInputStateStack
2460 Stack intern = new Stack ();
2461 public DtdInputStateStack ()
2463 Push (DtdInputState.Free);
2466 public DtdInputState Peek ()
2468 return (DtdInputState) intern.Peek ();
2471 public DtdInputState Pop ()
2473 return (DtdInputState) intern.Pop ();
2476 public void Push (DtdInputState val)
2483 DtdInputStateStack stateStack = new DtdInputStateStack ();
2484 DtdInputState State {
2485 get { return stateStack.Peek (); }
2488 private int ReadValueChar ()
2490 int ret = ReadChar ();
2491 AppendValueChar (ret);
2495 private void ExpectAndAppend (string s)
2498 valueBuffer.Append (s);
2501 // Simply read but not generate any result.
2502 private void ReadInternalSubset ()
2504 bool continueParse = true;
2506 while (continueParse) {
2507 switch (ReadValueChar ()) {
2510 case DtdInputState.Free:
2512 valueBuffer.Remove (valueBuffer.Length - 1, 1);
2513 continueParse = false;
2515 case DtdInputState.InsideDoubleQuoted:
2516 case DtdInputState.InsideSingleQuoted:
2517 case DtdInputState.Comment:
2520 throw NotWFError ("unexpected end of file at DTD.");
2524 throw NotWFError ("unexpected end of file at DTD.");
2527 case DtdInputState.InsideDoubleQuoted:
2528 case DtdInputState.InsideSingleQuoted:
2529 case DtdInputState.Comment:
2530 continue; // well-formed
2532 int c = ReadValueChar ();
2535 stateStack.Push (DtdInputState.PI);
2538 switch (ReadValueChar ()) {
2540 switch (ReadValueChar ()) {
2542 ExpectAndAppend ("EMENT");
2543 stateStack.Push (DtdInputState.ElementDecl);
2546 ExpectAndAppend ("TITY");
2547 stateStack.Push (DtdInputState.EntityDecl);
2550 throw NotWFError ("unexpected token '<!E'.");
2554 ExpectAndAppend ("TTLIST");
2555 stateStack.Push (DtdInputState.AttlistDecl);
2558 ExpectAndAppend ("OTATION");
2559 stateStack.Push (DtdInputState.NotationDecl);
2562 ExpectAndAppend ("-");
2563 stateStack.Push (DtdInputState.Comment);
2568 throw NotWFError (String.Format ("unexpected '<{0}'.", (char) c));
2572 if (State == DtdInputState.InsideSingleQuoted)
2574 else if (State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.Comment)
2575 stateStack.Push (DtdInputState.InsideSingleQuoted);
2578 if (State == DtdInputState.InsideDoubleQuoted)
2580 else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment)
2581 stateStack.Push (DtdInputState.InsideDoubleQuoted);
2585 case DtdInputState.ElementDecl:
2586 goto case DtdInputState.NotationDecl;
2587 case DtdInputState.AttlistDecl:
2588 goto case DtdInputState.NotationDecl;
2589 case DtdInputState.EntityDecl:
2590 goto case DtdInputState.NotationDecl;
2591 case DtdInputState.NotationDecl:
2594 case DtdInputState.InsideDoubleQuoted:
2595 case DtdInputState.InsideSingleQuoted:
2596 case DtdInputState.Comment:
2599 throw NotWFError ("unexpected token '>'");
2603 if (State == DtdInputState.PI) {
2604 if (ReadValueChar () == '>')
2609 if (State == DtdInputState.Comment) {
2610 if (PeekChar () == '-') {
2612 ExpectAndAppend (">");
2618 if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted)
2619 throw NotWFError ("Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8).");
2625 // The reader is positioned on the first 'S' of "SYSTEM".
2626 private string ReadSystemLiteral (bool expectSYSTEM)
2630 if (!SkipWhitespace ())
2631 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
2635 int quoteChar = ReadChar (); // apos or quot
2637 ClearValueBuffer ();
2638 while (c != quoteChar) {
2641 throw NotWFError ("Unexpected end of stream in ExternalID.");
2643 AppendValueChar (c);
2645 return CreateValueString ();
2648 private string ReadPubidLiteral()
2651 if (!SkipWhitespace ())
2652 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
2653 int quoteChar = ReadChar ();
2655 ClearValueBuffer ();
2656 while(c != quoteChar)
2659 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
2660 if(c != quoteChar && !XmlChar.IsPubidChar (c))
2661 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char)c ));
2663 AppendValueChar (c);
2665 return CreateValueString ();
2668 // The reader is positioned on the first character
2670 private string ReadName ()
2672 string prefix, local;
2673 return ReadName (out prefix, out local);
2676 private string ReadName (out string prefix, out string localName)
2678 #if !USE_NAME_BUFFER
2679 bool savePreserve = preserveCurrentTag;
2680 preserveCurrentTag = true;
2682 int startOffset = peekCharsIndex - curNodePeekIndex;
2683 int ch = PeekChar ();
2684 if (!XmlChar.IsFirstNameChar (ch))
2685 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2690 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2692 if (ch == ':' && namespaces && colonAt < 0)
2697 int start = curNodePeekIndex + startOffset;
2699 string name = NameTable.Add (
2700 peekChars, start, length);
2703 prefix = NameTable.Add (
2704 peekChars, start, colonAt);
2705 localName = NameTable.Add (
2706 peekChars, start + colonAt + 1, length - colonAt - 1);
2708 prefix = String.Empty;
2712 preserveCurrentTag = savePreserve;
2716 int ch = PeekChar ();
2717 if (!XmlChar.IsFirstNameChar (ch))
2718 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2723 // AppendNameChar (ch);
2725 // nameBuffer.Length is always non-0 so no need to ExpandNameCapacity () here
2726 if (ch <= Char.MaxValue)
2727 nameBuffer [nameLength++] = (char) ch;
2729 AppendSurrogatePairNameChar (ch);
2734 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2737 if (ch == ':' && namespaces && colonAt < 0)
2738 colonAt = nameLength;
2739 // AppendNameChar (ch);
2741 if (nameLength == nameCapacity)
2742 ExpandNameCapacity ();
2743 if (ch <= Char.MaxValue)
2744 nameBuffer [nameLength++] = (char) ch;
2746 AppendSurrogatePairNameChar (ch);
2750 string name = NameTable.Add (nameBuffer, 0, nameLength);
2753 prefix = NameTable.Add (nameBuffer, 0, colonAt);
2754 localName = NameTable.Add (nameBuffer, colonAt + 1, nameLength - colonAt - 1);
2756 prefix = String.Empty;
2764 // Read the next character and compare it against the
2765 // specified character.
2766 private void Expect (int expected)
2768 int ch = ReadChar ();
2770 if (ch != expected) {
2771 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2772 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
2775 ch < 0 ? (object) "EOF" : (char) ch,
2780 private void Expect (string expected)
2782 for (int i = 0; i < expected.Length; i++)
2783 if (ReadChar () != expected [i])
2784 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2785 "'{0}' is expected", expected));
2788 private void ExpectAfterWhitespace (char c)
2791 int i = ReadChar ();
2792 if (i < 0x21 && XmlChar.IsWhitespace (i))
2795 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0}, but found {1} [{2}]", c, i < 0 ? (object) "EOF" : (char) i, i));
2800 // Does not consume the first non-whitespace character.
2801 private bool SkipWhitespace ()
2803 // FIXME: It should be inlined by the JIT.
2804 // bool skipped = XmlChar.IsWhitespace (PeekChar ());
2805 int ch = PeekChar ();
2806 bool skipped = (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2810 // FIXME: It should be inlined by the JIT.
2811 // while (XmlChar.IsWhitespace (PeekChar ()))
2813 while ((ch = PeekChar ()) == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD)
2818 private bool ReadWhitespace ()
2820 if (currentState == XmlNodeType.None)
2821 currentState = XmlNodeType.XmlDeclaration;
2823 bool savePreserve = preserveCurrentTag;
2824 preserveCurrentTag = true;
2825 int startOffset = peekCharsIndex - curNodePeekIndex; // it should be 0 for now though.
2827 int ch = PeekChar ();
2831 // FIXME: It should be inlined by the JIT.
2832 // } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
2833 } while (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2835 bool isText = currentState == XmlNodeType.Element && ch != -1 && ch != '<';
2837 if (!isText && (whitespaceHandling == WhitespaceHandling.None ||
2838 whitespaceHandling == WhitespaceHandling.Significant && XmlSpace != XmlSpace.Preserve))
2841 ClearValueBuffer ();
2842 valueBuffer.Append (peekChars, curNodePeekIndex, peekCharsIndex - curNodePeekIndex - startOffset);
2843 preserveCurrentTag = savePreserve;
2848 XmlNodeType nodeType = (this.XmlSpace == XmlSpace.Preserve) ?
2849 XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
2850 SetProperties (nodeType,
2855 null, // value: create only when required
2862 // Returns -1 if it should throw an error.
2863 private int ReadCharsInternal (char [] buffer, int offset, int length)
2865 int bufIndex = offset;
2866 for (int i = 0; i < length; i++) {
2867 int c = PeekChar ();
2870 throw NotWFError ("Unexpected end of xml.");
2873 if (PeekChar () != '/') {
2875 buffer [bufIndex++] = '<';
2878 else if (nestLevel-- > 0) {
2879 buffer [bufIndex++] = '<';
2882 // Seems to skip immediate EndElement
2889 readCharsInProgress = false;
2890 Read (); // move to the next node
2894 if (c <= Char.MaxValue)
2895 buffer [bufIndex++] = (char) c;
2897 buffer [bufIndex++] = (char) ((c - 0x10000) / 0x400 + 0xD800);
2898 buffer [bufIndex++] = (char) ((c - 0x10000) % 0x400 + 0xDC00);
2906 private bool ReadUntilEndTag ()
2909 currentState = XmlNodeType.EndElement;
2915 throw NotWFError ("Unexpected end of xml.");
2917 if (PeekChar () != '/') {
2921 else if (--nestLevel > 0)
2924 string name = ReadName ();
2925 if (name != elementNames [elementNameStackPos - 1].Name)