2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
7 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
9 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
10 // Copyright (C) 2005-2006 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //#define USE_NAME_BUFFER
34 // Optimization TODOs:
36 // - support PushbackChar() which reverts one character read.
37 // - ReadTextReader() should always keep one pushback buffer
38 // as pushback safety net.
39 // - Replace (peek,read) * n -> read * n + pushback
43 using System.Collections;
45 using System.Collections.Generic;
47 using System.Globalization;
49 using System.Security.Permissions;
51 using System.Xml.Schema;
64 internal class XmlTextReader : XmlReader,
65 IXmlLineInfo, IXmlNamespaceResolver, IHasXmlParserContext
67 [PermissionSet (SecurityAction.InheritanceDemand, Unrestricted = true)]
68 public class XmlTextReader : XmlReader, IXmlLineInfo, IHasXmlParserContext
73 protected XmlTextReader ()
77 public XmlTextReader (Stream input)
78 : this (new XmlStreamReader (input))
82 public XmlTextReader (string url)
83 : this(url, new NameTable ())
87 public XmlTextReader (TextReader input)
88 : this (input, new NameTable ())
92 protected XmlTextReader (XmlNameTable nt)
93 : this (String.Empty, null, XmlNodeType.None, null)
97 public XmlTextReader (Stream input, XmlNameTable nt)
98 : this(new XmlStreamReader (input), nt)
102 public XmlTextReader (string url, Stream input)
103 : this (url, new XmlStreamReader (input))
107 public XmlTextReader (string url, TextReader input)
108 : this (url, input, new NameTable ())
112 public XmlTextReader (string url, XmlNameTable nt)
115 Stream stream = GetStreamFromUrl (url, out uriString);
116 XmlParserContext ctx = new XmlParserContext (nt,
117 new XmlNamespaceManager (nt),
120 this.InitializeContext (uriString, ctx, new XmlStreamReader (stream), XmlNodeType.Document);
123 public XmlTextReader (TextReader input, XmlNameTable nt)
124 : this (String.Empty, input, nt)
128 // This is used in XmlReader.Create() to indicate that string
129 // argument is uri, not an xml fragment.
130 internal XmlTextReader (bool dummy, string url, XmlNodeType fragType, XmlParserContext context)
133 Stream stream = GetStreamFromUrl (url, out uriString);
134 this.InitializeContext (uriString, context, new XmlStreamReader (stream), fragType);
137 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
138 : this (context != null ? context.BaseURI : String.Empty,
139 new XmlStreamReader (xmlFragment),
145 internal XmlTextReader (string baseURI, TextReader xmlFragment, XmlNodeType fragType)
146 : this (baseURI, xmlFragment, fragType, null)
150 public XmlTextReader (string url, Stream input, XmlNameTable nt)
151 : this (url, new XmlStreamReader (input), nt)
155 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
156 : this (url, input, XmlNodeType.Document, null)
160 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
161 : this (context != null ? context.BaseURI : String.Empty,
162 new StringReader (xmlFragment),
168 internal XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
170 InitializeContext (url, context, fragment, fragType);
173 private Stream GetStreamFromUrl (string url, out string absoluteUriString)
175 Uri uri = resolver.ResolveUri (null, url);
176 absoluteUriString = uri != null ? uri.ToString () : String.Empty;
177 return resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
184 public override int AttributeCount
186 get { return attributeCount; }
189 public override string BaseURI
191 get { return parserContext.BaseURI; }
195 public override bool CanReadBinaryContent {
199 public override bool CanReadValueChunk {
203 internal override bool CanReadBinaryContent {
207 internal override bool CanReadValueChunk {
212 internal bool CharacterChecking {
213 get { return checkCharacters; }
214 set { checkCharacters = value; }
217 // for XmlReaderSettings.CloseInput support
218 internal bool CloseInput {
219 get { return closeInput; }
220 set { closeInput = value; }
223 public override int Depth
226 int nodeTypeMod = currentToken.NodeType == XmlNodeType.Element ? 0 : -1;
227 if (currentAttributeValue >= 0)
228 return nodeTypeMod + elementDepth + 2; // inside attribute value.
229 else if (currentAttribute >= 0)
230 return nodeTypeMod + elementDepth + 1;
235 public Encoding Encoding
237 get { return parserContext.Encoding; }
240 public EntityHandling EntityHandling {
241 get { return entityHandling; }
242 set { entityHandling = value; }
246 public override bool EOF {
247 get { return readState == ReadState.EndOfFile; }
250 public override bool HasValue {
251 get { return cursorToken.Value != null; }
254 public override bool IsDefault {
255 // XmlTextReader does not expand default attributes.
256 get { return false; }
259 public override bool IsEmptyElement {
260 get { return cursorToken.IsEmptyElement; }
265 public override string this [int i] {
266 get { return GetAttribute (i); }
269 public override string this [string name] {
270 get { return GetAttribute (name); }
273 public override string this [string localName, string namespaceName] {
274 get { return GetAttribute (localName, namespaceName); }
278 public int LineNumber {
280 if (useProceedingLineInfo)
283 return cursorToken.LineNumber;
287 public int LinePosition {
289 if (useProceedingLineInfo)
292 return cursorToken.LinePosition;
296 public override string LocalName {
297 get { return cursorToken.LocalName; }
300 public override string Name {
301 get { return cursorToken.Name; }
304 public bool Namespaces {
305 get { return namespaces; }
307 if (readState != ReadState.Initial)
308 throw new InvalidOperationException ("Namespaces have to be set before reading.");
313 public override string NamespaceURI {
314 get { return cursorToken.NamespaceURI; }
317 public override XmlNameTable NameTable {
318 get { return parserContext.NameTable; }
321 public override XmlNodeType NodeType {
322 get { return cursorToken.NodeType; }
325 public bool Normalization {
326 get { return normalization; }
327 set { normalization = value; }
330 public override string Prefix {
331 get { return cursorToken.Prefix; }
335 public bool ProhibitDtd {
336 get { return prohibitDtd; }
337 set { prohibitDtd = value; }
341 public override char QuoteChar {
342 get { return cursorToken.QuoteChar; }
345 public override ReadState ReadState {
346 get { return readState; }
350 public override XmlReaderSettings Settings {
351 get { return base.Settings; }
355 public override string Value {
356 get { return cursorToken.Value != null ? cursorToken.Value : String.Empty; }
359 public WhitespaceHandling WhitespaceHandling {
360 get { return whitespaceHandling; }
361 set { whitespaceHandling = value; }
364 public override string XmlLang {
365 get { return parserContext.XmlLang; }
368 public XmlResolver XmlResolver {
369 set { resolver = value; }
372 public override XmlSpace XmlSpace {
373 get { return parserContext.XmlSpace; }
380 public override void Close ()
382 readState = ReadState.Closed;
384 cursorToken.Clear ();
385 currentToken.Clear ();
387 if (closeInput && reader != null)
391 public override string GetAttribute (int i)
393 if (i >= attributeCount)
394 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
396 return attributeTokens [i].Value;
400 // MS.NET 1.0 msdn says that this method returns String.Empty
401 // for absent attribute, but in fact it returns null.
402 // This description is corrected in MS.NET 1.1 msdn.
403 public override string GetAttribute (string name)
405 for (int i = 0; i < attributeCount; i++)
406 if (attributeTokens [i].Name == name)
407 return attributeTokens [i].Value;
411 private int GetIndexOfQualifiedAttribute (string localName, string namespaceURI)
413 for (int i = 0; i < attributeCount; i++) {
414 XmlAttributeTokenInfo ti = attributeTokens [i];
415 if (ti.LocalName == localName && ti.NamespaceURI == namespaceURI)
421 XmlParserContext IHasXmlParserContext.ParserContext {
422 get { return parserContext; }
425 public override string GetAttribute (string localName, string namespaceURI)
427 int idx = this.GetIndexOfQualifiedAttribute (localName, namespaceURI);
430 return attributeTokens [idx].Value;
434 public IDictionary<string, string> GetNamespacesInScope (XmlNamespaceScope scope)
436 return parserContext.NamespaceManager.GetNamespacesInScope (scope);
439 IDictionary<string, string> IXmlNamespaceResolver.GetNamespacesInScope (XmlNamespaceScope scope)
441 return GetNamespacesInScope (scope);
445 public TextReader GetRemainder ()
447 if (peekCharsLength < 0)
449 return new StringReader (new string (peekChars, peekCharsIndex, peekCharsLength - peekCharsIndex) + reader.ReadToEnd ());
453 public bool HasLineInfo ()
455 bool IXmlLineInfo.HasLineInfo ()
461 public override string LookupNamespace (string prefix)
463 return LookupNamespace (prefix, false);
466 private string LookupNamespace (string prefix, bool atomizedNames)
468 string s = parserContext.NamespaceManager.LookupNamespace (
469 prefix, atomizedNames);
470 return s == String.Empty ? null : s;
474 string IXmlNamespaceResolver.LookupPrefix (string ns)
476 return LookupPrefix (ns, false);
479 public string LookupPrefix (string ns, bool atomizedName)
481 return parserContext.NamespaceManager.LookupPrefix (ns, atomizedName);
485 public override void MoveToAttribute (int i)
487 if (i >= attributeCount)
488 throw new ArgumentOutOfRangeException ("attribute index out of range.");
490 currentAttribute = i;
491 currentAttributeValue = -1;
492 cursorToken = attributeTokens [i];
495 public override bool MoveToAttribute (string name)
497 for (int i = 0; i < attributeCount; i++) {
498 XmlAttributeTokenInfo ti = attributeTokens [i];
499 if (ti.Name == name) {
507 public override bool MoveToAttribute (string localName, string namespaceName)
509 int idx = GetIndexOfQualifiedAttribute (localName, namespaceName);
512 MoveToAttribute (idx);
516 public override bool MoveToElement ()
518 if (currentToken == null) // for attribute .ctor()
521 if (cursorToken == currentToken)
524 if (currentAttribute >= 0) {
525 currentAttribute = -1;
526 currentAttributeValue = -1;
527 cursorToken = currentToken;
534 public override bool MoveToFirstAttribute ()
536 if (attributeCount == 0)
539 return MoveToNextAttribute ();
542 public override bool MoveToNextAttribute ()
544 if (currentAttribute == 0 && attributeCount == 0)
546 if (currentAttribute + 1 < attributeCount) {
548 currentAttributeValue = -1;
549 cursorToken = attributeTokens [currentAttribute];
556 public override bool Read ()
558 curNodePeekIndex = peekCharsIndex;
559 preserveCurrentTag = true;
561 if (startNodeType == XmlNodeType.Attribute) {
562 if (currentAttribute == 0)
563 return false; // already read.
564 SkipTextDeclaration ();
566 IncrementAttributeToken ();
567 ReadAttributeValueTokens ('"');
568 cursorToken = attributeTokens [0];
569 currentAttributeValue = -1;
570 readState = ReadState.Interactive;
573 if (readState == ReadState.Initial && currentState == XmlNodeType.Element)
574 SkipTextDeclaration ();
580 readState = ReadState.Interactive;
581 currentLinkedNodeLineNumber = line;
582 currentLinkedNodeLinePosition = column;
583 useProceedingLineInfo = true;
585 cursorToken = currentToken;
587 currentAttribute = currentAttributeValue = -1;
588 currentToken.Clear ();
590 // It was moved from end of ReadStartTag ().
596 if (shouldSkipUntilEndTag) {
597 shouldSkipUntilEndTag = false;
598 return ReadUntilEndTag ();
601 more = ReadContent ();
603 if (!more && startNodeType == XmlNodeType.Document && currentState != XmlNodeType.EndElement)
604 throw NotWFError ("Document element did not appear.");
606 useProceedingLineInfo = false;
610 public override bool ReadAttributeValue ()
612 if (readState == ReadState.Initial && startNodeType == XmlNodeType.Attribute) {
616 if (currentAttribute < 0)
618 XmlAttributeTokenInfo ti = attributeTokens [currentAttribute];
619 if (currentAttributeValue < 0)
620 currentAttributeValue = ti.ValueTokenStartIndex - 1;
622 if (currentAttributeValue < ti.ValueTokenEndIndex) {
623 currentAttributeValue++;
624 cursorToken = attributeValueTokens [currentAttributeValue];
631 public int ReadBase64 (byte [] buffer, int offset, int length)
633 BinaryCharGetter = binaryCharGetter;
635 return Binary.ReadBase64 (buffer, offset, length);
637 BinaryCharGetter = null;
641 public int ReadBinHex (byte [] buffer, int offset, int length)
643 BinaryCharGetter = binaryCharGetter;
645 return Binary.ReadBinHex (buffer, offset, length);
647 BinaryCharGetter = null;
651 public int ReadChars (char [] buffer, int offset, int length)
654 throw new ArgumentOutOfRangeException ("offset", offset, "Offset must be non-negative integer.");
656 throw new ArgumentOutOfRangeException ("length", length, "Length must be non-negative integer.");
657 else if (buffer.Length < offset + length)
658 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
660 if (IsEmptyElement) {
665 if (NodeType != XmlNodeType.Element)
668 return ReadCharsInternal (buffer, offset, length);
671 public void ResetState ()
673 throw new InvalidOperationException ("Cannot call ResetState when parsing an XML fragment.");
677 public override void ResolveEntity ()
679 // XmlTextReader does not resolve entities.
680 throw new InvalidOperationException ("XmlTextReader cannot resolve external entities.");
684 [MonoTODO ("Implement for performance reason")]
685 public override void Skip ()
693 // Parsed DTD Objects
694 // Note that thgis property must be kept since dtd2xsd uses it.
695 internal DTDObjectModel DTD {
696 get { return parserContext.Dtd; }
699 internal XmlResolver Resolver {
700 get { return resolver; }
705 internal class XmlTokenInfo
707 public XmlTokenInfo (XmlTextReader xtr)
715 protected XmlTextReader Reader;
718 public string LocalName;
719 public string Prefix;
720 public string NamespaceURI;
721 public bool IsEmptyElement;
722 public char QuoteChar;
723 public int LineNumber;
724 public int LinePosition;
725 public int ValueBufferStart;
726 public int ValueBufferEnd;
728 public XmlNodeType NodeType;
730 public virtual string Value {
732 if (valueCache != null)
734 if (ValueBufferStart >= 0) {
735 //Console.WriteLine (NodeType + " / " + ValueBuffer.Length + " / " + ValueBufferStart + " / " + ValueBufferEnd);
736 valueCache = Reader.valueBuffer.ToString (ValueBufferStart, ValueBufferEnd - ValueBufferStart);
740 case XmlNodeType.Text:
741 case XmlNodeType.SignificantWhitespace:
742 case XmlNodeType.Whitespace:
743 case XmlNodeType.Comment:
744 case XmlNodeType.CDATA:
745 case XmlNodeType.ProcessingInstruction:
746 valueCache = Reader.CreateValueString ();
751 set { valueCache = value; }
754 public virtual void Clear ()
756 ValueBufferStart = -1;
758 NodeType = XmlNodeType.None;
759 Name = LocalName = Prefix = NamespaceURI = String.Empty;
760 IsEmptyElement = false;
762 LineNumber = LinePosition = 0;
766 internal class XmlAttributeTokenInfo : XmlTokenInfo
768 public XmlAttributeTokenInfo (XmlTextReader reader)
771 NodeType = XmlNodeType.Attribute;
774 public int ValueTokenStartIndex;
775 public int ValueTokenEndIndex;
777 StringBuilder tmpBuilder = new StringBuilder ();
779 public override string Value {
781 if (valueCache != null)
784 // An empty value should return String.Empty.
785 if (ValueTokenStartIndex == ValueTokenEndIndex) {
786 XmlTokenInfo ti = Reader.attributeValueTokens [ValueTokenStartIndex];
787 if (ti.NodeType == XmlNodeType.EntityReference)
788 valueCache = String.Concat ("&", ti.Name, ";");
790 valueCache = ti.Value;
794 tmpBuilder.Length = 0;
795 for (int i = ValueTokenStartIndex; i <= ValueTokenEndIndex; i++) {
796 XmlTokenInfo ti = Reader.attributeValueTokens [i];
797 if (ti.NodeType == XmlNodeType.Text)
798 tmpBuilder.Append (ti.Value);
800 tmpBuilder.Append ('&');
801 tmpBuilder.Append (ti.Name);
802 tmpBuilder.Append (';');
806 valueCache = tmpBuilder.ToString (0, tmpBuilder.Length);
810 set { valueCache = value; }
813 public override void Clear ()
817 NodeType = XmlNodeType.Attribute;
818 ValueTokenStartIndex = ValueTokenEndIndex = 0;
821 internal void FillXmlns ()
823 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns))
824 Reader.parserContext.NamespaceManager.AddNamespace (LocalName, Value);
825 else if (Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
826 Reader.parserContext.NamespaceManager.AddNamespace (String.Empty, Value);
829 internal void FillNamespace ()
831 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns) ||
832 Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
833 NamespaceURI = XmlNamespaceManager.XmlnsXmlns;
834 else if (Prefix.Length == 0)
835 NamespaceURI = string.Empty;
837 NamespaceURI = Reader.LookupNamespace (Prefix, true);
841 private XmlTokenInfo cursorToken;
842 private XmlTokenInfo currentToken;
843 private XmlAttributeTokenInfo currentAttributeToken;
844 private XmlTokenInfo currentAttributeValueToken;
845 private XmlAttributeTokenInfo [] attributeTokens = new XmlAttributeTokenInfo [10];
846 private XmlTokenInfo [] attributeValueTokens = new XmlTokenInfo [10];
847 private int currentAttribute;
848 private int currentAttributeValue;
849 private int attributeCount;
851 private XmlParserContext parserContext;
853 private ReadState readState;
856 private int elementDepth;
857 private bool depthUp;
859 private bool popScope;
863 public TagName (string n, string l, string p)
870 public readonly string Name;
871 public readonly string LocalName;
872 public readonly string Prefix;
875 private TagName [] elementNames;
876 int elementNameStackPos;
878 private bool allowMultipleRoot;
880 private bool isStandalone;
882 private bool returnEntityReference;
883 private string entityReferenceName;
886 private char [] nameBuffer;
887 private int nameLength;
888 private int nameCapacity;
889 private const int initialNameCapacity = 32;
892 private StringBuilder valueBuffer;
894 private TextReader reader;
895 private char [] peekChars;
896 private int peekCharsIndex;
897 private int peekCharsLength;
898 private int curNodePeekIndex;
899 private bool preserveCurrentTag;
900 private const int peekCharCapacity = 1024;
905 private int currentLinkedNodeLineNumber;
906 private int currentLinkedNodeLinePosition;
907 private bool useProceedingLineInfo;
909 private XmlNodeType startNodeType;
910 // State machine attribute.
911 // XmlDeclaration: after the first node.
912 // DocumentType: after doctypedecl
913 // Element: inside document element
914 // EndElement: after document element
915 private XmlNodeType currentState;
917 // For ReadChars()/ReadBase64()/ReadBinHex()
918 private bool shouldSkipUntilEndTag;
919 XmlReaderBinarySupport.CharGetter binaryCharGetter;
921 // These values are never re-initialized.
922 private bool namespaces = true;
923 private WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
924 private XmlResolver resolver = new XmlUrlResolver ();
925 private bool normalization = false;
927 private bool checkCharacters;
928 private bool prohibitDtd = false;
929 private bool closeInput = true;
930 private EntityHandling entityHandling; // 2.0
932 private NameTable whitespacePool;
933 private char [] whitespaceCache;
935 private XmlException NotWFError (string message)
937 return new XmlException (this as IXmlLineInfo, BaseURI, message);
942 currentToken = new XmlTokenInfo (this);
943 cursorToken = currentToken;
944 currentAttribute = -1;
945 currentAttributeValue = -1;
948 readState = ReadState.Initial;
949 allowMultipleRoot = false;
955 popScope = allowMultipleRoot = false;
956 elementNames = new TagName [10];
957 elementNameStackPos = 0;
959 isStandalone = false;
960 returnEntityReference = false;
961 entityReferenceName = String.Empty;
964 nameBuffer = new char [initialNameCapacity];
966 nameCapacity = initialNameCapacity;
969 valueBuffer = new StringBuilder ();
972 if (peekChars == null)
973 peekChars = new char [peekCharCapacity];
974 peekCharsLength = -1;
975 curNodePeekIndex = -1; // read from start
980 currentLinkedNodeLineNumber = currentLinkedNodeLinePosition = 0;
981 useProceedingLineInfo = false;
983 currentState = XmlNodeType.None;
985 shouldSkipUntilEndTag = false;
986 binaryCharGetter = new XmlReaderBinarySupport.CharGetter (ReadChars);
988 checkCharacters = true;
990 if (Settings != null)
991 checkCharacters = Settings.CheckCharacters;
995 entityHandling = EntityHandling.ExpandCharEntities;
998 private void InitializeContext (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType)
1000 startNodeType = fragType;
1001 parserContext = context;
1002 if (context == null) {
1003 XmlNameTable nt = new NameTable ();
1004 parserContext = new XmlParserContext (nt,
1005 new XmlNamespaceManager (nt),
1010 if (url != null && url.Length > 0) {
1013 uri = new Uri (url);
1014 } catch (Exception) {
1015 string path = Path.GetFullPath ("./a");
1016 uri = new Uri (new Uri (path), url);
1018 parserContext.BaseURI = uri.ToString ();
1026 case XmlNodeType.Attribute:
1027 reader = new StringReader (fragment.ReadToEnd ().Replace ("\"", """));
1029 case XmlNodeType.Element:
1030 currentState = XmlNodeType.Element;
1031 allowMultipleRoot = true;
1033 case XmlNodeType.Document:
1036 throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType));
1041 internal ConformanceLevel Conformance {
1042 get { return allowMultipleRoot ? ConformanceLevel.Fragment : ConformanceLevel.Document; }
1044 if (value == ConformanceLevel.Fragment) {
1045 currentState = XmlNodeType.Element;
1046 allowMultipleRoot = true;
1051 internal void AdjustLineInfoOffset (int lineNumberOffset, int linePositionOffset)
1053 line += lineNumberOffset;
1054 column += linePositionOffset;
1057 internal void SetNameTable (XmlNameTable nameTable)
1059 parserContext.NameTable = nameTable;
1063 // Use this method rather than setting the properties
1064 // directly so that all the necessary properties can
1065 // be changed in harmony with each other. Maybe the
1066 // fields should be in a seperate class to help enforce
1069 // Namespace URI could not be provided here.
1070 private void SetProperties (
1071 XmlNodeType nodeType,
1075 bool isEmptyElement,
1077 bool clearAttributes)
1079 SetTokenProperties (currentToken, nodeType, name, prefix, localName, isEmptyElement, value, clearAttributes);
1080 currentToken.LineNumber = this.currentLinkedNodeLineNumber;
1081 currentToken.LinePosition = this.currentLinkedNodeLinePosition;
1084 private void SetTokenProperties (
1086 XmlNodeType nodeType,
1090 bool isEmptyElement,
1092 bool clearAttributes)
1094 token.NodeType = nodeType;
1096 token.Prefix = prefix;
1097 token.LocalName = localName;
1098 token.IsEmptyElement = isEmptyElement;
1099 token.Value = value;
1100 this.elementDepth = depth;
1102 if (clearAttributes)
1106 private void ClearAttributes ()
1108 //for (int i = 0; i < attributeCount; i++)
1109 // attributeTokens [i].Clear ();
1111 currentAttribute = -1;
1112 currentAttributeValue = -1;
1115 private int PeekSurrogate (int c)
1117 if (peekCharsLength <= peekCharsIndex + 1) {
1118 if (!ReadTextReader (c))
1119 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1123 int highhalfChar = peekChars [peekCharsIndex];
1124 int lowhalfChar = peekChars [peekCharsIndex+1];
1126 if (((highhalfChar & 0xFC00) != 0xD800) || ((lowhalfChar & 0xFC00) != 0xDC00))
1127 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1128 return highhalfChar;
1129 return 0x10000 + (highhalfChar-0xD800)*0x400 + (lowhalfChar-0xDC00);
1132 private int PeekChar ()
1134 if (peekCharsIndex < peekCharsLength) {
1135 int c = peekChars [peekCharsIndex];
1138 if (c < 0xD800 || c >= 0xDFFF)
1140 return PeekSurrogate (c);
1142 if (!ReadTextReader (-1))
1148 private int ReadChar ()
1150 int ch = PeekChar ();
1154 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1159 } else if (ch != -1) {
1165 private void Advance (int ch) {
1169 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1174 } else if (ch != -1) {
1179 private bool ReadTextReader (int remained)
1181 if (peekCharsLength < 0) { // initialized buffer
1182 peekCharsLength = reader.Read (peekChars, 0, peekChars.Length);
1183 return peekCharsLength > 0;
1185 int offset = remained >= 0 ? 1 : 0;
1186 int copysize = peekCharsLength - curNodePeekIndex;
1188 // It must assure that current tag content always exists
1190 if (!preserveCurrentTag) {
1191 curNodePeekIndex = 0;
1194 } else if (peekCharsLength < peekChars.Length) {
1195 // NonBlockingStreamReader returned less bytes
1196 // than the size of the buffer. In that case,
1197 // just refill the buffer.
1198 } else if (curNodePeekIndex <= (peekCharsLength >> 1)) {
1199 // extend the buffer
1200 char [] tmp = new char [peekChars.Length * 2];
1201 Array.Copy (peekChars, curNodePeekIndex,
1204 curNodePeekIndex = 0;
1205 peekCharsIndex = copysize;
1207 Array.Copy (peekChars, curNodePeekIndex,
1208 peekChars, 0, copysize);
1209 curNodePeekIndex = 0;
1210 peekCharsIndex = copysize;
1213 peekChars [peekCharsIndex] = (char) remained;
1214 int count = peekChars.Length - peekCharsIndex - offset;
1215 if (count > peekCharCapacity)
1216 count = peekCharCapacity;
1217 int read = reader.Read (
1218 peekChars, peekCharsIndex + offset, count);
1219 int remainingSize = offset + read;
1220 peekCharsLength = peekCharsIndex + remainingSize;
1222 return (remainingSize != 0);
1225 private bool ReadContent ()
1228 parserContext.NamespaceManager.PopScope ();
1229 parserContext.PopScope ();
1233 if (returnEntityReference)
1234 SetEntityReferenceProperties ();
1236 int c = PeekChar ();
1238 readState = ReadState.EndOfFile;
1239 ClearValueBuffer ();
1241 XmlNodeType.None, // nodeType
1242 String.Empty, // name
1243 String.Empty, // prefix
1244 String.Empty, // localName
1245 false, // isEmptyElement
1247 true // clearAttributes
1250 throw NotWFError ("unexpected end of file. Current depth is " + depth);
1257 switch (PeekChar ())
1265 ReadProcessingInstruction ();
1280 if (!ReadWhitespace ())
1282 return ReadContent ();
1290 return this.ReadState != ReadState.EndOfFile;
1293 private void SetEntityReferenceProperties ()
1295 DTDEntityDeclaration decl = DTD != null ? DTD.EntityDecls [entityReferenceName] : null;
1296 if (this.isStandalone)
1297 if (DTD == null || decl == null || !decl.IsInternalSubset)
1298 throw NotWFError ("Standalone document must not contain any references to an non-internally declared entity.");
1299 if (decl != null && decl.NotationName != null)
1300 throw NotWFError ("Reference to any unparsed entities is not allowed here.");
1302 ClearValueBuffer ();
1304 XmlNodeType.EntityReference, // nodeType
1305 entityReferenceName, // name
1306 String.Empty, // prefix
1307 entityReferenceName, // localName
1308 false, // isEmptyElement
1310 true // clearAttributes
1313 returnEntityReference = false;
1314 entityReferenceName = String.Empty;
1317 // The leading '<' has already been consumed.
1318 private void ReadStartTag ()
1320 if (currentState == XmlNodeType.EndElement)
1321 throw NotWFError ("Multiple document element was detected.");
1322 currentState = XmlNodeType.Element;
1324 parserContext.NamespaceManager.PushScope ();
1326 currentLinkedNodeLineNumber = line;
1327 currentLinkedNodeLinePosition = column;
1329 string prefix, localName;
1330 string name = ReadName (out prefix, out localName);
1331 if (currentState == XmlNodeType.EndElement)
1332 throw NotWFError ("document has terminated, cannot open new element");
1334 bool isEmptyElement = false;
1339 if (XmlChar.IsFirstNameChar (PeekChar ()))
1340 ReadAttributes (false);
1341 cursorToken = this.currentToken;
1344 for (int i = 0; i < attributeCount; i++)
1345 attributeTokens [i].FillXmlns ();
1346 for (int i = 0; i < attributeCount; i++)
1347 attributeTokens [i].FillNamespace ();
1351 for (int i = 0; i < attributeCount; i++)
1352 if (attributeTokens [i].Prefix == "xmlns" &&
1353 attributeTokens [i].Value == String.Empty)
1354 throw NotWFError ("Empty namespace URI cannot be mapped to non-empty prefix.");
1356 for (int i = 0; i < attributeCount; i++) {
1357 for (int j = i + 1; j < attributeCount; j++)
1358 if (Object.ReferenceEquals (attributeTokens [i].Name, attributeTokens [j].Name) ||
1359 (Object.ReferenceEquals (attributeTokens [i].LocalName, attributeTokens [j].LocalName) &&
1360 Object.ReferenceEquals (attributeTokens [i].NamespaceURI, attributeTokens [j].NamespaceURI)))
1361 throw NotWFError ("Attribute name and qualified name must be identical.");
1364 if (PeekChar () == '/') {
1366 isEmptyElement = true;
1371 PushElementName (name, localName, prefix);
1373 parserContext.PushScope ();
1378 XmlNodeType.Element, // nodeType
1382 isEmptyElement, // isEmptyElement
1384 false // clearAttributes
1386 if (prefix.Length > 0)
1387 currentToken.NamespaceURI = LookupNamespace (prefix, true);
1388 else if (namespaces)
1389 currentToken.NamespaceURI = parserContext.NamespaceManager.DefaultNamespace;
1392 if (NamespaceURI == null)
1393 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1395 for (int i = 0; i < attributeCount; i++) {
1396 MoveToAttribute (i);
1397 if (NamespaceURI == null)
1398 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1405 for (int i = 0; i < attributeCount; i++) {
1406 if (!Object.ReferenceEquals (attributeTokens [i].Prefix, XmlNamespaceManager.PrefixXml))
1408 string aname = attributeTokens [i].LocalName;
1409 string value = attributeTokens [i].Value;
1412 if (this.resolver != null) {
1414 BaseURI != String.Empty ?
1415 new Uri (BaseURI) : null;
1416 Uri uri = resolver.ResolveUri (
1418 parserContext.BaseURI =
1424 parserContext.BaseURI = value;
1427 parserContext.XmlLang = value;
1432 parserContext.XmlSpace = XmlSpace.Preserve;
1435 parserContext.XmlSpace = XmlSpace.Default;
1438 throw NotWFError (String.Format ("Invalid xml:space value: {0}", value));
1445 CheckCurrentStateUpdate ();
1448 private void PushElementName (string name, string local, string prefix)
1450 if (elementNames.Length == elementNameStackPos) {
1451 TagName [] newArray = new TagName [elementNames.Length * 2];
1452 Array.Copy (elementNames, 0, newArray, 0, elementNameStackPos);
1453 elementNames = newArray;
1455 elementNames [elementNameStackPos++] =
1456 new TagName (name, local, prefix);
1459 // The reader is positioned on the first character
1460 // of the element's name.
1461 private void ReadEndTag ()
1463 if (currentState != XmlNodeType.Element)
1464 throw NotWFError ("End tag cannot appear in this state.");
1466 currentLinkedNodeLineNumber = line;
1467 currentLinkedNodeLinePosition = column;
1469 if (elementNameStackPos == 0)
1470 throw NotWFError ("closing element without matching opening element");
1471 TagName expected = elementNames [--elementNameStackPos];
1472 Expect (expected.Name);
1474 ExpectAfterWhitespace ('>');
1479 XmlNodeType.EndElement, // nodeType
1480 expected.Name, // name
1481 expected.Prefix, // prefix
1482 expected.LocalName, // localName
1483 false, // isEmptyElement
1485 true // clearAttributes
1487 if (expected.Prefix.Length > 0)
1488 currentToken.NamespaceURI = LookupNamespace (expected.Prefix, true);
1489 else if (namespaces)
1490 currentToken.NamespaceURI = parserContext.NamespaceManager.DefaultNamespace;
1494 CheckCurrentStateUpdate ();
1497 private void CheckCurrentStateUpdate ()
1499 if (depth == 0 && !allowMultipleRoot && (IsEmptyElement || NodeType == XmlNodeType.EndElement))
1500 currentState = XmlNodeType.EndElement;
1504 private void AppendSurrogatePairNameChar (int ch)
1506 nameBuffer [nameLength++] = (char) ((ch - 0x10000) / 0x400 + 0xD800);
1507 if (nameLength == nameCapacity)
1508 ExpandNameCapacity ();
1509 nameBuffer [nameLength++] = (char) ((ch - 0x10000) % 0x400 + 0xDC00);
1512 private void ExpandNameCapacity ()
1514 nameCapacity = nameCapacity * 2;
1515 char [] oldNameBuffer = nameBuffer;
1516 nameBuffer = new char [nameCapacity];
1517 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1521 private void AppendValueChar (int ch)
1523 if (ch < Char.MaxValue)
1524 valueBuffer.Append ((char) ch);
1526 AppendSurrogatePairValueChar (ch);
1529 private void AppendSurrogatePairValueChar (int ch)
1531 valueBuffer.Append ((char) ((ch - 0x10000) / 0x400 + 0xD800));
1532 valueBuffer.Append ((char) ((ch - 0x10000) % 0x400 + 0xDC00));
1535 private string CreateValueString ()
1537 // Since whitespace strings are mostly identical
1538 // depending on the Depth, we make use of NameTable
1539 // to atomize whitespace strings.
1541 case XmlNodeType.Whitespace:
1542 case XmlNodeType.SignificantWhitespace:
1543 int len = valueBuffer.Length;
1544 if (whitespaceCache == null)
1545 whitespaceCache = new char [32];
1546 if (len >= whitespaceCache.Length)
1548 if (whitespacePool == null)
1549 whitespacePool = new NameTable ();
1551 valueBuffer.CopyTo (0, whitespaceCache, 0, len);
1553 for (int i = 0; i < len; i++)
1554 whitespaceCache [i] = valueBuffer [i];
1556 return whitespacePool.Add (whitespaceCache, 0, valueBuffer.Length);
1558 return (valueBuffer.Capacity < 100) ?
1559 valueBuffer.ToString (0, valueBuffer.Length) :
1560 valueBuffer.ToString ();
1563 private void ClearValueBuffer ()
1565 valueBuffer.Length = 0;
1568 // The reader is positioned on the first character
1570 private void ReadText (bool notWhitespace)
1572 if (currentState != XmlNodeType.Element)
1573 throw NotWFError ("Text node cannot appear in this state.");
1574 preserveCurrentTag = false;
1577 ClearValueBuffer ();
1579 int ch = PeekChar ();
1580 bool previousWasCloseBracket = false;
1582 while (ch != '<' && ch != -1) {
1585 ch = ReadReference (false);
1586 if (returnEntityReference) // Returns -1 if char validation should not be done
1588 } else if (normalization && ch == '\r') {
1592 // append '\n' instead of '\r'.
1593 AppendValueChar ('\n');
1594 // and in case of "\r\n", discard '\r'.
1596 if (CharacterChecking && XmlChar.IsInvalid (ch))
1597 throw NotWFError ("Not allowed character was found.");
1601 // FIXME: it might be optimized by the JIT later,
1602 // AppendValueChar (ch);
1604 if (ch < Char.MaxValue)
1605 valueBuffer.Append ((char) ch);
1607 AppendSurrogatePairValueChar (ch);
1612 if (previousWasCloseBracket)
1613 if (PeekChar () == '>')
1614 throw NotWFError ("Inside text content, character sequence ']]>' is not allowed.");
1615 previousWasCloseBracket = true;
1617 else if (previousWasCloseBracket)
1618 previousWasCloseBracket = false;
1620 notWhitespace = true;
1623 if (returnEntityReference && valueBuffer.Length == 0) {
1624 SetEntityReferenceProperties ();
1626 XmlNodeType nodeType = notWhitespace ? XmlNodeType.Text :
1627 this.XmlSpace == XmlSpace.Preserve ? XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
1629 nodeType, // nodeType
1630 String.Empty, // name
1631 String.Empty, // prefix
1632 String.Empty, // localName
1633 false, // isEmptyElement
1634 null, // value: create only when required
1635 true // clearAttributes
1640 // The leading '&' has already been consumed.
1641 // Returns true if the entity reference isn't a simple
1642 // character reference or one of the predefined entities.
1643 // This allows the ReadText method to break so that the
1644 // next call to Read will return the EntityReference node.
1645 private int ReadReference (bool ignoreEntityReferences)
1647 if (PeekChar () == '#') {
1649 return ReadCharacterReference ();
1651 return ReadEntityReference (ignoreEntityReferences);
1654 private int ReadCharacterReference ()
1659 if (PeekChar () == 'x') {
1662 while ((ch = PeekChar ()) != ';' && ch != -1) {
1665 if (ch >= '0' && ch <= '9')
1666 value = (value << 4) + ch - '0';
1667 else if (ch >= 'A' && ch <= 'F')
1668 value = (value << 4) + ch - 'A' + 10;
1669 else if (ch >= 'a' && ch <= 'f')
1670 value = (value << 4) + ch - 'a' + 10;
1672 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1673 "invalid hexadecimal digit: {0} (#x{1:X})",
1678 while ((ch = PeekChar ()) != ';' && ch != -1) {
1681 if (ch >= '0' && ch <= '9')
1682 value = value * 10 + ch - '0';
1684 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1685 "invalid decimal digit: {0} (#x{1:X})",
1693 // There is no way to save surrogate pairs...
1694 if (CharacterChecking && Normalization &&
1695 XmlChar.IsInvalid (value))
1696 throw NotWFError ("Referenced character was not allowed in XML. Normalization is " + normalization + ", checkCharacters = " + checkCharacters);
1700 // Returns -1 if it should not be validated.
1701 // Real EOF must not be detected here.
1702 private int ReadEntityReference (bool ignoreEntityReferences)
1704 string name = ReadName ();
1707 int predefined = XmlChar.GetPredefinedEntity (name);
1708 if (predefined >= 0)
1711 if (ignoreEntityReferences) {
1712 AppendValueChar ('&');
1713 for (int i = 0; i < name.Length; i++)
1714 AppendValueChar (name [i]);
1715 AppendValueChar (';');
1717 returnEntityReference = true;
1718 entityReferenceName = name;
1724 // The reader is positioned on the first character of
1725 // the attribute name.
1726 private void ReadAttributes (bool isXmlDecl)
1729 bool requireWhitespace = false;
1730 currentAttribute = -1;
1731 currentAttributeValue = -1;
1734 if (!SkipWhitespace () && requireWhitespace)
1735 throw NotWFError ("Unexpected token. Name is required here.");
1737 IncrementAttributeToken ();
1738 currentAttributeToken.LineNumber = line;
1739 currentAttributeToken.LinePosition = column;
1741 string prefix, localName;
1742 currentAttributeToken.Name = ReadName (out prefix, out localName);
1743 currentAttributeToken.Prefix = prefix;
1744 currentAttributeToken.LocalName = localName;
1745 ExpectAfterWhitespace ('=');
1747 ReadAttributeValueTokens (-1);
1748 // This hack is required for xmldecl which has
1749 // both effective attributes and Value.
1752 dummyValue = currentAttributeToken.Value;
1756 if (!SkipWhitespace ())
1757 requireWhitespace = true;
1758 peekChar = PeekChar ();
1760 if (peekChar == '?')
1763 else if (peekChar == '/' || peekChar == '>')
1765 } while (peekChar != -1);
1767 currentAttribute = -1;
1768 currentAttributeValue = -1;
1771 private void AddAttributeWithValue (string name, string value)
1773 IncrementAttributeToken ();
1774 XmlAttributeTokenInfo ati = attributeTokens [currentAttribute];
1775 ati.Name = parserContext.NameTable.Add (name);
1776 ati.Prefix = String.Empty;
1777 ati.NamespaceURI = String.Empty;
1778 IncrementAttributeValueToken ();
1779 XmlTokenInfo vti = attributeValueTokens [currentAttributeValue];
1780 SetTokenProperties (vti,
1792 private void IncrementAttributeToken ()
1795 if (attributeTokens.Length == currentAttribute) {
1796 XmlAttributeTokenInfo [] newArray =
1797 new XmlAttributeTokenInfo [attributeTokens.Length * 2];
1798 attributeTokens.CopyTo (newArray, 0);
1799 attributeTokens = newArray;
1801 if (attributeTokens [currentAttribute] == null)
1802 attributeTokens [currentAttribute] = new XmlAttributeTokenInfo (this);
1803 currentAttributeToken = attributeTokens [currentAttribute];
1804 currentAttributeToken.Clear ();
1807 private void IncrementAttributeValueToken ()
1809 currentAttributeValue++;
1810 if (attributeValueTokens.Length == currentAttributeValue) {
1811 XmlTokenInfo [] newArray = new XmlTokenInfo [attributeValueTokens.Length * 2];
1812 attributeValueTokens.CopyTo (newArray, 0);
1813 attributeValueTokens = newArray;
1815 if (attributeValueTokens [currentAttributeValue] == null)
1816 attributeValueTokens [currentAttributeValue] = new XmlTokenInfo (this);
1817 currentAttributeValueToken = attributeValueTokens [currentAttributeValue];
1818 currentAttributeValueToken.Clear ();
1821 // LAMESPEC: Orthodox XML reader should normalize attribute values
1822 private void ReadAttributeValueTokens (int dummyQuoteChar)
1824 int quoteChar = (dummyQuoteChar < 0) ? ReadChar () : dummyQuoteChar;
1826 if (quoteChar != '\'' && quoteChar != '\"')
1827 throw NotWFError ("an attribute value was not quoted");
1828 currentAttributeToken.QuoteChar = (char) quoteChar;
1830 IncrementAttributeValueToken ();
1831 currentAttributeToken.ValueTokenStartIndex = currentAttributeValue;
1832 currentAttributeValueToken.LineNumber = line;
1833 currentAttributeValueToken.LinePosition = column;
1835 bool incrementToken = false;
1836 bool isNewToken = true;
1839 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1842 if (ch == quoteChar)
1845 if (incrementToken) {
1846 IncrementAttributeValueToken ();
1847 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1848 currentAttributeValueToken.LineNumber = line;
1849 currentAttributeValueToken.LinePosition = column;
1850 incrementToken = false;
1857 throw NotWFError ("attribute values cannot contain '<'");
1859 if (dummyQuoteChar < 0)
1860 throw NotWFError ("unexpected end of file in an attribute value");
1861 else // Attribute value constructor.
1867 if (PeekChar () == '\n')
1868 continue; // skip '\r'.
1870 // The csc in MS.NET 2.0 beta 1 barfs on this goto, so work around that
1879 // When Normalize = true, then replace
1880 // all spaces to ' '
1886 if (PeekChar () == '#') {
1888 ch = ReadCharacterReference ();
1889 AppendValueChar (ch);
1892 // Check XML 1.0 section 3.1 WFC.
1893 string entName = ReadName ();
1895 int predefined = XmlChar.GetPredefinedEntity (entName);
1896 if (predefined < 0) {
1897 CheckAttributeEntityReferenceWFC (entName);
1899 if (entityHandling == EntityHandling.ExpandEntities) {
1900 string value = DTD.GenerateEntityAttributeText (entName);
1901 foreach (char c in value)
1902 AppendValueChar (c);
1906 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1907 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1909 IncrementAttributeValueToken ();
1910 currentAttributeValueToken.Name = entName;
1911 currentAttributeValueToken.Value = String.Empty;
1912 currentAttributeValueToken.NodeType = XmlNodeType.EntityReference;
1913 incrementToken = true;
1917 AppendValueChar (predefined);
1920 if (CharacterChecking && XmlChar.IsInvalid (ch))
1921 throw NotWFError ("Invalid character was found.");
1922 // FIXME: it might be optimized by the JIT later,
1923 // AppendValueChar (ch);
1925 if (ch < Char.MaxValue)
1926 valueBuffer.Append ((char) ch);
1928 AppendSurrogatePairValueChar (ch);
1935 if (!incrementToken) {
1936 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1937 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1939 currentAttributeToken.ValueTokenEndIndex = currentAttributeValue;
1943 private void CheckAttributeEntityReferenceWFC (string entName)
1945 DTDEntityDeclaration entDecl =
1946 DTD == null ? null : DTD.EntityDecls [entName];
1947 if (entDecl == null) {
1948 if (entityHandling == EntityHandling.ExpandEntities
1949 || (DTD != null && resolver != null && entDecl == null))
1950 throw NotWFError (String.Format ("Referenced entity '{0}' does not exist.", entName));
1955 if (entDecl.HasExternalReference)
1956 throw NotWFError ("Reference to external entities is not allowed in the value of an attribute.");
1957 if (isStandalone && !entDecl.IsInternalSubset)
1958 throw NotWFError ("Reference to external entities is not allowed in the internal subset.");
1959 if (entDecl.EntityValue.IndexOf ('<') >= 0)
1960 throw NotWFError ("Attribute must not contain character '<' either directly or indirectly by way of entity references.");
1963 // The reader is positioned on the first character
1966 // It may be xml declaration or processing instruction.
1967 private void ReadProcessingInstruction ()
1969 string target = ReadName ();
1970 if (target != "xml" && target.ToLower (CultureInfo.InvariantCulture) == "xml")
1971 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
1973 if (!SkipWhitespace ())
1974 if (PeekChar () != '?')
1975 throw NotWFError ("Invalid processing instruction name was found.");
1977 ClearValueBuffer ();
1980 while ((ch = PeekChar ()) != -1) {
1983 if (ch == '?' && PeekChar () == '>') {
1988 if (CharacterChecking && XmlChar.IsInvalid (ch))
1989 throw NotWFError ("Invalid character was found.");
1990 AppendValueChar (ch);
1993 if (Object.ReferenceEquals (target, XmlNamespaceManager.PrefixXml))
1994 VerifyXmlDeclaration ();
1996 if (currentState == XmlNodeType.None)
1997 currentState = XmlNodeType.XmlDeclaration;
2000 XmlNodeType.ProcessingInstruction, // nodeType
2002 String.Empty, // prefix
2003 target, // localName
2004 false, // isEmptyElement
2005 null, // value: create only when required
2006 true // clearAttributes
2011 void VerifyXmlDeclaration ()
2013 if (!allowMultipleRoot && currentState != XmlNodeType.None)
2014 throw NotWFError ("XML declaration cannot appear in this state.");
2016 currentState = XmlNodeType.XmlDeclaration;
2018 string text = CreateValueString ();
2024 string encoding = null, standalone = null;
2026 ParseAttributeFromString (text, ref idx, out name, out value);
2027 if (name != "version" || value != "1.0")
2028 throw NotWFError ("'version' is expected.");
2029 name = String.Empty;
2030 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2031 ParseAttributeFromString (text, ref idx, out name, out value);
2032 if (name == "encoding") {
2033 if (!XmlChar.IsValidIANAEncoding (value))
2034 throw NotWFError ("'encoding' must be a valid IANA encoding name.");
2035 if (reader is XmlStreamReader)
2036 parserContext.Encoding = ((XmlStreamReader) reader).Encoding;
2038 parserContext.Encoding = Encoding.Unicode;
2040 name = String.Empty;
2041 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2042 ParseAttributeFromString (text, ref idx, out name, out value);
2044 if (name == "standalone") {
2045 this.isStandalone = value == "yes";
2046 if (value != "yes" && value != "no")
2047 throw NotWFError ("Only 'yes' or 'no' is allow for 'standalone'");
2049 SkipWhitespaceInString (text, ref idx);
2051 else if (name.Length != 0)
2052 throw NotWFError (String.Format ("Unexpected token: '{0}'", name));
2054 if (idx < text.Length)
2055 throw NotWFError ("'?' is expected.");
2057 AddAttributeWithValue ("version", "1.0");
2058 if (encoding != null)
2059 AddAttributeWithValue ("encoding", encoding);
2060 if (standalone != null)
2061 AddAttributeWithValue ("standalone", standalone);
2062 currentAttribute = currentAttributeValue = -1;
2065 XmlNodeType.XmlDeclaration, // nodeType
2067 String.Empty, // prefix
2069 false, // isEmptyElement
2071 false // clearAttributes
2075 bool SkipWhitespaceInString (string text, ref int idx)
2078 while (idx < text.Length && XmlChar.IsWhitespace (text [idx]))
2080 return idx - start > 0;
2083 private void ParseAttributeFromString (string src,
2084 ref int idx, out string name, out string value)
2086 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2090 while (idx < src.Length && XmlChar.IsNameChar (src [idx]))
2092 name = src.Substring (start, idx - start);
2094 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2096 if (idx == src.Length || src [idx] != '=')
2097 throw NotWFError (String.Format ("'=' is expected after {0}", name));
2100 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2103 if (idx == src.Length || src [idx] != '"' && src [idx] != '\'')
2104 throw NotWFError ("'\"' or '\'' is expected.");
2106 char quote = src [idx];
2110 while (idx < src.Length && src [idx] != quote)
2114 value = src.Substring (start, idx - start - 1);
2117 private void SkipTextDeclaration ()
2119 if (PeekChar () != '<')
2124 if (PeekChar () != '?') {
2130 while (peekCharsIndex < 6) {
2131 if (PeekChar () < 0)
2136 if (new string (peekChars, 2, 4) != "xml ") {
2137 if (new string (peekChars, 2, 4).ToLower (CultureInfo.InvariantCulture) == "xml ") {
2138 throw NotWFError ("Processing instruction name must not be character sequence 'X' 'M' 'L' with case insensitivity.");
2147 if (PeekChar () == 'v') {
2149 ExpectAfterWhitespace ('=');
2151 int quoteChar = ReadChar ();
2152 char [] expect1_0 = new char [3];
2153 int versionLength = 0;
2154 switch (quoteChar) {
2157 while (PeekChar () != quoteChar) {
2158 if (PeekChar () == -1)
2159 throw NotWFError ("Invalid version declaration inside text declaration.");
2160 else if (versionLength == 3)
2161 throw NotWFError ("Invalid version number inside text declaration.");
2163 expect1_0 [versionLength] = (char) ReadChar ();
2165 if (versionLength == 3 && new String (expect1_0) != "1.0")
2166 throw NotWFError ("Invalid version number inside text declaration.");
2173 throw NotWFError ("Invalid version declaration inside text declaration.");
2177 if (PeekChar () == 'e') {
2178 Expect ("encoding");
2179 ExpectAfterWhitespace ('=');
2181 int quoteChar = ReadChar ();
2182 switch (quoteChar) {
2185 while (PeekChar () != quoteChar)
2186 if (ReadChar () == -1)
2187 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2192 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2194 // Encoding value should be checked inside XmlInputStream.
2197 // this condition is to check if this instance is
2198 // not created by XmlReader.Create() (which just
2199 // omits strict text declaration check).
2200 else if (Conformance == ConformanceLevel.Auto)
2201 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
2207 // The reader is positioned on the first character after
2208 // the leading '<!'.
2209 private void ReadDeclaration ()
2211 int ch = PeekChar ();
2229 throw NotWFError ("Unexpected declaration markup was found.");
2233 // The reader is positioned on the first character after
2234 // the leading '<!--'.
2235 private void ReadComment ()
2237 if (currentState == XmlNodeType.None)
2238 currentState = XmlNodeType.XmlDeclaration;
2240 preserveCurrentTag = false;
2242 ClearValueBuffer ();
2245 while ((ch = PeekChar ()) != -1) {
2248 if (ch == '-' && PeekChar () == '-') {
2251 if (PeekChar () != '>')
2252 throw NotWFError ("comments cannot contain '--'");
2258 if (XmlChar.IsInvalid (ch))
2259 throw NotWFError ("Not allowed character was found.");
2261 AppendValueChar (ch);
2265 XmlNodeType.Comment, // nodeType
2266 String.Empty, // name
2267 String.Empty, // prefix
2268 String.Empty, // localName
2269 false, // isEmptyElement
2270 null, // value: create only when required
2271 true // clearAttributes
2275 // The reader is positioned on the first character after
2276 // the leading '<![CDATA['.
2277 private void ReadCDATA ()
2279 if (currentState != XmlNodeType.Element)
2280 throw NotWFError ("CDATA section cannot appear in this state.");
2281 preserveCurrentTag = false;
2283 ClearValueBuffer ();
2287 while (PeekChar () != -1) {
2292 if (ch == ']' && PeekChar () == ']') {
2293 ch = ReadChar (); // ']'
2295 if (PeekChar () == '>') {
2302 if (normalization && ch == '\r') {
2305 // append '\n' instead of '\r'.
2306 AppendValueChar ('\n');
2307 // otherwise, discard '\r'.
2310 if (CharacterChecking && XmlChar.IsInvalid (ch))
2311 throw NotWFError ("Invalid character was found.");
2313 // FIXME: it might be optimized by the JIT later,
2314 // AppendValueChar (ch);
2316 if (ch < Char.MaxValue)
2317 valueBuffer.Append ((char) ch);
2319 AppendSurrogatePairValueChar (ch);
2324 XmlNodeType.CDATA, // nodeType
2325 String.Empty, // name
2326 String.Empty, // prefix
2327 String.Empty, // localName
2328 false, // isEmptyElement
2329 null, // value: create only when required
2330 true // clearAttributes
2334 // The reader is positioned on the first character after
2335 // the leading '<!DOCTYPE'.
2336 private void ReadDoctypeDecl ()
2339 throw NotWFError ("Document Type Declaration (DTD) is prohibited in this XML.");
2340 switch (currentState) {
2341 case XmlNodeType.DocumentType:
2342 case XmlNodeType.Element:
2343 case XmlNodeType.EndElement:
2344 throw NotWFError ("Document type cannot appear in this state.");
2346 currentState = XmlNodeType.DocumentType;
2348 string doctypeName = null;
2349 string publicId = null;
2350 string systemId = null;
2351 int intSubsetStartLine = 0;
2352 int intSubsetStartColumn = 0;
2355 doctypeName = ReadName ();
2360 systemId = ReadSystemLiteral (true);
2363 publicId = ReadPubidLiteral ();
2364 if (!SkipWhitespace ())
2365 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
2366 systemId = ReadSystemLiteral (false);
2372 if(PeekChar () == '[')
2374 // read markupdecl etc. or end of decl
2376 intSubsetStartLine = this.LineNumber;
2377 intSubsetStartColumn = this.LinePosition;
2378 ClearValueBuffer ();
2379 ReadInternalSubset ();
2380 parserContext.InternalSubset = CreateValueString ();
2382 // end of DOCTYPE decl.
2383 ExpectAfterWhitespace ('>');
2385 GenerateDTDObjectModel (doctypeName, publicId,
2386 systemId, parserContext.InternalSubset,
2387 intSubsetStartLine, intSubsetStartColumn);
2389 // set properties for <!DOCTYPE> node
2391 XmlNodeType.DocumentType, // nodeType
2392 doctypeName, // name
2393 String.Empty, // prefix
2394 doctypeName, // localName
2395 false, // isEmptyElement
2396 parserContext.InternalSubset, // value
2397 true // clearAttributes
2400 if (publicId != null)
2401 AddAttributeWithValue ("PUBLIC", publicId);
2402 if (systemId != null)
2403 AddAttributeWithValue ("SYSTEM", systemId);
2404 currentAttribute = currentAttributeValue = -1;
2407 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2408 string systemId, string internalSubset)
2410 return GenerateDTDObjectModel (name, publicId, systemId, internalSubset, 0, 0);
2413 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2414 string systemId, string internalSubset, int intSubsetStartLine, int intSubsetStartColumn)
2417 parserContext.Dtd = new DTDObjectModel (this.NameTable); // merges both internal and external subsets in the meantime,
2418 DTD.BaseURI = BaseURI;
2420 DTD.PublicId = publicId;
2421 DTD.SystemId = systemId;
2422 DTD.InternalSubset = internalSubset;
2423 DTD.XmlResolver = resolver;
2424 DTD.IsStandalone = isStandalone;
2425 DTD.LineNumber = line;
2426 DTD.LinePosition = column;
2428 DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn);
2429 dr.Normalization = this.normalization;
2430 return dr.GenerateDTDObjectModel ();
2433 private enum DtdInputState
2446 private class DtdInputStateStack
2448 Stack intern = new Stack ();
2449 public DtdInputStateStack ()
2451 Push (DtdInputState.Free);
2454 public DtdInputState Peek ()
2456 return (DtdInputState) intern.Peek ();
2459 public DtdInputState Pop ()
2461 return (DtdInputState) intern.Pop ();
2464 public void Push (DtdInputState val)
2471 DtdInputStateStack stateStack = new DtdInputStateStack ();
2472 DtdInputState State {
2473 get { return stateStack.Peek (); }
2476 private int ReadValueChar ()
2478 int ret = ReadChar ();
2479 AppendValueChar (ret);
2483 private void ExpectAndAppend (string s)
2486 valueBuffer.Append (s);
2489 // Simply read but not generate any result.
2490 private void ReadInternalSubset ()
2492 bool continueParse = true;
2494 while (continueParse) {
2495 switch (ReadValueChar ()) {
2498 case DtdInputState.Free:
2500 valueBuffer.Remove (valueBuffer.Length - 1, 1);
2501 continueParse = false;
2503 case DtdInputState.InsideDoubleQuoted:
2505 case DtdInputState.InsideSingleQuoted:
2508 throw NotWFError ("unexpected end of file at DTD.");
2512 throw NotWFError ("unexpected end of file at DTD.");
2515 case DtdInputState.InsideDoubleQuoted:
2516 case DtdInputState.InsideSingleQuoted:
2517 case DtdInputState.Comment:
2518 continue; // well-formed
2520 int c = ReadValueChar ();
2523 stateStack.Push (DtdInputState.PI);
2526 switch (ReadValueChar ()) {
2528 switch (ReadValueChar ()) {
2530 ExpectAndAppend ("EMENT");
2531 stateStack.Push (DtdInputState.ElementDecl);
2534 ExpectAndAppend ("TITY");
2535 stateStack.Push (DtdInputState.EntityDecl);
2538 throw NotWFError ("unexpected token '<!E'.");
2542 ExpectAndAppend ("TTLIST");
2543 stateStack.Push (DtdInputState.AttlistDecl);
2546 ExpectAndAppend ("OTATION");
2547 stateStack.Push (DtdInputState.NotationDecl);
2550 ExpectAndAppend ("-");
2551 stateStack.Push (DtdInputState.Comment);
2556 throw NotWFError (String.Format ("unexpected '<{0}'.", (char) c));
2560 if (State == DtdInputState.InsideSingleQuoted)
2562 else if (State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.Comment)
2563 stateStack.Push (DtdInputState.InsideSingleQuoted);
2566 if (State == DtdInputState.InsideDoubleQuoted)
2568 else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment)
2569 stateStack.Push (DtdInputState.InsideDoubleQuoted);
2573 case DtdInputState.ElementDecl:
2574 goto case DtdInputState.NotationDecl;
2575 case DtdInputState.AttlistDecl:
2576 goto case DtdInputState.NotationDecl;
2577 case DtdInputState.EntityDecl:
2578 goto case DtdInputState.NotationDecl;
2579 case DtdInputState.NotationDecl:
2582 case DtdInputState.InsideDoubleQuoted:
2583 case DtdInputState.InsideSingleQuoted:
2584 case DtdInputState.Comment:
2587 throw NotWFError ("unexpected token '>'");
2591 if (State == DtdInputState.PI) {
2592 if (ReadValueChar () == '>')
2597 if (State == DtdInputState.Comment) {
2598 if (PeekChar () == '-') {
2600 ExpectAndAppend (">");
2606 if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted)
2607 throw NotWFError ("Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8).");
2613 // The reader is positioned on the first 'S' of "SYSTEM".
2614 private string ReadSystemLiteral (bool expectSYSTEM)
2618 if (!SkipWhitespace ())
2619 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
2623 int quoteChar = ReadChar (); // apos or quot
2625 ClearValueBuffer ();
2626 while (c != quoteChar) {
2629 throw NotWFError ("Unexpected end of stream in ExternalID.");
2631 AppendValueChar (c);
2633 return CreateValueString ();
2636 private string ReadPubidLiteral()
2639 if (!SkipWhitespace ())
2640 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
2641 int quoteChar = ReadChar ();
2643 ClearValueBuffer ();
2644 while(c != quoteChar)
2647 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
2648 if(c != quoteChar && !XmlChar.IsPubidChar (c))
2649 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char)c ));
2651 AppendValueChar (c);
2653 return CreateValueString ();
2656 // The reader is positioned on the first character
2658 private string ReadName ()
2660 string prefix, local;
2661 return ReadName (out prefix, out local);
2664 private string ReadName (out string prefix, out string localName)
2666 #if !USE_NAME_BUFFER
2667 bool savePreserve = preserveCurrentTag;
2668 preserveCurrentTag = true;
2670 int startOffset = peekCharsIndex - curNodePeekIndex;
2671 int ch = PeekChar ();
2672 if (!XmlChar.IsFirstNameChar (ch))
2673 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2678 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2680 if (ch == ':' && namespaces && colonAt < 0)
2685 int start = curNodePeekIndex + startOffset;
2687 string name = parserContext.NameTable.Add (
2688 peekChars, start, length);
2691 prefix = parserContext.NameTable.Add (
2692 peekChars, start, colonAt);
2693 localName = parserContext.NameTable.Add (
2694 peekChars, start + colonAt + 1, length - colonAt - 1);
2696 prefix = String.Empty;
2700 preserveCurrentTag = savePreserve;
2704 int ch = PeekChar ();
2705 if (!XmlChar.IsFirstNameChar (ch))
2706 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2711 // AppendNameChar (ch);
2713 // nameBuffer.Length is always non-0 so no need to ExpandNameCapacity () here
2714 if (ch < Char.MaxValue)
2715 nameBuffer [nameLength++] = (char) ch;
2717 AppendSurrogatePairNameChar (ch);
2722 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2725 if (ch == ':' && namespaces && colonAt < 0)
2726 colonAt = nameLength;
2727 // AppendNameChar (ch);
2729 if (nameLength == nameCapacity)
2730 ExpandNameCapacity ();
2731 if (ch < Char.MaxValue)
2732 nameBuffer [nameLength++] = (char) ch;
2734 AppendSurrogatePairNameChar (ch);
2738 string name = parserContext.NameTable.Add (nameBuffer, 0, nameLength);
2741 prefix = parserContext.NameTable.Add (nameBuffer, 0, colonAt);
2742 localName = parserContext.NameTable.Add (nameBuffer, colonAt + 1, nameLength - colonAt - 1);
2744 prefix = String.Empty;
2752 // Read the next character and compare it against the
2753 // specified character.
2754 private void Expect (int expected)
2756 int ch = ReadChar ();
2758 if (ch != expected) {
2759 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2760 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
2763 ch < 0 ? (object) "EOF" : (char) ch,
2768 private void Expect (string expected)
2770 for (int i = 0; i < expected.Length; i++)
2771 if (ReadChar () != expected [i])
2772 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2773 "'{0}' is expected", expected));
2776 private void ExpectAfterWhitespace (char c)
2779 int i = ReadChar ();
2780 if (i < 0x21 && XmlChar.IsWhitespace (i))
2783 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0}, but found {1} [{2}]", c, i < 0 ? (object) "EOF" : (char) i, i));
2788 // Does not consume the first non-whitespace character.
2789 private bool SkipWhitespace ()
2791 // FIXME: It should be inlined by the JIT.
2792 // bool skipped = XmlChar.IsWhitespace (PeekChar ());
2793 int ch = PeekChar ();
2794 bool skipped = (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2798 // FIXME: It should be inlined by the JIT.
2799 // while (XmlChar.IsWhitespace (PeekChar ()))
2801 while ((ch = PeekChar ()) == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD)
2806 private bool ReadWhitespace ()
2808 if (currentState == XmlNodeType.None)
2809 currentState = XmlNodeType.XmlDeclaration;
2811 bool savePreserve = preserveCurrentTag;
2812 preserveCurrentTag = true;
2813 int startOffset = peekCharsIndex - curNodePeekIndex; // it should be 0 for now though.
2815 int ch = PeekChar ();
2819 // FIXME: It should be inlined by the JIT.
2820 // } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
2821 } while (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2823 bool isText = currentState == XmlNodeType.Element && ch != -1 && ch != '<';
2825 if (!isText && (whitespaceHandling == WhitespaceHandling.None ||
2826 whitespaceHandling == WhitespaceHandling.Significant && XmlSpace != XmlSpace.Preserve))
2829 ClearValueBuffer ();
2830 valueBuffer.Append (peekChars, curNodePeekIndex, peekCharsIndex - curNodePeekIndex - startOffset);
2831 preserveCurrentTag = savePreserve;
2836 XmlNodeType nodeType = (this.XmlSpace == XmlSpace.Preserve) ?
2837 XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
2838 SetProperties (nodeType,
2843 null, // value: create only when required
2850 // Returns -1 if it should throw an error.
2851 private int ReadCharsInternal (char [] buffer, int offset, int length)
2853 shouldSkipUntilEndTag = true;
2855 int bufIndex = offset;
2856 for (int i = 0; i < length; i++) {
2857 int c = PeekChar ();
2860 throw NotWFError ("Unexpected end of xml.");
2863 if (PeekChar () != '/') {
2864 buffer [bufIndex++] = '<';
2867 // Seems to skip immediate EndElement
2874 shouldSkipUntilEndTag = false;
2875 Read (); // move to the next node
2879 if (c < Char.MaxValue)
2880 buffer [bufIndex++] = (char) c;
2882 buffer [bufIndex++] = (char) ((c - 0x10000) / 0x400 + 0xD800);
2883 buffer [bufIndex++] = (char) ((c - 0x10000) % 0x400 + 0xDC00);
2891 private bool ReadUntilEndTag ()
2894 currentState = XmlNodeType.EndElement;
2900 throw NotWFError ("Unexpected end of xml.");
2902 if (PeekChar () != '/')
2905 string name = ReadName ();
2906 if (name != elementNames [elementNameStackPos - 1].Name)