2 // System.Xml.XmlTextReader
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
7 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
9 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
10 // Copyright (C) 2005-2006 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //#define USE_NAME_BUFFER
34 // Optimization TODOs:
36 // - support PushbackChar() which reverts one character read.
37 // - ReadTextReader() should always keep one pushback buffer
38 // as pushback safety net.
39 // - Replace (peek,read) * n -> read * n + pushback
43 using System.Collections;
45 using System.Collections.Generic;
47 using System.Globalization;
49 using System.Security.Permissions;
51 using System.Xml.Schema;
64 internal class XmlTextReader : XmlReader,
65 IXmlLineInfo, IXmlNamespaceResolver, IHasXmlParserContext
67 [PermissionSet (SecurityAction.InheritanceDemand, Unrestricted = true)]
68 public class XmlTextReader : XmlReader, IXmlLineInfo, IHasXmlParserContext
73 protected XmlTextReader ()
77 public XmlTextReader (Stream input)
78 : this (new XmlStreamReader (input))
82 public XmlTextReader (string url)
83 : this(url, new NameTable ())
87 public XmlTextReader (TextReader input)
88 : this (input, new NameTable ())
92 protected XmlTextReader (XmlNameTable nt)
93 : this (String.Empty, null, XmlNodeType.None, null)
97 public XmlTextReader (Stream input, XmlNameTable nt)
98 : this(new XmlStreamReader (input), nt)
102 public XmlTextReader (string url, Stream input)
103 : this (url, new XmlStreamReader (input))
107 public XmlTextReader (string url, TextReader input)
108 : this (url, input, new NameTable ())
112 public XmlTextReader (string url, XmlNameTable nt)
115 Stream stream = GetStreamFromUrl (url, out uriString);
116 XmlParserContext ctx = new XmlParserContext (nt,
117 new XmlNamespaceManager (nt),
120 this.InitializeContext (uriString, ctx, new XmlStreamReader (stream), XmlNodeType.Document);
123 public XmlTextReader (TextReader input, XmlNameTable nt)
124 : this (String.Empty, input, nt)
128 // This is used in XmlReader.Create() to indicate that string
129 // argument is uri, not an xml fragment.
130 internal XmlTextReader (bool dummy, XmlResolver resolver, string url, XmlNodeType fragType, XmlParserContext context)
132 if (resolver == null)
133 resolver = new XmlUrlResolver ();
135 this.XmlResolver = resolver;
137 Stream stream = GetStreamFromUrl (url, out uriString);
138 this.InitializeContext (uriString, context, new XmlStreamReader (stream), fragType);
141 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
142 : this (context != null ? context.BaseURI : String.Empty,
143 new XmlStreamReader (xmlFragment),
149 internal XmlTextReader (string baseURI, TextReader xmlFragment, XmlNodeType fragType)
150 : this (baseURI, xmlFragment, fragType, null)
154 public XmlTextReader (string url, Stream input, XmlNameTable nt)
155 : this (url, new XmlStreamReader (input), nt)
159 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
160 : this (url, input, XmlNodeType.Document, null)
164 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
165 : this (context != null ? context.BaseURI : String.Empty,
166 new StringReader (xmlFragment),
172 internal XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
174 InitializeContext (url, context, fragment, fragType);
177 private Stream GetStreamFromUrl (string url, out string absoluteUriString)
179 Uri uri = resolver.ResolveUri (null, url);
180 absoluteUriString = uri != null ? uri.ToString () : String.Empty;
181 return resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
188 public override int AttributeCount
190 get { return attributeCount; }
193 public override string BaseURI
195 get { return parserContext.BaseURI; }
199 public override bool CanReadBinaryContent {
203 public override bool CanReadValueChunk {
207 internal override bool CanReadBinaryContent {
211 internal override bool CanReadValueChunk {
216 internal bool CharacterChecking {
217 get { return checkCharacters; }
218 set { checkCharacters = value; }
221 // for XmlReaderSettings.CloseInput support
222 internal bool CloseInput {
223 get { return closeInput; }
224 set { closeInput = value; }
227 public override int Depth
230 int nodeTypeMod = currentToken.NodeType == XmlNodeType.Element ? 0 : -1;
231 if (currentAttributeValue >= 0)
232 return nodeTypeMod + elementDepth + 2; // inside attribute value.
233 else if (currentAttribute >= 0)
234 return nodeTypeMod + elementDepth + 1;
239 public Encoding Encoding
241 get { return parserContext.Encoding; }
244 public EntityHandling EntityHandling {
245 get { return entityHandling; }
246 set { entityHandling = value; }
250 public override bool EOF {
251 get { return readState == ReadState.EndOfFile; }
254 public override bool HasValue {
255 get { return cursorToken.Value != null; }
258 public override bool IsDefault {
259 // XmlTextReader does not expand default attributes.
260 get { return false; }
263 public override bool IsEmptyElement {
264 get { return cursorToken.IsEmptyElement; }
269 public override string this [int i] {
270 get { return GetAttribute (i); }
273 public override string this [string name] {
274 get { return GetAttribute (name); }
277 public override string this [string localName, string namespaceName] {
278 get { return GetAttribute (localName, namespaceName); }
282 public int LineNumber {
284 if (useProceedingLineInfo)
287 return cursorToken.LineNumber;
291 public int LinePosition {
293 if (useProceedingLineInfo)
296 return cursorToken.LinePosition;
300 public override string LocalName {
301 get { return cursorToken.LocalName; }
304 public override string Name {
305 get { return cursorToken.Name; }
308 public bool Namespaces {
309 get { return namespaces; }
311 if (readState != ReadState.Initial)
312 throw new InvalidOperationException ("Namespaces have to be set before reading.");
317 public override string NamespaceURI {
318 get { return cursorToken.NamespaceURI; }
321 public override XmlNameTable NameTable {
322 get { return parserContext.NameTable; }
325 public override XmlNodeType NodeType {
326 get { return cursorToken.NodeType; }
329 public bool Normalization {
330 get { return normalization; }
331 set { normalization = value; }
334 public override string Prefix {
335 get { return cursorToken.Prefix; }
339 public bool ProhibitDtd {
340 get { return prohibitDtd; }
341 set { prohibitDtd = value; }
345 public override char QuoteChar {
346 get { return cursorToken.QuoteChar; }
349 public override ReadState ReadState {
350 get { return readState; }
354 public override XmlReaderSettings Settings {
355 get { return base.Settings; }
359 public override string Value {
360 get { return cursorToken.Value != null ? cursorToken.Value : String.Empty; }
363 public WhitespaceHandling WhitespaceHandling {
364 get { return whitespaceHandling; }
365 set { whitespaceHandling = value; }
368 public override string XmlLang {
369 get { return parserContext.XmlLang; }
372 public XmlResolver XmlResolver {
373 set { resolver = value; }
376 public override XmlSpace XmlSpace {
377 get { return parserContext.XmlSpace; }
384 public override void Close ()
386 readState = ReadState.Closed;
388 cursorToken.Clear ();
389 currentToken.Clear ();
391 if (closeInput && reader != null)
395 public override string GetAttribute (int i)
397 if (i >= attributeCount)
398 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
400 return attributeTokens [i].Value;
404 // MS.NET 1.0 msdn says that this method returns String.Empty
405 // for absent attribute, but in fact it returns null.
406 // This description is corrected in MS.NET 1.1 msdn.
407 public override string GetAttribute (string name)
409 for (int i = 0; i < attributeCount; i++)
410 if (attributeTokens [i].Name == name)
411 return attributeTokens [i].Value;
415 private int GetIndexOfQualifiedAttribute (string localName, string namespaceURI)
417 for (int i = 0; i < attributeCount; i++) {
418 XmlAttributeTokenInfo ti = attributeTokens [i];
419 if (ti.LocalName == localName && ti.NamespaceURI == namespaceURI)
425 XmlParserContext IHasXmlParserContext.ParserContext {
426 get { return parserContext; }
429 public override string GetAttribute (string localName, string namespaceURI)
431 int idx = this.GetIndexOfQualifiedAttribute (localName, namespaceURI);
434 return attributeTokens [idx].Value;
438 public IDictionary<string, string> GetNamespacesInScope (XmlNamespaceScope scope)
440 return parserContext.NamespaceManager.GetNamespacesInScope (scope);
443 IDictionary<string, string> IXmlNamespaceResolver.GetNamespacesInScope (XmlNamespaceScope scope)
445 return GetNamespacesInScope (scope);
449 public TextReader GetRemainder ()
451 if (peekCharsLength < 0)
453 return new StringReader (new string (peekChars, peekCharsIndex, peekCharsLength - peekCharsIndex) + reader.ReadToEnd ());
457 public bool HasLineInfo ()
459 bool IXmlLineInfo.HasLineInfo ()
465 public override string LookupNamespace (string prefix)
467 return LookupNamespace (prefix, false);
470 private string LookupNamespace (string prefix, bool atomizedNames)
472 string s = parserContext.NamespaceManager.LookupNamespace (
473 prefix, atomizedNames);
474 return s == String.Empty ? null : s;
478 string IXmlNamespaceResolver.LookupPrefix (string ns)
480 return LookupPrefix (ns, false);
483 public string LookupPrefix (string ns, bool atomizedName)
485 return parserContext.NamespaceManager.LookupPrefix (ns, atomizedName);
489 public override void MoveToAttribute (int i)
491 if (i >= attributeCount)
492 throw new ArgumentOutOfRangeException ("attribute index out of range.");
494 currentAttribute = i;
495 currentAttributeValue = -1;
496 cursorToken = attributeTokens [i];
499 public override bool MoveToAttribute (string name)
501 for (int i = 0; i < attributeCount; i++) {
502 XmlAttributeTokenInfo ti = attributeTokens [i];
503 if (ti.Name == name) {
511 public override bool MoveToAttribute (string localName, string namespaceName)
513 int idx = GetIndexOfQualifiedAttribute (localName, namespaceName);
516 MoveToAttribute (idx);
520 public override bool MoveToElement ()
522 if (currentToken == null) // for attribute .ctor()
525 if (cursorToken == currentToken)
528 if (currentAttribute >= 0) {
529 currentAttribute = -1;
530 currentAttributeValue = -1;
531 cursorToken = currentToken;
538 public override bool MoveToFirstAttribute ()
540 if (attributeCount == 0)
543 return MoveToNextAttribute ();
546 public override bool MoveToNextAttribute ()
548 if (currentAttribute == 0 && attributeCount == 0)
550 if (currentAttribute + 1 < attributeCount) {
552 currentAttributeValue = -1;
553 cursorToken = attributeTokens [currentAttribute];
560 public override bool Read ()
562 curNodePeekIndex = peekCharsIndex;
563 preserveCurrentTag = true;
565 if (startNodeType == XmlNodeType.Attribute) {
566 if (currentAttribute == 0)
567 return false; // already read.
568 SkipTextDeclaration ();
570 IncrementAttributeToken ();
571 ReadAttributeValueTokens ('"');
572 cursorToken = attributeTokens [0];
573 currentAttributeValue = -1;
574 readState = ReadState.Interactive;
577 if (readState == ReadState.Initial && currentState == XmlNodeType.Element)
578 SkipTextDeclaration ();
584 readState = ReadState.Interactive;
585 currentLinkedNodeLineNumber = line;
586 currentLinkedNodeLinePosition = column;
587 useProceedingLineInfo = true;
589 cursorToken = currentToken;
591 currentAttribute = currentAttributeValue = -1;
592 currentToken.Clear ();
594 // It was moved from end of ReadStartTag ().
600 if (shouldSkipUntilEndTag) {
601 shouldSkipUntilEndTag = false;
602 return ReadUntilEndTag ();
605 more = ReadContent ();
607 if (!more && startNodeType == XmlNodeType.Document && currentState != XmlNodeType.EndElement)
608 throw NotWFError ("Document element did not appear.");
610 useProceedingLineInfo = false;
614 public override bool ReadAttributeValue ()
616 if (readState == ReadState.Initial && startNodeType == XmlNodeType.Attribute) {
620 if (currentAttribute < 0)
622 XmlAttributeTokenInfo ti = attributeTokens [currentAttribute];
623 if (currentAttributeValue < 0)
624 currentAttributeValue = ti.ValueTokenStartIndex - 1;
626 if (currentAttributeValue < ti.ValueTokenEndIndex) {
627 currentAttributeValue++;
628 cursorToken = attributeValueTokens [currentAttributeValue];
635 public int ReadBase64 (byte [] buffer, int offset, int length)
637 BinaryCharGetter = binaryCharGetter;
639 return Binary.ReadBase64 (buffer, offset, length);
641 BinaryCharGetter = null;
645 public int ReadBinHex (byte [] buffer, int offset, int length)
647 BinaryCharGetter = binaryCharGetter;
649 return Binary.ReadBinHex (buffer, offset, length);
651 BinaryCharGetter = null;
655 public int ReadChars (char [] buffer, int offset, int length)
658 throw new ArgumentOutOfRangeException ("offset", offset, "Offset must be non-negative integer.");
660 throw new ArgumentOutOfRangeException ("length", length, "Length must be non-negative integer.");
661 else if (buffer.Length < offset + length)
662 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
664 if (IsEmptyElement) {
669 if (NodeType != XmlNodeType.Element)
672 return ReadCharsInternal (buffer, offset, length);
675 public void ResetState ()
677 throw new InvalidOperationException ("Cannot call ResetState when parsing an XML fragment.");
681 public override void ResolveEntity ()
683 // XmlTextReader does not resolve entities.
684 throw new InvalidOperationException ("XmlTextReader cannot resolve external entities.");
688 [MonoTODO] // FIXME: Implement, for performance improvement
689 public override void Skip ()
697 // Parsed DTD Objects
698 // Note that thgis property must be kept since dtd2xsd uses it.
699 internal DTDObjectModel DTD {
700 get { return parserContext.Dtd; }
703 internal XmlResolver Resolver {
704 get { return resolver; }
709 internal class XmlTokenInfo
711 public XmlTokenInfo (XmlTextReader xtr)
719 protected XmlTextReader Reader;
722 public string LocalName;
723 public string Prefix;
724 public string NamespaceURI;
725 public bool IsEmptyElement;
726 public char QuoteChar;
727 public int LineNumber;
728 public int LinePosition;
729 public int ValueBufferStart;
730 public int ValueBufferEnd;
732 public XmlNodeType NodeType;
734 public virtual string Value {
736 if (valueCache != null)
738 if (ValueBufferStart >= 0) {
739 //Console.WriteLine (NodeType + " / " + ValueBuffer.Length + " / " + ValueBufferStart + " / " + ValueBufferEnd);
740 valueCache = Reader.valueBuffer.ToString (ValueBufferStart, ValueBufferEnd - ValueBufferStart);
744 case XmlNodeType.Text:
745 case XmlNodeType.SignificantWhitespace:
746 case XmlNodeType.Whitespace:
747 case XmlNodeType.Comment:
748 case XmlNodeType.CDATA:
749 case XmlNodeType.ProcessingInstruction:
750 valueCache = Reader.CreateValueString ();
755 set { valueCache = value; }
758 public virtual void Clear ()
760 ValueBufferStart = -1;
762 NodeType = XmlNodeType.None;
763 Name = LocalName = Prefix = NamespaceURI = String.Empty;
764 IsEmptyElement = false;
766 LineNumber = LinePosition = 0;
770 internal class XmlAttributeTokenInfo : XmlTokenInfo
772 public XmlAttributeTokenInfo (XmlTextReader reader)
775 NodeType = XmlNodeType.Attribute;
778 public int ValueTokenStartIndex;
779 public int ValueTokenEndIndex;
781 StringBuilder tmpBuilder = new StringBuilder ();
783 public override string Value {
785 if (valueCache != null)
788 // An empty value should return String.Empty.
789 if (ValueTokenStartIndex == ValueTokenEndIndex) {
790 XmlTokenInfo ti = Reader.attributeValueTokens [ValueTokenStartIndex];
791 if (ti.NodeType == XmlNodeType.EntityReference)
792 valueCache = String.Concat ("&", ti.Name, ";");
794 valueCache = ti.Value;
798 tmpBuilder.Length = 0;
799 for (int i = ValueTokenStartIndex; i <= ValueTokenEndIndex; i++) {
800 XmlTokenInfo ti = Reader.attributeValueTokens [i];
801 if (ti.NodeType == XmlNodeType.Text)
802 tmpBuilder.Append (ti.Value);
804 tmpBuilder.Append ('&');
805 tmpBuilder.Append (ti.Name);
806 tmpBuilder.Append (';');
810 valueCache = tmpBuilder.ToString (0, tmpBuilder.Length);
814 set { valueCache = value; }
817 public override void Clear ()
821 NodeType = XmlNodeType.Attribute;
822 ValueTokenStartIndex = ValueTokenEndIndex = 0;
825 internal void FillXmlns ()
827 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns))
828 Reader.parserContext.NamespaceManager.AddNamespace (LocalName, Value);
829 else if (Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
830 Reader.parserContext.NamespaceManager.AddNamespace (String.Empty, Value);
833 internal void FillNamespace ()
835 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns) ||
836 Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
837 NamespaceURI = XmlNamespaceManager.XmlnsXmlns;
838 else if (Prefix.Length == 0)
839 NamespaceURI = string.Empty;
841 NamespaceURI = Reader.LookupNamespace (Prefix, true);
845 private XmlTokenInfo cursorToken;
846 private XmlTokenInfo currentToken;
847 private XmlAttributeTokenInfo currentAttributeToken;
848 private XmlTokenInfo currentAttributeValueToken;
849 private XmlAttributeTokenInfo [] attributeTokens = new XmlAttributeTokenInfo [10];
850 private XmlTokenInfo [] attributeValueTokens = new XmlTokenInfo [10];
851 private int currentAttribute;
852 private int currentAttributeValue;
853 private int attributeCount;
855 private XmlParserContext parserContext;
857 private ReadState readState;
860 private int elementDepth;
861 private bool depthUp;
863 private bool popScope;
867 public TagName (string n, string l, string p)
874 public readonly string Name;
875 public readonly string LocalName;
876 public readonly string Prefix;
879 private TagName [] elementNames;
880 int elementNameStackPos;
882 private bool allowMultipleRoot;
884 private bool isStandalone;
886 private bool returnEntityReference;
887 private string entityReferenceName;
890 private char [] nameBuffer;
891 private int nameLength;
892 private int nameCapacity;
893 private const int initialNameCapacity = 32;
896 private StringBuilder valueBuffer;
898 private TextReader reader;
899 private char [] peekChars;
900 private int peekCharsIndex;
901 private int peekCharsLength;
902 private int curNodePeekIndex;
903 private bool preserveCurrentTag;
904 private const int peekCharCapacity = 1024;
909 private int currentLinkedNodeLineNumber;
910 private int currentLinkedNodeLinePosition;
911 private bool useProceedingLineInfo;
913 private XmlNodeType startNodeType;
914 // State machine attribute.
915 // XmlDeclaration: after the first node.
916 // DocumentType: after doctypedecl
917 // Element: inside document element
918 // EndElement: after document element
919 private XmlNodeType currentState;
921 // For ReadChars()/ReadBase64()/ReadBinHex()
922 private bool shouldSkipUntilEndTag;
923 XmlReaderBinarySupport.CharGetter binaryCharGetter;
925 // These values are never re-initialized.
926 private bool namespaces = true;
927 private WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
928 private XmlResolver resolver = new XmlUrlResolver ();
929 private bool normalization = false;
931 private bool checkCharacters;
932 private bool prohibitDtd = false;
933 private bool closeInput = true;
934 private EntityHandling entityHandling; // 2.0
936 private NameTable whitespacePool;
937 private char [] whitespaceCache;
939 private XmlException NotWFError (string message)
941 return new XmlException (this as IXmlLineInfo, BaseURI, message);
946 currentToken = new XmlTokenInfo (this);
947 cursorToken = currentToken;
948 currentAttribute = -1;
949 currentAttributeValue = -1;
952 readState = ReadState.Initial;
953 allowMultipleRoot = false;
959 popScope = allowMultipleRoot = false;
960 elementNames = new TagName [10];
961 elementNameStackPos = 0;
963 isStandalone = false;
964 returnEntityReference = false;
965 entityReferenceName = String.Empty;
968 nameBuffer = new char [initialNameCapacity];
970 nameCapacity = initialNameCapacity;
973 valueBuffer = new StringBuilder ();
976 if (peekChars == null)
977 peekChars = new char [peekCharCapacity];
978 peekCharsLength = -1;
979 curNodePeekIndex = -1; // read from start
984 currentLinkedNodeLineNumber = currentLinkedNodeLinePosition = 0;
985 useProceedingLineInfo = false;
987 currentState = XmlNodeType.None;
989 shouldSkipUntilEndTag = false;
990 binaryCharGetter = new XmlReaderBinarySupport.CharGetter (ReadChars);
992 checkCharacters = true;
994 if (Settings != null)
995 checkCharacters = Settings.CheckCharacters;
999 entityHandling = EntityHandling.ExpandCharEntities;
1002 private void InitializeContext (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType)
1004 startNodeType = fragType;
1005 parserContext = context;
1006 if (context == null) {
1007 XmlNameTable nt = new NameTable ();
1008 parserContext = new XmlParserContext (nt,
1009 new XmlNamespaceManager (nt),
1014 if (url != null && url.Length > 0) {
1017 uri = new Uri (url);
1018 } catch (Exception) {
1019 string path = Path.GetFullPath ("./a");
1020 uri = new Uri (new Uri (path), url);
1022 parserContext.BaseURI = uri.ToString ();
1030 case XmlNodeType.Attribute:
1031 reader = new StringReader (fragment.ReadToEnd ().Replace ("\"", """));
1033 case XmlNodeType.Element:
1034 currentState = XmlNodeType.Element;
1035 allowMultipleRoot = true;
1037 case XmlNodeType.Document:
1040 throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType));
1045 internal ConformanceLevel Conformance {
1046 get { return allowMultipleRoot ? ConformanceLevel.Fragment : ConformanceLevel.Document; }
1048 if (value == ConformanceLevel.Fragment) {
1049 currentState = XmlNodeType.Element;
1050 allowMultipleRoot = true;
1055 internal void AdjustLineInfoOffset (int lineNumberOffset, int linePositionOffset)
1057 line += lineNumberOffset;
1058 column += linePositionOffset;
1061 internal void SetNameTable (XmlNameTable nameTable)
1063 parserContext.NameTable = nameTable;
1067 // Use this method rather than setting the properties
1068 // directly so that all the necessary properties can
1069 // be changed in harmony with each other. Maybe the
1070 // fields should be in a seperate class to help enforce
1073 // Namespace URI could not be provided here.
1074 private void SetProperties (
1075 XmlNodeType nodeType,
1079 bool isEmptyElement,
1081 bool clearAttributes)
1083 SetTokenProperties (currentToken, nodeType, name, prefix, localName, isEmptyElement, value, clearAttributes);
1084 currentToken.LineNumber = this.currentLinkedNodeLineNumber;
1085 currentToken.LinePosition = this.currentLinkedNodeLinePosition;
1088 private void SetTokenProperties (
1090 XmlNodeType nodeType,
1094 bool isEmptyElement,
1096 bool clearAttributes)
1098 token.NodeType = nodeType;
1100 token.Prefix = prefix;
1101 token.LocalName = localName;
1102 token.IsEmptyElement = isEmptyElement;
1103 token.Value = value;
1104 this.elementDepth = depth;
1106 if (clearAttributes)
1110 private void ClearAttributes ()
1112 //for (int i = 0; i < attributeCount; i++)
1113 // attributeTokens [i].Clear ();
1115 currentAttribute = -1;
1116 currentAttributeValue = -1;
1119 private int PeekSurrogate (int c)
1121 if (peekCharsLength <= peekCharsIndex + 1) {
1122 if (!ReadTextReader (c))
1123 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1127 int highhalfChar = peekChars [peekCharsIndex];
1128 int lowhalfChar = peekChars [peekCharsIndex+1];
1130 if (((highhalfChar & 0xFC00) != 0xD800) || ((lowhalfChar & 0xFC00) != 0xDC00))
1131 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1132 return highhalfChar;
1133 return 0x10000 + (highhalfChar-0xD800)*0x400 + (lowhalfChar-0xDC00);
1136 private int PeekChar ()
1138 if (peekCharsIndex < peekCharsLength) {
1139 int c = peekChars [peekCharsIndex];
1142 if (c < 0xD800 || c >= 0xDFFF)
1144 return PeekSurrogate (c);
1146 if (!ReadTextReader (-1))
1152 private int ReadChar ()
1154 int ch = PeekChar ();
1158 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1163 } else if (ch != -1) {
1169 private void Advance (int ch) {
1173 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1178 } else if (ch != -1) {
1183 private bool ReadTextReader (int remained)
1185 if (peekCharsLength < 0) { // initialized buffer
1186 peekCharsLength = reader.Read (peekChars, 0, peekChars.Length);
1187 return peekCharsLength > 0;
1189 int offset = remained >= 0 ? 1 : 0;
1190 int copysize = peekCharsLength - curNodePeekIndex;
1192 // It must assure that current tag content always exists
1194 if (!preserveCurrentTag) {
1195 curNodePeekIndex = 0;
1198 } else if (peekCharsLength < peekChars.Length) {
1199 // NonBlockingStreamReader returned less bytes
1200 // than the size of the buffer. In that case,
1201 // just refill the buffer.
1202 } else if (curNodePeekIndex <= (peekCharsLength >> 1)) {
1203 // extend the buffer
1204 char [] tmp = new char [peekChars.Length * 2];
1205 Array.Copy (peekChars, curNodePeekIndex,
1208 curNodePeekIndex = 0;
1209 peekCharsIndex = copysize;
1211 Array.Copy (peekChars, curNodePeekIndex,
1212 peekChars, 0, copysize);
1213 curNodePeekIndex = 0;
1214 peekCharsIndex = copysize;
1217 peekChars [peekCharsIndex] = (char) remained;
1218 int count = peekChars.Length - peekCharsIndex - offset;
1219 if (count > peekCharCapacity)
1220 count = peekCharCapacity;
1221 int read = reader.Read (
1222 peekChars, peekCharsIndex + offset, count);
1223 int remainingSize = offset + read;
1224 peekCharsLength = peekCharsIndex + remainingSize;
1226 return (remainingSize != 0);
1229 private bool ReadContent ()
1232 parserContext.NamespaceManager.PopScope ();
1233 parserContext.PopScope ();
1237 if (returnEntityReference)
1238 SetEntityReferenceProperties ();
1240 int c = PeekChar ();
1242 readState = ReadState.EndOfFile;
1243 ClearValueBuffer ();
1245 XmlNodeType.None, // nodeType
1246 String.Empty, // name
1247 String.Empty, // prefix
1248 String.Empty, // localName
1249 false, // isEmptyElement
1251 true // clearAttributes
1254 throw NotWFError ("unexpected end of file. Current depth is " + depth);
1261 switch (PeekChar ())
1269 ReadProcessingInstruction ();
1284 if (!ReadWhitespace ())
1286 return ReadContent ();
1294 return this.ReadState != ReadState.EndOfFile;
1297 private void SetEntityReferenceProperties ()
1299 DTDEntityDeclaration decl = DTD != null ? DTD.EntityDecls [entityReferenceName] : null;
1300 if (this.isStandalone)
1301 if (DTD == null || decl == null || !decl.IsInternalSubset)
1302 throw NotWFError ("Standalone document must not contain any references to an non-internally declared entity.");
1303 if (decl != null && decl.NotationName != null)
1304 throw NotWFError ("Reference to any unparsed entities is not allowed here.");
1306 ClearValueBuffer ();
1308 XmlNodeType.EntityReference, // nodeType
1309 entityReferenceName, // name
1310 String.Empty, // prefix
1311 entityReferenceName, // localName
1312 false, // isEmptyElement
1314 true // clearAttributes
1317 returnEntityReference = false;
1318 entityReferenceName = String.Empty;
1321 // The leading '<' has already been consumed.
1322 private void ReadStartTag ()
1324 if (currentState == XmlNodeType.EndElement)
1325 throw NotWFError ("Multiple document element was detected.");
1326 currentState = XmlNodeType.Element;
1328 parserContext.NamespaceManager.PushScope ();
1330 currentLinkedNodeLineNumber = line;
1331 currentLinkedNodeLinePosition = column;
1333 string prefix, localName;
1334 string name = ReadName (out prefix, out localName);
1335 if (currentState == XmlNodeType.EndElement)
1336 throw NotWFError ("document has terminated, cannot open new element");
1338 bool isEmptyElement = false;
1343 if (XmlChar.IsFirstNameChar (PeekChar ()))
1344 ReadAttributes (false);
1345 cursorToken = this.currentToken;
1348 for (int i = 0; i < attributeCount; i++)
1349 attributeTokens [i].FillXmlns ();
1350 for (int i = 0; i < attributeCount; i++)
1351 attributeTokens [i].FillNamespace ();
1355 for (int i = 0; i < attributeCount; i++)
1356 if (attributeTokens [i].Prefix == "xmlns" &&
1357 attributeTokens [i].Value == String.Empty)
1358 throw NotWFError ("Empty namespace URI cannot be mapped to non-empty prefix.");
1360 for (int i = 0; i < attributeCount; i++) {
1361 for (int j = i + 1; j < attributeCount; j++)
1362 if (Object.ReferenceEquals (attributeTokens [i].Name, attributeTokens [j].Name) ||
1363 (Object.ReferenceEquals (attributeTokens [i].LocalName, attributeTokens [j].LocalName) &&
1364 Object.ReferenceEquals (attributeTokens [i].NamespaceURI, attributeTokens [j].NamespaceURI)))
1365 throw NotWFError ("Attribute name and qualified name must be identical.");
1368 if (PeekChar () == '/') {
1370 isEmptyElement = true;
1375 PushElementName (name, localName, prefix);
1377 parserContext.PushScope ();
1382 XmlNodeType.Element, // nodeType
1386 isEmptyElement, // isEmptyElement
1388 false // clearAttributes
1390 if (prefix.Length > 0)
1391 currentToken.NamespaceURI = LookupNamespace (prefix, true);
1392 else if (namespaces)
1393 currentToken.NamespaceURI = parserContext.NamespaceManager.DefaultNamespace;
1396 if (NamespaceURI == null)
1397 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1399 for (int i = 0; i < attributeCount; i++) {
1400 MoveToAttribute (i);
1401 if (NamespaceURI == null)
1402 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1409 for (int i = 0; i < attributeCount; i++) {
1410 if (!Object.ReferenceEquals (attributeTokens [i].Prefix, XmlNamespaceManager.PrefixXml))
1412 string aname = attributeTokens [i].LocalName;
1413 string value = attributeTokens [i].Value;
1416 if (this.resolver != null) {
1418 BaseURI != String.Empty ?
1419 new Uri (BaseURI) : null;
1420 Uri uri = resolver.ResolveUri (
1422 parserContext.BaseURI =
1428 parserContext.BaseURI = value;
1431 parserContext.XmlLang = value;
1436 parserContext.XmlSpace = XmlSpace.Preserve;
1439 parserContext.XmlSpace = XmlSpace.Default;
1442 throw NotWFError (String.Format ("Invalid xml:space value: {0}", value));
1449 CheckCurrentStateUpdate ();
1452 private void PushElementName (string name, string local, string prefix)
1454 if (elementNames.Length == elementNameStackPos) {
1455 TagName [] newArray = new TagName [elementNames.Length * 2];
1456 Array.Copy (elementNames, 0, newArray, 0, elementNameStackPos);
1457 elementNames = newArray;
1459 elementNames [elementNameStackPos++] =
1460 new TagName (name, local, prefix);
1463 // The reader is positioned on the first character
1464 // of the element's name.
1465 private void ReadEndTag ()
1467 if (currentState != XmlNodeType.Element)
1468 throw NotWFError ("End tag cannot appear in this state.");
1470 currentLinkedNodeLineNumber = line;
1471 currentLinkedNodeLinePosition = column;
1473 if (elementNameStackPos == 0)
1474 throw NotWFError ("closing element without matching opening element");
1475 TagName expected = elementNames [--elementNameStackPos];
1476 Expect (expected.Name);
1478 ExpectAfterWhitespace ('>');
1483 XmlNodeType.EndElement, // nodeType
1484 expected.Name, // name
1485 expected.Prefix, // prefix
1486 expected.LocalName, // localName
1487 false, // isEmptyElement
1489 true // clearAttributes
1491 if (expected.Prefix.Length > 0)
1492 currentToken.NamespaceURI = LookupNamespace (expected.Prefix, true);
1493 else if (namespaces)
1494 currentToken.NamespaceURI = parserContext.NamespaceManager.DefaultNamespace;
1498 CheckCurrentStateUpdate ();
1501 private void CheckCurrentStateUpdate ()
1503 if (depth == 0 && !allowMultipleRoot && (IsEmptyElement || NodeType == XmlNodeType.EndElement))
1504 currentState = XmlNodeType.EndElement;
1508 private void AppendSurrogatePairNameChar (int ch)
1510 nameBuffer [nameLength++] = (char) ((ch - 0x10000) / 0x400 + 0xD800);
1511 if (nameLength == nameCapacity)
1512 ExpandNameCapacity ();
1513 nameBuffer [nameLength++] = (char) ((ch - 0x10000) % 0x400 + 0xDC00);
1516 private void ExpandNameCapacity ()
1518 nameCapacity = nameCapacity * 2;
1519 char [] oldNameBuffer = nameBuffer;
1520 nameBuffer = new char [nameCapacity];
1521 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1525 private void AppendValueChar (int ch)
1527 if (ch < Char.MaxValue)
1528 valueBuffer.Append ((char) ch);
1530 AppendSurrogatePairValueChar (ch);
1533 private void AppendSurrogatePairValueChar (int ch)
1535 valueBuffer.Append ((char) ((ch - 0x10000) / 0x400 + 0xD800));
1536 valueBuffer.Append ((char) ((ch - 0x10000) % 0x400 + 0xDC00));
1539 private string CreateValueString ()
1541 // Since whitespace strings are mostly identical
1542 // depending on the Depth, we make use of NameTable
1543 // to atomize whitespace strings.
1545 case XmlNodeType.Whitespace:
1546 case XmlNodeType.SignificantWhitespace:
1547 int len = valueBuffer.Length;
1548 if (whitespaceCache == null)
1549 whitespaceCache = new char [32];
1550 if (len >= whitespaceCache.Length)
1552 if (whitespacePool == null)
1553 whitespacePool = new NameTable ();
1555 valueBuffer.CopyTo (0, whitespaceCache, 0, len);
1557 for (int i = 0; i < len; i++)
1558 whitespaceCache [i] = valueBuffer [i];
1560 return whitespacePool.Add (whitespaceCache, 0, valueBuffer.Length);
1562 return (valueBuffer.Capacity < 100) ?
1563 valueBuffer.ToString (0, valueBuffer.Length) :
1564 valueBuffer.ToString ();
1567 private void ClearValueBuffer ()
1569 valueBuffer.Length = 0;
1572 // The reader is positioned on the first character
1574 private void ReadText (bool notWhitespace)
1576 if (currentState != XmlNodeType.Element)
1577 throw NotWFError ("Text node cannot appear in this state.");
1578 preserveCurrentTag = false;
1581 ClearValueBuffer ();
1583 int ch = PeekChar ();
1584 bool previousWasCloseBracket = false;
1586 while (ch != '<' && ch != -1) {
1589 ch = ReadReference (false);
1590 if (returnEntityReference) // Returns -1 if char validation should not be done
1592 } else if (normalization && ch == '\r') {
1596 // append '\n' instead of '\r'.
1597 AppendValueChar ('\n');
1598 // and in case of "\r\n", discard '\r'.
1600 if (CharacterChecking && XmlChar.IsInvalid (ch))
1601 throw NotWFError ("Not allowed character was found.");
1605 // FIXME: it might be optimized by the JIT later,
1606 // AppendValueChar (ch);
1608 if (ch < Char.MaxValue)
1609 valueBuffer.Append ((char) ch);
1611 AppendSurrogatePairValueChar (ch);
1616 if (previousWasCloseBracket)
1617 if (PeekChar () == '>')
1618 throw NotWFError ("Inside text content, character sequence ']]>' is not allowed.");
1619 previousWasCloseBracket = true;
1621 else if (previousWasCloseBracket)
1622 previousWasCloseBracket = false;
1624 notWhitespace = true;
1627 if (returnEntityReference && valueBuffer.Length == 0) {
1628 SetEntityReferenceProperties ();
1630 XmlNodeType nodeType = notWhitespace ? XmlNodeType.Text :
1631 this.XmlSpace == XmlSpace.Preserve ? XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
1633 nodeType, // nodeType
1634 String.Empty, // name
1635 String.Empty, // prefix
1636 String.Empty, // localName
1637 false, // isEmptyElement
1638 null, // value: create only when required
1639 true // clearAttributes
1644 // The leading '&' has already been consumed.
1645 // Returns true if the entity reference isn't a simple
1646 // character reference or one of the predefined entities.
1647 // This allows the ReadText method to break so that the
1648 // next call to Read will return the EntityReference node.
1649 private int ReadReference (bool ignoreEntityReferences)
1651 if (PeekChar () == '#') {
1653 return ReadCharacterReference ();
1655 return ReadEntityReference (ignoreEntityReferences);
1658 private int ReadCharacterReference ()
1663 if (PeekChar () == 'x') {
1666 while ((ch = PeekChar ()) != ';' && ch != -1) {
1669 if (ch >= '0' && ch <= '9')
1670 value = (value << 4) + ch - '0';
1671 else if (ch >= 'A' && ch <= 'F')
1672 value = (value << 4) + ch - 'A' + 10;
1673 else if (ch >= 'a' && ch <= 'f')
1674 value = (value << 4) + ch - 'a' + 10;
1676 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1677 "invalid hexadecimal digit: {0} (#x{1:X})",
1682 while ((ch = PeekChar ()) != ';' && ch != -1) {
1685 if (ch >= '0' && ch <= '9')
1686 value = value * 10 + ch - '0';
1688 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1689 "invalid decimal digit: {0} (#x{1:X})",
1697 // There is no way to save surrogate pairs...
1698 if (CharacterChecking && Normalization &&
1699 XmlChar.IsInvalid (value))
1700 throw NotWFError ("Referenced character was not allowed in XML. Normalization is " + normalization + ", checkCharacters = " + checkCharacters);
1704 // Returns -1 if it should not be validated.
1705 // Real EOF must not be detected here.
1706 private int ReadEntityReference (bool ignoreEntityReferences)
1708 string name = ReadName ();
1711 int predefined = XmlChar.GetPredefinedEntity (name);
1712 if (predefined >= 0)
1715 if (ignoreEntityReferences) {
1716 AppendValueChar ('&');
1717 for (int i = 0; i < name.Length; i++)
1718 AppendValueChar (name [i]);
1719 AppendValueChar (';');
1721 returnEntityReference = true;
1722 entityReferenceName = name;
1728 // The reader is positioned on the first character of
1729 // the attribute name.
1730 private void ReadAttributes (bool isXmlDecl)
1733 bool requireWhitespace = false;
1734 currentAttribute = -1;
1735 currentAttributeValue = -1;
1738 if (!SkipWhitespace () && requireWhitespace)
1739 throw NotWFError ("Unexpected token. Name is required here.");
1741 IncrementAttributeToken ();
1742 currentAttributeToken.LineNumber = line;
1743 currentAttributeToken.LinePosition = column;
1745 string prefix, localName;
1746 currentAttributeToken.Name = ReadName (out prefix, out localName);
1747 currentAttributeToken.Prefix = prefix;
1748 currentAttributeToken.LocalName = localName;
1749 ExpectAfterWhitespace ('=');
1751 ReadAttributeValueTokens (-1);
1752 // This hack is required for xmldecl which has
1753 // both effective attributes and Value.
1756 dummyValue = currentAttributeToken.Value;
1760 if (!SkipWhitespace ())
1761 requireWhitespace = true;
1762 peekChar = PeekChar ();
1764 if (peekChar == '?')
1767 else if (peekChar == '/' || peekChar == '>')
1769 } while (peekChar != -1);
1771 currentAttribute = -1;
1772 currentAttributeValue = -1;
1775 private void AddAttributeWithValue (string name, string value)
1777 IncrementAttributeToken ();
1778 XmlAttributeTokenInfo ati = attributeTokens [currentAttribute];
1779 ati.Name = parserContext.NameTable.Add (name);
1780 ati.Prefix = String.Empty;
1781 ati.NamespaceURI = String.Empty;
1782 IncrementAttributeValueToken ();
1783 XmlTokenInfo vti = attributeValueTokens [currentAttributeValue];
1784 SetTokenProperties (vti,
1796 private void IncrementAttributeToken ()
1799 if (attributeTokens.Length == currentAttribute) {
1800 XmlAttributeTokenInfo [] newArray =
1801 new XmlAttributeTokenInfo [attributeTokens.Length * 2];
1802 attributeTokens.CopyTo (newArray, 0);
1803 attributeTokens = newArray;
1805 if (attributeTokens [currentAttribute] == null)
1806 attributeTokens [currentAttribute] = new XmlAttributeTokenInfo (this);
1807 currentAttributeToken = attributeTokens [currentAttribute];
1808 currentAttributeToken.Clear ();
1811 private void IncrementAttributeValueToken ()
1813 currentAttributeValue++;
1814 if (attributeValueTokens.Length == currentAttributeValue) {
1815 XmlTokenInfo [] newArray = new XmlTokenInfo [attributeValueTokens.Length * 2];
1816 attributeValueTokens.CopyTo (newArray, 0);
1817 attributeValueTokens = newArray;
1819 if (attributeValueTokens [currentAttributeValue] == null)
1820 attributeValueTokens [currentAttributeValue] = new XmlTokenInfo (this);
1821 currentAttributeValueToken = attributeValueTokens [currentAttributeValue];
1822 currentAttributeValueToken.Clear ();
1825 // LAMESPEC: Orthodox XML reader should normalize attribute values
1826 private void ReadAttributeValueTokens (int dummyQuoteChar)
1828 int quoteChar = (dummyQuoteChar < 0) ? ReadChar () : dummyQuoteChar;
1830 if (quoteChar != '\'' && quoteChar != '\"')
1831 throw NotWFError ("an attribute value was not quoted");
1832 currentAttributeToken.QuoteChar = (char) quoteChar;
1834 IncrementAttributeValueToken ();
1835 currentAttributeToken.ValueTokenStartIndex = currentAttributeValue;
1836 currentAttributeValueToken.LineNumber = line;
1837 currentAttributeValueToken.LinePosition = column;
1839 bool incrementToken = false;
1840 bool isNewToken = true;
1843 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1846 if (ch == quoteChar)
1849 if (incrementToken) {
1850 IncrementAttributeValueToken ();
1851 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1852 currentAttributeValueToken.LineNumber = line;
1853 currentAttributeValueToken.LinePosition = column;
1854 incrementToken = false;
1861 throw NotWFError ("attribute values cannot contain '<'");
1863 if (dummyQuoteChar < 0)
1864 throw NotWFError ("unexpected end of file in an attribute value");
1865 else // Attribute value constructor.
1871 if (PeekChar () == '\n')
1872 continue; // skip '\r'.
1874 // The csc in MS.NET 2.0 beta 1 barfs on this goto, so work around that
1883 // When Normalize = true, then replace
1884 // all spaces to ' '
1890 if (PeekChar () == '#') {
1892 ch = ReadCharacterReference ();
1893 AppendValueChar (ch);
1896 // Check XML 1.0 section 3.1 WFC.
1897 string entName = ReadName ();
1899 int predefined = XmlChar.GetPredefinedEntity (entName);
1900 if (predefined < 0) {
1901 CheckAttributeEntityReferenceWFC (entName);
1903 if (entityHandling == EntityHandling.ExpandEntities) {
1904 string value = DTD.GenerateEntityAttributeText (entName);
1905 foreach (char c in value)
1906 AppendValueChar (c);
1910 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1911 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1913 IncrementAttributeValueToken ();
1914 currentAttributeValueToken.Name = entName;
1915 currentAttributeValueToken.Value = String.Empty;
1916 currentAttributeValueToken.NodeType = XmlNodeType.EntityReference;
1917 incrementToken = true;
1921 AppendValueChar (predefined);
1924 if (CharacterChecking && XmlChar.IsInvalid (ch))
1925 throw NotWFError ("Invalid character was found.");
1926 // FIXME: it might be optimized by the JIT later,
1927 // AppendValueChar (ch);
1929 if (ch < Char.MaxValue)
1930 valueBuffer.Append ((char) ch);
1932 AppendSurrogatePairValueChar (ch);
1939 if (!incrementToken) {
1940 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1941 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1943 currentAttributeToken.ValueTokenEndIndex = currentAttributeValue;
1947 private void CheckAttributeEntityReferenceWFC (string entName)
1949 DTDEntityDeclaration entDecl =
1950 DTD == null ? null : DTD.EntityDecls [entName];
1951 if (entDecl == null) {
1952 if (entityHandling == EntityHandling.ExpandEntities
1953 || (DTD != null && resolver != null && entDecl == null))
1954 throw NotWFError (String.Format ("Referenced entity '{0}' does not exist.", entName));
1959 if (entDecl.HasExternalReference)
1960 throw NotWFError ("Reference to external entities is not allowed in the value of an attribute.");
1961 if (isStandalone && !entDecl.IsInternalSubset)
1962 throw NotWFError ("Reference to external entities is not allowed in the internal subset.");
1963 if (entDecl.EntityValue.IndexOf ('<') >= 0)
1964 throw NotWFError ("Attribute must not contain character '<' either directly or indirectly by way of entity references.");
1967 // The reader is positioned on the first character
1970 // It may be xml declaration or processing instruction.
1971 private void ReadProcessingInstruction ()
1973 string target = ReadName ();
1974 if (target != "xml" && target.ToLower (CultureInfo.InvariantCulture) == "xml")
1975 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
1977 if (!SkipWhitespace ())
1978 if (PeekChar () != '?')
1979 throw NotWFError ("Invalid processing instruction name was found.");
1981 ClearValueBuffer ();
1984 while ((ch = PeekChar ()) != -1) {
1987 if (ch == '?' && PeekChar () == '>') {
1992 if (CharacterChecking && XmlChar.IsInvalid (ch))
1993 throw NotWFError ("Invalid character was found.");
1994 AppendValueChar (ch);
1997 if (Object.ReferenceEquals (target, XmlNamespaceManager.PrefixXml))
1998 VerifyXmlDeclaration ();
2000 if (currentState == XmlNodeType.None)
2001 currentState = XmlNodeType.XmlDeclaration;
2004 XmlNodeType.ProcessingInstruction, // nodeType
2006 String.Empty, // prefix
2007 target, // localName
2008 false, // isEmptyElement
2009 null, // value: create only when required
2010 true // clearAttributes
2015 void VerifyXmlDeclaration ()
2017 if (!allowMultipleRoot && currentState != XmlNodeType.None)
2018 throw NotWFError ("XML declaration cannot appear in this state.");
2020 currentState = XmlNodeType.XmlDeclaration;
2022 string text = CreateValueString ();
2028 string encoding = null, standalone = null;
2030 ParseAttributeFromString (text, ref idx, out name, out value);
2031 if (name != "version" || value != "1.0")
2032 throw NotWFError ("'version' is expected.");
2033 name = String.Empty;
2034 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2035 ParseAttributeFromString (text, ref idx, out name, out value);
2036 if (name == "encoding") {
2037 if (!XmlChar.IsValidIANAEncoding (value))
2038 throw NotWFError ("'encoding' must be a valid IANA encoding name.");
2039 if (reader is XmlStreamReader)
2040 parserContext.Encoding = ((XmlStreamReader) reader).Encoding;
2042 parserContext.Encoding = Encoding.Unicode;
2044 name = String.Empty;
2045 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2046 ParseAttributeFromString (text, ref idx, out name, out value);
2048 if (name == "standalone") {
2049 this.isStandalone = value == "yes";
2050 if (value != "yes" && value != "no")
2051 throw NotWFError ("Only 'yes' or 'no' is allow for 'standalone'");
2053 SkipWhitespaceInString (text, ref idx);
2055 else if (name.Length != 0)
2056 throw NotWFError (String.Format ("Unexpected token: '{0}'", name));
2058 if (idx < text.Length)
2059 throw NotWFError ("'?' is expected.");
2061 AddAttributeWithValue ("version", "1.0");
2062 if (encoding != null)
2063 AddAttributeWithValue ("encoding", encoding);
2064 if (standalone != null)
2065 AddAttributeWithValue ("standalone", standalone);
2066 currentAttribute = currentAttributeValue = -1;
2069 XmlNodeType.XmlDeclaration, // nodeType
2071 String.Empty, // prefix
2073 false, // isEmptyElement
2075 false // clearAttributes
2079 bool SkipWhitespaceInString (string text, ref int idx)
2082 while (idx < text.Length && XmlChar.IsWhitespace (text [idx]))
2084 return idx - start > 0;
2087 private void ParseAttributeFromString (string src,
2088 ref int idx, out string name, out string value)
2090 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2094 while (idx < src.Length && XmlChar.IsNameChar (src [idx]))
2096 name = src.Substring (start, idx - start);
2098 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2100 if (idx == src.Length || src [idx] != '=')
2101 throw NotWFError (String.Format ("'=' is expected after {0}", name));
2104 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2107 if (idx == src.Length || src [idx] != '"' && src [idx] != '\'')
2108 throw NotWFError ("'\"' or '\'' is expected.");
2110 char quote = src [idx];
2114 while (idx < src.Length && src [idx] != quote)
2118 value = src.Substring (start, idx - start - 1);
2121 internal void SkipTextDeclaration ()
2123 if (PeekChar () != '<')
2128 if (PeekChar () != '?') {
2134 while (peekCharsIndex < 6) {
2135 if (PeekChar () < 0)
2140 if (new string (peekChars, 2, 4) != "xml ") {
2141 if (new string (peekChars, 2, 4).ToLower (CultureInfo.InvariantCulture) == "xml ") {
2142 throw NotWFError ("Processing instruction name must not be character sequence 'X' 'M' 'L' with case insensitivity.");
2151 if (PeekChar () == 'v') {
2153 ExpectAfterWhitespace ('=');
2155 int quoteChar = ReadChar ();
2156 char [] expect1_0 = new char [3];
2157 int versionLength = 0;
2158 switch (quoteChar) {
2161 while (PeekChar () != quoteChar) {
2162 if (PeekChar () == -1)
2163 throw NotWFError ("Invalid version declaration inside text declaration.");
2164 else if (versionLength == 3)
2165 throw NotWFError ("Invalid version number inside text declaration.");
2167 expect1_0 [versionLength] = (char) ReadChar ();
2169 if (versionLength == 3 && new String (expect1_0) != "1.0")
2170 throw NotWFError ("Invalid version number inside text declaration.");
2177 throw NotWFError ("Invalid version declaration inside text declaration.");
2181 if (PeekChar () == 'e') {
2182 Expect ("encoding");
2183 ExpectAfterWhitespace ('=');
2185 int quoteChar = ReadChar ();
2186 switch (quoteChar) {
2189 while (PeekChar () != quoteChar)
2190 if (ReadChar () == -1)
2191 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2196 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2198 // Encoding value should be checked inside XmlInputStream.
2201 // this condition is to check if this instance is
2202 // not created by XmlReader.Create() (which just
2203 // omits strict text declaration check).
2204 else if (Conformance == ConformanceLevel.Auto)
2205 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
2210 curNodePeekIndex = peekCharsIndex; // without this it causes incorrect value start indication.
2213 // The reader is positioned on the first character after
2214 // the leading '<!'.
2215 private void ReadDeclaration ()
2217 int ch = PeekChar ();
2235 throw NotWFError ("Unexpected declaration markup was found.");
2239 // The reader is positioned on the first character after
2240 // the leading '<!--'.
2241 private void ReadComment ()
2243 if (currentState == XmlNodeType.None)
2244 currentState = XmlNodeType.XmlDeclaration;
2246 preserveCurrentTag = false;
2248 ClearValueBuffer ();
2251 while ((ch = PeekChar ()) != -1) {
2254 if (ch == '-' && PeekChar () == '-') {
2257 if (PeekChar () != '>')
2258 throw NotWFError ("comments cannot contain '--'");
2264 if (XmlChar.IsInvalid (ch))
2265 throw NotWFError ("Not allowed character was found.");
2267 AppendValueChar (ch);
2271 XmlNodeType.Comment, // nodeType
2272 String.Empty, // name
2273 String.Empty, // prefix
2274 String.Empty, // localName
2275 false, // isEmptyElement
2276 null, // value: create only when required
2277 true // clearAttributes
2281 // The reader is positioned on the first character after
2282 // the leading '<![CDATA['.
2283 private void ReadCDATA ()
2285 if (currentState != XmlNodeType.Element)
2286 throw NotWFError ("CDATA section cannot appear in this state.");
2287 preserveCurrentTag = false;
2289 ClearValueBuffer ();
2293 while (PeekChar () != -1) {
2298 if (ch == ']' && PeekChar () == ']') {
2299 ch = ReadChar (); // ']'
2301 if (PeekChar () == '>') {
2308 if (normalization && ch == '\r') {
2311 // append '\n' instead of '\r'.
2312 AppendValueChar ('\n');
2313 // otherwise, discard '\r'.
2316 if (CharacterChecking && XmlChar.IsInvalid (ch))
2317 throw NotWFError ("Invalid character was found.");
2319 // FIXME: it might be optimized by the JIT later,
2320 // AppendValueChar (ch);
2322 if (ch < Char.MaxValue)
2323 valueBuffer.Append ((char) ch);
2325 AppendSurrogatePairValueChar (ch);
2330 XmlNodeType.CDATA, // nodeType
2331 String.Empty, // name
2332 String.Empty, // prefix
2333 String.Empty, // localName
2334 false, // isEmptyElement
2335 null, // value: create only when required
2336 true // clearAttributes
2340 // The reader is positioned on the first character after
2341 // the leading '<!DOCTYPE'.
2342 private void ReadDoctypeDecl ()
2345 throw NotWFError ("Document Type Declaration (DTD) is prohibited in this XML.");
2346 switch (currentState) {
2347 case XmlNodeType.DocumentType:
2348 case XmlNodeType.Element:
2349 case XmlNodeType.EndElement:
2350 throw NotWFError ("Document type cannot appear in this state.");
2352 currentState = XmlNodeType.DocumentType;
2354 string doctypeName = null;
2355 string publicId = null;
2356 string systemId = null;
2357 int intSubsetStartLine = 0;
2358 int intSubsetStartColumn = 0;
2361 doctypeName = ReadName ();
2366 systemId = ReadSystemLiteral (true);
2369 publicId = ReadPubidLiteral ();
2370 if (!SkipWhitespace ())
2371 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
2372 systemId = ReadSystemLiteral (false);
2378 if(PeekChar () == '[')
2380 // read markupdecl etc. or end of decl
2382 intSubsetStartLine = this.LineNumber;
2383 intSubsetStartColumn = this.LinePosition;
2384 ClearValueBuffer ();
2385 ReadInternalSubset ();
2386 parserContext.InternalSubset = CreateValueString ();
2388 // end of DOCTYPE decl.
2389 ExpectAfterWhitespace ('>');
2391 GenerateDTDObjectModel (doctypeName, publicId,
2392 systemId, parserContext.InternalSubset,
2393 intSubsetStartLine, intSubsetStartColumn);
2395 // set properties for <!DOCTYPE> node
2397 XmlNodeType.DocumentType, // nodeType
2398 doctypeName, // name
2399 String.Empty, // prefix
2400 doctypeName, // localName
2401 false, // isEmptyElement
2402 parserContext.InternalSubset, // value
2403 true // clearAttributes
2406 if (publicId != null)
2407 AddAttributeWithValue ("PUBLIC", publicId);
2408 if (systemId != null)
2409 AddAttributeWithValue ("SYSTEM", systemId);
2410 currentAttribute = currentAttributeValue = -1;
2413 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2414 string systemId, string internalSubset)
2416 return GenerateDTDObjectModel (name, publicId, systemId, internalSubset, 0, 0);
2419 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2420 string systemId, string internalSubset, int intSubsetStartLine, int intSubsetStartColumn)
2423 parserContext.Dtd = new DTDObjectModel (this.NameTable); // merges both internal and external subsets in the meantime,
2424 DTD.BaseURI = BaseURI;
2426 DTD.PublicId = publicId;
2427 DTD.SystemId = systemId;
2428 DTD.InternalSubset = internalSubset;
2429 DTD.XmlResolver = resolver;
2430 DTD.IsStandalone = isStandalone;
2431 DTD.LineNumber = line;
2432 DTD.LinePosition = column;
2434 DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn);
2435 dr.Normalization = this.normalization;
2436 return dr.GenerateDTDObjectModel ();
2439 private enum DtdInputState
2452 private class DtdInputStateStack
2454 Stack intern = new Stack ();
2455 public DtdInputStateStack ()
2457 Push (DtdInputState.Free);
2460 public DtdInputState Peek ()
2462 return (DtdInputState) intern.Peek ();
2465 public DtdInputState Pop ()
2467 return (DtdInputState) intern.Pop ();
2470 public void Push (DtdInputState val)
2477 DtdInputStateStack stateStack = new DtdInputStateStack ();
2478 DtdInputState State {
2479 get { return stateStack.Peek (); }
2482 private int ReadValueChar ()
2484 int ret = ReadChar ();
2485 AppendValueChar (ret);
2489 private void ExpectAndAppend (string s)
2492 valueBuffer.Append (s);
2495 // Simply read but not generate any result.
2496 private void ReadInternalSubset ()
2498 bool continueParse = true;
2500 while (continueParse) {
2501 switch (ReadValueChar ()) {
2504 case DtdInputState.Free:
2506 valueBuffer.Remove (valueBuffer.Length - 1, 1);
2507 continueParse = false;
2509 case DtdInputState.InsideDoubleQuoted:
2511 case DtdInputState.InsideSingleQuoted:
2514 throw NotWFError ("unexpected end of file at DTD.");
2518 throw NotWFError ("unexpected end of file at DTD.");
2521 case DtdInputState.InsideDoubleQuoted:
2522 case DtdInputState.InsideSingleQuoted:
2523 case DtdInputState.Comment:
2524 continue; // well-formed
2526 int c = ReadValueChar ();
2529 stateStack.Push (DtdInputState.PI);
2532 switch (ReadValueChar ()) {
2534 switch (ReadValueChar ()) {
2536 ExpectAndAppend ("EMENT");
2537 stateStack.Push (DtdInputState.ElementDecl);
2540 ExpectAndAppend ("TITY");
2541 stateStack.Push (DtdInputState.EntityDecl);
2544 throw NotWFError ("unexpected token '<!E'.");
2548 ExpectAndAppend ("TTLIST");
2549 stateStack.Push (DtdInputState.AttlistDecl);
2552 ExpectAndAppend ("OTATION");
2553 stateStack.Push (DtdInputState.NotationDecl);
2556 ExpectAndAppend ("-");
2557 stateStack.Push (DtdInputState.Comment);
2562 throw NotWFError (String.Format ("unexpected '<{0}'.", (char) c));
2566 if (State == DtdInputState.InsideSingleQuoted)
2568 else if (State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.Comment)
2569 stateStack.Push (DtdInputState.InsideSingleQuoted);
2572 if (State == DtdInputState.InsideDoubleQuoted)
2574 else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment)
2575 stateStack.Push (DtdInputState.InsideDoubleQuoted);
2579 case DtdInputState.ElementDecl:
2580 goto case DtdInputState.NotationDecl;
2581 case DtdInputState.AttlistDecl:
2582 goto case DtdInputState.NotationDecl;
2583 case DtdInputState.EntityDecl:
2584 goto case DtdInputState.NotationDecl;
2585 case DtdInputState.NotationDecl:
2588 case DtdInputState.InsideDoubleQuoted:
2589 case DtdInputState.InsideSingleQuoted:
2590 case DtdInputState.Comment:
2593 throw NotWFError ("unexpected token '>'");
2597 if (State == DtdInputState.PI) {
2598 if (ReadValueChar () == '>')
2603 if (State == DtdInputState.Comment) {
2604 if (PeekChar () == '-') {
2606 ExpectAndAppend (">");
2612 if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted)
2613 throw NotWFError ("Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8).");
2619 // The reader is positioned on the first 'S' of "SYSTEM".
2620 private string ReadSystemLiteral (bool expectSYSTEM)
2624 if (!SkipWhitespace ())
2625 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
2629 int quoteChar = ReadChar (); // apos or quot
2631 ClearValueBuffer ();
2632 while (c != quoteChar) {
2635 throw NotWFError ("Unexpected end of stream in ExternalID.");
2637 AppendValueChar (c);
2639 return CreateValueString ();
2642 private string ReadPubidLiteral()
2645 if (!SkipWhitespace ())
2646 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
2647 int quoteChar = ReadChar ();
2649 ClearValueBuffer ();
2650 while(c != quoteChar)
2653 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
2654 if(c != quoteChar && !XmlChar.IsPubidChar (c))
2655 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char)c ));
2657 AppendValueChar (c);
2659 return CreateValueString ();
2662 // The reader is positioned on the first character
2664 private string ReadName ()
2666 string prefix, local;
2667 return ReadName (out prefix, out local);
2670 private string ReadName (out string prefix, out string localName)
2672 #if !USE_NAME_BUFFER
2673 bool savePreserve = preserveCurrentTag;
2674 preserveCurrentTag = true;
2676 int startOffset = peekCharsIndex - curNodePeekIndex;
2677 int ch = PeekChar ();
2678 if (!XmlChar.IsFirstNameChar (ch))
2679 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2684 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2686 if (ch == ':' && namespaces && colonAt < 0)
2691 int start = curNodePeekIndex + startOffset;
2693 string name = parserContext.NameTable.Add (
2694 peekChars, start, length);
2697 prefix = parserContext.NameTable.Add (
2698 peekChars, start, colonAt);
2699 localName = parserContext.NameTable.Add (
2700 peekChars, start + colonAt + 1, length - colonAt - 1);
2702 prefix = String.Empty;
2706 preserveCurrentTag = savePreserve;
2710 int ch = PeekChar ();
2711 if (!XmlChar.IsFirstNameChar (ch))
2712 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2717 // AppendNameChar (ch);
2719 // nameBuffer.Length is always non-0 so no need to ExpandNameCapacity () here
2720 if (ch < Char.MaxValue)
2721 nameBuffer [nameLength++] = (char) ch;
2723 AppendSurrogatePairNameChar (ch);
2728 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2731 if (ch == ':' && namespaces && colonAt < 0)
2732 colonAt = nameLength;
2733 // AppendNameChar (ch);
2735 if (nameLength == nameCapacity)
2736 ExpandNameCapacity ();
2737 if (ch < Char.MaxValue)
2738 nameBuffer [nameLength++] = (char) ch;
2740 AppendSurrogatePairNameChar (ch);
2744 string name = parserContext.NameTable.Add (nameBuffer, 0, nameLength);
2747 prefix = parserContext.NameTable.Add (nameBuffer, 0, colonAt);
2748 localName = parserContext.NameTable.Add (nameBuffer, colonAt + 1, nameLength - colonAt - 1);
2750 prefix = String.Empty;
2758 // Read the next character and compare it against the
2759 // specified character.
2760 private void Expect (int expected)
2762 int ch = ReadChar ();
2764 if (ch != expected) {
2765 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2766 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
2769 ch < 0 ? (object) "EOF" : (char) ch,
2774 private void Expect (string expected)
2776 for (int i = 0; i < expected.Length; i++)
2777 if (ReadChar () != expected [i])
2778 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2779 "'{0}' is expected", expected));
2782 private void ExpectAfterWhitespace (char c)
2785 int i = ReadChar ();
2786 if (i < 0x21 && XmlChar.IsWhitespace (i))
2789 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0}, but found {1} [{2}]", c, i < 0 ? (object) "EOF" : (char) i, i));
2794 // Does not consume the first non-whitespace character.
2795 private bool SkipWhitespace ()
2797 // FIXME: It should be inlined by the JIT.
2798 // bool skipped = XmlChar.IsWhitespace (PeekChar ());
2799 int ch = PeekChar ();
2800 bool skipped = (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2804 // FIXME: It should be inlined by the JIT.
2805 // while (XmlChar.IsWhitespace (PeekChar ()))
2807 while ((ch = PeekChar ()) == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD)
2812 private bool ReadWhitespace ()
2814 if (currentState == XmlNodeType.None)
2815 currentState = XmlNodeType.XmlDeclaration;
2817 bool savePreserve = preserveCurrentTag;
2818 preserveCurrentTag = true;
2819 int startOffset = peekCharsIndex - curNodePeekIndex; // it should be 0 for now though.
2821 int ch = PeekChar ();
2825 // FIXME: It should be inlined by the JIT.
2826 // } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
2827 } while (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2829 bool isText = currentState == XmlNodeType.Element && ch != -1 && ch != '<';
2831 if (!isText && (whitespaceHandling == WhitespaceHandling.None ||
2832 whitespaceHandling == WhitespaceHandling.Significant && XmlSpace != XmlSpace.Preserve))
2835 ClearValueBuffer ();
2836 valueBuffer.Append (peekChars, curNodePeekIndex, peekCharsIndex - curNodePeekIndex - startOffset);
2837 preserveCurrentTag = savePreserve;
2842 XmlNodeType nodeType = (this.XmlSpace == XmlSpace.Preserve) ?
2843 XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
2844 SetProperties (nodeType,
2849 null, // value: create only when required
2856 // Returns -1 if it should throw an error.
2857 private int ReadCharsInternal (char [] buffer, int offset, int length)
2859 shouldSkipUntilEndTag = true;
2861 int bufIndex = offset;
2862 for (int i = 0; i < length; i++) {
2863 int c = PeekChar ();
2866 throw NotWFError ("Unexpected end of xml.");
2869 if (PeekChar () != '/') {
2870 buffer [bufIndex++] = '<';
2873 // Seems to skip immediate EndElement
2880 shouldSkipUntilEndTag = false;
2881 Read (); // move to the next node
2885 if (c < Char.MaxValue)
2886 buffer [bufIndex++] = (char) c;
2888 buffer [bufIndex++] = (char) ((c - 0x10000) / 0x400 + 0xD800);
2889 buffer [bufIndex++] = (char) ((c - 0x10000) % 0x400 + 0xDC00);
2897 private bool ReadUntilEndTag ()
2900 currentState = XmlNodeType.EndElement;
2906 throw NotWFError ("Unexpected end of xml.");
2908 if (PeekChar () != '/')
2911 string name = ReadName ();
2912 if (name != elementNames [elementNameStackPos - 1].Name)