// instructions, and comments are OK.
//
// It barfs on DOCTYPE declarations.
+// => No barfing, but parsing is incomplete.
+// DTD nodes are not still created.
//
// There's also no checking being done for either well-formedness
// or validity.
{
public class XmlTextReader : XmlReader, IXmlLineInfo
{
+ WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
#region Constructors
protected XmlTextReader ()
{
}
- [MonoTODO]
public XmlTextReader (Stream input)
+ : this (new StreamReader (input))
{
- // We can share some code in the constructors (at least for this one and next 2)
- XmlNameTable nt = new NameTable ();
- XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
- parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
- Init ();
- reader = new StreamReader (input);
}
- [MonoTODO]
public XmlTextReader (string url)
+ : this(url, new NameTable ())
{
- XmlNameTable nt = new NameTable ();
- XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
- parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
- Init ();
- reader = new StreamReader(url);
}
- [MonoTODO]
public XmlTextReader (TextReader input)
+ : this (input, new NameTable ())
{
- XmlNameTable nt = new NameTable ();
- XmlNamespaceManager nsMgr = new XmlNamespaceManager (nt);
- parserContext = new XmlParserContext (null, nsMgr, null, XmlSpace.None);
- Init ();
- reader = input;
}
- [MonoTODO]
protected XmlTextReader (XmlNameTable nt)
+ : this (String.Empty, null, XmlNodeType.None, null)
{
- throw new NotImplementedException ();
}
- [MonoTODO]
public XmlTextReader (Stream input, XmlNameTable nt)
+ : this(new StreamReader (input), nt)
{
- throw new NotImplementedException ();
}
- [MonoTODO]
public XmlTextReader (string url, Stream input)
+ : this (url, new StreamReader (input))
{
- throw new NotImplementedException ();
}
- [MonoTODO]
public XmlTextReader (string url, TextReader input)
+ : this (url, input, new NameTable ())
{
- throw new NotImplementedException ();
}
- [MonoTODO]
+ [MonoTODO("Non-filename-url must be supported. Waiting for WebClient")]
public XmlTextReader (string url, XmlNameTable nt)
+ // : this(url, new StreamReader ((Stream)new XmlUrlResolver ().GetEntity (new Uri (url), null, typeof(Stream))), nt)
+ : this (url, new StreamReader (url), nt)
{
- throw new NotImplementedException ();
}
- [MonoTODO]
public XmlTextReader (TextReader input, XmlNameTable nt)
+ : this(String.Empty, input, nt)
{
- throw new NotImplementedException ();
}
- [MonoTODO]
public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
+ : this (String.Empty, new StreamReader (xmlFragment), fragType, context)
{
- throw new NotImplementedException ();
}
- [MonoTODO]
public XmlTextReader (string url, Stream input, XmlNameTable nt)
+ : this (url, new StreamReader (input), nt)
{
- throw new NotImplementedException ();
}
- [MonoTODO]
public XmlTextReader (string url, TextReader input, XmlNameTable nt)
+ : this (url, input, XmlNodeType.Document, new XmlParserContext (nt, new XmlNamespaceManager (nt), null, XmlSpace.None))
{
- throw new NotImplementedException ();
}
- [MonoTODO]
+ [MonoTODO("TODO as same as private XmlTextReader(TextReader, XmlNodeType, XmlParserContext)")]
public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
+ : this (String.Empty, new StringReader (xmlFragment), fragType, context)
{
- //Waiting for Validating reader for fragType rules.
- parserContext = context;
- Init ();
- reader = new StringReader(xmlFragment);
+ }
+
+ // TODO still remains as described at head of this file,
+ // but it might not be TODO of the constructors...
+ XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
+ {
+ this.SetReaderContext(url, context);
+ this.SetReaderFragment(fragment, fragType);
}
#endregion
get { return attributes.Count; }
}
- [MonoTODO]
public override string BaseURI
{
- get { throw new NotImplementedException (); }
+ get { return parserContext.BaseURI; }
}
public override int Depth
{
- get { return depth > 0 ? depth : 0; }
+ get {
+ return elementDepth;
+ }
}
- [MonoTODO]
public Encoding Encoding
{
- get { throw new NotImplementedException (); }
+ get { return parserContext.Encoding; }
}
public override bool EOF
get { return value; }
}
- [MonoTODO]
public WhitespaceHandling WhitespaceHandling
{
- get { throw new NotImplementedException (); }
- set { throw new NotImplementedException (); }
+ get { return whitespaceHandling; }
+ set { whitespaceHandling = value; }
}
[MonoTODO]
public override string GetAttribute (string name)
{
- return attributes [name] as string;
+ return attributes.ContainsKey (name) ?
+ attributes [name] as string : String.Empty;
}
public override string GetAttribute (string localName, string namespaceURI)
string thisNamespaceURI = LookupNamespace (thisPrefix);
if (namespaceURI == thisNamespaceURI)
- return attributes [thisName] as string;
+ return attributes.ContainsKey (thisName) ?
+ attributes [thisName] as string : String.Empty;
}
} else if (localName == "xmlns" && namespaceURI == "http://www.w3.org/2000/xmlns/" && thisName == "xmlns")
- return attributes [thisName] as string;
+ return attributes.ContainsKey (thisName) ?
+ attributes [thisName] as string : String.Empty;
}
return String.Empty;
return more;
}
- [MonoTODO]
public override bool ReadAttributeValue ()
{
- throw new NotImplementedException ();
+ // reading attribute value phase now stopped
+ if(attributeStringCurrentPosition < 0 ||
+ attributeString.Length < attributeStringCurrentPosition) {
+ attributeStringCurrentPosition = 0;
+ attributeString = String.Empty;
+ return false;
+ }
+
+ // If not started, then initialize attributeString when parsing is at start.
+ if(attributeStringCurrentPosition == 0)
+ attributeString = value;
+
+ bool returnEntity = false;
+ value = String.Empty;
+ int nextPosition = attributeString.IndexOf ('&',
+ attributeStringCurrentPosition);
+
+ // if attribute string starts from '&' then it may be (unparsable) entity reference.
+ if(nextPosition == 0) {
+ string parsed = ReadAttributeValueEntityReference ();
+ if(parsed == null) {
+ // return entity (It is only this case to return entity reference.)
+ int endEntityPosition = attributeString.IndexOf (';',
+ attributeStringCurrentPosition);
+ SetProperties (XmlNodeType.EntityReference,
+ attributeString.Substring (attributeStringCurrentPosition + 1,
+ endEntityPosition - attributeStringCurrentPosition - 1),
+ false,
+ String.Empty,
+ false);
+ attributeStringCurrentPosition = endEntityPosition + 1;
+
+ return true;
+ }
+ else
+ value += parsed;
+ }
+
+ // Other case always set text node.
+ while(!returnEntity) {
+ nextPosition = attributeString.IndexOf ('&', attributeStringCurrentPosition);
+ if(nextPosition < 0) {
+ // Reached to the end of value string.
+ value += attributeString.Substring (attributeStringCurrentPosition);
+ attributeStringCurrentPosition = -1;
+ break;
+ } else if(nextPosition == attributeStringCurrentPosition) {
+ string parsed = ReadAttributeValueEntityReference ();
+ if(parsed != null)
+ value += parsed;
+ else {
+ // Found that an entity reference starts from this point.
+ // Then once stop to parse attribute value and then return text.
+ value += attributeString.Substring (attributeStringCurrentPosition,
+ nextPosition - attributeStringCurrentPosition);
+ break;
+ }
+ } else {
+ value += attributeString.Substring (attributeStringCurrentPosition,
+ nextPosition - attributeStringCurrentPosition);
+ attributeStringCurrentPosition = nextPosition;
+ break;
+ }
+ }
+
+ SetProperties(XmlNodeType.Text,
+ "#text",
+ false,
+ value,
+ false);
+
+ return true;
}
[MonoTODO]
while (startname != endname) {
ReadContent ();
- endname = this.Name;
+ endname = this.Name;
}
- xmlBuffer.Replace(currentTag.ToString (), "");
+ xmlBuffer.Replace (currentTag.ToString (), "");
saveToXmlBuffer = false;
string InnerXml = xmlBuffer.ToString ();
xmlBuffer.Length = 0;
[MonoTODO]
public override string ReadOuterXml ()
{
- // Still need a Well Formedness check.
- // Will wait for Validating reader ;-)
if (NodeType == XmlNodeType.Attribute) {
- return Name+"=\""+Value+"\"";
+ return Name + "=\"" + Value + "\"";
} else {
saveToXmlBuffer = true;
- xmlBuffer.Append(currentTag.ToString ());
+ xmlBuffer.Append (currentTag.ToString ());
string startname = this.Name;
string endname = string.Empty;
readState = ReadState.Interactive;
while (startname != endname) {
ReadContent ();
- endname = this.Name;
+ endname = this.Name;
}
saveToXmlBuffer = false;
string OuterXml = xmlBuffer.ToString ();
#endregion
- // privates
+ #region Internals
+ internal string publicId;
+ internal string systemId;
+
+ internal void SetReaderContext (string url, XmlParserContext context)
+ {
+ parserContext = context;
+ parserContext.BaseURI = url;
+ Init ();
+ }
+
+ internal void SetReaderFragment(TextReader fragment, XmlNodeType fragType)
+ {
+ this.reader = fragment;
+ can_seek = fragment != null && fragment.Peek () != -1;
+/* for future use
+ switch(fragType)
+ {
+ case XmlNodeType.Attribute: // attribute content
+ parserContext.InputState = XmlParserInputState.AttributeValue;
+ break;
+ case XmlNodeType.DocumentFragment: // element content
+ parserContext.InputState = XmlParserInputState.Content;
+ break;
+ case XmlNodeType.Element: // one element
+ parserContext.InputState = XmlParserInputState.StartTag;
+ break;
+ case XmlNodeType.Document: // document content
+ parserContext.InputState = XmlParserInputState.Start;
+ break;
+ default:
+ throw new InvalidOperationException("setting this xml node type not allowed.");
+ }
+*/
+ }
+ #endregion
+
+ #region Privates
private XmlParserContext parserContext;
private ReadState readState;
private int depth;
+ private int elementDepth;
private bool depthDown;
private bool popScope;
private bool saveToXmlBuffer;
private int line = 1;
private int column = 1;
+ private bool has_peek;
+ private bool can_seek;
+ private int peek_char;
+
+ private string attributeString = String.Empty;
+ private int attributeStringCurrentPosition;
private void Init ()
{
readState = ReadState.Initial;
- depth = -1;
+ depth = 0;
depthDown = false;
popScope = false;
this.name = name;
this.isEmptyElement = isEmptyElement;
this.value = value;
+ this.elementDepth = depth;
if (clearAttributes)
ClearAttributes ();
private int PeekChar ()
{
- return reader.Peek ();
+ if (can_seek)
+ return reader.Peek ();
+
+ if (has_peek)
+ return peek_char;
+
+ peek_char = reader.Read ();
+ has_peek = true;
+ return peek_char;
}
private int ReadChar ()
{
- int ch = reader.Read ();
+ int ch;
+ if (has_peek) {
+ ch = peek_char;
+ has_peek = false;
+ } else {
+ ch = reader.Read ();
+ }
+
if (ch == '\n') {
line++;
column = 1;
// element or text outside of the document element.
private bool ReadContent ()
{
- bool more = false;
currentTag.Length = 0;
if (popScope) {
parserContext.NamespaceManager.PopScope ();
}
if (returnEntityReference) {
- ++depth;
SetEntityReferenceProperties ();
- more = true;
} else {
switch (PeekChar ())
{
case '<':
ReadChar ();
ReadTag ();
- more = true;
break;
case '\r':
+ if (whitespaceHandling == WhitespaceHandling.All ||
+ whitespaceHandling == WhitespaceHandling.Significant)
+ return ReadWhitespace ();
+
ReadChar ();
return ReadContent ();
case '\n':
+ if (whitespaceHandling == WhitespaceHandling.All ||
+ whitespaceHandling == WhitespaceHandling.Significant)
+ return ReadWhitespace ();
+
ReadChar ();
return ReadContent ();
case ' ':
+ if (whitespaceHandling == WhitespaceHandling.All ||
+ whitespaceHandling == WhitespaceHandling.Significant)
+ return ReadWhitespace ();
+
SkipWhitespace ();
return ReadContent ();
case -1:
String.Empty, // value
true // clearAttributes
);
- more = false;
break;
default:
- ReadText ();
- more = true;
+ ReadText (true);
break;
}
}
-
- return more;
+ return this.ReadState != ReadState.EndOfFile;
}
private void SetEntityReferenceProperties ()
Expect ('>');
- if (!depthDown)
- ++depth;
- else
- depthDown = false;
-
SetProperties (
XmlNodeType.Element, // nodeType
name, // name
String.Empty, // value
false // clearAttributes
);
+
+ if (!depthDown)
+ ++depth;
+ else
+ depthDown = false;
+
}
// The reader is positioned on the first character
// The reader is positioned on the first character
// of the text.
- private void ReadText ()
+ private void ReadText (bool cleanValue)
{
- valueLength = 0;
+ if (cleanValue)
+ valueLength = 0;
int ch = PeekChar ();
}
if (returnEntityReference && valueLength == 0) {
- ++depth;
SetEntityReferenceProperties ();
} else {
- if (depth >= 0) {
- ++depth;
- depthDown = true;
- }
-
SetProperties (
XmlNodeType.Text, // nodeType
String.Empty, // name
// The reader is positioned on the first character
// of the target.
+ //
+ // Now it also reads XmlDeclaration, this method name became improper...
private void ReadProcessingInstruction ()
{
string target = ReadName ();
AppendValueChar ((char)ch);
}
+/* for future use
+ if(target == "xml") && parserContext.InputState != XmlParserInputState.Start)
+ throw new XmlException("Xml declaration is not allowed here.");
+ else {
+ parserContext.InputState = XmlParserInputState.DTD; //for future use
+ }
+*/
SetProperties (
+ target == "xml" ?
+ XmlNodeType.XmlDeclaration :
XmlNodeType.ProcessingInstruction, // nodeType
target, // name
false, // isEmptyElement
switch (ch)
{
case '-':
- Expect ('-');
- Expect ('-');
+ Expect ("--");
ReadComment ();
break;
case '[':
ReadChar ();
- Expect ('C');
- Expect ('D');
- Expect ('A');
- Expect ('T');
- Expect ('A');
- Expect ('[');
+ Expect ("CDATA[");
ReadCDATA ();
break;
+ case 'D':
+ Expect ("DOCTYPE");
+ ReadDoctypeDecl ();
+ break;
}
}
AppendValueChar ((char)ch);
}
- ++depth;
-
SetProperties (
XmlNodeType.CDATA, // nodeType
String.Empty, // name
);
}
+ // The reader is positioned on the first character after
+ // the leading '<!DOCTYPE'.
+ private void ReadDoctypeDecl ()
+ {
+ string doctypeName = null;
+ string publicId = String.Empty;
+ string systemId = String.Empty;
+
+ SkipWhitespace ();
+ doctypeName = ReadName ();
+ SkipWhitespace ();
+ xmlBuffer.Length = 0;
+ switch(PeekChar ())
+ {
+ case 'S':
+ systemId = ReadSystemLiteral (true);
+ break;
+ case 'P':
+ publicId = ReadPubidLiteral ();
+ SkipWhitespace ();
+ systemId = ReadSystemLiteral (false);
+ break;
+ }
+ SkipWhitespace ();
+
+
+ if(PeekChar () == '[')
+ {
+ // read markupdecl etc. or end of decl
+ ReadChar ();
+ xmlBuffer.Length = 0;
+ saveToXmlBuffer = true;
+ do {
+ ReadDTDInternalSubset ();
+ } while(nodeType != XmlNodeType.None);
+ xmlBuffer.Remove (xmlBuffer.Length - 1, 1); // cut off ']'
+ saveToXmlBuffer = false;
+ }
+ // end of DOCTYPE decl.
+ SkipWhitespace ();
+ Expect ('>');
+
+ parserContext.InternalSubset = xmlBuffer.ToString ();
+
+ // set properties for <!DOCTYPE> node
+ SetProperties (
+ XmlNodeType.DocumentType, // nodeType
+ doctypeName, // name
+ false, // isEmptyElement
+ parserContext.InternalSubset, // value
+ true // clearAttributes
+ );
+ }
+
+ // Read any one of following:
+ // elementdecl, AttlistDecl, EntityDecl, NotationDecl,
+ // PI, Comment, Parameter Entity, or doctype termination char(']')
+ //
+ // returns a node of some nodeType or null, setting nodeType.
+ // (if None then ']' was found.)
+ private void ReadDTDInternalSubset()
+ {
+ SkipWhitespace ();
+ switch(ReadChar ())
+ {
+ case ']':
+ nodeType = XmlNodeType.None;
+ break;
+ case '%':
+ string peName = ReadName ();
+ Expect (';');
+ nodeType = XmlNodeType.EntityReference; // It's chating a bit;-)
+ break;
+ case '<':
+ switch(ReadChar ())
+ {
+ case '?':
+ ReadProcessingInstruction ();
+ break;
+ case '!':
+ switch(ReadChar ())
+ {
+ case '-':
+ Expect ('-');
+ ReadComment ();
+ break;
+ case 'E':
+ switch(ReadChar ())
+ {
+ case 'N':
+ Expect ("TITY");
+ ReadEntityDecl ();
+ break;
+ case 'L':
+ Expect ("EMENT");
+ ReadElementDecl ();
+ break;
+ default:
+ throw new XmlException ("Syntax Error after '<!E' (ELEMENT or ENTITY must be found)");
+ }
+ break;
+ case 'A':
+ Expect ("TTLIST");
+ ReadAttListDecl ();
+ break;
+ case 'N':
+ Expect ("OTATION");
+ ReadNotationDecl ();
+ break;
+ default:
+ throw new XmlException ("Syntax Error after '<!' characters.");
+ }
+ break;
+ default:
+ throw new XmlException ("Syntax Error after '<' character.");
+ }
+ break;
+ default:
+ throw new XmlException ("Syntax Error inside doctypedecl markup.");
+ }
+ }
+
+ // The reader is positioned on the head of the name.
+ private void ReadElementDecl()
+ {
+ while(ReadChar () != '>');
+ }
+
+ private void ReadEntityDecl()
+ {
+ while(ReadChar () != '>');
+ }
+
+ private void ReadAttListDecl()
+ {
+ while(ReadChar () != '>');
+ }
+
+ private void ReadNotationDecl()
+ {
+ while(ReadChar () != '>');
+ }
+
+ // The reader is positioned on the first 'S' of "SYSTEM".
+ private string ReadSystemLiteral (bool expectSYSTEM)
+ {
+ if(expectSYSTEM)
+ Expect ("SYSTEM");
+ SkipWhitespace ();
+ int quoteChar = ReadChar (); // apos or quot
+ xmlBuffer.Length = 0;
+ saveToXmlBuffer = true;
+ int c = 0;
+ while(c != quoteChar) {
+ c = ReadChar ();
+ if(c < 0) throw new XmlException ("Unexpected end of stream in ExternalID.");
+ }
+ saveToXmlBuffer = false;
+ xmlBuffer.Remove (xmlBuffer.Length-1, 1); // cut quoteChar
+ return xmlBuffer.ToString ();
+ }
+
+ private string ReadPubidLiteral()
+ {
+ Expect ("PUBLIC");
+ SkipWhitespace ();
+ int quoteChar = ReadChar ();
+ xmlBuffer.Length = 0;
+ saveToXmlBuffer = true;
+ int c = 0;
+ while(c != quoteChar)
+ {
+ c = ReadChar ();
+ if(c < 0) throw new XmlException ("Unexpected end of stream in ExternalID.");
+ if(c != quoteChar && !XmlChar.IsPubidChar (c))
+ throw new XmlException("character '" + (char)c + "' not allowed for PUBLIC ID");
+ }
+ ReadChar(); // skips quoteChar
+ xmlBuffer.Remove (xmlBuffer.Length-1, 1); // cut quoteChar
+ saveToXmlBuffer = false;
+ return xmlBuffer.ToString ();
+ }
+
// The reader is positioned on the first character
// of the name.
private string ReadName ()
}
}
+ private void Expect (string expected)
+ {
+ int len = expected.Length;
+ for(int i=0; i< len; i++)
+ Expect (expected[i]);
+ }
+
// Does not consume the first non-whitespace character.
private void SkipWhitespace ()
{
+ //FIXME: Should not skip if whitespaceHandling == WhiteSpaceHandling.None
while (XmlChar.IsWhitespace (PeekChar ()))
ReadChar ();
}
+
+ private bool ReadWhitespace ()
+ {
+ valueLength = 0;
+ int ch = PeekChar ();
+ do {
+ AppendValueChar (ReadChar ());
+ } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
+
+ if (ch != -1 && ch != '<')
+ ReadText (false);
+ else
+ SetProperties (XmlNodeType.Whitespace,
+ String.Empty,
+ false,
+ CreateValueString (),
+ true);
+
+ return (PeekChar () != -1);
+ }
+
+ // read entity reference from attribute string and if parsable then return the value.
+ private string ReadAttributeValueEntityReference ()
+ {
+ int endEntityPosition = attributeString.IndexOf(';',
+ attributeStringCurrentPosition);
+ string entityName = attributeString.Substring (attributeStringCurrentPosition + 1,
+ endEntityPosition - attributeStringCurrentPosition - 1);
+
+ attributeStringCurrentPosition = endEntityPosition + 1;
+
+ if(entityName [0] == '#') {
+ char c;
+ // character entity
+ if(entityName [1] == 'x') {
+ // hexadecimal
+ c = (char) int.Parse ("0" + entityName.Substring (2),
+ System.Globalization.NumberStyles.HexNumber);
+ } else {
+ // decimal
+ c = (char) int.Parse (entityName.Substring (1));
+ }
+ return c.ToString();
+ }
+ else {
+ switch(entityName)
+ {
+ case "lt": return "<";
+ case "gt": return ">";
+ case "amp": return "&";
+ case "quot": return "\"";
+ case "apos": return "'";
+ default: return null;
+ }
+ }
+ }
+
+ private string ResolveAttributeValue (string unresolved)
+ {
+ if(unresolved == null) return null;
+ StringBuilder resolved = new StringBuilder();
+ int pos = 0;
+
+ int next = unresolved.IndexOf ('&');
+ if(next < 0)
+ return unresolved;
+
+ while(next >= 0) {
+ if(pos < next)
+ resolved.Append (unresolved.Substring (pos, next - pos));// - 1);
+ int endPos = unresolved.IndexOf (';', next+1);
+ string entityName =
+ unresolved.Substring (next + 1, endPos - next - 1);
+ if(entityName [0] == '#') {
+ char c;
+ // character entity
+ if(entityName [1] == 'x') {
+ // hexadecimal
+ c = (char) int.Parse ("0" + entityName.Substring (2),
+ System.Globalization.NumberStyles.HexNumber);
+ } else {
+ // decimal
+ c = (char) int.Parse (entityName.Substring (1));
+ }
+ resolved.Append (c);
+ } else {
+ switch(entityName) {
+ case "lt": resolved.Append ("<"); break;
+ case "gt": resolved.Append (">"); break;
+ case "amp": resolved.Append ("&"); break;
+ case "quot": resolved.Append ("\""); break;
+ case "apos": resolved.Append ("'"); break;
+ // With respect to "Value", MS document is helpless
+ // and the implemention returns inconsistent value
+ // (e.g. XML: "&ent; &ent;" ---> Value: "&ent; &ent;".)
+ default: resolved.Append ("&" + entityName + ";"); break;
+ }
+ }
+ pos = endPos + 1;
+ if(pos > unresolved.Length)
+ break;
+ next = unresolved.IndexOf('&', pos);
+ }
+ resolved.Append (unresolved.Substring(pos));
+
+ return resolved.ToString();
+ }
+
+ #endregion
}
}