1 // HtmlAgilityPack V1.0 - Simon Mourier <simon underscore mourier at hotmail dot com>
\r
3 using System.Collections;
\r
4 using System.Collections.Generic;
\r
5 using System.Diagnostics;
\r
8 using System.Xml.XPath;
\r
10 namespace HtmlAgilityPack
\r
13 /// Represents an HTML node.
\r
15 [DebuggerDisplay("Name: {OriginalName}}")]
\r
16 public class HtmlNode : IXPathNavigable
\r
20 internal HtmlAttributeCollection _attributes;
\r
21 internal HtmlNodeCollection _childnodes;
\r
22 internal HtmlNode _endnode;
\r
24 internal bool _innerchanged;
\r
25 internal string _innerhtml;
\r
26 internal int _innerlength;
\r
27 internal int _innerstartindex;
\r
29 internal int _lineposition;
\r
30 private string _name;
\r
31 internal int _namelength;
\r
32 internal int _namestartindex;
\r
33 internal HtmlNode _nextnode;
\r
34 internal HtmlNodeType _nodetype;
\r
35 internal bool _outerchanged;
\r
36 internal string _outerhtml;
\r
37 internal int _outerlength;
\r
38 internal int _outerstartindex;
\r
39 internal HtmlDocument _ownerdocument;
\r
40 internal HtmlNode _parentnode;
\r
41 internal HtmlNode _prevnode;
\r
42 internal HtmlNode _prevwithsamename;
\r
43 internal bool _starttag;
\r
44 internal int _streamposition;
\r
48 #region Static Members
\r
51 /// Gets the name of a comment node. It is actually defined as '#comment'.
\r
53 public static readonly string HtmlNodeTypeNameComment = "#comment";
\r
56 /// Gets the name of the document node. It is actually defined as '#document'.
\r
58 public static readonly string HtmlNodeTypeNameDocument = "#document";
\r
61 /// Gets the name of a text node. It is actually defined as '#text'.
\r
63 public static readonly string HtmlNodeTypeNameText = "#text";
\r
66 /// Gets a collection of flags that define specific behaviors for specific element nodes.
\r
67 /// The table contains a DictionaryEntry list with the lowercase tag name as the Key, and a combination of HtmlElementFlags as the Value.
\r
69 public static Hashtable ElementsFlags;
\r
73 #region Constructors
\r
76 /// Initialize HtmlNode. Builds a list of all tags that have special allowances
\r
80 // tags whose content may be anything
\r
81 ElementsFlags = new Hashtable();
\r
82 ElementsFlags.Add("script", HtmlElementFlag.CData);
\r
83 ElementsFlags.Add("style", HtmlElementFlag.CData);
\r
84 ElementsFlags.Add("noxhtml", HtmlElementFlag.CData);
\r
86 // tags that can not contain other tags
\r
87 ElementsFlags.Add("base", HtmlElementFlag.Empty);
\r
88 ElementsFlags.Add("link", HtmlElementFlag.Empty);
\r
89 ElementsFlags.Add("meta", HtmlElementFlag.Empty);
\r
90 ElementsFlags.Add("isindex", HtmlElementFlag.Empty);
\r
91 ElementsFlags.Add("hr", HtmlElementFlag.Empty);
\r
92 ElementsFlags.Add("col", HtmlElementFlag.Empty);
\r
93 ElementsFlags.Add("img", HtmlElementFlag.Empty);
\r
94 ElementsFlags.Add("param", HtmlElementFlag.Empty);
\r
95 ElementsFlags.Add("embed", HtmlElementFlag.Empty);
\r
96 ElementsFlags.Add("frame", HtmlElementFlag.Empty);
\r
97 ElementsFlags.Add("wbr", HtmlElementFlag.Empty);
\r
98 ElementsFlags.Add("bgsound", HtmlElementFlag.Empty);
\r
99 ElementsFlags.Add("spacer", HtmlElementFlag.Empty);
\r
100 ElementsFlags.Add("keygen", HtmlElementFlag.Empty);
\r
101 ElementsFlags.Add("area", HtmlElementFlag.Empty);
\r
102 ElementsFlags.Add("input", HtmlElementFlag.Empty);
\r
103 ElementsFlags.Add("basefont", HtmlElementFlag.Empty);
\r
105 ElementsFlags.Add("form", HtmlElementFlag.CanOverlap | HtmlElementFlag.Empty);
\r
107 // they sometimes contain, and sometimes they don 't...
\r
108 ElementsFlags.Add("option", HtmlElementFlag.Empty);
\r
110 // tag whose closing tag is equivalent to open tag:
\r
111 // <p>bla</p>bla will be transformed into <p>bla</p>bla
\r
112 // <p>bla<p>bla will be transformed into <p>bla<p>bla and not <p>bla></p><p>bla</p> or <p>bla<p>bla</p></p>
\r
114 ElementsFlags.Add("br", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
\r
115 ElementsFlags.Add("p", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
\r
119 /// Initializes HtmlNode, providing type, owner and where it exists in a collection
\r
121 /// <param name="type"></param>
\r
122 /// <param name="ownerdocument"></param>
\r
123 /// <param name="index"></param>
\r
124 public HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
\r
127 _ownerdocument = ownerdocument;
\r
128 _outerstartindex = index;
\r
132 case HtmlNodeType.Comment:
\r
133 Name = HtmlNodeTypeNameComment;
\r
137 case HtmlNodeType.Document:
\r
138 Name = HtmlNodeTypeNameDocument;
\r
142 case HtmlNodeType.Text:
\r
143 Name = HtmlNodeTypeNameText;
\r
148 if (_ownerdocument._openednodes != null)
\r
152 // we use the index as the key
\r
154 // -1 means the node comes from public
\r
157 _ownerdocument._openednodes.Add(index, this);
\r
162 if ((-1 != index) || (type == HtmlNodeType.Comment) || (type == HtmlNodeType.Text)) return;
\r
163 // innerhtml and outerhtml must be calculated
\r
164 _outerchanged = true;
\r
165 _innerchanged = true;
\r
173 /// Gets the collection of HTML attributes for this node. May not be null.
\r
175 public HtmlAttributeCollection Attributes
\r
179 if (!HasAttributes)
\r
181 _attributes = new HtmlAttributeCollection(this);
\r
183 return _attributes;
\r
185 internal set { _attributes = value; }
\r
189 /// Gets all the children of the node.
\r
191 public HtmlNodeCollection ChildNodes
\r
195 if (_childnodes == null)
\r
197 _childnodes = new HtmlNodeCollection(this);
\r
199 return _childnodes;
\r
201 internal set { _childnodes = value; }
\r
205 /// Gets a value indicating if this node has been closed or not.
\r
209 get { return (_endnode != null); }
\r
213 /// Gets the collection of HTML attributes for the closing tag. May not be null.
\r
215 public HtmlAttributeCollection ClosingAttributes
\r
219 if (!HasClosingAttributes)
\r
221 return new HtmlAttributeCollection(this);
\r
223 return _endnode.Attributes;
\r
227 internal HtmlNode EndNode
\r
229 get { return _endnode; }
\r
233 /// Gets the first child of the node.
\r
235 public HtmlNode FirstChild
\r
239 if (!HasChildNodes)
\r
243 return _childnodes[0];
\r
248 /// Gets a value indicating whether the current node has any attributes.
\r
250 public bool HasAttributes
\r
254 if (_attributes == null)
\r
259 if (_attributes.Count <= 0)
\r
268 /// Gets a value indicating whether this node has any child nodes.
\r
270 public bool HasChildNodes
\r
274 if (_childnodes == null)
\r
279 if (_childnodes.Count <= 0)
\r
288 /// Gets a value indicating whether the current node has any attributes on the closing tag.
\r
290 public bool HasClosingAttributes
\r
294 if ((_endnode == null) || (_endnode == this))
\r
299 if (_endnode._attributes == null)
\r
304 if (_endnode._attributes.Count <= 0)
\r
313 /// Gets or sets the value of the 'id' HTML attribute. The document must have been parsed using the OptionUseIdAttribute set to true.
\r
319 if (_ownerdocument._nodesid == null)
\r
321 throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
\r
327 if (_ownerdocument._nodesid == null)
\r
329 throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
\r
334 throw new ArgumentNullException("value");
\r
341 /// Gets or Sets the HTML between the start and end tags of the object.
\r
343 public virtual string InnerHtml
\r
349 _innerhtml = WriteContentTo();
\r
350 _innerchanged = false;
\r
353 if (_innerhtml != null)
\r
358 if (_innerstartindex < 0)
\r
360 return string.Empty;
\r
363 return _ownerdocument._text.Substring(_innerstartindex, _innerlength);
\r
367 HtmlDocument doc = new HtmlDocument();
\r
368 doc.LoadHtml(value);
\r
370 RemoveAllChildren();
\r
371 AppendChildren(doc.DocumentNode.ChildNodes);
\r
376 /// Gets or Sets the text between the start and end tags of the object.
\r
378 public virtual string InnerText
\r
382 if (_nodetype == HtmlNodeType.Text)
\r
384 return ((HtmlTextNode) this).Text;
\r
387 if (_nodetype == HtmlNodeType.Comment)
\r
389 return ((HtmlCommentNode) this).Comment;
\r
392 // note: right now, this method is *slow*, because we recompute everything.
\r
393 // it could be optimised like innerhtml
\r
394 if (!HasChildNodes)
\r
396 return string.Empty;
\r
400 foreach (HtmlNode node in ChildNodes)
\r
402 s += node.InnerText;
\r
409 /// Gets the last child of the node.
\r
411 public HtmlNode LastChild
\r
415 return !HasChildNodes ? null : _childnodes[_childnodes.Count - 1];
\r
420 /// Gets the line number of this node in the document.
\r
424 get { return _line; }
\r
425 internal set { _line = value; }
\r
429 /// Gets the column number of this node in the document.
\r
431 public int LinePosition
\r
433 get { return _lineposition; }
\r
434 internal set { _lineposition = value; }
\r
438 /// Gets or sets this node's name.
\r
446 Name = _ownerdocument._text.Substring(_namestartindex, _namelength);
\r
448 return _name != null ? _name.ToLower() : string.Empty;
\r
450 set { _name = value; }
\r
454 /// Gets the HTML node immediately following this element.
\r
456 public HtmlNode NextSibling
\r
458 get { return _nextnode; }
\r
459 internal set { _nextnode = value; }
\r
463 /// Gets the type of this node.
\r
465 public HtmlNodeType NodeType
\r
467 get { return _nodetype; }
\r
468 internal set { _nodetype = value; }
\r
472 /// The original unaltered name of the tag
\r
474 public string OriginalName
\r
476 get { return _name; }
\r
480 /// Gets or Sets the object and its content in HTML.
\r
482 public virtual string OuterHtml
\r
488 _outerhtml = WriteTo();
\r
489 _outerchanged = false;
\r
493 if (_outerhtml != null)
\r
498 if (_outerstartindex < 0)
\r
500 return string.Empty;
\r
503 return _ownerdocument._text.Substring(_outerstartindex, _outerlength);
\r
508 /// Gets the <see cref="HtmlDocument"/> to which this node belongs.
\r
510 public HtmlDocument OwnerDocument
\r
512 get { return _ownerdocument; }
\r
513 internal set { _ownerdocument = value; }
\r
517 /// Gets the parent of this node (for nodes that can have parents).
\r
519 public HtmlNode ParentNode
\r
521 get { return _parentnode; }
\r
522 internal set { _parentnode = value; }
\r
526 /// Gets the node immediately preceding this node.
\r
528 public HtmlNode PreviousSibling
\r
530 get { return _prevnode; }
\r
531 internal set { _prevnode = value; }
\r
535 /// Gets the stream position of this node in the document, relative to the start of the document.
\r
537 public int StreamPosition
\r
539 get { return _streamposition; }
\r
543 /// Gets a valid XPath string that points to this node
\r
545 public string XPath
\r
549 string basePath = (ParentNode == null || ParentNode.NodeType == HtmlNodeType.Document)
\r
551 : ParentNode.XPath + "/";
\r
552 return basePath + GetRelativeXpath();
\r
558 #region IXPathNavigable Members
\r
561 /// Creates a new XPathNavigator object for navigating this HTML node.
\r
563 /// <returns>An XPathNavigator object. The XPathNavigator is positioned on the node from which the method was called. It is not positioned on the root of the document.</returns>
\r
564 public XPathNavigator CreateNavigator()
\r
566 return new HtmlNodeNavigator(_ownerdocument, this);
\r
571 #region Public Methods
\r
574 /// Determines if an element node can be kept overlapped.
\r
576 /// <param name="name">The name of the element node to check. May not be <c>null</c>.</param>
\r
577 /// <returns>true if the name is the name of an element node that can be kept overlapped, <c>false</c> otherwise.</returns>
\r
578 public static bool CanOverlapElement(string name)
\r
582 throw new ArgumentNullException("name");
\r
585 object flag = ElementsFlags[name.ToLower()];
\r
590 return (((HtmlElementFlag) flag) & HtmlElementFlag.CanOverlap) != 0;
\r
594 /// Creates an HTML node from a string representing literal HTML.
\r
596 /// <param name="html">The HTML text.</param>
\r
597 /// <returns>The newly created node instance.</returns>
\r
598 public static HtmlNode CreateNode(string html)
\r
600 // REVIEW: this is *not* optimum...
\r
601 HtmlDocument doc = new HtmlDocument();
\r
602 doc.LoadHtml(html);
\r
603 return doc.DocumentNode.FirstChild;
\r
607 /// Determines if an element node is a CDATA element node.
\r
609 /// <param name="name">The name of the element node to check. May not be null.</param>
\r
610 /// <returns>true if the name is the name of a CDATA element node, false otherwise.</returns>
\r
611 public static bool IsCDataElement(string name)
\r
615 throw new ArgumentNullException("name");
\r
618 object flag = ElementsFlags[name.ToLower()];
\r
623 return (((HtmlElementFlag) flag) & HtmlElementFlag.CData) != 0;
\r
627 /// Determines if an element node is closed.
\r
629 /// <param name="name">The name of the element node to check. May not be null.</param>
\r
630 /// <returns>true if the name is the name of a closed element node, false otherwise.</returns>
\r
631 public static bool IsClosedElement(string name)
\r
635 throw new ArgumentNullException("name");
\r
638 object flag = ElementsFlags[name.ToLower()];
\r
643 return (((HtmlElementFlag) flag) & HtmlElementFlag.Closed) != 0;
\r
647 /// Determines if an element node is defined as empty.
\r
649 /// <param name="name">The name of the element node to check. May not be null.</param>
\r
650 /// <returns>true if the name is the name of an empty element node, false otherwise.</returns>
\r
651 public static bool IsEmptyElement(string name)
\r
655 throw new ArgumentNullException("name");
\r
658 if (name.Length == 0)
\r
664 if ('!' == name[0])
\r
670 if ('?' == name[0])
\r
675 object flag = ElementsFlags[name.ToLower()];
\r
680 return (((HtmlElementFlag) flag) & HtmlElementFlag.Empty) != 0;
\r
684 /// Determines if a text corresponds to the closing tag of an node that can be kept overlapped.
\r
686 /// <param name="text">The text to check. May not be null.</param>
\r
687 /// <returns>true or false.</returns>
\r
688 public static bool IsOverlappedClosingElement(string text)
\r
692 throw new ArgumentNullException("text");
\r
695 if (text.Length <= 4)
\r
698 if ((text[0] != '<') ||
\r
699 (text[text.Length - 1] != '>') ||
\r
703 string name = text.Substring(2, text.Length - 3);
\r
704 return CanOverlapElement(name);
\r
708 /// Returns a collection of all ancestor nodes of this element.
\r
710 /// <returns></returns>
\r
711 public IEnumerable<HtmlNode> Ancestors()
\r
713 HtmlNode node = ParentNode;
\r
714 while (node.ParentNode != null)
\r
716 yield return node.ParentNode;
\r
717 node = node.ParentNode;
\r
722 /// Get Ancestors with matching name
\r
724 /// <param name="name"></param>
\r
725 /// <returns></returns>
\r
726 public IEnumerable<HtmlNode> Ancestors(string name)
\r
728 for (HtmlNode n = ParentNode; n != null; n = n.ParentNode)
\r
729 if (n.Name == name)
\r
734 /// Returns a collection of all ancestor nodes of this element.
\r
736 /// <returns></returns>
\r
737 public IEnumerable<HtmlNode> AncestorsAndSelf()
\r
739 for (HtmlNode n = this; n != null; n = n.ParentNode)
\r
744 /// Gets all anscestor nodes and the current node
\r
746 /// <param name="name"></param>
\r
747 /// <returns></returns>
\r
748 public IEnumerable<HtmlNode> AncestorsAndSelf(string name)
\r
750 for (HtmlNode n = this; n != null; n = n.ParentNode)
\r
751 if (n.Name == name)
\r
756 /// Adds the specified node to the end of the list of children of this node.
\r
758 /// <param name="newChild">The node to add. May not be null.</param>
\r
759 /// <returns>The node added.</returns>
\r
760 public HtmlNode AppendChild(HtmlNode newChild)
\r
762 if (newChild == null)
\r
764 throw new ArgumentNullException("newChild");
\r
767 ChildNodes.Append(newChild);
\r
768 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
\r
769 _outerchanged = true;
\r
770 _innerchanged = true;
\r
775 /// Adds the specified node to the end of the list of children of this node.
\r
777 /// <param name="newChildren">The node list to add. May not be null.</param>
\r
778 public void AppendChildren(HtmlNodeCollection newChildren)
\r
780 if (newChildren == null)
\r
781 throw new ArgumentNullException("newChildrend");
\r
783 foreach (HtmlNode newChild in newChildren)
\r
785 AppendChild(newChild);
\r
790 /// Gets all Attributes with name
\r
792 /// <param name="name"></param>
\r
793 /// <returns></returns>
\r
794 public IEnumerable<HtmlAttribute> ChildAttributes(string name)
\r
796 return Attributes.AttributesWithName(name);
\r
800 /// Creates a duplicate of the node
\r
802 /// <returns></returns>
\r
803 public HtmlNode Clone()
\r
805 return CloneNode(true);
\r
809 /// Creates a duplicate of the node and changes its name at the same time.
\r
811 /// <param name="newName">The new name of the cloned node. May not be <c>null</c>.</param>
\r
812 /// <returns>The cloned node.</returns>
\r
813 public HtmlNode CloneNode(string newName)
\r
815 return CloneNode(newName, true);
\r
819 /// Creates a duplicate of the node and changes its name at the same time.
\r
821 /// <param name="newName">The new name of the cloned node. May not be null.</param>
\r
822 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
\r
823 /// <returns>The cloned node.</returns>
\r
824 public HtmlNode CloneNode(string newName, bool deep)
\r
826 if (newName == null)
\r
828 throw new ArgumentNullException("newName");
\r
831 HtmlNode node = CloneNode(deep);
\r
832 node.Name = newName;
\r
837 /// Creates a duplicate of the node.
\r
839 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
\r
840 /// <returns>The cloned node.</returns>
\r
841 public HtmlNode CloneNode(bool deep)
\r
843 HtmlNode node = _ownerdocument.CreateNode(_nodetype);
\r
848 case HtmlNodeType.Comment:
\r
849 ((HtmlCommentNode) node).Comment = ((HtmlCommentNode) this).Comment;
\r
852 case HtmlNodeType.Text:
\r
853 ((HtmlTextNode) node).Text = ((HtmlTextNode) this).Text;
\r
860 foreach (HtmlAttribute att in _attributes)
\r
862 HtmlAttribute newatt = att.Clone();
\r
863 node.Attributes.Append(newatt);
\r
867 // closing attributes
\r
868 if (HasClosingAttributes)
\r
870 node._endnode = _endnode.CloneNode(false);
\r
871 foreach (HtmlAttribute att in _endnode._attributes)
\r
873 HtmlAttribute newatt = att.Clone();
\r
874 node._endnode._attributes.Append(newatt);
\r
882 if (!HasChildNodes)
\r
888 foreach (HtmlNode child in _childnodes)
\r
890 HtmlNode newchild = child.Clone();
\r
891 node.AppendChild(newchild);
\r
897 /// Creates a duplicate of the node and the subtree under it.
\r
899 /// <param name="node">The node to duplicate. May not be <c>null</c>.</param>
\r
900 public void CopyFrom(HtmlNode node)
\r
902 CopyFrom(node, true);
\r
906 /// Creates a duplicate of the node.
\r
908 /// <param name="node">The node to duplicate. May not be <c>null</c>.</param>
\r
909 /// <param name="deep">true to recursively clone the subtree under the specified node, false to clone only the node itself.</param>
\r
910 public void CopyFrom(HtmlNode node, bool deep)
\r
914 throw new ArgumentNullException("node");
\r
917 Attributes.RemoveAll();
\r
918 if (node.HasAttributes)
\r
920 foreach (HtmlAttribute att in node.Attributes)
\r
922 SetAttributeValue(att.Name, att.Value);
\r
928 RemoveAllChildren();
\r
929 if (node.HasChildNodes)
\r
931 foreach (HtmlNode child in node.ChildNodes)
\r
933 AppendChild(child.CloneNode(true));
\r
940 /// Creates an XPathNavigator using the root of this document.
\r
942 /// <returns></returns>
\r
943 public XPathNavigator CreateRootNavigator()
\r
945 return new HtmlNodeNavigator(_ownerdocument, _ownerdocument.DocumentNode);
\r
949 /// Gets all Descendant nodes for this node and each of child nodes
\r
951 /// <returns></returns>
\r
952 public IEnumerable<HtmlNode> DescendantNodes()
\r
954 foreach (HtmlNode node in ChildNodes)
\r
957 foreach (HtmlNode descendant in node.DescendantNodes())
\r
958 yield return descendant;
\r
963 /// Returns a collection of all descendant nodes of this element, in document order
\r
965 /// <returns></returns>
\r
966 public IEnumerable<HtmlNode> DescendantNodesAndSelf()
\r
968 return DescendantsAndSelf();
\r
972 /// Gets all Descendant nodes in enumerated list
\r
974 /// <returns></returns>
\r
975 public IEnumerable<HtmlNode> Descendants()
\r
977 foreach (HtmlNode node in DescendantNodes())
\r
984 /// Get all descendant nodes with matching name
\r
986 /// <param name="name"></param>
\r
987 /// <returns></returns>
\r
988 public IEnumerable<HtmlNode> Descendants(string name)
\r
990 foreach (HtmlNode node in Descendants())
\r
991 if (node.Name == name)
\r
996 /// Returns a collection of all descendant nodes of this element, in document order
\r
998 /// <returns></returns>
\r
999 public IEnumerable<HtmlNode> DescendantsAndSelf()
\r
1001 yield return this;
\r
1002 foreach (HtmlNode n in DescendantNodes())
\r
1011 /// Gets all descendant nodes including this node
\r
1013 /// <param name="name"></param>
\r
1014 /// <returns></returns>
\r
1015 public IEnumerable<HtmlNode> DescendantsAndSelf(string name)
\r
1017 yield return this;
\r
1018 foreach (HtmlNode node in Descendants())
\r
1019 if (node.Name == name)
\r
1020 yield return node;
\r
1024 /// Gets first generation child node matching name
\r
1026 /// <param name="name"></param>
\r
1027 /// <returns></returns>
\r
1028 public HtmlNode Element(string name)
\r
1030 foreach (HtmlNode node in ChildNodes)
\r
1031 if (node.Name == name)
\r
1037 /// Gets matching first generation child nodes matching name
\r
1039 /// <param name="name"></param>
\r
1040 /// <returns></returns>
\r
1041 public IEnumerable<HtmlNode> Elements(string name)
\r
1043 foreach (HtmlNode node in ChildNodes)
\r
1044 if (node.Name == name)
\r
1045 yield return node;
\r
1049 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
\r
1051 /// <param name="name">The name of the attribute to get. May not be <c>null</c>.</param>
\r
1052 /// <param name="def">The default value to return if not found.</param>
\r
1053 /// <returns>The value of the attribute if found, the default value if not found.</returns>
\r
1054 public string GetAttributeValue(string name, string def)
\r
1058 throw new ArgumentNullException("name");
\r
1061 if (!HasAttributes)
\r
1065 HtmlAttribute att = Attributes[name];
\r
1074 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
\r
1076 /// <param name="name">The name of the attribute to get. May not be <c>null</c>.</param>
\r
1077 /// <param name="def">The default value to return if not found.</param>
\r
1078 /// <returns>The value of the attribute if found, the default value if not found.</returns>
\r
1079 public int GetAttributeValue(string name, int def)
\r
1083 throw new ArgumentNullException("name");
\r
1086 if (!HasAttributes)
\r
1090 HtmlAttribute att = Attributes[name];
\r
1097 return Convert.ToInt32(att.Value);
\r
1106 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
\r
1108 /// <param name="name">The name of the attribute to get. May not be <c>null</c>.</param>
\r
1109 /// <param name="def">The default value to return if not found.</param>
\r
1110 /// <returns>The value of the attribute if found, the default value if not found.</returns>
\r
1111 public bool GetAttributeValue(string name, bool def)
\r
1115 throw new ArgumentNullException("name");
\r
1118 if (!HasAttributes)
\r
1122 HtmlAttribute att = Attributes[name];
\r
1129 return Convert.ToBoolean(att.Value);
\r
1138 /// Inserts the specified node immediately after the specified reference node.
\r
1140 /// <param name="newChild">The node to insert. May not be <c>null</c>.</param>
\r
1141 /// <param name="refChild">The node that is the reference node. The newNode is placed after the refNode.</param>
\r
1142 /// <returns>The node being inserted.</returns>
\r
1143 public HtmlNode InsertAfter(HtmlNode newChild, HtmlNode refChild)
\r
1145 if (newChild == null)
\r
1147 throw new ArgumentNullException("newChild");
\r
1150 if (refChild == null)
\r
1152 return PrependChild(newChild);
\r
1155 if (newChild == refChild)
\r
1162 if (_childnodes != null)
\r
1164 index = _childnodes[refChild];
\r
1168 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
\r
1171 if (_childnodes != null) _childnodes.Insert(index + 1, newChild);
\r
1173 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
\r
1174 _outerchanged = true;
\r
1175 _innerchanged = true;
\r
1180 /// Inserts the specified node immediately before the specified reference node.
\r
1182 /// <param name="newChild">The node to insert. May not be <c>null</c>.</param>
\r
1183 /// <param name="refChild">The node that is the reference node. The newChild is placed before this node.</param>
\r
1184 /// <returns>The node being inserted.</returns>
\r
1185 public HtmlNode InsertBefore(HtmlNode newChild, HtmlNode refChild)
\r
1187 if (newChild == null)
\r
1189 throw new ArgumentNullException("newChild");
\r
1192 if (refChild == null)
\r
1194 return AppendChild(newChild);
\r
1197 if (newChild == refChild)
\r
1204 if (_childnodes != null)
\r
1206 index = _childnodes[refChild];
\r
1211 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
\r
1214 if (_childnodes != null) _childnodes.Insert(index, newChild);
\r
1216 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
\r
1217 _outerchanged = true;
\r
1218 _innerchanged = true;
\r
1223 /// Adds the specified node to the beginning of the list of children of this node.
\r
1225 /// <param name="newChild">The node to add. May not be <c>null</c>.</param>
\r
1226 /// <returns>The node added.</returns>
\r
1227 public HtmlNode PrependChild(HtmlNode newChild)
\r
1229 if (newChild == null)
\r
1231 throw new ArgumentNullException("newChild");
\r
1233 ChildNodes.Prepend(newChild);
\r
1234 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
\r
1235 _outerchanged = true;
\r
1236 _innerchanged = true;
\r
1241 /// Adds the specified node list to the beginning of the list of children of this node.
\r
1243 /// <param name="newChildren">The node list to add. May not be <c>null</c>.</param>
\r
1244 public void PrependChildren(HtmlNodeCollection newChildren)
\r
1246 if (newChildren == null)
\r
1248 throw new ArgumentNullException("newChildren");
\r
1251 foreach (HtmlNode newChild in newChildren)
\r
1253 PrependChild(newChild);
\r
1258 /// Removes node from parent collection
\r
1260 public void Remove()
\r
1262 if (ParentNode != null)
\r
1263 ParentNode.ChildNodes.Remove(this);
\r
1267 /// Removes all the children and/or attributes of the current node.
\r
1269 public void RemoveAll()
\r
1271 RemoveAllChildren();
\r
1273 if (HasAttributes)
\r
1275 _attributes.Clear();
\r
1278 if ((_endnode != null) && (_endnode != this))
\r
1280 if (_endnode._attributes != null)
\r
1282 _endnode._attributes.Clear();
\r
1285 _outerchanged = true;
\r
1286 _innerchanged = true;
\r
1290 /// Removes all the children of the current node.
\r
1292 public void RemoveAllChildren()
\r
1294 if (!HasChildNodes)
\r
1299 if (_ownerdocument.OptionUseIdAttribute)
\r
1301 // remove nodes from id list
\r
1302 foreach (HtmlNode node in _childnodes)
\r
1304 _ownerdocument.SetIdForNode(null, node.GetId());
\r
1307 _childnodes.Clear();
\r
1308 _outerchanged = true;
\r
1309 _innerchanged = true;
\r
1313 /// Removes the specified child node.
\r
1315 /// <param name="oldChild">The node being removed. May not be <c>null</c>.</param>
\r
1316 /// <returns>The node removed.</returns>
\r
1317 public HtmlNode RemoveChild(HtmlNode oldChild)
\r
1319 if (oldChild == null)
\r
1321 throw new ArgumentNullException("oldChild");
\r
1326 if (_childnodes != null)
\r
1328 index = _childnodes[oldChild];
\r
1333 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
\r
1336 if (_childnodes != null)
\r
1337 _childnodes.Remove(index);
\r
1339 _ownerdocument.SetIdForNode(null, oldChild.GetId());
\r
1340 _outerchanged = true;
\r
1341 _innerchanged = true;
\r
1346 /// Removes the specified child node.
\r
1348 /// <param name="oldChild">The node being removed. May not be <c>null</c>.</param>
\r
1349 /// <param name="keepGrandChildren">true to keep grand children of the node, false otherwise.</param>
\r
1350 /// <returns>The node removed.</returns>
\r
1351 public HtmlNode RemoveChild(HtmlNode oldChild, bool keepGrandChildren)
\r
1353 if (oldChild == null)
\r
1355 throw new ArgumentNullException("oldChild");
\r
1358 if ((oldChild._childnodes != null) && keepGrandChildren)
\r
1360 // get prev sibling
\r
1361 HtmlNode prev = oldChild.PreviousSibling;
\r
1363 // reroute grand children to ourselves
\r
1364 foreach (HtmlNode grandchild in oldChild._childnodes)
\r
1366 InsertAfter(grandchild, prev);
\r
1369 RemoveChild(oldChild);
\r
1370 _outerchanged = true;
\r
1371 _innerchanged = true;
\r
1376 /// Replaces the child node oldChild with newChild node.
\r
1378 /// <param name="newChild">The new node to put in the child list.</param>
\r
1379 /// <param name="oldChild">The node being replaced in the list.</param>
\r
1380 /// <returns>The node replaced.</returns>
\r
1381 public HtmlNode ReplaceChild(HtmlNode newChild, HtmlNode oldChild)
\r
1383 if (newChild == null)
\r
1385 return RemoveChild(oldChild);
\r
1388 if (oldChild == null)
\r
1390 return AppendChild(newChild);
\r
1395 if (_childnodes != null)
\r
1397 index = _childnodes[oldChild];
\r
1402 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
\r
1405 if (_childnodes != null) _childnodes.Replace(index, newChild);
\r
1407 _ownerdocument.SetIdForNode(null, oldChild.GetId());
\r
1408 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
\r
1409 _outerchanged = true;
\r
1410 _innerchanged = true;
\r
1415 /// Selects a list of nodes matching the <see cref="XPath"/> expression.
\r
1417 /// <param name="xpath">The XPath expression.</param>
\r
1418 /// <returns>An <see cref="HtmlNodeCollection"/> containing a collection of nodes matching the <see cref="XPath"/> query, or <c>null</c> if no node matched the XPath expression.</returns>
\r
1419 public HtmlNodeCollection SelectNodes(string xpath)
\r
1421 HtmlNodeCollection list = new HtmlNodeCollection(null);
\r
1423 HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
\r
1424 XPathNodeIterator it = nav.Select(xpath);
\r
1425 while (it.MoveNext())
\r
1427 HtmlNodeNavigator n = (HtmlNodeNavigator) it.Current;
\r
1428 list.Add(n.CurrentNode);
\r
1430 if (list.Count == 0)
\r
1438 /// Selects the first XmlNode that matches the XPath expression.
\r
1440 /// <param name="xpath">The XPath expression. May not be null.</param>
\r
1441 /// <returns>The first <see cref="HtmlNode"/> that matches the XPath query or a null reference if no matching node was found.</returns>
\r
1442 public HtmlNode SelectSingleNode(string xpath)
\r
1444 if (xpath == null)
\r
1446 throw new ArgumentNullException("xpath");
\r
1449 HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
\r
1450 XPathNodeIterator it = nav.Select(xpath);
\r
1451 if (!it.MoveNext())
\r
1456 HtmlNodeNavigator node = (HtmlNodeNavigator) it.Current;
\r
1457 return node.CurrentNode;
\r
1461 /// Helper method to set the value of an attribute of this node. If the attribute is not found, it will be created automatically.
\r
1463 /// <param name="name">The name of the attribute to set. May not be null.</param>
\r
1464 /// <param name="value">The value for the attribute.</param>
\r
1465 /// <returns>The corresponding attribute instance.</returns>
\r
1466 public HtmlAttribute SetAttributeValue(string name, string value)
\r
1470 throw new ArgumentNullException("name");
\r
1472 HtmlAttribute att = Attributes[name];
\r
1475 return Attributes.Append(_ownerdocument.CreateAttribute(name, value));
\r
1477 att.Value = value;
\r
1482 /// Saves all the children of the node to the specified TextWriter.
\r
1484 /// <param name="outText">The TextWriter to which you want to save.</param>
\r
1485 public void WriteContentTo(TextWriter outText)
\r
1487 if (_childnodes == null)
\r
1492 foreach (HtmlNode node in _childnodes)
\r
1494 node.WriteTo(outText);
\r
1499 /// Saves all the children of the node to a string.
\r
1501 /// <returns>The saved string.</returns>
\r
1502 public string WriteContentTo()
\r
1504 StringWriter sw = new StringWriter();
\r
1505 WriteContentTo(sw);
\r
1507 return sw.ToString();
\r
1511 /// Saves the current node to the specified TextWriter.
\r
1513 /// <param name="outText">The TextWriter to which you want to save.</param>
\r
1514 public void WriteTo(TextWriter outText)
\r
1517 switch (_nodetype)
\r
1519 case HtmlNodeType.Comment:
\r
1520 html = ((HtmlCommentNode) this).Comment;
\r
1521 if (_ownerdocument.OptionOutputAsXml)
\r
1523 outText.Write("<!--" + GetXmlComment((HtmlCommentNode) this) + " -->");
\r
1527 outText.Write(html);
\r
1531 case HtmlNodeType.Document:
\r
1532 if (_ownerdocument.OptionOutputAsXml)
\r
1534 outText.Write("<?xml version=\"1.0\" encoding=\"" + _ownerdocument.GetOutEncoding().BodyName +
\r
1537 // check there is a root element
\r
1538 if (_ownerdocument.DocumentNode.HasChildNodes)
\r
1540 int rootnodes = _ownerdocument.DocumentNode._childnodes.Count;
\r
1541 if (rootnodes > 0)
\r
1543 HtmlNode xml = _ownerdocument.GetXmlDeclaration();
\r
1549 if (rootnodes > 1)
\r
1551 if (_ownerdocument.OptionOutputUpperCase)
\r
1553 outText.Write("<SPAN>");
\r
1554 WriteContentTo(outText);
\r
1555 outText.Write("</SPAN>");
\r
1559 outText.Write("<span>");
\r
1560 WriteContentTo(outText);
\r
1561 outText.Write("</span>");
\r
1568 WriteContentTo(outText);
\r
1571 case HtmlNodeType.Text:
\r
1572 html = ((HtmlTextNode) this).Text;
\r
1573 if (_ownerdocument.OptionOutputAsXml)
\r
1575 outText.Write(HtmlDocument.HtmlEncode(html));
\r
1579 outText.Write(html);
\r
1583 case HtmlNodeType.Element:
\r
1585 if (_ownerdocument.OptionOutputUpperCase)
\r
1587 name = Name.ToUpper();
\r
1594 if (_ownerdocument.OptionOutputOriginalCase)
\r
1595 name = OriginalName;
\r
1597 if (_ownerdocument.OptionOutputAsXml)
\r
1599 if (name.Length > 0)
\r
1601 if (name[0] == '?')
\r
1603 // forget this one, it's been done at the document level
\r
1607 if (name.Trim().Length == 0)
\r
1611 name = HtmlDocument.GetXmlName(name);
\r
1619 outText.Write("<" + name);
\r
1620 WriteAttributes(outText, false);
\r
1622 if (!HasChildNodes)
\r
1624 if (IsEmptyElement(Name))
\r
1626 if ((_ownerdocument.OptionWriteEmptyNodes) || (_ownerdocument.OptionOutputAsXml))
\r
1628 outText.Write(" />");
\r
1632 if (Name.Length > 0)
\r
1634 if (Name[0] == '?')
\r
1636 outText.Write("?");
\r
1640 outText.Write(">");
\r
1645 outText.Write("></" + name + ">");
\r
1650 outText.Write(">");
\r
1651 bool cdata = false;
\r
1652 if (_ownerdocument.OptionOutputAsXml)
\r
1654 if (IsCDataElement(Name))
\r
1656 // this code and the following tries to output things as nicely as possible for old browsers.
\r
1658 outText.Write("\r\n//<![CDATA[\r\n");
\r
1664 if (HasChildNodes)
\r
1666 // child must be a text
\r
1667 ChildNodes[0].WriteTo(outText);
\r
1669 outText.Write("\r\n//]]>//\r\n");
\r
1673 WriteContentTo(outText);
\r
1676 outText.Write("</" + name);
\r
1677 if (!_ownerdocument.OptionOutputAsXml)
\r
1679 WriteAttributes(outText, true);
\r
1681 outText.Write(">");
\r
1688 /// Saves the current node to the specified XmlWriter.
\r
1690 /// <param name="writer">The XmlWriter to which you want to save.</param>
\r
1691 public void WriteTo(XmlWriter writer)
\r
1693 switch (_nodetype)
\r
1695 case HtmlNodeType.Comment:
\r
1696 writer.WriteComment(GetXmlComment((HtmlCommentNode) this));
\r
1699 case HtmlNodeType.Document:
\r
1700 writer.WriteProcessingInstruction("xml",
\r
1701 "version=\"1.0\" encoding=\"" +
\r
1702 _ownerdocument.GetOutEncoding().BodyName + "\"");
\r
1703 if (HasChildNodes)
\r
1705 foreach (HtmlNode subnode in ChildNodes)
\r
1707 subnode.WriteTo(writer);
\r
1712 case HtmlNodeType.Text:
\r
1713 string html = ((HtmlTextNode) this).Text;
\r
1714 writer.WriteString(html);
\r
1717 case HtmlNodeType.Element:
\r
1718 string name = _ownerdocument.OptionOutputUpperCase ? Name.ToUpper() : Name;
\r
1720 if (_ownerdocument.OptionOutputOriginalCase)
\r
1721 name = OriginalName;
\r
1723 writer.WriteStartElement(name);
\r
1724 WriteAttributes(writer, this);
\r
1726 if (HasChildNodes)
\r
1728 foreach (HtmlNode subnode in ChildNodes)
\r
1730 subnode.WriteTo(writer);
\r
1733 writer.WriteEndElement();
\r
1739 /// Saves the current node to a string.
\r
1741 /// <returns>The saved string.</returns>
\r
1742 public string WriteTo()
\r
1744 using (StringWriter sw = new StringWriter())
\r
1748 return sw.ToString();
\r
1754 #region Internal Methods
\r
1756 internal static string GetXmlComment(HtmlCommentNode comment)
\r
1758 string s = comment.Comment;
\r
1759 return s.Substring(4, s.Length - 7).Replace("--", " - -");
\r
1762 internal static void WriteAttributes(XmlWriter writer, HtmlNode node)
\r
1764 if (!node.HasAttributes)
\r
1768 // we use Hashitems to make sure attributes are written only once
\r
1769 foreach (HtmlAttribute att in node.Attributes.Hashitems.Values)
\r
1771 writer.WriteAttributeString(att.XmlName, att.Value);
\r
1775 internal void CloseNode(HtmlNode endnode)
\r
1777 if (!_ownerdocument.OptionAutoCloseOnEnd)
\r
1779 // close all children
\r
1780 if (_childnodes != null)
\r
1782 foreach (HtmlNode child in _childnodes)
\r
1787 // create a fake closer node
\r
1788 HtmlNode close = new HtmlNode(NodeType, _ownerdocument, -1);
\r
1789 close._endnode = close;
\r
1790 child.CloseNode(close);
\r
1797 _endnode = endnode;
\r
1799 if (_ownerdocument._openednodes != null)
\r
1801 _ownerdocument._openednodes.Remove(_outerstartindex);
\r
1804 HtmlNode self = _ownerdocument._lastnodes[Name] as HtmlNode;
\r
1807 _ownerdocument._lastnodes.Remove(Name);
\r
1808 _ownerdocument.UpdateLastParentNode();
\r
1811 if (endnode == this)
\r
1814 // create an inner section
\r
1815 _innerstartindex = _outerstartindex + _outerlength;
\r
1816 _innerlength = endnode._outerstartindex - _innerstartindex;
\r
1818 // update full length
\r
1819 _outerlength = (endnode._outerstartindex + endnode._outerlength) - _outerstartindex;
\r
1823 internal string GetId()
\r
1825 HtmlAttribute att = Attributes["id"];
\r
1833 internal void SetId(string id)
\r
1835 HtmlAttribute att = Attributes["id"];
\r
1838 att = _ownerdocument.CreateAttribute("id");
\r
1841 _ownerdocument.SetIdForNode(this, att.Value);
\r
1842 _outerchanged = true;
\r
1845 internal void WriteAttribute(TextWriter outText, HtmlAttribute att)
\r
1848 string quote = att.QuoteType == AttributeValueQuote.DoubleQuote ? "\"" : "'";
\r
1849 if (_ownerdocument.OptionOutputAsXml)
\r
1851 if (_ownerdocument.OptionOutputUpperCase)
\r
1853 name = att.XmlName.ToUpper();
\r
1857 name = att.XmlName;
\r
1859 if (_ownerdocument.OptionOutputOriginalCase)
\r
1860 name = att.OriginalName;
\r
1862 outText.Write(" " + name + "=" + quote + HtmlDocument.HtmlEncode(att.XmlValue) + quote);
\r
1866 if (_ownerdocument.OptionOutputUpperCase)
\r
1868 name = att.Name.ToUpper();
\r
1875 if (att.Name.Length >= 4)
\r
1877 if ((att.Name[0] == '<') && (att.Name[1] == '%') &&
\r
1878 (att.Name[att.Name.Length - 1] == '>') && (att.Name[att.Name.Length - 2] == '%'))
\r
1880 outText.Write(" " + name);
\r
1884 if (_ownerdocument.OptionOutputOptimizeAttributeValues)
\r
1886 if (att.Value.IndexOfAny(new Char[] {(char) 10, (char) 13, (char) 9, ' '}) < 0)
\r
1888 outText.Write(" " + name + "=" + att.Value);
\r
1892 outText.Write(" " + name + "=" + quote + att.Value + quote);
\r
1897 outText.Write(" " + name + "=" + quote + att.Value + quote);
\r
1902 internal void WriteAttributes(TextWriter outText, bool closing)
\r
1904 if (_ownerdocument.OptionOutputAsXml)
\r
1906 if (_attributes == null)
\r
1910 // we use Hashitems to make sure attributes are written only once
\r
1911 foreach (HtmlAttribute att in _attributes.Hashitems.Values)
\r
1913 WriteAttribute(outText, att);
\r
1920 if (_attributes != null)
\r
1922 foreach (HtmlAttribute att in _attributes)
\r
1924 WriteAttribute(outText, att);
\r
1927 if (_ownerdocument.OptionAddDebuggingAttributes)
\r
1929 WriteAttribute(outText, _ownerdocument.CreateAttribute("_closed", Closed.ToString()));
\r
1930 WriteAttribute(outText, _ownerdocument.CreateAttribute("_children", ChildNodes.Count.ToString()));
\r
1933 foreach (HtmlNode n in ChildNodes)
\r
1935 WriteAttribute(outText, _ownerdocument.CreateAttribute("_child_" + i,
\r
1943 if (_endnode == null)
\r
1948 if (_endnode._attributes == null)
\r
1953 if (_endnode == this)
\r
1958 foreach (HtmlAttribute att in _endnode._attributes)
\r
1960 WriteAttribute(outText, att);
\r
1962 if (_ownerdocument.OptionAddDebuggingAttributes)
\r
1964 WriteAttribute(outText, _ownerdocument.CreateAttribute("_closed", Closed.ToString()));
\r
1965 WriteAttribute(outText, _ownerdocument.CreateAttribute("_children", ChildNodes.Count.ToString()));
\r
1972 #region Private Methods
\r
1974 private string GetRelativeXpath()
\r
1976 if (ParentNode == null)
\r
1978 if (NodeType == HtmlNodeType.Document)
\r
1979 return string.Empty;
\r
1982 foreach (HtmlNode node in ParentNode.ChildNodes)
\r
1984 if (node.Name != Name) continue;
\r
1991 return Name + "[" + i + "]";
\r