1 // HtmlAgilityPack V1.0 - Simon Mourier <simon underscore mourier at hotmail dot com>
\r
7 using System.Xml.Serialization;
\r
8 using System.Xml.Xsl;
\r
9 using Microsoft.Win32;
\r
11 namespace HtmlAgilityPack
\r
14 /// A utility class to get HTML document from HTTP.
\r
16 public class HtmlWeb
\r
21 /// Represents the method that will handle the PostResponse event.
\r
23 public delegate void PostResponseHandler(HttpWebRequest request, HttpWebResponse response);
\r
26 /// Represents the method that will handle the PreHandleDocument event.
\r
28 public delegate void PreHandleDocumentHandler(HtmlDocument document);
\r
31 /// Represents the method that will handle the PreRequest event.
\r
33 public delegate bool PreRequestHandler(HttpWebRequest request);
\r
39 private bool _autoDetectEncoding = true;
\r
40 private bool _cacheOnly;
\r
42 private string _cachePath;
\r
43 private bool _fromCache;
\r
44 private int _requestDuration;
\r
45 private Uri _responseUri;
\r
46 private HttpStatusCode _statusCode = HttpStatusCode.OK;
\r
47 private int _streamBufferSize = 1024;
\r
48 private bool _useCookies;
\r
49 private bool _usingCache;
\r
52 /// Occurs after an HTTP request has been executed.
\r
54 public PostResponseHandler PostResponse;
\r
57 /// Occurs before an HTML document is handled.
\r
59 public PreHandleDocumentHandler PreHandleDocument;
\r
62 /// Occurs before an HTTP request is executed.
\r
64 public PreRequestHandler PreRequest;
\r
71 /// Gets or Sets a value indicating if document encoding must be automatically detected.
\r
73 public bool AutoDetectEncoding
\r
75 get { return _autoDetectEncoding; }
\r
76 set { _autoDetectEncoding = value; }
\r
80 /// Gets or Sets a value indicating whether to get document only from the cache.
\r
81 /// If this is set to true and document is not found in the cache, nothing will be loaded.
\r
83 public bool CacheOnly
\r
85 get { return _cacheOnly; }
\r
88 if ((value) && !UsingCache)
\r
90 throw new HtmlWebException("Cache is not enabled. Set UsingCache to true first.");
\r
97 /// Gets or Sets the cache path. If null, no caching mechanism will be used.
\r
99 public string CachePath
\r
101 get { return _cachePath; }
\r
102 set { _cachePath = value; }
\r
106 /// Gets a value indicating if the last document was retrieved from the cache.
\r
108 public bool FromCache
\r
110 get { return _fromCache; }
\r
114 /// Gets the last request duration in milliseconds.
\r
116 public int RequestDuration
\r
118 get { return _requestDuration; }
\r
122 /// Gets the URI of the Internet resource that actually responded to the request.
\r
124 public Uri ResponseUri
\r
126 get { return _responseUri; }
\r
130 /// Gets the last request status.
\r
132 public HttpStatusCode StatusCode
\r
134 get { return _statusCode; }
\r
138 /// Gets or Sets the size of the buffer used for memory operations.
\r
140 public int StreamBufferSize
\r
142 get { return _streamBufferSize; }
\r
145 if (_streamBufferSize <= 0)
\r
147 throw new ArgumentException("Size must be greater than zero.");
\r
149 _streamBufferSize = value;
\r
154 /// Gets or Sets a value indicating if cookies will be stored.
\r
156 public bool UseCookies
\r
158 get { return _useCookies; }
\r
159 set { _useCookies = value; }
\r
163 /// Gets or Sets a value indicating whether the caching mechanisms should be used or not.
\r
165 public bool UsingCache
\r
169 if (_cachePath == null)
\r
173 return _usingCache;
\r
177 if ((value) && (_cachePath == null))
\r
179 throw new HtmlWebException("You need to define a CachePath first.");
\r
181 _usingCache = value;
\r
187 #region Public Methods
\r
190 /// Gets the MIME content type for a given path extension.
\r
192 /// <param name="extension">The input path extension.</param>
\r
193 /// <param name="def">The default content type to return if any error occurs.</param>
\r
194 /// <returns>The path extension's MIME content type.</returns>
\r
195 public static string GetContentTypeForExtension(string extension, string def)
\r
197 if (string.IsNullOrEmpty(extension))
\r
201 string contentType = "";
\r
204 RegistryKey reg = Registry.ClassesRoot;
\r
205 reg = reg.OpenSubKey(extension, false);
\r
206 if (reg != null) contentType = (string)reg.GetValue("", def);
\r
212 return contentType;
\r
216 /// Gets the path extension for a given MIME content type.
\r
218 /// <param name="contentType">The input MIME content type.</param>
\r
219 /// <param name="def">The default path extension to return if any error occurs.</param>
\r
220 /// <returns>The MIME content type's path extension.</returns>
\r
221 public static string GetExtensionForContentType(string contentType, string def)
\r
223 if (string.IsNullOrEmpty(contentType))
\r
230 RegistryKey reg = Registry.ClassesRoot;
\r
231 reg = reg.OpenSubKey(@"MIME\Database\Content Type\" + contentType, false);
\r
232 if (reg != null) ext = (string)reg.GetValue("Extension", def);
\r
242 /// Creates an instance of the given type from the specified Internet resource.
\r
244 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
\r
245 /// <param name="type">The requested type.</param>
\r
246 /// <returns>An newly created instance.</returns>
\r
247 public object CreateInstance(string url, Type type)
\r
249 return CreateInstance(url, null, null, type);
\r
253 /// Creates an instance of the given type from the specified Internet resource.
\r
255 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
\r
256 /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
\r
257 /// <param name="xsltArgs">An <see cref="XsltArgumentList"/> containing the namespace-qualified arguments used as input to the transform.</param>
\r
258 /// <param name="type">The requested type.</param>
\r
259 /// <returns>An newly created instance.</returns>
\r
260 public object CreateInstance(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, Type type)
\r
262 return CreateInstance(htmlUrl, xsltUrl, xsltArgs, type, null);
\r
266 /// Creates an instance of the given type from the specified Internet resource.
\r
268 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
\r
269 /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
\r
270 /// <param name="xsltArgs">An <see cref="XsltArgumentList"/> containing the namespace-qualified arguments used as input to the transform.</param>
\r
271 /// <param name="type">The requested type.</param>
\r
272 /// <param name="xmlPath">A file path where the temporary XML before transformation will be saved. Mostly used for debugging purposes.</param>
\r
273 /// <returns>An newly created instance.</returns>
\r
274 public object CreateInstance(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, Type type,
\r
277 StringWriter sw = new StringWriter();
\r
278 XmlTextWriter writer = new XmlTextWriter(sw);
\r
279 if (xsltUrl == null)
\r
281 LoadHtmlAsXml(htmlUrl, writer);
\r
285 if (xmlPath == null)
\r
287 LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer);
\r
291 LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer, xmlPath);
\r
295 StringReader sr = new StringReader(sw.ToString());
\r
296 XmlTextReader reader = new XmlTextReader(sr);
\r
297 XmlSerializer serializer = new XmlSerializer(type);
\r
301 o = serializer.Deserialize(reader);
\r
303 catch (InvalidOperationException ex)
\r
305 throw new Exception(ex + ", --- xml:" + sw);
\r
311 /// Gets an HTML document from an Internet resource and saves it to the specified file.
\r
313 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
\r
314 /// <param name="path">The location of the file where you want to save the document.</param>
\r
315 public void Get(string url, string path)
\r
317 Get(url, path, "GET");
\r
321 /// Gets an HTML document from an Internet resource and saves it to the specified file. - Proxy aware
\r
323 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
\r
324 /// <param name="path">The location of the file where you want to save the document.</param>
\r
325 /// <param name="proxy"></param>
\r
326 /// <param name="credentials"></param>
\r
327 public void Get(string url, string path, WebProxy proxy, NetworkCredential credentials)
\r
329 Get(url, path, proxy, credentials, "GET");
\r
333 /// Gets an HTML document from an Internet resource and saves it to the specified file.
\r
335 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
\r
336 /// <param name="path">The location of the file where you want to save the document.</param>
\r
337 /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param>
\r
338 public void Get(string url, string path, string method)
\r
340 Uri uri = new Uri(url);
\r
341 if ((uri.Scheme == Uri.UriSchemeHttps) ||
\r
342 (uri.Scheme == Uri.UriSchemeHttp))
\r
344 Get(uri, method, path, null, null, null);
\r
348 throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'.");
\r
353 /// Gets an HTML document from an Internet resource and saves it to the specified file. Understands Proxies
\r
355 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
\r
356 /// <param name="path">The location of the file where you want to save the document.</param>
\r
357 /// <param name="credentials"></param>
\r
358 /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param>
\r
359 /// <param name="proxy"></param>
\r
360 public void Get(string url, string path, WebProxy proxy, NetworkCredential credentials, string method)
\r
362 Uri uri = new Uri(url);
\r
363 if ((uri.Scheme == Uri.UriSchemeHttps) ||
\r
364 (uri.Scheme == Uri.UriSchemeHttp))
\r
366 Get(uri, method, path, null, proxy, credentials);
\r
370 throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'.");
\r
375 /// Gets the cache file path for a specified url.
\r
377 /// <param name="uri">The url fo which to retrieve the cache path. May not be null.</param>
\r
378 /// <returns>The cache file path.</returns>
\r
379 public string GetCachePath(Uri uri)
\r
383 throw new ArgumentNullException("uri");
\r
387 throw new HtmlWebException("Cache is not enabled. Set UsingCache to true first.");
\r
390 if (uri.AbsolutePath == "/")
\r
392 cachePath = Path.Combine(_cachePath, ".htm");
\r
396 cachePath = Path.Combine(_cachePath, (uri.Host + uri.AbsolutePath).Replace('/', '\\'));
\r
402 /// Gets an HTML document from an Internet resource.
\r
404 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
\r
405 /// <returns>A new HTML document.</returns>
\r
406 public HtmlDocument Load(string url)
\r
408 return Load(url, "GET");
\r
412 /// Gets an HTML document from an Internet resource.
\r
414 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
\r
415 /// <param name="proxyHost">Host to use for Proxy</param>
\r
416 /// <param name="proxyPort">Port the Proxy is on</param>
\r
417 /// <param name="userId">User Id for Authentication</param>
\r
418 /// <param name="password">Password for Authentication</param>
\r
419 /// <returns>A new HTML document.</returns>
\r
420 public HtmlDocument Load(string url, string proxyHost, int proxyPort, string userId, string password)
\r
423 WebProxy myProxy = new WebProxy(proxyHost, proxyPort);
\r
424 myProxy.BypassProxyOnLocal = true;
\r
426 //Create my credentials
\r
427 NetworkCredential myCreds = null;
\r
428 if ((userId != null) && (password != null))
\r
430 myCreds = new NetworkCredential(userId, password);
\r
431 CredentialCache credCache = new CredentialCache();
\r
433 credCache.Add(myProxy.Address, "Basic", myCreds);
\r
434 credCache.Add(myProxy.Address, "Digest", myCreds);
\r
437 return Load(url, "GET", myProxy, myCreds);
\r
441 /// Loads an HTML document from an Internet resource.
\r
443 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
\r
444 /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param>
\r
445 /// <returns>A new HTML document.</returns>
\r
446 public HtmlDocument Load(string url, string method)
\r
448 Uri uri = new Uri(url);
\r
450 if ((uri.Scheme == Uri.UriSchemeHttps) ||
\r
451 (uri.Scheme == Uri.UriSchemeHttp))
\r
453 doc = LoadUrl(uri, method, null, null);
\r
457 if (uri.Scheme == Uri.UriSchemeFile)
\r
459 doc = new HtmlDocument();
\r
460 doc.OptionAutoCloseOnEnd = false;
\r
461 doc.OptionAutoCloseOnEnd = true;
\r
462 doc.DetectEncodingAndLoad(url, _autoDetectEncoding);
\r
466 throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'.");
\r
469 if (PreHandleDocument != null)
\r
471 PreHandleDocument(doc);
\r
477 /// Loads an HTML document from an Internet resource.
\r
479 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
\r
480 /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param>
\r
481 /// <param name="proxy">Proxy to use with this request</param>
\r
482 /// <param name="credentials">Credentials to use when authenticating</param>
\r
483 /// <returns>A new HTML document.</returns>
\r
484 public HtmlDocument Load(string url, string method, WebProxy proxy, NetworkCredential credentials)
\r
486 Uri uri = new Uri(url);
\r
488 if ((uri.Scheme == Uri.UriSchemeHttps) ||
\r
489 (uri.Scheme == Uri.UriSchemeHttp))
\r
491 doc = LoadUrl(uri, method, proxy, credentials);
\r
495 if (uri.Scheme == Uri.UriSchemeFile)
\r
497 doc = new HtmlDocument();
\r
498 doc.OptionAutoCloseOnEnd = false;
\r
499 doc.OptionAutoCloseOnEnd = true;
\r
500 doc.DetectEncodingAndLoad(url, _autoDetectEncoding);
\r
504 throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'.");
\r
507 if (PreHandleDocument != null)
\r
509 PreHandleDocument(doc);
\r
515 /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter.
\r
517 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
\r
518 /// <param name="writer">The XmlTextWriter to which you want to save.</param>
\r
519 public void LoadHtmlAsXml(string htmlUrl, XmlTextWriter writer)
\r
521 HtmlDocument doc = Load(htmlUrl);
\r
526 /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter, after an XSLT transformation.
\r
528 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
\r
529 /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
\r
530 /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param>
\r
531 /// <param name="writer">The XmlTextWriter to which you want to save.</param>
\r
532 public void LoadHtmlAsXml(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, XmlTextWriter writer)
\r
534 LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer, null);
\r
538 /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter, after an XSLT transformation.
\r
540 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp". May not be null.</param>
\r
541 /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
\r
542 /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param>
\r
543 /// <param name="writer">The XmlTextWriter to which you want to save.</param>
\r
544 /// <param name="xmlPath">A file path where the temporary XML before transformation will be saved. Mostly used for debugging purposes.</param>
\r
545 public void LoadHtmlAsXml(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, XmlTextWriter writer,
\r
548 if (htmlUrl == null)
\r
550 throw new ArgumentNullException("htmlUrl");
\r
553 HtmlDocument doc = Load(htmlUrl);
\r
555 if (xmlPath != null)
\r
557 XmlTextWriter w = new XmlTextWriter(xmlPath, doc.Encoding);
\r
561 if (xsltArgs == null)
\r
563 xsltArgs = new XsltArgumentList();
\r
566 // add some useful variables to the xslt doc
\r
567 xsltArgs.AddParam("url", "", htmlUrl);
\r
568 xsltArgs.AddParam("requestDuration", "", RequestDuration);
\r
569 xsltArgs.AddParam("fromCache", "", FromCache);
\r
571 XslCompiledTransform xslt = new XslCompiledTransform();
\r
572 xslt.Load(xsltUrl);
\r
573 xslt.Transform(doc, xsltArgs, writer);
\r
578 #region Private Methods
\r
580 private static void FilePreparePath(string target)
\r
582 if (File.Exists(target))
\r
584 FileAttributes atts = File.GetAttributes(target);
\r
585 File.SetAttributes(target, atts & ~FileAttributes.ReadOnly);
\r
589 string dir = Path.GetDirectoryName(target);
\r
590 if (!Directory.Exists(dir))
\r
592 Directory.CreateDirectory(dir);
\r
597 private static DateTime RemoveMilliseconds(DateTime t)
\r
599 return new DateTime(t.Year, t.Month, t.Day, t.Hour, t.Minute, t.Second, 0);
\r
602 // ReSharper disable UnusedMethodReturnValue.Local
\r
603 private static long SaveStream(Stream stream, string path, DateTime touchDate, int streamBufferSize)
\r
604 // ReSharper restore UnusedMethodReturnValue.Local
\r
606 FilePreparePath(path);
\r
607 FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write);
\r
608 BinaryReader br = null;
\r
609 BinaryWriter bw = null;
\r
613 br = new BinaryReader(stream);
\r
614 bw = new BinaryWriter(fs);
\r
619 buffer = br.ReadBytes(streamBufferSize);
\r
620 len += buffer.Length;
\r
621 if (buffer.Length > 0)
\r
625 } while (buffer.Length > 0);
\r
643 File.SetLastWriteTime(path, touchDate);
\r
647 private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc, IWebProxy proxy,
\r
648 ICredentials creds)
\r
650 string cachePath = null;
\r
651 HttpWebRequest req;
\r
652 bool oldFile = false;
\r
654 req = WebRequest.Create(uri) as HttpWebRequest;
\r
655 req.Method = method;
\r
661 proxy.Credentials = creds;
\r
662 req.Credentials = creds;
\r
666 proxy.Credentials = CredentialCache.DefaultCredentials;
\r
667 req.Credentials = CredentialCache.DefaultCredentials;
\r
672 _fromCache = false;
\r
673 _requestDuration = 0;
\r
674 int tc = Environment.TickCount;
\r
677 cachePath = GetCachePath(req.RequestUri);
\r
678 if (File.Exists(cachePath))
\r
680 req.IfModifiedSince = File.GetLastAccessTime(cachePath);
\r
687 if (!File.Exists(cachePath))
\r
689 throw new HtmlWebException("File was not found at cache path: '" + cachePath + "'");
\r
694 IOLibrary.CopyAlways(cachePath, path);
\r
696 File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
\r
699 return HttpStatusCode.NotModified;
\r
704 req.CookieContainer = new CookieContainer();
\r
707 if (PreRequest != null)
\r
709 // allow our user to change the request at will
\r
710 if (!PreRequest(req))
\r
712 return HttpStatusCode.ResetContent;
\r
716 // if (_useCookies)
\r
718 // foreach(Cookie cookie in req.CookieContainer.GetCookies(req.RequestUri))
\r
720 // HtmlLibrary.Trace("Cookie " + cookie.Name + "=" + cookie.Value + " path=" + cookie.Path + " domain=" + cookie.Domain);
\r
725 HttpWebResponse resp;
\r
729 resp = req.GetResponse() as HttpWebResponse;
\r
731 catch (WebException we)
\r
733 _requestDuration = Environment.TickCount - tc;
\r
734 resp = (HttpWebResponse)we.Response;
\r
741 IOLibrary.CopyAlways(cachePath, path);
\r
743 File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
\r
745 return HttpStatusCode.NotModified;
\r
752 _requestDuration = Environment.TickCount - tc;
\r
756 // allow our user to get some info from the response
\r
757 if (PostResponse != null)
\r
759 PostResponse(req, resp);
\r
762 _requestDuration = Environment.TickCount - tc;
\r
763 _responseUri = resp.ResponseUri;
\r
765 bool html = IsHtmlContent(resp.ContentType);
\r
768 if ((resp.ContentEncoding != null) && (resp.ContentEncoding.Length > 0))
\r
770 respenc = Encoding.GetEncoding(resp.ContentEncoding);
\r
777 if (resp.StatusCode == HttpStatusCode.NotModified)
\r
784 IOLibrary.CopyAlways(cachePath, path);
\r
786 File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
\r
788 return resp.StatusCode;
\r
792 // this should *never* happen...
\r
793 throw new HtmlWebException("Server has send a NotModifed code, without cache enabled.");
\r
796 Stream s = resp.GetResponseStream();
\r
801 // NOTE: LastModified does not contain milliseconds, so we remove them to the file
\r
802 SaveStream(s, cachePath, RemoveMilliseconds(resp.LastModified), _streamBufferSize);
\r
805 SaveCacheHeaders(req.RequestUri, resp);
\r
809 // copy and touch the file
\r
810 IOLibrary.CopyAlways(cachePath, path);
\r
811 File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
\r
816 // try to work in-memory
\r
817 if ((doc != null) && (html))
\r
819 if (respenc != null)
\r
821 doc.Load(s, respenc);
\r
831 return resp.StatusCode;
\r
834 private string GetCacheHeader(Uri requestUri, string name, string def)
\r
836 // note: some headers are collection (ex: www-authenticate)
\r
837 // we don't handle that here
\r
838 XmlDocument doc = new XmlDocument();
\r
839 doc.Load(GetCacheHeadersPath(requestUri));
\r
841 doc.SelectSingleNode("//h[translate(@n, 'abcdefghijklmnopqrstuvwxyz','ABCDEFGHIJKLMNOPQRSTUVWXYZ')='" +
\r
842 name.ToUpper() + "']");
\r
847 // attribute should exist
\r
848 return node.Attributes[name].Value;
\r
851 private string GetCacheHeadersPath(Uri uri)
\r
853 //return Path.Combine(GetCachePath(uri), ".h.xml");
\r
854 return GetCachePath(uri) + ".h.xml";
\r
857 private bool IsCacheHtmlContent(string path)
\r
859 string ct = GetContentTypeForExtension(Path.GetExtension(path), null);
\r
860 return IsHtmlContent(ct);
\r
863 private bool IsHtmlContent(string contentType)
\r
865 return contentType.ToLower().StartsWith("text/html");
\r
868 private HtmlDocument LoadUrl(Uri uri, string method, WebProxy proxy, NetworkCredential creds)
\r
870 HtmlDocument doc = new HtmlDocument();
\r
871 doc.OptionAutoCloseOnEnd = false;
\r
872 doc.OptionFixNestedTags = true;
\r
873 _statusCode = Get(uri, method, null, doc, proxy, creds);
\r
874 if (_statusCode == HttpStatusCode.NotModified)
\r
876 // read cached encoding
\r
877 doc.DetectEncodingAndLoad(GetCachePath(uri));
\r
882 private void SaveCacheHeaders(Uri requestUri, HttpWebResponse resp)
\r
884 // we cache the original headers aside the cached document.
\r
885 string file = GetCacheHeadersPath(requestUri);
\r
886 XmlDocument doc = new XmlDocument();
\r
887 doc.LoadXml("<c></c>");
\r
888 XmlNode cache = doc.FirstChild;
\r
889 foreach (string header in resp.Headers)
\r
891 XmlNode entry = doc.CreateElement("h");
\r
892 XmlAttribute att = doc.CreateAttribute("n");
\r
893 att.Value = header;
\r
894 entry.Attributes.Append(att);
\r
896 att = doc.CreateAttribute("v");
\r
897 att.Value = resp.Headers[header];
\r
898 entry.Attributes.Append(att);
\r
900 cache.AppendChild(entry);
\r