New test.
[mono.git] / mcs / class / System.Web / Test / mainsoft / MainsoftWebTest / HtmlAgilityPack / HtmlWeb.cs
1 // HtmlAgilityPack V1.0 - Simon Mourier <simonm@microsoft.com>
2 using System;
3 using System.IO;
4 using System.Net;
5 using System.Xml;
6 using System.Xml.Serialization;
7 using System.Xml.Xsl;
8 using Microsoft.Win32;
9
10 #if !TARGET_JVM
11 namespace HtmlAgilityPack
12 {
13         /// <summary>
14         /// A utility class to get HTML document from HTTP.
15         /// </summary>
16         public class HtmlWeb
17         {
18                 /// <summary>
19                 /// Represents the method that will handle the PreRequest event.
20                 /// </summary>
21                 public delegate bool PreRequestHandler(HttpWebRequest request);
22
23                 /// <summary>
24                 /// Represents the method that will handle the PostResponse event.
25                 /// </summary>
26                 public delegate void PostResponseHandler(HttpWebRequest request, HttpWebResponse response);
27
28                 /// <summary>
29                 /// Represents the method that will handle the PreHandleDocument event.
30                 /// </summary>
31                 public delegate void PreHandleDocumentHandler(HtmlDocument document);
32
33                 private int _streamBufferSize = 1024;
34                 private string _cachePath;
35                 private bool _usingCache;
36                 private bool _fromCache;
37                 private bool _cacheOnly;
38                 private bool _useCookies;
39                 private int _requestDuration;
40                 private bool _autoDetectEncoding = true;
41                 private HttpStatusCode _statusCode = HttpStatusCode.OK;
42                 private Uri _responseUri;
43
44                 /// <summary>
45                 /// Occurs before an HTTP request is executed.
46                 /// </summary>
47                 public PreRequestHandler PreRequest;
48
49                 /// <summary>
50                 /// Occurs after an HTTP request has been executed.
51                 /// </summary>
52                 public PostResponseHandler PostResponse;
53
54                 /// <summary>
55                 /// Occurs before an HTML document is handled.
56                 /// </summary>
57                 public PreHandleDocumentHandler PreHandleDocument;
58
59                 /// <summary>
60                 /// Creates an instance of an HtmlWeb class.
61                 /// </summary>
62                 public HtmlWeb()
63                 {
64                 }
65
66                 /// <summary>
67                 /// Gets an HTML document from an Internet resource and saves it to the specified file.
68                 /// </summary>
69                 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
70                 /// <param name="path">The location of the file where you want to save the document.</param>
71                 public void Get(string url, string path)
72                 {
73                         Get(url, path, "GET");
74                 }
75                         
76                 /// <summary>
77                 /// Gets an HTML document from an Internet resource and saves it to the specified file.
78                 /// </summary>
79                 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
80                 /// <param name="path">The location of the file where you want to save the document.</param>
81                 /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param>
82                 public void Get(string url, string path, string method)
83                 {
84                         Uri uri = new Uri(url);
85                         if ((uri.Scheme == Uri.UriSchemeHttps) ||
86                                 (uri.Scheme == Uri.UriSchemeHttp))
87                         {
88                                 Get(uri, method, path, null);
89                         }
90                         else
91                         {
92                                 throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'.");
93                         }
94                 }
95
96                 /// <summary>
97                 /// Gets an HTML document from an Internet resource.
98                 /// </summary>
99                 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
100                 /// <returns>A new HTML document.</returns>
101                 public HtmlDocument Load(string url)
102                 {
103                         return Load(url, "GET");
104                 }
105
106                 /// <summary>
107                 /// Loads an HTML document from an Internet resource.
108                 /// </summary>
109                 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
110                 /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param>
111                 /// <returns>A new HTML document.</returns>
112                 public HtmlDocument Load(string url, string method)
113                 {
114                         Uri uri = new Uri(url);
115                         HtmlDocument doc;
116                         if ((uri.Scheme == Uri.UriSchemeHttps) ||
117                                 (uri.Scheme == Uri.UriSchemeHttp))
118                         {
119                                 doc = LoadUrl(uri, method);
120                         }
121                         else
122                         {
123
124                                 if (uri.Scheme == Uri.UriSchemeFile)
125                                 {
126                                         doc = new HtmlDocument();
127                                         doc.OptionAutoCloseOnEnd = false;
128                                         doc.OptionAutoCloseOnEnd = true;
129                                         doc.DetectEncodingAndLoad(url, _autoDetectEncoding);
130                                 }
131                                 else
132                                 {
133                                         throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'.");
134                                 }
135                         }
136                         if (PreHandleDocument != null)
137                         {
138                                 PreHandleDocument(doc);
139                         }
140                         return doc;
141                 }
142
143                 private bool IsCacheHtmlContent(string path)
144                 {
145                         string ct = GetContentTypeForExtension(Path.GetExtension(path), null);
146                         return IsHtmlContent(ct);
147                 }
148
149                 private bool IsHtmlContent(string contentType)
150                 {
151                         return contentType.ToLower().StartsWith("text/html");
152                 }
153
154                 private string GetCacheHeadersPath(Uri uri)
155                 {
156                         //return Path.Combine(GetCachePath(uri), ".h.xml");
157                         return GetCachePath(uri) + ".h.xml";
158                 }
159
160                 /// <summary>
161                 /// Gets the cache file path for a specified url.
162                 /// </summary>
163                 /// <param name="uri">The url fo which to retrieve the cache path. May not be null.</param>
164                 /// <returns>The cache file path.</returns>
165                 public string GetCachePath(Uri uri)
166                 {
167                         if (uri == null)
168                         {
169                                 throw new ArgumentNullException("uri");
170                         }
171                         if (!UsingCache)
172                         {
173                                 throw new HtmlWebException("Cache is not enabled. Set UsingCache to true first.");
174                         }
175                         string cachePath;
176                         if (uri.AbsolutePath == "/")
177                         {
178                                 cachePath = Path.Combine(_cachePath, ".htm");
179                         }
180                         else
181                         {
182                                 cachePath = Path.Combine(_cachePath, (uri.Host + uri.AbsolutePath).Replace('/', '\\'));
183                         }
184                         return cachePath;
185                 }
186
187                 /// <summary>
188                 /// Gets a value indicating if the last document was retrieved from the cache.
189                 /// </summary>
190                 public bool FromCache
191                 {
192                         get
193                         {
194                                 return _fromCache;
195                         }
196                 }
197
198                 /// <summary>
199                 /// Gets the URI of the Internet resource that actually responded to the request.
200                 /// </summary>
201                 public Uri ResponseUri
202                 {
203                         get
204                         {
205                                 return _responseUri;
206                         }
207                 }
208
209                 /// <summary>
210                 /// Gets or Sets a value indicating whether to get document only from the cache.
211                 /// If this is set to true and document is not found in the cache, nothing will be loaded.
212                 /// </summary>
213                 public bool CacheOnly
214                 {
215                         get
216                         {
217                                 return _cacheOnly;
218                         }
219                         set
220                         {
221                                 if ((value) && !UsingCache)
222                                 {
223                                         throw new HtmlWebException("Cache is not enabled. Set UsingCache to true first.");
224                                 }
225                                 _cacheOnly = value;
226                         }
227                 }
228
229                 /// <summary>
230                 /// Gets or Sets a value indicating if cookies will be stored.
231                 /// </summary>
232                 public bool UseCookies
233                 {
234                         get
235                         {
236                                 return _useCookies;
237                         }
238                         set
239                         {
240                                 _useCookies = value;
241                         }
242                 }
243
244                 /// <summary>
245                 /// Gets the last request duration in milliseconds.
246                 /// </summary>
247                 public int RequestDuration
248                 {
249                         get
250                         {
251                                 return _requestDuration;
252                         }
253                 }
254
255                 /// <summary>
256                 /// Gets or Sets a value indicating if document encoding must be automatically detected.
257                 /// </summary>
258                 public bool AutoDetectEncoding
259                 {
260                         get
261                         {
262                                 return _autoDetectEncoding;
263                         }
264                         set
265                         {
266                                 _autoDetectEncoding = value;
267                         }
268                 }
269
270                 /// <summary>
271                 /// Gets the last request status.
272                 /// </summary>
273                 public HttpStatusCode StatusCode
274                 {
275                         get
276                         {
277                                 return _statusCode;
278                         }
279                 }
280
281                 /// <summary>
282                 /// Gets or Sets the size of the buffer used for memory operations.
283                 /// </summary>
284                 public int StreamBufferSize
285                 {
286                         get
287                         {
288                                 return _streamBufferSize;
289                         }
290                         set
291                         {
292                                 if (_streamBufferSize <= 0)
293                                 {
294                                         throw new ArgumentException("Size must be greater than zero.");
295                                 }
296                                 _streamBufferSize = value;
297                         }
298                 }
299
300                 private HtmlDocument LoadUrl(Uri uri, string method)
301                 {
302                         HtmlDocument doc = new HtmlDocument();
303                         doc.OptionAutoCloseOnEnd = false;
304                         doc.OptionFixNestedTags = true;
305                         _statusCode = Get(uri, method, null, doc);
306                         if (_statusCode == HttpStatusCode.NotModified)
307                         {
308                                 // read cached encoding
309                                 doc.DetectEncodingAndLoad(GetCachePath(uri));
310                         }
311                         return doc;
312                 }
313
314                 private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc)
315                 {
316                         string cachePath = null;
317                         HttpWebRequest req;
318                         bool oldFile = false;
319
320                         req = WebRequest.Create(uri) as HttpWebRequest;
321                         req.Method = method;
322
323                         _fromCache = false;
324                         _requestDuration = 0;
325                         int tc = Environment.TickCount;
326                         if (UsingCache)
327                         {
328                                 cachePath = GetCachePath(req.RequestUri);
329                                 if (File.Exists(cachePath))
330                                 {
331                                         req.IfModifiedSince = File.GetLastAccessTime(cachePath);
332                                         oldFile = true;
333                                 }
334                         }
335
336                         if (_cacheOnly)
337                         {
338                                 if (!File.Exists(cachePath))
339                                 {
340                                         throw new HtmlWebException("File was not found at cache path: '" + cachePath + "'");
341                                 }
342
343                                 if (path != null)
344                                 {
345                                         IOLibrary.CopyAlways(cachePath, path);
346                                         // touch the file
347                                         File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
348                                 }
349                                 _fromCache = true;
350                                 return HttpStatusCode.NotModified;
351                         }
352
353                         if (_useCookies)
354                         {
355                                 req.CookieContainer = new CookieContainer();
356                         }
357
358                         if (PreRequest != null)
359                         {
360                                 // allow our user to change the request at will
361                                 if (!PreRequest(req))
362                                 {
363                                         return HttpStatusCode.ResetContent;
364                                 }
365
366                                 // dump cookie
367 //                              if (_useCookies)
368 //                              {
369 //                                      foreach(Cookie cookie in req.CookieContainer.GetCookies(req.RequestUri))
370 //                                      {
371 //                                              HtmlLibrary.Trace("Cookie " + cookie.Name + "=" + cookie.Value + " path=" + cookie.Path + " domain=" + cookie.Domain);
372 //                                      }
373 //                              }
374                         }
375
376                         HttpWebResponse resp;
377
378                         try
379                         {
380                                 resp = req.GetResponse() as HttpWebResponse;
381                         }
382                         catch (WebException we)
383                         {
384                                 _requestDuration = Environment.TickCount - tc;
385                                 resp = (HttpWebResponse)we.Response;
386                                 if (resp == null)
387                                 {
388                                         if (oldFile)
389                                         {
390                                                 if (path != null)
391                                                 {
392                                                         IOLibrary.CopyAlways(cachePath, path);
393                                                         // touch the file
394                                                         File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
395                                                 }
396                                                 return HttpStatusCode.NotModified;
397                                         }
398                                         throw;
399                                 }
400                         }
401                         catch(Exception)
402                         {
403                                 _requestDuration = Environment.TickCount - tc;
404                                 throw;
405                         }
406
407                         // allow our user to get some info from the response
408                         if (PostResponse != null)
409                         {
410                                 PostResponse(req, resp);
411                         }
412
413                         _requestDuration = Environment.TickCount - tc;
414                         _responseUri = resp.ResponseUri;
415                         
416                         bool html = IsHtmlContent(resp.ContentType);
417                         System.Text.Encoding respenc;
418
419                         if ((resp.ContentEncoding != null) && (resp.ContentEncoding.Length>0))
420                         {
421                                 respenc = System.Text.Encoding.GetEncoding(resp.ContentEncoding);
422                         }
423                         else
424                         {
425                                 respenc = null;
426                         }
427
428                         if (resp.StatusCode == HttpStatusCode.NotModified)
429                         {
430                                 if (UsingCache)
431                                 {
432                                         _fromCache = true;
433                                         if (path != null)
434                                         {
435                                                 IOLibrary.CopyAlways(cachePath, path);
436                                                 // touch the file
437                                                 File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
438                                         }
439                                         return resp.StatusCode;
440                                 }
441                                 else
442                                 {
443                                         // this should *never* happen...
444                                         throw new HtmlWebException("Server has send a NotModifed code, without cache enabled.");
445                                 }
446                         }
447                         Stream s = resp.GetResponseStream();
448                         if (s != null)
449                         {
450                                 if (UsingCache)
451                                 {
452                                         // NOTE: LastModified does not contain milliseconds, so we remove them to the file
453                                         SaveStream(s, cachePath, RemoveMilliseconds(resp.LastModified), _streamBufferSize);
454
455                                         // save headers
456                                         SaveCacheHeaders(req.RequestUri, resp);
457
458                                         if (path != null)
459                                         {
460                                                 // copy and touch the file
461                                                 IOLibrary.CopyAlways(cachePath, path);
462                                                 File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
463                                         }
464                                 }
465                                 else
466                                 {
467                                         // try to work in-memory
468                                         if ((doc != null) && (html))
469                                         {
470                                                 if (respenc != null)
471                                                 {
472                                                         doc.Load(s,respenc);
473                                                 }
474                                                 else
475                                                 {
476                                                         doc.Load(s);
477                                                 }
478                                         }
479                                 }
480                                 resp.Close();
481                         }
482                         return resp.StatusCode;
483                 }
484
485                 private string GetCacheHeader(Uri requestUri, string name, string def)
486                 {
487                         // note: some headers are collection (ex: www-authenticate)
488                         // we don't handle that here
489                         XmlDocument doc = new XmlDocument();
490                         doc.Load(GetCacheHeadersPath(requestUri));
491                         XmlNode node = doc.SelectSingleNode("//h[translate(@n, 'abcdefghijklmnopqrstuvwxyz','ABCDEFGHIJKLMNOPQRSTUVWXYZ')='" + name.ToUpper() + "']");
492                         if (node == null)
493                         {
494                                 return def;
495                         }
496                         // attribute should exist
497                         return node.Attributes[name].Value;
498                 }
499
500                 private void SaveCacheHeaders(Uri requestUri, HttpWebResponse resp)
501                 {
502                         // we cache the original headers aside the cached document.
503                         string file = GetCacheHeadersPath(requestUri);
504                         XmlDocument doc = new XmlDocument();
505                         doc.LoadXml("<c></c>");
506                         XmlNode cache = doc.FirstChild;
507                         foreach(string header in resp.Headers)
508                         {
509                                 XmlNode entry = doc.CreateElement("h");
510                                 XmlAttribute att = doc.CreateAttribute("n");
511                                 att.Value = header;
512                                 entry.Attributes.Append(att);
513
514                                 att = doc.CreateAttribute("v");
515                                 att.Value = resp.Headers[header];
516                                 entry.Attributes.Append(att);
517
518                                 cache.AppendChild(entry);
519                         }
520                         doc.Save(file);
521                 }
522
523                 private static long SaveStream(Stream stream, string path, DateTime touchDate, int streamBufferSize)
524                 {
525                         FilePreparePath(path);
526                         FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write);
527                         BinaryReader br = null;
528                         BinaryWriter bw = null;
529                         long len;
530                         try
531                         {
532                                 br = new BinaryReader(stream);
533                                 bw = new BinaryWriter(fs);
534                                 len = 0;
535                                 byte[] buffer;
536                                 do
537                                 {
538                                         buffer = br.ReadBytes(streamBufferSize);
539                                         len += buffer.Length;
540                                         if (buffer.Length>0)
541                                         {
542                                                 bw.Write(buffer);
543                                         }
544                                 }
545                                 while (buffer.Length>0);
546                         }
547                         finally
548                         {
549                                 if (br != null)
550                                 {
551                                         br.Close();
552                                 }
553                                 if (bw != null)
554                                 {
555                                         bw.Flush();
556                                         bw.Close();
557                                 }
558                                 if (fs != null)
559                                 {
560                                         fs.Close();
561                                 }
562                         }
563                         File.SetLastWriteTime(path, touchDate);
564                         return len;
565                 }
566
567                 private static void FilePreparePath(string target)
568                 {
569                         if (File.Exists(target))
570                         {
571                                 FileAttributes atts = File.GetAttributes(target);
572                                 File.SetAttributes(target, atts & ~FileAttributes.ReadOnly);
573                         }
574                         else
575                         {
576                                 string dir = Path.GetDirectoryName(target);
577                                 if (!Directory.Exists(dir))
578                                 {
579                                         Directory.CreateDirectory(dir);
580                                 }
581                         }
582                 }
583
584                 private static DateTime RemoveMilliseconds(DateTime t)
585                 {
586                         return new DateTime(t.Year, t.Month, t.Day, t.Hour, t.Minute, t.Second, 0);
587                 }
588
589                 /// <summary>
590                 /// Gets the path extension for a given MIME content type.
591                 /// </summary>
592                 /// <param name="contentType">The input MIME content type.</param>
593                 /// <param name="def">The default path extension to return if any error occurs.</param>
594                 /// <returns>The MIME content type's path extension.</returns>
595                 public static string GetExtensionForContentType(string contentType, string def)
596                 {
597                         if ((contentType == null) || (contentType.Length == 0))
598                         {
599                                 return def;
600                         }
601                         string ext;
602                         try
603                         {
604                                 RegistryKey reg = Registry.ClassesRoot;
605                                 reg = reg.OpenSubKey(@"MIME\Database\Content Type\" + contentType, false);
606                                 ext = (string)reg.GetValue("Extension", def);
607                         }
608                         catch(Exception)
609                         {
610                                 ext =  def;
611                         }
612                         return ext;
613                 }
614
615                 /// <summary>
616                 /// Gets the MIME content type for a given path extension.
617                 /// </summary>
618                 /// <param name="extension">The input path extension.</param>
619                 /// <param name="def">The default content type to return if any error occurs.</param>
620                 /// <returns>The path extention's MIME content type.</returns>
621                 public static string GetContentTypeForExtension(string extension, string def)
622                 {
623                         if ((extension == null) || (extension.Length == 0))
624                         {
625                                 return def;
626                         }
627                         string contentType;
628                         try
629                         {
630                                 RegistryKey reg = Registry.ClassesRoot;
631                                 reg = reg.OpenSubKey(extension, false);
632                                 contentType = (string)reg.GetValue("", def);
633                         }
634                         catch(Exception)
635                         {
636                                 contentType =  def;
637                         }
638                         return contentType;
639                 }
640
641                 /// <summary>
642                 /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter.
643                 /// </summary>
644                 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
645                 /// <param name="writer">The XmlTextWriter to which you want to save.</param>
646                 public void LoadHtmlAsXml(string htmlUrl, XmlTextWriter writer)
647                 {
648                         HtmlDocument doc = Load(htmlUrl);
649                         doc.Save(writer);
650                 }
651
652                 /// <summary>
653                 /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter, after an XSLT transformation.
654                 /// </summary>
655                 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
656                 /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
657                 /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param>
658                 /// <param name="writer">The XmlTextWriter to which you want to save.</param>
659                 public void LoadHtmlAsXml(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, XmlTextWriter writer)
660                 {
661                         LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer, null);
662                 }
663
664                 /// <summary>
665                 /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter, after an XSLT transformation.
666                 /// </summary>
667                 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp". May not be null.</param>
668                 /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
669                 /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param>
670                 /// <param name="writer">The XmlTextWriter to which you want to save.</param>
671                 /// <param name="xmlPath">A file path where the temporary XML before transformation will be saved. Mostly used for debugging purposes.</param>
672                 public void LoadHtmlAsXml(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, XmlTextWriter writer, string xmlPath)
673                 {
674                         if (htmlUrl == null)
675                         {
676                                 throw new ArgumentNullException("htmlUrl");
677                         }
678
679                         HtmlDocument doc = Load(htmlUrl);
680
681                         if (xmlPath != null)
682                         {
683                                 XmlTextWriter w = new XmlTextWriter(xmlPath, doc.Encoding);
684                                 doc.Save(w);
685                                 w.Close();
686                         }
687                         if (xsltArgs == null)
688                         {
689                                 xsltArgs = new XsltArgumentList();
690                         }
691                         
692                         // add some useful variables to the xslt doc
693                         xsltArgs.AddParam("url", "", htmlUrl);
694                         xsltArgs.AddParam("requestDuration", "", RequestDuration);
695                         xsltArgs.AddParam("fromCache", "", FromCache);
696
697                         XslTransform xslt = new XslTransform();
698                         xslt.Load(xsltUrl);
699                         xslt.Transform(doc, xsltArgs, writer, null);
700                 }
701
702                 /// <summary>
703                 /// Creates an instance of the given type from the specified Internet resource.
704                 /// </summary>
705                 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
706                 /// <param name="type">The requested type.</param>
707                 /// <returns>An newly created instance.</returns>
708                 public object CreateInstance(string url, Type type)
709                 {
710                         return CreateInstance(url, null, null, type);
711                 }
712
713                 /// <summary>
714                 /// Creates an instance of the given type from the specified Internet resource.
715                 /// </summary>
716                 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
717                 /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
718                 /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param>
719                 /// <param name="type">The requested type.</param>
720                 /// <returns>An newly created instance.</returns>
721                 public object CreateInstance(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, Type type)
722                 {
723                         return CreateInstance(htmlUrl, xsltUrl, xsltArgs, type, null);
724                 }
725
726                 /// <summary>
727                 /// Creates an instance of the given type from the specified Internet resource.
728                 /// </summary>
729                 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
730                 /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
731                 /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param>
732                 /// <param name="type">The requested type.</param>
733                 /// <param name="xmlPath">A file path where the temporary XML before transformation will be saved. Mostly used for debugging purposes.</param>
734                 /// <returns>An newly created instance.</returns>
735                 public object CreateInstance(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, Type type, string xmlPath)
736                 {
737                         StringWriter sw = new StringWriter();
738                         XmlTextWriter writer = new XmlTextWriter(sw);
739                         if (xsltUrl == null)
740                         {
741                                 LoadHtmlAsXml(htmlUrl, writer);
742                         }
743                         else
744                         {
745                                 if (xmlPath == null)
746                                 {
747                                         LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer);
748                                 }
749                                 else
750                                 {
751                                         LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer, xmlPath);
752                                 }
753                         }
754                         writer.Flush();
755                         StringReader sr = new StringReader(sw.ToString());
756                         XmlTextReader reader = new XmlTextReader(sr);
757                         XmlSerializer serializer = new XmlSerializer(type);
758                         object o = null;
759                         try
760                         {
761                                 o = serializer.Deserialize(reader);
762                         }
763                         catch(InvalidOperationException ex)
764                         {
765                                 throw new Exception(ex.ToString() + ", --- xml:" + sw.ToString());
766                         }
767                         return o;
768                 }
769
770                 /// <summary>
771                 /// Gets or Sets the cache path. If null, no caching mechanism will be used.
772                 /// </summary>
773                 public string CachePath
774                 {
775                         get
776                         {
777                                 return _cachePath;
778                         }
779                         set
780                         {
781                                 _cachePath = value;
782                         }
783                 }
784
785                 /// <summary>
786                 /// Gets or Sets a value indicating whether the caching mechanisms should be used or not.
787                 /// </summary>
788                 public bool UsingCache
789                 {
790                         get
791                         {
792                                 if (_cachePath == null)
793                                 {
794                                         return false;
795                                 }
796                                 return _usingCache;
797                         }
798                         set
799                         {
800                                 if ((value) && (_cachePath == null))
801                                 {
802                                         throw new HtmlWebException("You need to define a CachePath first.");
803                                 }
804                                 _usingCache = value;
805                         }
806                 }
807         }
808
809         /// <summary>
810         /// Represents an exception thrown by the HtmlWeb utility class.
811         /// </summary>
812         public class HtmlWebException: Exception
813         {
814                 /// <summary>
815                 /// Creates an instance of the HtmlWebException.
816                 /// </summary>
817                 /// <param name="message">The exception's message.</param>
818                 public HtmlWebException(string message)
819                         :base(message)
820                 {
821                 }
822         }
823 }
824 #endif