1 // HtmlAgilityPack V1.0 - Simon Mourier <simon underscore mourier at hotmail dot com>
\r
6 namespace HtmlAgilityPack
\r
9 /// Represents a document with mixed code and text. ASP, ASPX, JSP, are good example of such documents.
\r
11 public class MixedCodeDocument
\r
16 internal MixedCodeDocumentFragmentList _codefragments;
\r
17 private MixedCodeDocumentFragment _currentfragment;
\r
18 internal MixedCodeDocumentFragmentList _fragments;
\r
21 private int _lineposition;
\r
22 private ParseState _state;
\r
23 private Encoding _streamencoding;
\r
24 internal string _text;
\r
25 internal MixedCodeDocumentFragmentList _textfragments;
\r
28 /// Gets or sets the token representing code end.
\r
30 public string TokenCodeEnd = "%>";
\r
33 /// Gets or sets the token representing code start.
\r
35 public string TokenCodeStart = "<%";
\r
38 /// Gets or sets the token representing code directive.
\r
40 public string TokenDirective = "@";
\r
43 /// Gets or sets the token representing response write directive.
\r
45 public string TokenResponseWrite = "Response.Write ";
\r
48 private string TokenTextBlock = "TextBlock({0})";
\r
52 #region Constructors
\r
55 /// Creates a mixed code document instance.
\r
57 public MixedCodeDocument()
\r
59 _codefragments = new MixedCodeDocumentFragmentList(this);
\r
60 _textfragments = new MixedCodeDocumentFragmentList(this);
\r
61 _fragments = new MixedCodeDocumentFragmentList(this);
\r
69 /// Gets the code represented by the mixed code document seen as a template.
\r
77 foreach (MixedCodeDocumentFragment frag in _fragments)
\r
81 case MixedCodeDocumentFragmentType.Text:
\r
82 s += TokenResponseWrite + string.Format(TokenTextBlock, i) + "\n";
\r
86 case MixedCodeDocumentFragmentType.Code:
\r
87 s += ((MixedCodeDocumentCodeFragment) frag).Code + "\n";
\r
96 /// Gets the list of code fragments in the document.
\r
98 public MixedCodeDocumentFragmentList CodeFragments
\r
100 get { return _codefragments; }
\r
104 /// Gets the list of all fragments in the document.
\r
106 public MixedCodeDocumentFragmentList Fragments
\r
108 get { return _fragments; }
\r
112 /// Gets the encoding of the stream used to read the document.
\r
114 public Encoding StreamEncoding
\r
116 get { return _streamencoding; }
\r
120 /// Gets the list of text fragments in the document.
\r
122 public MixedCodeDocumentFragmentList TextFragments
\r
124 get { return _textfragments; }
\r
129 #region Public Methods
\r
132 /// Create a code fragment instances.
\r
134 /// <returns>The newly created code fragment instance.</returns>
\r
135 public MixedCodeDocumentCodeFragment CreateCodeFragment()
\r
137 return (MixedCodeDocumentCodeFragment) CreateFragment(MixedCodeDocumentFragmentType.Code);
\r
141 /// Create a text fragment instances.
\r
143 /// <returns>The newly created text fragment instance.</returns>
\r
144 public MixedCodeDocumentTextFragment CreateTextFragment()
\r
146 return (MixedCodeDocumentTextFragment) CreateFragment(MixedCodeDocumentFragmentType.Text);
\r
150 /// Loads a mixed code document from a stream.
\r
152 /// <param name="stream">The input stream.</param>
\r
153 public void Load(Stream stream)
\r
155 Load(new StreamReader(stream));
\r
159 /// Loads a mixed code document from a stream.
\r
161 /// <param name="stream">The input stream.</param>
\r
162 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
\r
163 public void Load(Stream stream, bool detectEncodingFromByteOrderMarks)
\r
165 Load(new StreamReader(stream, detectEncodingFromByteOrderMarks));
\r
169 /// Loads a mixed code document from a stream.
\r
171 /// <param name="stream">The input stream.</param>
\r
172 /// <param name="encoding">The character encoding to use.</param>
\r
173 public void Load(Stream stream, Encoding encoding)
\r
175 Load(new StreamReader(stream, encoding));
\r
179 /// Loads a mixed code document from a stream.
\r
181 /// <param name="stream">The input stream.</param>
\r
182 /// <param name="encoding">The character encoding to use.</param>
\r
183 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
\r
184 public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
\r
186 Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks));
\r
190 /// Loads a mixed code document from a stream.
\r
192 /// <param name="stream">The input stream.</param>
\r
193 /// <param name="encoding">The character encoding to use.</param>
\r
194 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
\r
195 /// <param name="buffersize">The minimum buffer size.</param>
\r
196 public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
\r
198 Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks, buffersize));
\r
202 /// Loads a mixed code document from a file.
\r
204 /// <param name="path">The complete file path to be read.</param>
\r
205 public void Load(string path)
\r
207 Load(new StreamReader(path));
\r
211 /// Loads a mixed code document from a file.
\r
213 /// <param name="path">The complete file path to be read.</param>
\r
214 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
\r
215 public void Load(string path, bool detectEncodingFromByteOrderMarks)
\r
217 Load(new StreamReader(path, detectEncodingFromByteOrderMarks));
\r
221 /// Loads a mixed code document from a file.
\r
223 /// <param name="path">The complete file path to be read.</param>
\r
224 /// <param name="encoding">The character encoding to use.</param>
\r
225 public void Load(string path, Encoding encoding)
\r
227 Load(new StreamReader(path, encoding));
\r
231 /// Loads a mixed code document from a file.
\r
233 /// <param name="path">The complete file path to be read.</param>
\r
234 /// <param name="encoding">The character encoding to use.</param>
\r
235 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
\r
236 public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
\r
238 Load(new StreamReader(path, encoding, detectEncodingFromByteOrderMarks));
\r
242 /// Loads a mixed code document from a file.
\r
244 /// <param name="path">The complete file path to be read.</param>
\r
245 /// <param name="encoding">The character encoding to use.</param>
\r
246 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
\r
247 /// <param name="buffersize">The minimum buffer size.</param>
\r
248 public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
\r
250 Load(new StreamReader(path, encoding, detectEncodingFromByteOrderMarks, buffersize));
\r
254 /// Loads the mixed code document from the specified TextReader.
\r
256 /// <param name="reader">The TextReader used to feed the HTML data into the document.</param>
\r
257 public void Load(TextReader reader)
\r
259 _codefragments.Clear();
\r
260 _textfragments.Clear();
\r
262 // all pseudo constructors get down to this one
\r
263 StreamReader sr = reader as StreamReader;
\r
266 _streamencoding = sr.CurrentEncoding;
\r
269 _text = reader.ReadToEnd();
\r
275 /// Loads a mixed document from a text
\r
277 /// <param name="html">The text to load.</param>
\r
278 public void LoadHtml(string html)
\r
280 Load(new StringReader(html));
\r
284 /// Saves the mixed document to the specified stream.
\r
286 /// <param name="outStream">The stream to which you want to save.</param>
\r
287 public void Save(Stream outStream)
\r
289 StreamWriter sw = new StreamWriter(outStream, GetOutEncoding());
\r
294 /// Saves the mixed document to the specified stream.
\r
296 /// <param name="outStream">The stream to which you want to save.</param>
\r
297 /// <param name="encoding">The character encoding to use.</param>
\r
298 public void Save(Stream outStream, Encoding encoding)
\r
300 StreamWriter sw = new StreamWriter(outStream, encoding);
\r
305 /// Saves the mixed document to the specified file.
\r
307 /// <param name="filename">The location of the file where you want to save the document.</param>
\r
308 public void Save(string filename)
\r
310 StreamWriter sw = new StreamWriter(filename, false, GetOutEncoding());
\r
315 /// Saves the mixed document to the specified file.
\r
317 /// <param name="filename">The location of the file where you want to save the document.</param>
\r
318 /// <param name="encoding">The character encoding to use.</param>
\r
319 public void Save(string filename, Encoding encoding)
\r
321 StreamWriter sw = new StreamWriter(filename, false, encoding);
\r
326 /// Saves the mixed document to the specified StreamWriter.
\r
328 /// <param name="writer">The StreamWriter to which you want to save.</param>
\r
329 public void Save(StreamWriter writer)
\r
331 Save((TextWriter) writer);
\r
335 /// Saves the mixed document to the specified TextWriter.
\r
337 /// <param name="writer">The TextWriter to which you want to save.</param>
\r
338 public void Save(TextWriter writer)
\r
345 #region Internal Methods
\r
347 internal MixedCodeDocumentFragment CreateFragment(MixedCodeDocumentFragmentType type)
\r
351 case MixedCodeDocumentFragmentType.Text:
\r
352 return new MixedCodeDocumentTextFragment(this);
\r
354 case MixedCodeDocumentFragmentType.Code:
\r
355 return new MixedCodeDocumentCodeFragment(this);
\r
358 throw new NotSupportedException();
\r
362 internal Encoding GetOutEncoding()
\r
364 if (_streamencoding != null)
\r
365 return _streamencoding;
\r
366 return Encoding.Default;
\r
371 #region Private Methods
\r
373 private void IncrementPosition()
\r
385 private void Parse()
\r
387 _state = ParseState.Text;
\r
389 _currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Text);
\r
391 while (_index < _text.Length)
\r
393 _c = _text[_index];
\r
394 IncrementPosition();
\r
398 case ParseState.Text:
\r
399 if (_index + TokenCodeStart.Length < _text.Length)
\r
401 if (_text.Substring(_index - 1, TokenCodeStart.Length) == TokenCodeStart)
\r
403 _state = ParseState.Code;
\r
404 _currentfragment.Length = _index - 1 - _currentfragment.Index;
\r
405 _currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Code);
\r
412 case ParseState.Code:
\r
413 if (_index + TokenCodeEnd.Length < _text.Length)
\r
415 if (_text.Substring(_index - 1, TokenCodeEnd.Length) == TokenCodeEnd)
\r
417 _state = ParseState.Text;
\r
418 _currentfragment.Length = _index + TokenCodeEnd.Length - _currentfragment.Index;
\r
419 _index += TokenCodeEnd.Length;
\r
420 _lineposition += TokenCodeEnd.Length;
\r
421 _currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Text);
\r
430 _currentfragment.Length = _index - _currentfragment.Index;
\r
433 private void SetPosition()
\r
435 _currentfragment.Line = _line;
\r
436 _currentfragment._lineposition = _lineposition;
\r
437 _currentfragment.Index = _index - 1;
\r
438 _currentfragment.Length = 0;
\r
443 #region Nested type: ParseState
\r
445 private enum ParseState
\r