// HtmlAgilityPack V1.0 - Simon Mourier using System; using System.IO; using System.Text; namespace HtmlAgilityPack { /// /// Represents a document with mixed code and text. ASP, ASPX, JSP, are good example of such documents. /// public class MixedCodeDocument { #region Fields private int _c; internal MixedCodeDocumentFragmentList _codefragments; private MixedCodeDocumentFragment _currentfragment; internal MixedCodeDocumentFragmentList _fragments; private int _index; private int _line; private int _lineposition; private ParseState _state; private Encoding _streamencoding; internal string _text; internal MixedCodeDocumentFragmentList _textfragments; /// /// Gets or sets the token representing code end. /// public string TokenCodeEnd = "%>"; /// /// Gets or sets the token representing code start. /// public string TokenCodeStart = "<%"; /// /// Gets or sets the token representing code directive. /// public string TokenDirective = "@"; /// /// Gets or sets the token representing response write directive. /// public string TokenResponseWrite = "Response.Write "; private string TokenTextBlock = "TextBlock({0})"; #endregion #region Constructors /// /// Creates a mixed code document instance. /// public MixedCodeDocument() { _codefragments = new MixedCodeDocumentFragmentList(this); _textfragments = new MixedCodeDocumentFragmentList(this); _fragments = new MixedCodeDocumentFragmentList(this); } #endregion #region Properties /// /// Gets the code represented by the mixed code document seen as a template. /// public string Code { get { string s = ""; int i = 0; foreach (MixedCodeDocumentFragment frag in _fragments) { switch (frag._type) { case MixedCodeDocumentFragmentType.Text: s += TokenResponseWrite + string.Format(TokenTextBlock, i) + "\n"; i++; break; case MixedCodeDocumentFragmentType.Code: s += ((MixedCodeDocumentCodeFragment) frag).Code + "\n"; break; } } return s; } } /// /// Gets the list of code fragments in the document. /// public MixedCodeDocumentFragmentList CodeFragments { get { return _codefragments; } } /// /// Gets the list of all fragments in the document. /// public MixedCodeDocumentFragmentList Fragments { get { return _fragments; } } /// /// Gets the encoding of the stream used to read the document. /// public Encoding StreamEncoding { get { return _streamencoding; } } /// /// Gets the list of text fragments in the document. /// public MixedCodeDocumentFragmentList TextFragments { get { return _textfragments; } } #endregion #region Public Methods /// /// Create a code fragment instances. /// /// The newly created code fragment instance. public MixedCodeDocumentCodeFragment CreateCodeFragment() { return (MixedCodeDocumentCodeFragment) CreateFragment(MixedCodeDocumentFragmentType.Code); } /// /// Create a text fragment instances. /// /// The newly created text fragment instance. public MixedCodeDocumentTextFragment CreateTextFragment() { return (MixedCodeDocumentTextFragment) CreateFragment(MixedCodeDocumentFragmentType.Text); } /// /// Loads a mixed code document from a stream. /// /// The input stream. public void Load(Stream stream) { Load(new StreamReader(stream)); } /// /// Loads a mixed code document from a stream. /// /// The input stream. /// Indicates whether to look for byte order marks at the beginning of the file. public void Load(Stream stream, bool detectEncodingFromByteOrderMarks) { Load(new StreamReader(stream, detectEncodingFromByteOrderMarks)); } /// /// Loads a mixed code document from a stream. /// /// The input stream. /// The character encoding to use. public void Load(Stream stream, Encoding encoding) { Load(new StreamReader(stream, encoding)); } /// /// Loads a mixed code document from a stream. /// /// The input stream. /// The character encoding to use. /// Indicates whether to look for byte order marks at the beginning of the file. public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks) { Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks)); } /// /// Loads a mixed code document from a stream. /// /// The input stream. /// The character encoding to use. /// Indicates whether to look for byte order marks at the beginning of the file. /// The minimum buffer size. public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize) { Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks, buffersize)); } /// /// Loads a mixed code document from a file. /// /// The complete file path to be read. public void Load(string path) { Load(new StreamReader(path)); } /// /// Loads a mixed code document from a file. /// /// The complete file path to be read. /// Indicates whether to look for byte order marks at the beginning of the file. public void Load(string path, bool detectEncodingFromByteOrderMarks) { Load(new StreamReader(path, detectEncodingFromByteOrderMarks)); } /// /// Loads a mixed code document from a file. /// /// The complete file path to be read. /// The character encoding to use. public void Load(string path, Encoding encoding) { Load(new StreamReader(path, encoding)); } /// /// Loads a mixed code document from a file. /// /// The complete file path to be read. /// The character encoding to use. /// Indicates whether to look for byte order marks at the beginning of the file. public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks) { Load(new StreamReader(path, encoding, detectEncodingFromByteOrderMarks)); } /// /// Loads a mixed code document from a file. /// /// The complete file path to be read. /// The character encoding to use. /// Indicates whether to look for byte order marks at the beginning of the file. /// The minimum buffer size. public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize) { Load(new StreamReader(path, encoding, detectEncodingFromByteOrderMarks, buffersize)); } /// /// Loads the mixed code document from the specified TextReader. /// /// The TextReader used to feed the HTML data into the document. public void Load(TextReader reader) { _codefragments.Clear(); _textfragments.Clear(); // all pseudo constructors get down to this one StreamReader sr = reader as StreamReader; if (sr != null) { _streamencoding = sr.CurrentEncoding; } _text = reader.ReadToEnd(); reader.Close(); Parse(); } /// /// Loads a mixed document from a text /// /// The text to load. public void LoadHtml(string html) { Load(new StringReader(html)); } /// /// Saves the mixed document to the specified stream. /// /// The stream to which you want to save. public void Save(Stream outStream) { StreamWriter sw = new StreamWriter(outStream, GetOutEncoding()); Save(sw); } /// /// Saves the mixed document to the specified stream. /// /// The stream to which you want to save. /// The character encoding to use. public void Save(Stream outStream, Encoding encoding) { StreamWriter sw = new StreamWriter(outStream, encoding); Save(sw); } /// /// Saves the mixed document to the specified file. /// /// The location of the file where you want to save the document. public void Save(string filename) { StreamWriter sw = new StreamWriter(filename, false, GetOutEncoding()); Save(sw); } /// /// Saves the mixed document to the specified file. /// /// The location of the file where you want to save the document. /// The character encoding to use. public void Save(string filename, Encoding encoding) { StreamWriter sw = new StreamWriter(filename, false, encoding); Save(sw); } /// /// Saves the mixed document to the specified StreamWriter. /// /// The StreamWriter to which you want to save. public void Save(StreamWriter writer) { Save((TextWriter) writer); } /// /// Saves the mixed document to the specified TextWriter. /// /// The TextWriter to which you want to save. public void Save(TextWriter writer) { writer.Flush(); } #endregion #region Internal Methods internal MixedCodeDocumentFragment CreateFragment(MixedCodeDocumentFragmentType type) { switch (type) { case MixedCodeDocumentFragmentType.Text: return new MixedCodeDocumentTextFragment(this); case MixedCodeDocumentFragmentType.Code: return new MixedCodeDocumentCodeFragment(this); default: throw new NotSupportedException(); } } internal Encoding GetOutEncoding() { if (_streamencoding != null) return _streamencoding; return Encoding.Default; } #endregion #region Private Methods private void IncrementPosition() { _index++; if (_c == 10) { _lineposition = 1; _line++; } else _lineposition++; } private void Parse() { _state = ParseState.Text; _index = 0; _currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Text); while (_index < _text.Length) { _c = _text[_index]; IncrementPosition(); switch (_state) { case ParseState.Text: if (_index + TokenCodeStart.Length < _text.Length) { if (_text.Substring(_index - 1, TokenCodeStart.Length) == TokenCodeStart) { _state = ParseState.Code; _currentfragment.Length = _index - 1 - _currentfragment.Index; _currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Code); SetPosition(); continue; } } break; case ParseState.Code: if (_index + TokenCodeEnd.Length < _text.Length) { if (_text.Substring(_index - 1, TokenCodeEnd.Length) == TokenCodeEnd) { _state = ParseState.Text; _currentfragment.Length = _index + TokenCodeEnd.Length - _currentfragment.Index; _index += TokenCodeEnd.Length; _lineposition += TokenCodeEnd.Length; _currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Text); SetPosition(); continue; } } break; } } _currentfragment.Length = _index - _currentfragment.Index; } private void SetPosition() { _currentfragment.Line = _line; _currentfragment._lineposition = _lineposition; _currentfragment.Index = _index - 1; _currentfragment.Length = 0; } #endregion #region Nested type: ParseState private enum ParseState { Text, Code } #endregion } }