* AspTokenizer.cs: Collect discarded characters that might be used
[mono.git] / mcs / class / System.Web / System.Web.Compilation / AspParser.cs
1 //
2 // System.Web.Compilation.AspParser
3 //
4 // Authors:
5 //      Gonzalo Paniagua Javier (gonzalo@ximian.com)
6 //
7 // (C) 2002,2003 Ximian, Inc (http://www.ximian.com)
8 //
9 using System;
10 using System.Collections;
11 using System.IO;
12 using System.Text;
13
14 namespace System.Web.Compilation
15 {
16         delegate void ParseErrorHandler (ILocation location, string message);
17         delegate void TextParsedHandler (ILocation location, string text);
18         delegate void TagParsedHandler (ILocation location, TagType tagtype, string id, TagAttributes attributes);
19
20         class AspParser : ILocation
21         {
22                 AspTokenizer tokenizer;
23                 int beginLine, endLine;
24                 int beginColumn, endColumn;
25                 int beginPosition, endPosition;
26                 string filename;
27                 string fileText;
28                 string verbatimID;
29
30                 public AspParser (string filename, TextReader input)
31                 {
32                         this.filename = filename;
33                         fileText = input.ReadToEnd ();
34                         StringReader reader = new StringReader (fileText);
35                         tokenizer = new AspTokenizer (reader);
36                 }
37
38                 public int BeginLine {
39                         get { return beginLine; }
40                 }
41
42                 public int BeginColumn {
43                         get { return beginColumn; }
44                 }
45
46                 public int EndLine {
47                         get { return endLine; }
48                 }
49
50                 public int EndColumn {
51                         get { return endColumn; }
52                 }
53
54                 public string PlainText {
55                         get {
56                                 if (beginPosition >= endPosition)
57                                         return null;
58
59                                 return fileText.Substring (beginPosition, endPosition - beginPosition);
60                         }
61                 }
62
63                 public string Filename {
64                         get { return filename; }
65                 }
66
67                 public string VerbatimID {
68                         set {
69                                 tokenizer.Verbatim = true;
70                                 verbatimID = value.ToUpper ();
71                         }
72                 }
73                 
74                 bool Eat (int expected_token)
75                 {
76                         if (tokenizer.get_token () != expected_token) {
77                                 tokenizer.put_back ();
78                                 return false;
79                         }
80
81                         endLine = tokenizer.EndLine;
82                         endColumn = tokenizer.EndColumn;
83                         return true;
84                 }
85
86                 void BeginElement ()
87                 {
88                         beginLine = tokenizer.BeginLine;
89                         beginColumn = tokenizer.BeginColumn;
90                         beginPosition = tokenizer.Position - 1;
91                 }
92
93                 void EndElement ()
94                 {
95                         endLine = tokenizer.EndLine;
96                         endColumn = tokenizer.EndColumn;
97                         endPosition = tokenizer.Position;
98                 }
99
100                 public void Parse ()
101                 {
102                         int token;
103                         string id;
104                         TagAttributes attributes;
105                         TagType tagtype;
106                         StringBuilder text =  new StringBuilder ();
107
108                         while ((token = tokenizer.get_token ()) != Token.EOF) {
109                                 BeginElement ();
110
111                                 if (tokenizer.Verbatim){
112                                         string end_verbatim = "</" + verbatimID + ">";
113                                         string verbatim_text = GetVerbatim (token, end_verbatim);
114
115                                         if (verbatim_text == null)
116                                                 OnError ("Unexpected EOF processing " + verbatimID);
117
118                                         tokenizer.Verbatim = false;
119
120                                         EndElement ();
121                                         endPosition -= end_verbatim.Length;
122                                         OnTextParsed (verbatim_text);
123                                         beginPosition = endPosition;
124                                         endPosition += end_verbatim.Length;
125                                         OnTagParsed (TagType.Close, verbatimID, null);
126                                         continue;
127                                 }
128                                 
129                                 if (token == '<') {
130                                         GetTag (out tagtype, out id, out attributes);
131                                         EndElement ();
132                                         if (tagtype == TagType.ServerComment)
133                                                 continue;
134
135                                         if (tagtype == TagType.Text)
136                                                 OnTextParsed (id);
137                                         else
138                                                 OnTagParsed (tagtype, id, attributes);
139
140                                         continue;
141                                 }
142
143                                 text.Length = 0;
144                                 do {
145                                         text.Append (tokenizer.Value);
146                                         token = tokenizer.get_token ();
147                                 } while (token != '<' && token != Token.EOF);
148
149                                 tokenizer.put_back ();
150                                 EndElement ();
151                                 OnTextParsed (text.ToString ());
152                         }
153                 }
154
155                 bool GetInclude (string str, out string pathType, out string filename)
156                 {
157                         pathType = null;
158                         filename = null;
159                         str = str.Substring (2).Trim ();
160                         int len = str.Length;
161                         int lastQuote = str.LastIndexOf ('"');
162                         if (len < 10 || lastQuote != len - 1 || !str.StartsWith ("#include "))
163                                 return false;
164
165                         str = str.Substring (9).Trim ();
166                         bool isfile = (str.StartsWith ("file"));
167                         if (!isfile && !str.StartsWith ("virtual"))
168                                 return false;
169
170                         pathType = (isfile) ? "file" : "virtual";
171                         if (str.Length < pathType.Length + 3)
172                                 return false;
173
174                         str = str.Substring (pathType.Length).Trim ();
175                         if (str.Length < 3 || str [0] != '=')
176                                 return false;
177
178                         int index = 1;
179                         for (; index < str.Length; index++) {
180                                 if (Char.IsWhiteSpace (str [index]))
181                                         index++;
182                                 else if (str [index] == '"')
183                                         break;
184                         }
185
186                         if (index == str.Length || index == lastQuote)
187                                 return false;
188
189                         str = str.Substring (index);
190                         if (str.Length == 2) { // only quotes
191                                 OnError ("Empty file name.");
192                                 return false;
193                         }
194
195                         filename = str.Trim ().Substring (index, str.Length - 2);
196                         if (filename.LastIndexOf  ('"') != -1)
197                                 return false; // file=""" -> no error
198
199                         return true;
200                 }
201
202                 void GetTag (out TagType tagtype, out string id, out TagAttributes attributes)
203                 {
204                         int token = tokenizer.get_token ();
205
206                         tagtype = TagType.ServerComment;
207                         id = null;
208                         attributes = null;
209                         switch (token){
210                         case '%':
211                                 GetServerTag (out tagtype, out id, out attributes);
212                                 break;
213                         case '/':
214                                 if (!Eat (Token.IDENTIFIER))
215                                         OnError ("expecting TAGNAME");
216
217                                 id = tokenizer.Value;
218                                 if (!Eat ('>'))
219                                         OnError ("expecting '>'. Got '" + id + "'");
220
221                                 tagtype = TagType.Close;
222                                 break;
223                         case '!':
224                                 bool double_dash = Eat (Token.DOUBLEDASH);
225                                 if (double_dash)
226                                         tokenizer.put_back ();
227
228                                 tokenizer.Verbatim = true;
229                                 string end = double_dash ? "-->" : ">";
230                                 string comment = GetVerbatim (tokenizer.get_token (), end);
231                                 tokenizer.Verbatim = false;
232                                 if (comment == null)
233                                         OnError ("Unfinished HTML comment/DTD");
234
235                                 string pathType, filename;
236                                 if (double_dash && GetInclude (comment, out pathType, out filename)) {
237                                         tagtype = TagType.Include;
238                                         attributes = new TagAttributes ();
239                                         attributes.Add (pathType, filename);
240                                 } else {
241                                         tagtype = TagType.Text;
242                                         id = "<!" + comment + end;
243                                 }
244                                 break;
245                         case Token.IDENTIFIER:
246                                 if (this.filename == "@@inner_string@@") {
247                                         // Actually not tag but "xxx < yyy" stuff in inner_string!
248                                         tagtype = TagType.Text;
249                                         tokenizer.InTag = false;
250                                         id = "<" + tokenizer.Odds + tokenizer.Value;
251                                 } else {
252                                 id = tokenizer.Value;
253                                 try {
254                                         attributes = GetAttributes ();
255                                 } catch (Exception e) {
256                                         OnError (e.Message);
257                                         break;
258                                 }
259                                 
260                                 tagtype = TagType.Tag;
261                                 if (Eat ('/') && Eat ('>'))
262                                         tagtype = TagType.SelfClosing;
263                                 else if (!Eat ('>'))
264                                         OnError ("expecting '>'. Got '" + tokenizer.Value + "'");
265                                 }
266
267                                 break;
268                         default:
269                                 tagtype = TagType.Text;
270                                 tokenizer.InTag = false;
271                                 id = "<" + tokenizer.Value;
272                                 break;
273                         }
274                 }
275
276                 TagAttributes GetAttributes ()
277                 {
278                         int token;
279                         TagAttributes attributes;
280                         string id;
281
282                         attributes = new TagAttributes ();
283                         while ((token = tokenizer.get_token ()) != Token.EOF){
284                                 if (token != Token.IDENTIFIER)
285                                         break;
286                                 id = tokenizer.Value;
287                                 if (Eat ('=')){
288                                         if (Eat (Token.ATTVALUE)){
289                                                 attributes.Add (id, tokenizer.Value);
290                                         } else if (Eat ('<') && Eat ('%')) {
291                                                 attributes.Add (id, "<%" +
292                                                                 GetVerbatim (tokenizer.get_token (), "%>"));
293                                         } else {
294                                                 OnError ("expected ATTVALUE");
295                                                 return null;
296                                         }
297                                         
298                                 } else {
299                                         attributes.Add (id, null);
300                                 }
301                         }
302
303                         tokenizer.put_back ();
304                         return attributes;
305                 }
306
307                 string GetVerbatim (int token, string end)
308                 {
309                         StringBuilder vb_text = new StringBuilder ();
310                         int i = 0;
311
312                         if (tokenizer.Value.Length > 1){
313                                 // May be we have a put_back token that is not a single character
314                                 vb_text.Append (tokenizer.Value);
315                                 token = tokenizer.get_token ();
316                         }
317
318                         while (token != Token.EOF){
319                                 if (Char.ToUpper ((char) token) == end [i]){
320                                         if (++i >= end.Length)
321                                                 break;
322                                         token = tokenizer.get_token ();
323                                         continue;
324                                 } else if (i > 0) {
325                                         for (int j = 0; j < i; j++)
326                                                 vb_text.Append (end [j]);
327                                         i = 0;
328                                 }
329
330                                 vb_text.Append ((char) token);
331                                 token = tokenizer.get_token ();
332                         } 
333
334                         return RemoveComments (vb_text.ToString ());
335                 }
336
337                 string RemoveComments (string text)
338                 {
339                         int end;
340                         int start = text.IndexOf ("<%--");
341
342                         while (start != -1) {
343                                 end = text.IndexOf ("--%>");
344                                 if (end == -1 || end <= start + 1)
345                                         break;
346
347                                 text = text.Remove (start, end - start + 4);
348                                 start = text.IndexOf ("<%--");
349                         }
350
351                         return text;
352                 }
353
354                 void GetServerTag (out TagType tagtype, out string id, out TagAttributes attributes)
355                 {
356                         string inside_tags;
357
358                         if (Eat ('@')){
359                                 tagtype = TagType.Directive;
360                                 id = "";
361                                 if (Eat (Token.DIRECTIVE))
362                                         id = tokenizer.Value;
363
364                                 attributes = GetAttributes ();
365                                 if (!Eat ('%') || !Eat ('>'))
366                                         OnError ("expecting '%>'");
367
368                                 return;
369                         }
370                         
371                         if (Eat (Token.DOUBLEDASH)) {
372                                 tokenizer.Verbatim = true;
373                                 inside_tags = GetVerbatim (tokenizer.get_token (), "--%>");
374                                 tokenizer.Verbatim = false;
375                                 id = null;
376                                 attributes = null;
377                                 tagtype = TagType.ServerComment;
378                                 return;
379                         }
380
381                         bool varname;
382                         bool databinding;
383                         varname = Eat ('=');
384                         databinding = !varname && Eat ('#');
385
386                         tokenizer.Verbatim = true;
387                         inside_tags = GetVerbatim (tokenizer.get_token (), "%>");
388                         tokenizer.Verbatim = false;
389                         id = inside_tags;
390                         attributes = null;
391                         tagtype = (databinding ? TagType.DataBinding :
392                                   (varname ? TagType.CodeRenderExpression : TagType.CodeRender));
393                 }
394
395                 public event ParseErrorHandler Error;
396                 public event TagParsedHandler TagParsed;
397                 public event TextParsedHandler TextParsed;
398
399                 void OnError (string msg)
400                 {
401                         if (Error != null)
402                                 Error (this, msg);
403                 }
404
405                 void OnTagParsed (TagType tagtype, string id, TagAttributes attributes)
406                 {
407                         if (TagParsed != null)
408                                 TagParsed (this, tagtype, id, attributes);
409                 }
410
411                 void OnTextParsed (string text)
412                 {
413                         if (TextParsed != null)
414                                 TextParsed (this, text);
415                 }
416         }
417
418 }
419