Do not skip comments, just pluck expressions/tags from within them
[mono.git] / mcs / class / System.Web / System.Web.Compilation / AspTokenizer.cs
1 //
2 // System.Web.Compilation.AspTokenizer
3 //
4 // Authors:
5 //      Gonzalo Paniagua Javier (gonzalo@ximian.com)
6 //
7 // (C) 2002,2003 Ximian, Inc (http://www.ximian.com)
8 //
9
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
18 // 
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
21 // 
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30
31 using System;
32 using System.Collections;
33 using System.IO;
34 using System.Text;
35 using System.Security.Cryptography;
36
37 namespace System.Web.Compilation
38 {
39         class Token
40         {
41                 public const int EOF            = 0x0200000;
42                 public const int IDENTIFIER     = 0x0200001;
43                 public const int DIRECTIVE      = 0x0200002;
44                 public const int ATTVALUE       = 0x0200003;
45                 public const int TEXT           = 0x0200004;
46                 public const int DOUBLEDASH     = 0x0200005;
47                 public const int CLOSING        = 0x0200006;
48         }
49
50         class AspTokenizer
51         {
52 #if NET_2_0
53                 const int CHECKSUM_BUF_SIZE = 8192;
54 #endif
55                 class PutBackItem
56                 {
57                         public readonly string Value;
58                         public readonly int Position;
59                         public readonly int CurrentToken;
60                         public readonly bool InTag;
61                         
62                         public PutBackItem (string value, int position, int currentToken, bool inTag)
63                         {
64                                 Value = value;
65                                 Position = position;
66                                 CurrentToken = currentToken;
67                                 InTag = inTag;
68                         }
69                 }
70                 
71                 static char [] lfcr = new char [] { '\n', '\r' };
72                 TextReader sr;
73                 int current_token;
74                 StringBuilder sb, odds;
75                 int col, line;
76                 int begcol, begline;
77                 int position;
78                 bool inTag;
79                 bool expectAttrValue;
80                 bool alternatingQuotes;
81                 bool hasPutBack;
82                 bool verbatim;
83                 bool have_value;
84                 bool have_unget;
85                 int unget_value;
86                 string val;
87                 Stack putBackBuffer;
88 #if NET_2_0
89                 MD5 checksum;
90                 char[] checksum_buf = new char [CHECKSUM_BUF_SIZE];
91                 int checksum_buf_pos = -1;
92                 
93                 public MD5 Checksum {
94                         get { return checksum; }
95                 }
96 #endif
97                 
98                 public AspTokenizer (TextReader reader)
99                 {
100                         this.sr = reader;
101                         sb = new StringBuilder ();
102                         odds= new StringBuilder();
103                         col = line = 1;
104                         hasPutBack = inTag = false;
105                 }
106
107                 public bool Verbatim
108                 {
109                         get { return verbatim; }
110                         set { verbatim = value; }
111                 }
112
113                 public void put_back ()
114                 {
115                         if (hasPutBack && !inTag)
116                                 throw new HttpException ("put_back called twice!");
117                         
118                         hasPutBack = true;
119                         if (putBackBuffer == null)
120                                 putBackBuffer = new Stack ();
121
122                         string val = Value;
123                         putBackBuffer.Push (new PutBackItem (val, position, current_token, inTag));
124                         position -= val.Length;
125                 }
126                 
127                 public int get_token ()
128                 {
129                         if (hasPutBack) {
130                                 PutBackItem pbi = putBackBuffer.Pop () as PutBackItem;
131                                 hasPutBack = putBackBuffer.Count > 0;
132                                 position = pbi.Position;
133                                 have_value = false;
134                                 val = null;
135                                 sb = new StringBuilder (pbi.Value);
136                                 current_token = pbi.CurrentToken;
137                                 inTag = pbi.InTag;
138                                 return current_token;
139                         }
140
141                         begline = line;
142                         begcol = col;
143                         have_value = false;
144                         current_token = NextToken ();
145                         return current_token;
146                 }
147
148                 bool is_identifier_start_character (char c)
149                 {
150                         return (Char.IsLetter (c) || c == '_' );
151                 }
152
153                 bool is_identifier_part_character (char c)
154                 {
155                         return (Char.IsLetterOrDigit (c) || c == '_' || c == '-');
156                 }
157
158                 void ungetc (int value)
159                 {
160                         have_unget = true;
161                         unget_value = value;
162
163                         // Only '/' passes through here now.
164                         // If we ever let \n here, update 'line'
165                         position--;
166                         col--;
167                 }
168
169 #if NET_2_0
170                 void TransformNextBlock (int count, bool final)
171                 {
172                         byte[] input = Encoding.UTF8.GetBytes (checksum_buf, 0, count);
173
174                         if (checksum == null)
175                                 checksum = MD5.Create ();
176                         
177                         if (final)
178                                 checksum.TransformFinalBlock (input, 0, input.Length);
179                         else
180                                 checksum.TransformBlock (input, 0, input.Length, input, 0);
181                         input = null;
182                         
183                         checksum_buf_pos = -1;
184                 }
185                 
186                 void UpdateChecksum (int c)
187                 {
188                         bool final = c == -1;
189
190                         if (!final) {
191                                 if (checksum_buf_pos + 1 >= CHECKSUM_BUF_SIZE)
192                                         TransformNextBlock (checksum_buf_pos + 1, false);
193                                 checksum_buf [++checksum_buf_pos] = (char)c;
194                         } else
195                                 TransformNextBlock (checksum_buf_pos + 1, true);
196                 }
197 #endif
198                 int read_char ()
199                 {
200                         int c;
201                         if (have_unget) {
202                                 c = unget_value;
203                                 have_unget = false;
204                         } else {
205                                 c = sr.Read ();
206 #if NET_2_0
207                                 UpdateChecksum (c);
208 #endif
209                         }
210
211                         if (c == '\r' && sr.Peek () == '\n') {
212                                 c = sr.Read ();
213 #if NET_2_0
214                                 UpdateChecksum (c);
215 #endif
216                                 position++;
217                         }
218
219                         if (c == '\n'){
220                                 col = -1;
221                                 line++;
222                         }
223
224                         if (c != -1) {
225                                 col++;
226                                 position++;
227                         }
228
229                         return c;
230                 }
231
232                 int ReadAttValue (int start)
233                 {
234                         int quoteChar = 0;
235                         bool quoted = false;
236
237                         if (start == '"' || start == '\'') {
238                                 quoteChar = start;
239                                 quoted = true;
240                         } else {
241                                 sb.Append ((char) start);
242                         }
243
244                         int c;
245                         int last = 0;
246                         bool inServerTag = false;
247                         alternatingQuotes = true;
248                         
249                         while ((c = sr.Peek ()) != -1) {
250                                 if (c == '%' && last == '<') {
251                                         inServerTag = true;
252                                 } else if (inServerTag && c == '>' && last == '%') {
253                                         inServerTag = false;
254                                 } else if (!inServerTag) {
255                                         if (!quoted && c == '/') {
256                                                 read_char ();
257                                                 c = sr.Peek ();
258                                                 if (c == -1) {
259                                                         c = '/';
260                                                 } else if (c == '>') {
261                                                         ungetc ('/');
262                                                         break;
263                                                 }
264                                         } else if (!quoted && (c == '>' || Char.IsWhiteSpace ((char) c))) {
265                                                 break;
266                                         } else if (quoted && c == quoteChar && last != '\\') {
267                                                 read_char ();
268                                                 break;
269                                         }
270                                 } else if (quoted && c == quoteChar) {
271                                         alternatingQuotes = false;
272                                 }
273
274                                 sb.Append ((char) c);
275                                 read_char ();
276                                 last = c;
277                         }
278
279                         return Token.ATTVALUE;
280                 }
281
282                 int NextToken ()
283                 {
284                         int c;
285                         
286                         sb.Length = 0;
287                         odds.Length=0;
288                         while ((c = read_char ()) != -1){
289                                 if (verbatim){
290                                         inTag = false;
291                                         sb.Append  ((char) c);
292                                         return c;
293                                 }
294
295                                 if (inTag && expectAttrValue && (c == '"' || c == '\''))
296                                         return ReadAttValue (c);
297                                 
298                                 if (c == '<'){
299                                         inTag = true;
300                                         sb.Append ((char) c);
301                                         return c;
302                                 }
303
304                                 if (c == '>'){
305                                         inTag = false;
306                                         sb.Append ((char) c);
307                                         return c;
308                                 }
309
310                                 if (current_token == '<' && "%/!".IndexOf ((char) c) != -1){
311                                         sb.Append ((char) c);
312                                         return c;
313                                 }
314
315                                 if (inTag && current_token == '%' && "@#=".IndexOf ((char) c) != -1){
316                                         if (odds.Length == 0 || odds.ToString ().IndexOfAny (lfcr) < 0) {
317                                                 sb.Append ((char) c);
318                                                 return c;
319                                         }
320                                         sb.Append ((char) c);
321                                         continue;
322                                 }
323
324                                 if (inTag && c == '-' && sr.Peek () == '-'){
325                                         sb.Append ("--");
326                                         read_char ();
327                                         return Token.DOUBLEDASH;
328                                 }
329
330                                 if (!inTag){
331                                         sb.Append ((char) c);
332                                         while ((c = sr.Peek ()) != -1 && c != '<')
333                                                 sb.Append ((char) read_char ());
334
335                                         return (c != -1 || sb.Length > 0) ? Token.TEXT : Token.EOF;
336                                 }
337
338                                 if (inTag && current_token == '=' && !Char.IsWhiteSpace ((char) c))
339                                         return ReadAttValue (c);
340
341                                 if (inTag && is_identifier_start_character ((char) c)){
342                                         sb.Append ((char) c);
343                                         while ((c = sr.Peek ()) != -1) {
344                                                 if (!is_identifier_part_character ((char) c) && c != ':')
345                                                         break;
346                                                 sb.Append ((char) read_char ());
347                                         }
348
349                                         if (current_token == '@' && Directive.IsDirective (sb.ToString ()))
350                                                 return Token.DIRECTIVE;
351                                         
352                                         return Token.IDENTIFIER;
353                                 }
354
355                                 if (!Char.IsWhiteSpace ((char) c)) {
356                                         sb.Append  ((char) c);
357                                         return c;
358                                 }
359                                 // keep otherwise discarded characters in case we need.
360                                 odds.Append((char) c);
361                         }
362
363                         return Token.EOF;
364                 }
365
366                 public string Value {
367                         get {
368                                 if (have_value)
369                                         return val;
370
371                                 have_value = true;
372                                 val = sb.ToString ();
373                                 return val;
374                         }
375                 }
376
377                 public string Odds {
378                         get {
379                                 return odds.ToString();
380                         }
381                 }
382
383                 public bool InTag {
384                         get { return inTag; }
385                         set { inTag = value; }
386                 }
387
388                 // Hack for preventing confusion with VB comments (see bug #63451)
389                 public bool ExpectAttrValue {
390                         get { return expectAttrValue; }
391                         set { expectAttrValue = value; }
392                 }
393                 
394                 public bool AlternatingQuotes {
395                         get { return alternatingQuotes; }
396                 }
397                 
398                 public int BeginLine {
399                         get { return begline; }
400                 }
401
402                 public int BeginColumn {
403                         get { return begcol; }
404                 }
405
406                 public int EndLine {
407                         get { return line; }
408                 }
409
410                 public int EndColumn {
411                         get { return col; }
412                 }
413
414                 public int Position {
415                         get { return position; }
416                 }
417         }
418 }
419