ace6ab9c2ba6b99598909f57b8fce95534905548
[mono.git] / mcs / class / System.Web / System.Web.Compilation / AspTokenizer.cs
1 //
2 // System.Web.Compilation.AspTokenizer
3 //
4 // Authors:
5 //      Gonzalo Paniagua Javier (gonzalo@ximian.com)
6 //
7 // (C) 2002,2003 Ximian, Inc (http://www.ximian.com)
8 //
9
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
18 // 
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
21 // 
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30
31 using System;
32 using System.Collections;
33 using System.IO;
34 using System.Text;
35 using System.Security.Cryptography;
36
37 namespace System.Web.Compilation
38 {
39         class Token
40         {
41                 public const int EOF            = 0x0200000;
42                 public const int IDENTIFIER     = 0x0200001;
43                 public const int DIRECTIVE      = 0x0200002;
44                 public const int ATTVALUE       = 0x0200003;
45                 public const int TEXT           = 0x0200004;
46                 public const int DOUBLEDASH     = 0x0200005;
47                 public const int CLOSING        = 0x0200006;
48         }
49
50         class AspTokenizer
51         {
52 #if NET_2_0
53                 const int CHECKSUM_BUF_SIZE = 8192;
54 #endif
55                 class PutBackItem
56                 {
57                         public readonly string Value;
58                         public readonly int Position;
59                         public readonly int CurrentToken;
60                         public readonly bool InTag;
61                         
62                         public PutBackItem (string value, int position, int currentToken, bool inTag)
63                         {
64                                 Value = value;
65                                 Position = position;
66                                 CurrentToken = currentToken;
67                                 InTag = inTag;
68                         }
69                 }
70                 
71                 static char [] lfcr = new char [] { '\n', '\r' };
72                 TextReader sr;
73                 int current_token;
74                 StringBuilder sb, odds;
75                 int col, line;
76                 int begcol, begline;
77                 int position;
78                 bool inTag;
79                 bool expectAttrValue;
80                 bool alternatingQuotes;
81                 bool hasPutBack;
82                 bool verbatim;
83                 bool have_value;
84                 bool have_unget;
85                 int unget_value;
86                 string val;
87                 Stack putBackBuffer;
88 #if NET_2_0
89                 MD5 checksum;
90                 char[] checksum_buf = new char [CHECKSUM_BUF_SIZE];
91                 int checksum_buf_pos = -1;
92                 
93                 public MD5 Checksum {
94                         get { return checksum; }
95                 }
96 #endif
97                 
98                 public AspTokenizer (TextReader reader)
99                 {
100                         this.sr = reader;
101                         sb = new StringBuilder ();
102                         odds= new StringBuilder();
103                         col = line = 1;
104                         hasPutBack = inTag = false;
105                 }
106
107                 public bool Verbatim
108                 {
109                         get { return verbatim; }
110                         set { verbatim = value; }
111                 }
112
113                 public void put_back ()
114                 {
115                         if (hasPutBack && !inTag)
116                                 throw new HttpException ("put_back called twice!");
117                         
118                         hasPutBack = true;
119                         if (putBackBuffer == null)
120                                 putBackBuffer = new Stack ();
121
122                         string val = Value;
123                         putBackBuffer.Push (new PutBackItem (val, position, current_token, inTag));
124                         position -= val.Length;
125                 }
126                 
127                 public int get_token ()
128                 {
129                         if (hasPutBack) {
130                                 PutBackItem pbi;
131                                 if (verbatim) {
132                                         pbi = putBackBuffer.Pop () as PutBackItem;
133                                         string value = pbi.Value;
134                                         switch (value.Length) {
135                                                 case 0:
136                                                         // do nothing, CurrentToken will be used
137                                                         break;
138
139                                                 case 1:
140                                                         pbi = new PutBackItem (String.Empty, pbi.Position, (int)value [0], false);
141                                                         break;
142
143                                                 default:
144                                                         pbi = new PutBackItem (value, pbi.Position, (int)value [0], false);
145                                                         break;
146                                         }               
147                                 } else
148                                         pbi = putBackBuffer.Pop () as PutBackItem;
149                                 
150                                 hasPutBack = putBackBuffer.Count > 0;
151                                 position = pbi.Position;
152                                 have_value = false;
153                                 val = null;
154                                 sb = new StringBuilder (pbi.Value);
155                                 current_token = pbi.CurrentToken;
156                                 inTag = pbi.InTag;
157                                 return current_token;
158                         }
159
160                         begline = line;
161                         begcol = col;
162                         have_value = false;
163                         current_token = NextToken ();
164                         return current_token;
165                 }
166
167                 bool is_identifier_start_character (char c)
168                 {
169                         return (Char.IsLetter (c) || c == '_' );
170                 }
171
172                 bool is_identifier_part_character (char c)
173                 {
174                         return (Char.IsLetterOrDigit (c) || c == '_' || c == '-');
175                 }
176
177                 void ungetc (int value)
178                 {
179                         have_unget = true;
180                         unget_value = value;
181
182                         // Only '/' passes through here now.
183                         // If we ever let \n here, update 'line'
184                         position--;
185                         col--;
186                 }
187
188 #if NET_2_0
189                 void TransformNextBlock (int count, bool final)
190                 {
191                         byte[] input = Encoding.UTF8.GetBytes (checksum_buf, 0, count);
192
193                         if (checksum == null)
194                                 checksum = MD5.Create ();
195                         
196                         if (final)
197                                 checksum.TransformFinalBlock (input, 0, input.Length);
198                         else
199                                 checksum.TransformBlock (input, 0, input.Length, input, 0);
200                         input = null;
201                         
202                         checksum_buf_pos = -1;
203                 }
204                 
205                 void UpdateChecksum (int c)
206                 {
207                         bool final = c == -1;
208
209                         if (!final) {
210                                 if (checksum_buf_pos + 1 >= CHECKSUM_BUF_SIZE)
211                                         TransformNextBlock (checksum_buf_pos + 1, false);
212                                 checksum_buf [++checksum_buf_pos] = (char)c;
213                         } else
214                                 TransformNextBlock (checksum_buf_pos + 1, true);
215                 }
216 #endif
217                 int read_char ()
218                 {
219                         int c;
220                         if (have_unget) {
221                                 c = unget_value;
222                                 have_unget = false;
223                         } else {
224                                 c = sr.Read ();
225 #if NET_2_0
226                                 UpdateChecksum (c);
227 #endif
228                         }
229
230                         if (c == '\r' && sr.Peek () == '\n') {
231                                 c = sr.Read ();
232 #if NET_2_0
233                                 UpdateChecksum (c);
234 #endif
235                                 position++;
236                         }
237
238                         if (c == '\n'){
239                                 col = -1;
240                                 line++;
241                         }
242
243                         if (c != -1) {
244                                 col++;
245                                 position++;
246                         }
247
248                         return c;
249                 }
250
251                 int ReadAttValue (int start)
252                 {
253                         int quoteChar = 0;
254                         bool quoted = false;
255
256                         if (start == '"' || start == '\'') {
257                                 quoteChar = start;
258                                 quoted = true;
259                         } else {
260                                 sb.Append ((char) start);
261                         }
262
263                         int c;
264                         int last = 0;
265                         bool inServerTag = false;
266                         alternatingQuotes = true;
267                         
268                         while ((c = sr.Peek ()) != -1) {
269                                 if (c == '%' && last == '<') {
270                                         inServerTag = true;
271                                 } else if (inServerTag && c == '>' && last == '%') {
272                                         inServerTag = false;
273                                 } else if (!inServerTag) {
274                                         if (!quoted && c == '/') {
275                                                 read_char ();
276                                                 c = sr.Peek ();
277                                                 if (c == -1) {
278                                                         c = '/';
279                                                 } else if (c == '>') {
280                                                         ungetc ('/');
281                                                         break;
282                                                 }
283                                         } else if (!quoted && (c == '>' || Char.IsWhiteSpace ((char) c))) {
284                                                 break;
285                                         } else if (quoted && c == quoteChar && last != '\\') {
286                                                 read_char ();
287                                                 break;
288                                         }
289                                 } else if (quoted && c == quoteChar) {
290                                         alternatingQuotes = false;
291                                 }
292
293                                 sb.Append ((char) c);
294                                 read_char ();
295                                 last = c;
296                         }
297
298                         return Token.ATTVALUE;
299                 }
300
301                 int NextToken ()
302                 {
303                         int c;
304                         
305                         sb.Length = 0;
306                         odds.Length=0;
307                         while ((c = read_char ()) != -1){
308                                 if (verbatim){
309                                         inTag = false;
310                                         sb.Append  ((char) c);
311                                         return c;
312                                 }
313
314                                 if (inTag && expectAttrValue && (c == '"' || c == '\''))
315                                         return ReadAttValue (c);
316                                 
317                                 if (c == '<'){
318                                         inTag = true;
319                                         sb.Append ((char) c);
320                                         return c;
321                                 }
322
323                                 if (c == '>'){
324                                         inTag = false;
325                                         sb.Append ((char) c);
326                                         return c;
327                                 }
328
329                                 if (current_token == '<' && "%/!".IndexOf ((char) c) != -1){
330                                         sb.Append ((char) c);
331                                         return c;
332                                 }
333
334                                 if (inTag && current_token == '%' && "@#=".IndexOf ((char) c) != -1){
335                                         if (odds.Length == 0 || odds.ToString ().IndexOfAny (lfcr) < 0) {
336                                                 sb.Append ((char) c);
337                                                 return c;
338                                         }
339                                         sb.Append ((char) c);
340                                         continue;
341                                 }
342
343                                 if (inTag && c == '-' && sr.Peek () == '-'){
344                                         sb.Append ("--");
345                                         read_char ();
346                                         return Token.DOUBLEDASH;
347                                 }
348
349                                 if (!inTag){
350                                         sb.Append ((char) c);
351                                         while ((c = sr.Peek ()) != -1 && c != '<')
352                                                 sb.Append ((char) read_char ());
353
354                                         return (c != -1 || sb.Length > 0) ? Token.TEXT : Token.EOF;
355                                 }
356
357                                 if (inTag && current_token == '=' && !Char.IsWhiteSpace ((char) c))
358                                         return ReadAttValue (c);
359
360                                 if (inTag && is_identifier_start_character ((char) c)){
361                                         sb.Append ((char) c);
362                                         while ((c = sr.Peek ()) != -1) {
363                                                 if (!is_identifier_part_character ((char) c) && c != ':')
364                                                         break;
365                                                 sb.Append ((char) read_char ());
366                                         }
367
368                                         if (current_token == '@' && Directive.IsDirective (sb.ToString ()))
369                                                 return Token.DIRECTIVE;
370                                         
371                                         return Token.IDENTIFIER;
372                                 }
373
374                                 if (!Char.IsWhiteSpace ((char) c)) {
375                                         sb.Append  ((char) c);
376                                         return c;
377                                 }
378                                 // keep otherwise discarded characters in case we need.
379                                 odds.Append((char) c);
380                         }
381
382                         return Token.EOF;
383                 }
384
385                 public string Value {
386                         get {
387                                 if (have_value)
388                                         return val;
389
390                                 have_value = true;
391                                 val = sb.ToString ();
392                                 return val;
393                         }
394                 }
395
396                 public string Odds {
397                         get {
398                                 return odds.ToString();
399                         }
400                 }
401
402                 public bool InTag {
403                         get { return inTag; }
404                         set { inTag = value; }
405                 }
406
407                 // Hack for preventing confusion with VB comments (see bug #63451)
408                 public bool ExpectAttrValue {
409                         get { return expectAttrValue; }
410                         set { expectAttrValue = value; }
411                 }
412                 
413                 public bool AlternatingQuotes {
414                         get { return alternatingQuotes; }
415                 }
416                 
417                 public int BeginLine {
418                         get { return begline; }
419                 }
420
421                 public int BeginColumn {
422                         get { return begcol; }
423                 }
424
425                 public int EndLine {
426                         get { return line; }
427                 }
428
429                 public int EndColumn {
430                         get { return col; }
431                 }
432
433                 public int Position {
434                         get { return position; }
435                 }
436         }
437 }
438