Merge pull request #4670 from BrzVlad/fix-sgen-testing
[mono.git] / mcs / class / System.Web / System.Web.Compilation / AspTokenizer.cs
1 //
2 // System.Web.Compilation.AspTokenizer
3 //
4 // Authors:
5 //      Gonzalo Paniagua Javier (gonzalo@ximian.com)
6 //      Marek Habersack <mhabersack@novell.com>
7 //
8 // (C) 2002,2003 Ximian, Inc (http://www.ximian.com)
9 // (C) 2003-2009 Novell, Inc (http://novell.com)
10 //
11
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining
14 // a copy of this software and associated documentation files (the
15 // "Software"), to deal in the Software without restriction, including
16 // without limitation the rights to use, copy, modify, merge, publish,
17 // distribute, sublicense, and/or sell copies of the Software, and to
18 // permit persons to whom the Software is furnished to do so, subject to
19 // the following conditions:
20 // 
21 // The above copyright notice and this permission notice shall be
22 // included in all copies or substantial portions of the Software.
23 // 
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
28 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
29 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
30 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //
32
33 using System;
34 using System.Collections;
35 using System.IO;
36 using System.Text;
37 using System.Security.Cryptography;
38
39 namespace System.Web.Compilation
40 {
41         class Token
42         {
43                 public const int EOF            = 0x0200000;
44                 public const int IDENTIFIER     = 0x0200001;
45                 public const int DIRECTIVE      = 0x0200002;
46                 public const int ATTVALUE       = 0x0200003;
47                 public const int TEXT           = 0x0200004;
48                 public const int DOUBLEDASH     = 0x0200005;
49                 public const int CLOSING        = 0x0200006;
50         }
51
52         class AspTokenizer
53         {
54                 const int CHECKSUM_BUF_SIZE = 8192;
55
56                 class PutBackItem
57                 {
58                         public readonly string Value;
59                         public readonly int Position;
60                         public readonly int CurrentToken;
61                         public readonly bool InTag;
62                         
63                         public PutBackItem (string value, int position, int currentToken, bool inTag)
64                         {
65                                 Value = value;
66                                 Position = position;
67                                 CurrentToken = currentToken;
68                                 InTag = inTag;
69                         }
70                 }
71                 
72                 static char [] lfcr = new char [] { '\n', '\r' };
73                 TextReader sr;
74                 int current_token;
75                 StringBuilder sb, odds;
76                 int col, line;
77                 int begcol, begline;
78                 int position;
79                 bool inTag;
80                 bool expectAttrValue;
81                 bool alternatingQuotes;
82                 bool hasPutBack;
83                 bool verbatim;
84                 bool have_value;
85                 bool have_unget;
86                 int unget_value;
87                 string val;
88                 Stack putBackBuffer;
89                 MD5 checksum;
90                 char[] checksum_buf = new char [CHECKSUM_BUF_SIZE];
91                 int checksum_buf_pos = -1;
92                 
93                 public MD5 Checksum {
94                         get { return checksum; }
95                 }
96                 
97                 public AspTokenizer (TextReader reader)
98                 {
99                         this.sr = reader;
100                         sb = new StringBuilder ();
101                         odds= new StringBuilder();
102                         col = line = 1;
103                         hasPutBack = inTag = false;
104                 }
105
106                 public bool Verbatim
107                 {
108                         get { return verbatim; }
109                         set { verbatim = value; }
110                 }
111
112                 public void put_back ()
113                 {
114                         if (hasPutBack && !inTag)
115                                 throw new HttpException ("put_back called twice!");
116                         
117                         hasPutBack = true;
118                         if (putBackBuffer == null)
119                                 putBackBuffer = new Stack ();
120
121                         string val = Value;
122                         putBackBuffer.Push (new PutBackItem (val, position, current_token, inTag));
123                         position -= val.Length;
124                 }
125                 
126                 public int get_token ()
127                 {
128                         if (hasPutBack) {
129                                 PutBackItem pbi;
130                                 if (verbatim) {
131                                         pbi = putBackBuffer.Pop () as PutBackItem;
132                                         string value = pbi.Value;
133                                         switch (value.Length) {
134                                                 case 0:
135                                                         // do nothing, CurrentToken will be used
136                                                         break;
137
138                                                 case 1:
139                                                         pbi = new PutBackItem (String.Empty, pbi.Position, (int)value [0], false);
140                                                         break;
141
142                                                 default:
143                                                         pbi = new PutBackItem (value, pbi.Position, (int)value [0], false);
144                                                         break;
145                                         }               
146                                 } else
147                                         pbi = putBackBuffer.Pop () as PutBackItem;
148                                 
149                                 hasPutBack = putBackBuffer.Count > 0;
150                                 position = pbi.Position;
151                                 have_value = false;
152                                 val = null;
153                                 sb = new StringBuilder (pbi.Value);
154                                 current_token = pbi.CurrentToken;
155                                 inTag = pbi.InTag;
156                                 return current_token;
157                         }
158
159                         begline = line;
160                         begcol = col;
161                         have_value = false;
162                         current_token = NextToken ();
163                         return current_token;
164                 }
165
166                 bool is_identifier_start_character (char c)
167                 {
168                         return (Char.IsLetter (c) || c == '_' );
169                 }
170
171                 bool is_identifier_part_character (char c)
172                 {
173                         return (Char.IsLetterOrDigit (c) || c == '_' || c == '-');
174                 }
175
176                 void ungetc (int value)
177                 {
178                         have_unget = true;
179                         unget_value = value;
180
181                         // Only '/' passes through here now.
182                         // If we ever let \n here, update 'line'
183                         position--;
184                         col--;
185                 }
186
187                 void TransformNextBlock (int count, bool final)
188                 {
189                         byte[] input = Encoding.UTF8.GetBytes (checksum_buf, 0, count);
190
191                         if (checksum == null)
192                                 checksum = MD5.Create ();
193                         
194                         if (final)
195                                 checksum.TransformFinalBlock (input, 0, input.Length);
196                         else
197                                 checksum.TransformBlock (input, 0, input.Length, input, 0);
198                         input = null;
199                         
200                         checksum_buf_pos = -1;
201                 }
202                 
203                 void UpdateChecksum (int c)
204                 {
205                         bool final = c == -1;
206
207                         if (!final) {
208                                 if (checksum_buf_pos + 1 >= CHECKSUM_BUF_SIZE)
209                                         TransformNextBlock (checksum_buf_pos + 1, false);
210                                 checksum_buf [++checksum_buf_pos] = (char)c;
211                         } else
212                                 TransformNextBlock (checksum_buf_pos + 1, true);
213                 }
214
215                 int read_char ()
216                 {
217                         int c;
218                         if (have_unget) {
219                                 c = unget_value;
220                                 have_unget = false;
221                         } else {
222                                 c = sr.Read ();
223                                 UpdateChecksum (c);
224                         }
225
226                         if (c == '\r' && sr.Peek () == '\n') {
227                                 c = sr.Read ();
228                                 UpdateChecksum (c);
229                                 position++;
230                         }
231
232                         if (c == '\n'){
233                                 col = -1;
234                                 line++;
235                         }
236
237                         if (c != -1) {
238                                 col++;
239                                 position++;
240                         }
241
242                         return c;
243                 }
244
245                 int ReadAttValue (int start)
246                 {
247                         int quoteChar = 0;
248                         bool quoted = false;
249
250                         if (start == '"' || start == '\'') {
251                                 quoteChar = start;
252                                 quoted = true;
253                         } else {
254                                 sb.Append ((char) start);
255                         }
256
257                         int c;
258                         int last = 0;
259                         bool inServerTag = false;
260                         alternatingQuotes = true;
261                         
262                         while ((c = sr.Peek ()) != -1) {
263                                 if (c == '%' && last == '<') {
264                                         inServerTag = true;
265                                 } else if (inServerTag && c == '>' && last == '%') {
266                                         inServerTag = false;
267                                 } else if (!inServerTag) {
268                                         if (!quoted && c == '/') {
269                                                 read_char ();
270                                                 c = sr.Peek ();
271                                                 if (c == -1) {
272                                                         c = '/';
273                                                 } else if (c == '>') {
274                                                         ungetc ('/');
275                                                         break;
276                                                 }
277                                         } else if (!quoted && (c == '>' || Char.IsWhiteSpace ((char) c))) {
278                                                 break;
279                                         } else if (quoted && c == quoteChar && last != '\\') {
280                                                 read_char ();
281                                                 break;
282                                         }
283                                 } else if (quoted && c == quoteChar) {
284                                         alternatingQuotes = false;
285                                 }
286
287                                 sb.Append ((char) c);
288                                 read_char ();
289                                 last = c;
290                         }
291
292                         return Token.ATTVALUE;
293                 }
294
295                 int NextToken ()
296                 {
297                         int c;
298                         
299                         sb.Length = 0;
300                         odds.Length=0;
301                         while ((c = read_char ()) != -1){
302                                 if (verbatim){
303                                         inTag = false;
304                                         sb.Append  ((char) c);
305                                         return c;
306                                 }
307
308                                 if (inTag && expectAttrValue && (c == '"' || c == '\''))
309                                         return ReadAttValue (c);
310                                 
311                                 if (c == '<'){
312                                         inTag = true;
313                                         sb.Append ((char) c);
314                                         return c;
315                                 }
316
317                                 if (c == '>'){
318                                         inTag = false;
319                                         sb.Append ((char) c);
320                                         return c;
321                                 }
322
323                                 if (current_token == '<' && "%/!".IndexOf ((char) c) != -1){
324                                         sb.Append ((char) c);
325                                         return c;
326                                 }
327
328                                 if (inTag && current_token == '%' && "@#=".IndexOf ((char) c) != -1){
329                                         if (odds.Length == 0 || odds.ToString ().IndexOfAny (lfcr) < 0) {
330                                                 sb.Append ((char) c);
331                                                 return c;
332                                         }
333                                         sb.Append ((char) c);
334                                         continue;
335                                 }
336
337                                 if (inTag && c == '-' && sr.Peek () == '-'){
338                                         sb.Append ("--");
339                                         read_char ();
340                                         return Token.DOUBLEDASH;
341                                 }
342
343                                 if (!inTag){
344                                         sb.Append ((char) c);
345                                         while ((c = sr.Peek ()) != -1 && c != '<')
346                                                 sb.Append ((char) read_char ());
347
348                                         return (c != -1 || sb.Length > 0) ? Token.TEXT : Token.EOF;
349                                 }
350
351                                 if (inTag && current_token == '=' && !Char.IsWhiteSpace ((char) c))
352                                         return ReadAttValue (c);
353
354                                 if (inTag && is_identifier_start_character ((char) c)){
355                                         sb.Append ((char) c);
356                                         while ((c = sr.Peek ()) != -1) {
357                                                 if (!is_identifier_part_character ((char) c) && c != ':')
358                                                         break;
359                                                 sb.Append ((char) read_char ());
360                                         }
361
362                                         if (current_token == '@' && Directive.IsDirective (sb.ToString ()))
363                                                 return Token.DIRECTIVE;
364                                         
365                                         return Token.IDENTIFIER;
366                                 }
367
368                                 if (!Char.IsWhiteSpace ((char) c)) {
369                                         sb.Append  ((char) c);
370                                         return c;
371                                 }
372                                 // keep otherwise discarded characters in case we need.
373                                 odds.Append((char) c);
374                         }
375
376                         return Token.EOF;
377                 }
378
379                 public string Value {
380                         get {
381                                 if (have_value)
382                                         return val;
383
384                                 have_value = true;
385                                 val = sb.ToString ();
386                                 return val;
387                         }
388                 }
389
390                 public string Odds {
391                         get {
392                                 return odds.ToString();
393                         }
394                 }
395
396                 public bool InTag {
397                         get { return inTag; }
398                         set { inTag = value; }
399                 }
400
401                 // Hack for preventing confusion with VB comments (see bug #63451)
402                 public bool ExpectAttrValue {
403                         get { return expectAttrValue; }
404                         set { expectAttrValue = value; }
405                 }
406                 
407                 public bool AlternatingQuotes {
408                         get { return alternatingQuotes; }
409                 }
410                 
411                 public int BeginLine {
412                         get { return begline; }
413                 }
414
415                 public int BeginColumn {
416                         get { return begcol; }
417                 }
418
419                 public int EndLine {
420                         get { return line; }
421                 }
422
423                 public int EndColumn {
424                         get { return col; }
425                 }
426
427                 public int Position {
428                         get { return position; }
429                 }
430         }
431 }
432