2005-09-13 Sureshkumar T <tsureshkumar@novell.com>
[mono.git] / mcs / class / System.Web / System.Web.Compilation / AspTokenizer.cs
1 //
2 // System.Web.Compilation.AspTokenizer
3 //
4 // Authors:
5 //      Gonzalo Paniagua Javier (gonzalo@ximian.com)
6 //
7 // (C) 2002,2003 Ximian, Inc (http://www.ximian.com)
8 //
9
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
18 // 
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
21 // 
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30
31 using System;
32 using System.Collections;
33 using System.IO;
34 using System.Text;
35
36 namespace System.Web.Compilation
37 {
38         class Token
39         {
40                 public const int EOF            = 0x0200000;
41                 public const int IDENTIFIER     = 0x0200001;
42                 public const int DIRECTIVE      = 0x0200002;
43                 public const int ATTVALUE       = 0x0200003;
44                 public const int TEXT           = 0x0200004;
45                 public const int DOUBLEDASH     = 0x0200005;
46                 public const int CLOSING        = 0x0200006;
47         }
48
49         class AspTokenizer
50         {
51                 TextReader sr;
52                 int current_token;
53                 StringBuilder sb, odds;
54                 int col, line;
55                 int begcol, begline;
56                 int position;
57                 bool inTag;
58                 bool expectAttrValue;
59                 bool hasPutBack;
60                 bool verbatim;
61                 bool have_value;
62                 bool have_unget;
63                 int unget_value;
64                 string val;
65                 
66                 public AspTokenizer (TextReader reader)
67                 {
68                         this.sr = reader;
69                         sb = new StringBuilder ();
70                         odds= new StringBuilder();
71                         col = line = 1;
72                         hasPutBack = inTag = false;
73                 }
74
75                 public bool Verbatim
76                 {
77                         get { return verbatim; }
78                         set { verbatim = value; }
79                 }
80
81                 public void put_back ()
82                 {
83                         if (hasPutBack)
84                                 throw new HttpException ("put_back called twice!");
85                         
86                         hasPutBack = true;
87                         position -= Value.Length;
88                 }
89                 
90                 public int get_token ()
91                 {
92                         if (hasPutBack){
93                                 hasPutBack = false;
94                                 position += Value.Length;
95                                 return current_token;
96                         }
97
98                         begline = line;
99                         begcol = col;
100                         have_value = false;
101                         current_token = NextToken ();
102                         return current_token;
103                 }
104
105                 bool is_identifier_start_character (char c)
106                 {
107                         return (Char.IsLetter (c) || c == '_' );
108                 }
109
110                 bool is_identifier_part_character (char c)
111                 {
112                         return (Char.IsLetterOrDigit (c) || c == '_' || c == '-');
113                 }
114
115                 void ungetc (int value)
116                 {
117                         have_unget = true;
118                         unget_value = value;
119
120                         // Only '/' passes through here now.
121                         // If we ever let \n here, update 'line'
122                         position--;
123                         col--;
124                 }
125                 
126                 int read_char ()
127                 {
128                         int c;
129                         if (have_unget) {
130                                 c = unget_value;
131                                 have_unget = false;
132                         } else {
133                                 c = sr.Read ();
134                         }
135
136                         if (c == '\r' && sr.Peek () == '\n') {
137                                 c = sr.Read ();
138                                 position++;
139                         }
140
141                         if (c == '\n'){
142                                 col = -1;
143                                 line++;
144                         }
145
146                         if (c != -1) {
147                                 col++;
148                                 position++;
149                         }
150
151                         return c;
152                 }
153
154                 int ReadAttValue (int start)
155                 {
156                         int quoteChar = 0;
157                         bool quoted = false;
158
159                         if (start == '"' || start == '\'') {
160                                 quoteChar = start;
161                                 quoted = true;
162                         } else {
163                                 sb.Append ((char) start);
164                         }
165
166                         int c;
167                         int last = 0;
168                         bool inServerTag = false;
169                         
170                         while ((c = sr.Peek ()) != -1) {
171                                 if (c == '%' && last == '<') {
172                                         inServerTag = true;
173                                 } else if (inServerTag && c == '>' && last == '%') {
174                                         inServerTag = false;
175                                 } else if (!inServerTag) {
176                                         if (!quoted && c == '/') {
177                                                 read_char ();
178                                                 c = sr.Peek ();
179                                                 if (c == -1) {
180                                                         c = '/';
181                                                 } else if (c == '>') {
182                                                         ungetc ('/');
183                                                         break;
184                                                 }
185                                         } else if (!quoted && (c == '>' || Char.IsWhiteSpace ((char) c))) {
186                                                 break;
187                                         } else if (quoted && c == quoteChar && last != '\\') {
188                                                 read_char ();
189                                                 break;
190                                         }
191                                 }
192
193                                 sb.Append ((char) c);
194                                 read_char ();
195                                 last = c;
196                         }
197
198                         return Token.ATTVALUE;
199                 }
200
201                 int NextToken ()
202                 {
203                         int c;
204                         
205                         sb.Length = 0;
206                         odds.Length=0;
207                         while ((c = read_char ()) != -1){
208                                 if (verbatim){
209                                         inTag = false;
210                                         sb.Append  ((char) c);
211                                         return c;
212                                 }
213
214                                 if (inTag && expectAttrValue && (c == '"' || c == '\''))
215                                         return ReadAttValue (c);
216                                 
217                                 if (c == '<'){
218                                         inTag = true;
219                                         sb.Append ((char) c);
220                                         return c;
221                                 }
222
223                                 if (c == '>'){
224                                         inTag = false;
225                                         sb.Append ((char) c);
226                                         return c;
227                                 }
228
229                                 if (current_token == '<' && "%/!".IndexOf ((char) c) != -1){
230                                         sb.Append ((char) c);
231                                         return c;
232                                 }
233
234                                 if (inTag && current_token == '%' && "@#=".IndexOf ((char) c) != -1){
235                                         sb.Append ((char) c);
236                                         return c;
237                                 }
238
239                                 if (inTag && c == '-' && sr.Peek () == '-'){
240                                         sb.Append ("--");
241                                         read_char ();
242                                         return Token.DOUBLEDASH;
243                                 }
244
245                                 if (!inTag){
246                                         sb.Append ((char) c);
247                                         while ((c = sr.Peek ()) != -1 && c != '<')
248                                                 sb.Append ((char) read_char ());
249
250                                         return (c != -1 || sb.Length > 0) ? Token.TEXT : Token.EOF;
251                                 }
252
253                                 if (inTag && current_token == '=' && !Char.IsWhiteSpace ((char) c))
254                                         return ReadAttValue (c);
255
256                                 if (inTag && is_identifier_start_character ((char) c)){
257                                         sb.Append ((char) c);
258                                         while ((c = sr.Peek ()) != -1) {
259                                                 if (!is_identifier_part_character ((char) c) && c != ':')
260                                                         break;
261                                                 sb.Append ((char) read_char ());
262                                         }
263
264                                         if (current_token == '@' && Directive.IsDirective (sb.ToString ()))
265                                                 return Token.DIRECTIVE;
266                                         
267                                         return Token.IDENTIFIER;
268                                 }
269
270                                 if (!Char.IsWhiteSpace ((char) c)) {
271                                         sb.Append  ((char) c);
272                                         return c;
273                                 }
274                                 // keep otherwise discarded characters in case we need.
275                                 odds.Append((char) c);
276                         }
277
278                         return Token.EOF;
279                 }
280
281                 public string Value {
282                         get {
283                                 if (have_value)
284                                         return val;
285
286                                 have_value = true;
287                                 val = sb.ToString ();
288                                 return val;
289                         }
290                 }
291
292                 public string Odds {
293                         get {
294                                 return odds.ToString();
295                         }
296                 }
297
298                 public bool InTag {
299                         get { return inTag; }
300                         set { inTag = value; }
301                 }
302
303                 // Hack for preventing confusion with VB comments (see bug #63451)
304                 public bool ExpectAttrValue {
305                         get { return expectAttrValue; }
306                         set { expectAttrValue = value; }
307                 }
308                 
309                 public int BeginLine {
310                         get { return begline; }
311                 }
312
313                 public int BeginColumn {
314                         get { return begcol; }
315                 }
316
317                 public int EndLine {
318                         get { return line; }
319                 }
320
321                 public int EndColumn {
322                         get { return col; }
323                 }
324
325                 public int Position {
326                         get { return position; }
327                 }
328         }
329 }
330