* roottypes.cs: Rename from tree.cs.
[mono.git] / mcs / class / System.Web / System.Web.Compilation / AspTokenizer.cs
1 //
2 // System.Web.Compilation.AspTokenizer
3 //
4 // Authors:
5 //      Gonzalo Paniagua Javier (gonzalo@ximian.com)
6 //
7 // (C) 2002,2003 Ximian, Inc (http://www.ximian.com)
8 //
9
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
18 // 
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
21 // 
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30
31 using System;
32 using System.Collections;
33 using System.IO;
34 using System.Text;
35
36 namespace System.Web.Compilation
37 {
38         class Token
39         {
40                 public const int EOF            = 0x0200000;
41                 public const int IDENTIFIER     = 0x0200001;
42                 public const int DIRECTIVE      = 0x0200002;
43                 public const int ATTVALUE       = 0x0200003;
44                 public const int TEXT           = 0x0200004;
45                 public const int DOUBLEDASH     = 0x0200005;
46                 public const int CLOSING        = 0x0200006;
47         }
48
49         class AspTokenizer
50         {
51                 TextReader sr;
52                 int current_token;
53                 StringBuilder sb, odds;
54                 int col, line;
55                 int begcol, begline;
56                 int position;
57                 bool inTag;
58                 bool expectAttrValue;
59                 bool alternatingQuotes;
60                 bool hasPutBack;
61                 bool verbatim;
62                 bool have_value;
63                 bool have_unget;
64                 int unget_value;
65                 string val;
66                 
67                 public AspTokenizer (TextReader reader)
68                 {
69                         this.sr = reader;
70                         sb = new StringBuilder ();
71                         odds= new StringBuilder();
72                         col = line = 1;
73                         hasPutBack = inTag = false;
74                 }
75
76                 public bool Verbatim
77                 {
78                         get { return verbatim; }
79                         set { verbatim = value; }
80                 }
81
82                 public void put_back ()
83                 {
84                         if (hasPutBack)
85                                 throw new HttpException ("put_back called twice!");
86                         
87                         hasPutBack = true;
88                         position -= Value.Length;
89                 }
90                 
91                 public int get_token ()
92                 {
93                         if (hasPutBack){
94                                 hasPutBack = false;
95                                 position += Value.Length;
96                                 return current_token;
97                         }
98
99                         begline = line;
100                         begcol = col;
101                         have_value = false;
102                         current_token = NextToken ();
103                         return current_token;
104                 }
105
106                 bool is_identifier_start_character (char c)
107                 {
108                         return (Char.IsLetter (c) || c == '_' );
109                 }
110
111                 bool is_identifier_part_character (char c)
112                 {
113                         return (Char.IsLetterOrDigit (c) || c == '_' || c == '-');
114                 }
115
116                 void ungetc (int value)
117                 {
118                         have_unget = true;
119                         unget_value = value;
120
121                         // Only '/' passes through here now.
122                         // If we ever let \n here, update 'line'
123                         position--;
124                         col--;
125                 }
126                 
127                 int read_char ()
128                 {
129                         int c;
130                         if (have_unget) {
131                                 c = unget_value;
132                                 have_unget = false;
133                         } else {
134                                 c = sr.Read ();
135                         }
136
137                         if (c == '\r' && sr.Peek () == '\n') {
138                                 c = sr.Read ();
139                                 position++;
140                         }
141
142                         if (c == '\n'){
143                                 col = -1;
144                                 line++;
145                         }
146
147                         if (c != -1) {
148                                 col++;
149                                 position++;
150                         }
151
152                         return c;
153                 }
154
155                 int ReadAttValue (int start)
156                 {
157                         int quoteChar = 0;
158                         bool quoted = false;
159
160                         if (start == '"' || start == '\'') {
161                                 quoteChar = start;
162                                 quoted = true;
163                         } else {
164                                 sb.Append ((char) start);
165                         }
166
167                         int c;
168                         int last = 0;
169                         bool inServerTag = false;
170                         alternatingQuotes = true;
171                         
172                         while ((c = sr.Peek ()) != -1) {
173                                 if (c == '%' && last == '<') {
174                                         inServerTag = true;
175                                 } else if (inServerTag && c == '>' && last == '%') {
176                                         inServerTag = false;
177                                 } else if (!inServerTag) {
178                                         if (!quoted && c == '/') {
179                                                 read_char ();
180                                                 c = sr.Peek ();
181                                                 if (c == -1) {
182                                                         c = '/';
183                                                 } else if (c == '>') {
184                                                         ungetc ('/');
185                                                         break;
186                                                 }
187                                         } else if (!quoted && (c == '>' || Char.IsWhiteSpace ((char) c))) {
188                                                 break;
189                                         } else if (quoted && c == quoteChar && last != '\\') {
190                                                 read_char ();
191                                                 break;
192                                         }
193                                 } else if (quoted && c == quoteChar) {
194                                         alternatingQuotes = false;
195                                 }
196
197                                 sb.Append ((char) c);
198                                 read_char ();
199                                 last = c;
200                         }
201
202                         return Token.ATTVALUE;
203                 }
204
205                 int NextToken ()
206                 {
207                         int c;
208                         
209                         sb.Length = 0;
210                         odds.Length=0;
211                         while ((c = read_char ()) != -1){
212                                 if (verbatim){
213                                         inTag = false;
214                                         sb.Append  ((char) c);
215                                         return c;
216                                 }
217
218                                 if (inTag && expectAttrValue && (c == '"' || c == '\''))
219                                         return ReadAttValue (c);
220                                 
221                                 if (c == '<'){
222                                         inTag = true;
223                                         sb.Append ((char) c);
224                                         return c;
225                                 }
226
227                                 if (c == '>'){
228                                         inTag = false;
229                                         sb.Append ((char) c);
230                                         return c;
231                                 }
232
233                                 if (current_token == '<' && "%/!".IndexOf ((char) c) != -1){
234                                         sb.Append ((char) c);
235                                         return c;
236                                 }
237
238                                 if (inTag && current_token == '%' && "@#=".IndexOf ((char) c) != -1){
239                                         sb.Append ((char) c);
240                                         return c;
241                                 }
242
243                                 if (inTag && c == '-' && sr.Peek () == '-'){
244                                         sb.Append ("--");
245                                         read_char ();
246                                         return Token.DOUBLEDASH;
247                                 }
248
249                                 if (!inTag){
250                                         sb.Append ((char) c);
251                                         while ((c = sr.Peek ()) != -1 && c != '<')
252                                                 sb.Append ((char) read_char ());
253
254                                         return (c != -1 || sb.Length > 0) ? Token.TEXT : Token.EOF;
255                                 }
256
257                                 if (inTag && current_token == '=' && !Char.IsWhiteSpace ((char) c))
258                                         return ReadAttValue (c);
259
260                                 if (inTag && is_identifier_start_character ((char) c)){
261                                         sb.Append ((char) c);
262                                         while ((c = sr.Peek ()) != -1) {
263                                                 if (!is_identifier_part_character ((char) c) && c != ':')
264                                                         break;
265                                                 sb.Append ((char) read_char ());
266                                         }
267
268                                         if (current_token == '@' && Directive.IsDirective (sb.ToString ()))
269                                                 return Token.DIRECTIVE;
270                                         
271                                         return Token.IDENTIFIER;
272                                 }
273
274                                 if (!Char.IsWhiteSpace ((char) c)) {
275                                         sb.Append  ((char) c);
276                                         return c;
277                                 }
278                                 // keep otherwise discarded characters in case we need.
279                                 odds.Append((char) c);
280                         }
281
282                         return Token.EOF;
283                 }
284
285                 public string Value {
286                         get {
287                                 if (have_value)
288                                         return val;
289
290                                 have_value = true;
291                                 val = sb.ToString ();
292                                 return val;
293                         }
294                 }
295
296                 public string Odds {
297                         get {
298                                 return odds.ToString();
299                         }
300                 }
301
302                 public bool InTag {
303                         get { return inTag; }
304                         set { inTag = value; }
305                 }
306
307                 // Hack for preventing confusion with VB comments (see bug #63451)
308                 public bool ExpectAttrValue {
309                         get { return expectAttrValue; }
310                         set { expectAttrValue = value; }
311                 }
312                 
313                 public bool AlternatingQuotes {
314                         get { return alternatingQuotes; }
315                 }
316                 
317                 public int BeginLine {
318                         get { return begline; }
319                 }
320
321                 public int BeginColumn {
322                         get { return begcol; }
323                 }
324
325                 public int EndLine {
326                         get { return line; }
327                 }
328
329                 public int EndColumn {
330                         get { return col; }
331                 }
332
333                 public int Position {
334                         get { return position; }
335                 }
336         }
337 }
338