Add a more functional (i.e. fewer-stubs) implementation of System.Data.Linq.
[mono.git] / mcs / class / System.Web / System.Web.Compilation / AspTokenizer.cs
1 //
2 // System.Web.Compilation.AspTokenizer
3 //
4 // Authors:
5 //      Gonzalo Paniagua Javier (gonzalo@ximian.com)
6 //
7 // (C) 2002,2003 Ximian, Inc (http://www.ximian.com)
8 //
9
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
18 // 
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
21 // 
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30
31 using System;
32 using System.Collections;
33 using System.IO;
34 using System.Text;
35
36 namespace System.Web.Compilation
37 {
38         class Token
39         {
40                 public const int EOF            = 0x0200000;
41                 public const int IDENTIFIER     = 0x0200001;
42                 public const int DIRECTIVE      = 0x0200002;
43                 public const int ATTVALUE       = 0x0200003;
44                 public const int TEXT           = 0x0200004;
45                 public const int DOUBLEDASH     = 0x0200005;
46                 public const int CLOSING        = 0x0200006;
47         }
48
49         class AspTokenizer
50         {
51                 static char [] lfcr = new char [] { '\n', '\r' };
52                 TextReader sr;
53                 int current_token;
54                 StringBuilder sb, odds;
55                 int col, line;
56                 int begcol, begline;
57                 int position;
58                 bool inTag;
59                 bool expectAttrValue;
60                 bool alternatingQuotes;
61                 bool hasPutBack;
62                 bool verbatim;
63                 bool have_value;
64                 bool have_unget;
65                 int unget_value;
66                 string val;
67                 
68                 public AspTokenizer (TextReader reader)
69                 {
70                         this.sr = reader;
71                         sb = new StringBuilder ();
72                         odds= new StringBuilder();
73                         col = line = 1;
74                         hasPutBack = inTag = false;
75                 }
76
77                 public bool Verbatim
78                 {
79                         get { return verbatim; }
80                         set { verbatim = value; }
81                 }
82
83                 public void put_back ()
84                 {
85                         if (hasPutBack)
86                                 throw new HttpException ("put_back called twice!");
87                         
88                         hasPutBack = true;
89                         position -= Value.Length;
90                 }
91                 
92                 public int get_token ()
93                 {
94                         if (hasPutBack){
95                                 hasPutBack = false;
96                                 position += Value.Length;
97                                 return current_token;
98                         }
99
100                         begline = line;
101                         begcol = col;
102                         have_value = false;
103                         current_token = NextToken ();
104                         return current_token;
105                 }
106
107                 bool is_identifier_start_character (char c)
108                 {
109                         return (Char.IsLetter (c) || c == '_' );
110                 }
111
112                 bool is_identifier_part_character (char c)
113                 {
114                         return (Char.IsLetterOrDigit (c) || c == '_' || c == '-');
115                 }
116
117                 void ungetc (int value)
118                 {
119                         have_unget = true;
120                         unget_value = value;
121
122                         // Only '/' passes through here now.
123                         // If we ever let \n here, update 'line'
124                         position--;
125                         col--;
126                 }
127                 
128                 int read_char ()
129                 {
130                         int c;
131                         if (have_unget) {
132                                 c = unget_value;
133                                 have_unget = false;
134                         } else {
135                                 c = sr.Read ();
136                         }
137
138                         if (c == '\r' && sr.Peek () == '\n') {
139                                 c = sr.Read ();
140                                 position++;
141                         }
142
143                         if (c == '\n'){
144                                 col = -1;
145                                 line++;
146                         }
147
148                         if (c != -1) {
149                                 col++;
150                                 position++;
151                         }
152
153                         return c;
154                 }
155
156                 int ReadAttValue (int start)
157                 {
158                         int quoteChar = 0;
159                         bool quoted = false;
160
161                         if (start == '"' || start == '\'') {
162                                 quoteChar = start;
163                                 quoted = true;
164                         } else {
165                                 sb.Append ((char) start);
166                         }
167
168                         int c;
169                         int last = 0;
170                         bool inServerTag = false;
171                         alternatingQuotes = true;
172                         
173                         while ((c = sr.Peek ()) != -1) {
174                                 if (c == '%' && last == '<') {
175                                         inServerTag = true;
176                                 } else if (inServerTag && c == '>' && last == '%') {
177                                         inServerTag = false;
178                                 } else if (!inServerTag) {
179                                         if (!quoted && c == '/') {
180                                                 read_char ();
181                                                 c = sr.Peek ();
182                                                 if (c == -1) {
183                                                         c = '/';
184                                                 } else if (c == '>') {
185                                                         ungetc ('/');
186                                                         break;
187                                                 }
188                                         } else if (!quoted && (c == '>' || Char.IsWhiteSpace ((char) c))) {
189                                                 break;
190                                         } else if (quoted && c == quoteChar && last != '\\') {
191                                                 read_char ();
192                                                 break;
193                                         }
194                                 } else if (quoted && c == quoteChar) {
195                                         alternatingQuotes = false;
196                                 }
197
198                                 sb.Append ((char) c);
199                                 read_char ();
200                                 last = c;
201                         }
202
203                         return Token.ATTVALUE;
204                 }
205
206                 int NextToken ()
207                 {
208                         int c;
209                         
210                         sb.Length = 0;
211                         odds.Length=0;
212                         while ((c = read_char ()) != -1){
213                                 if (verbatim){
214                                         inTag = false;
215                                         sb.Append  ((char) c);
216                                         return c;
217                                 }
218
219                                 if (inTag && expectAttrValue && (c == '"' || c == '\''))
220                                         return ReadAttValue (c);
221                                 
222                                 if (c == '<'){
223                                         inTag = true;
224                                         sb.Append ((char) c);
225                                         return c;
226                                 }
227
228                                 if (c == '>'){
229                                         inTag = false;
230                                         sb.Append ((char) c);
231                                         return c;
232                                 }
233
234                                 if (current_token == '<' && "%/!".IndexOf ((char) c) != -1){
235                                         sb.Append ((char) c);
236                                         return c;
237                                 }
238
239                                 if (inTag && current_token == '%' && "@#=".IndexOf ((char) c) != -1){
240                                         if (odds.Length == 0 || odds.ToString ().IndexOfAny (lfcr) < 0) {
241                                                 sb.Append ((char) c);
242                                                 return c;
243                                         }
244                                         sb.Append ((char) c);
245                                         continue;
246                                 }
247
248                                 if (inTag && c == '-' && sr.Peek () == '-'){
249                                         sb.Append ("--");
250                                         read_char ();
251                                         return Token.DOUBLEDASH;
252                                 }
253
254                                 if (!inTag){
255                                         sb.Append ((char) c);
256                                         while ((c = sr.Peek ()) != -1 && c != '<')
257                                                 sb.Append ((char) read_char ());
258
259                                         return (c != -1 || sb.Length > 0) ? Token.TEXT : Token.EOF;
260                                 }
261
262                                 if (inTag && current_token == '=' && !Char.IsWhiteSpace ((char) c))
263                                         return ReadAttValue (c);
264
265                                 if (inTag && is_identifier_start_character ((char) c)){
266                                         sb.Append ((char) c);
267                                         while ((c = sr.Peek ()) != -1) {
268                                                 if (!is_identifier_part_character ((char) c) && c != ':')
269                                                         break;
270                                                 sb.Append ((char) read_char ());
271                                         }
272
273                                         if (current_token == '@' && Directive.IsDirective (sb.ToString ()))
274                                                 return Token.DIRECTIVE;
275                                         
276                                         return Token.IDENTIFIER;
277                                 }
278
279                                 if (!Char.IsWhiteSpace ((char) c)) {
280                                         sb.Append  ((char) c);
281                                         return c;
282                                 }
283                                 // keep otherwise discarded characters in case we need.
284                                 odds.Append((char) c);
285                         }
286
287                         return Token.EOF;
288                 }
289
290                 public string Value {
291                         get {
292                                 if (have_value)
293                                         return val;
294
295                                 have_value = true;
296                                 val = sb.ToString ();
297                                 return val;
298                         }
299                 }
300
301                 public string Odds {
302                         get {
303                                 return odds.ToString();
304                         }
305                 }
306
307                 public bool InTag {
308                         get { return inTag; }
309                         set { inTag = value; }
310                 }
311
312                 // Hack for preventing confusion with VB comments (see bug #63451)
313                 public bool ExpectAttrValue {
314                         get { return expectAttrValue; }
315                         set { expectAttrValue = value; }
316                 }
317                 
318                 public bool AlternatingQuotes {
319                         get { return alternatingQuotes; }
320                 }
321                 
322                 public int BeginLine {
323                         get { return begline; }
324                 }
325
326                 public int BeginColumn {
327                         get { return begcol; }
328                 }
329
330                 public int EndLine {
331                         get { return line; }
332                 }
333
334                 public int EndColumn {
335                         get { return col; }
336                 }
337
338                 public int Position {
339                         get { return position; }
340                 }
341         }
342 }
343