2003-07-16 Gonzalo Paniagua Javier <gonzalo@ximian.com>
[mono.git] / mcs / class / System.Web / System.Web.Compilation / AspTokenizer.cs
1 //
2 // System.Web.Compilation.AspTokenizer
3 //
4 // Authors:
5 //      Gonzalo Paniagua Javier (gonzalo@ximian.com)
6 //
7 // (C) 2002,2003 Ximian, Inc (http://www.ximian.com)
8 //
9
10 using System;
11 using System.Collections;
12 using System.IO;
13 using System.Text;
14
15 namespace System.Web.Compilation
16 {
17         class Token
18         {
19                 public const int EOF            = 0;
20                 public const int IDENTIFIER     = 1000;
21                 public const int DIRECTIVE      = 1001;
22                 public const int ATTVALUE       = 1002;
23                 public const int TEXT           = 1003;
24                 public const int DOUBLEDASH     = 1004;
25                 public const int CLOSING        = 1005;
26         }
27
28         class AspTokenizer
29         {
30                 TextReader sr;
31                 int current_token;
32                 StringBuilder sb;
33                 int col, line;
34                 int begcol, begline;
35                 int position;
36                 bool inTag;
37                 bool hasPutBack;
38                 bool verbatim;
39                 bool have_value;
40                 string val;
41                 
42                 public AspTokenizer (TextReader reader)
43                 {
44                         this.sr = reader;
45                         sb = new StringBuilder ();
46                         col = line = 1;
47                         hasPutBack = inTag = false;
48                 }
49
50                 public bool Verbatim
51                 {
52                         get { return verbatim; }
53                         set { verbatim = value; }
54                 }
55
56                 public void put_back ()
57                 {
58                         if (hasPutBack)
59                                 throw new HttpException ("put_back called twice!");
60                         
61                         hasPutBack = true;
62                         position -= Value.Length;
63                 }
64                 
65                 public int get_token ()
66                 {
67                         if (hasPutBack){
68                                 hasPutBack = false;
69                                 position += Value.Length;
70                                 return current_token;
71                         }
72
73                         begline = line;
74                         begcol = col;
75                         have_value = false;
76                         current_token = NextToken ();
77                         return current_token;
78                 }
79
80                 bool is_identifier_start_character (char c)
81                 {
82                         return (Char.IsLetter (c) || c == '_' );
83                 }
84
85                 bool is_identifier_part_character (char c)
86                 {
87                         return (Char.IsLetterOrDigit (c) || c == '_' || c == '-');
88                 }
89
90                 int read_char ()
91                 {
92                         int c = sr.Read ();
93
94                         if (c == '\r' && sr.Peek () == '\n') {
95                                 c = sr.Read ();
96                                 position++;
97                         }
98
99                         if (c == '\n'){
100                                 col = -1;
101                                 line++;
102                         }
103
104                         if (c != -1) {
105                                 col++;
106                                 position++;
107                         }
108
109                         return c;
110                 }
111
112                 int ReadAttValue (int start)
113                 {
114                         int quoteChar = 0;
115                         bool quoted = false;
116
117                         if (start == '"' || start == '\'') {
118                                 quoteChar = start;
119                                 quoted = true;
120                         } else {
121                                 sb.Append ((char) start);
122                         }
123
124                         int c;
125                         int last = 0;
126                         bool inServerTag = false;
127                         
128                         while ((c = sr.Peek ()) != -1) {
129                                 if (c == '%' && last == '<') {
130                                         inServerTag = true;
131                                 } else if (inServerTag && c == '>' && last == '%') {
132                                         inServerTag = false;
133                                 } else if (!inServerTag) {
134                                         if (!quoted && (c == '/' || c == '>' || Char.IsWhiteSpace ((char) c))) {
135                                                 break;
136                                         } else if (quoted && c == quoteChar && last != '\\') {
137                                                 read_char ();
138                                                 break;
139                                         }
140                                 }
141
142                                 sb.Append ((char) c);
143                                 read_char ();
144                                 last = c;
145                         }
146
147                         return Token.ATTVALUE;
148                 }
149
150                 int NextToken ()
151                 {
152                         int c;
153                         
154                         sb.Length = 0;
155                         while ((c = read_char ()) != -1){
156                                 if (verbatim){
157                                         inTag = false;
158                                         sb.Append  ((char) c);
159                                         return c;
160                                 }
161
162                                 if (inTag && (c == '"' || c == '\''))
163                                         return ReadAttValue (c);
164                                 
165                                 if (c == '<'){
166                                         inTag = true;
167                                         sb.Append ((char) c);
168                                         return c;
169                                 }
170
171                                 if (c == '>'){
172                                         inTag = false;
173                                         sb.Append ((char) c);
174                                         return c;
175                                 }
176
177                                 if (current_token == '<' && "%/!".IndexOf ((char) c) != -1){
178                                         sb.Append ((char) c);
179                                         return c;
180                                 }
181
182                                 if (inTag && current_token == '%' && "@#=".IndexOf ((char) c) != -1){
183                                         sb.Append ((char) c);
184                                         return c;
185                                 }
186
187                                 if (inTag && c == '-' && sr.Peek () == '-'){
188                                         sb.Append ("--");
189                                         read_char ();
190                                         return Token.DOUBLEDASH;
191                                 }
192
193                                 if (!inTag){
194                                         sb.Append ((char) c);
195                                         while ((c = sr.Peek ()) != -1 && c != '<')
196                                                 sb.Append ((char) read_char ());
197
198                                         return (c != -1 || sb.Length > 0) ? Token.TEXT : Token.EOF;
199                                 }
200
201                                 if (inTag && current_token == '=' && !Char.IsWhiteSpace ((char) c))
202                                         return ReadAttValue (c);
203
204                                 if (inTag && is_identifier_start_character ((char) c)){
205                                         sb.Append ((char) c);
206                                         while ((c = sr.Peek ()) != -1) {
207                                                 if (!is_identifier_part_character ((char) c) && c != ':')
208                                                         break;
209                                                 sb.Append ((char) read_char ());
210                                         }
211
212                                         if (current_token == '@' && Directive.IsDirective (sb.ToString ()))
213                                                 return Token.DIRECTIVE;
214                                         
215                                         return Token.IDENTIFIER;
216                                 }
217
218                                 if (!Char.IsWhiteSpace ((char) c)) {
219                                         sb.Append  ((char) c);
220                                         return c;
221                                 }
222                         }
223
224                         return Token.EOF;
225                 }
226
227                 public string Value {
228                         get {
229                                 if (have_value)
230                                         return val;
231
232                                 have_value = true;
233                                 val = sb.ToString ();
234                                 return val;
235                         }
236                 }
237
238                 public bool InTag {
239                         get { return inTag; }
240                         set { inTag = value; }
241                 }
242                 
243                 public int BeginLine {
244                         get { return begline; }
245                 }
246
247                 public int BeginColumn {
248                         get { return begcol; }
249                 }
250
251                 public int EndLine {
252                         get { return line; }
253                 }
254
255                 public int EndColumn {
256                         get { return col; }
257                 }
258
259                 public int Position {
260                         get { return position; }
261                 }
262         }
263 }
264