2 // MonoBASIC.Tokenizer.cs: The Tokenizer for the MonoBASIC compiler
\r
4 // Author: A Rafael D Teixeira (rafaelteixeirabr@hotmail.com)
\r
6 // Based on cs-tokenizer.cs by Miguel de Icaza (miguel@gnu.org)
\r
8 // Licensed under the terms of the GNU GPL
\r
10 // Copyright (C) 2001 A Rafael D Teixeira
\r
21 namespace Mono.Languages.MonoBASIC
\r
25 using System.Collections;
\r
27 using System.Globalization;
\r
28 using Mono.Languages.MonoBASIC;
\r
32 /// Tokenizer for MonoBASIC source code.
\r
35 public class Tokenizer : yyParser.yyInput
\r
38 public string ref_name;
\r
39 public int ref_line = 1;
\r
40 public int line = 1;
\r
42 public int current_token;
\r
43 bool handle_get_set = false;
\r
45 public string location {
\r
49 if (current_token == Token.ERROR)
\r
50 det = "detail: " + error_details;
\r
54 return "Line: "+line+" Col: "+col + "\n" +
\r
55 "VirtLine: "+ref_line +
\r
56 " Token: "+current_token + " " + det;
\r
60 public bool properties {
\r
62 return handle_get_set;
\r
66 handle_get_set = value;
\r
73 static Hashtable keywords;
\r
74 static NumberStyles styles;
\r
75 static NumberFormatInfo csharp_format_info;
\r
78 // Values for the associated token returned
\r
80 System.Text.StringBuilder number;
\r
85 // Details about the error encoutered by the tokenizer
\r
87 string error_details;
\r
89 public string error {
\r
91 return error_details;
\r
107 static void initTokens ()
\r
109 keywords = new Hashtable ();
\r
111 keywords.Add ("addhandler", Token.ADDHANDLER);
\r
112 keywords.Add ("addressof", Token.ADDRESSOF);
\r
113 keywords.Add ("alias", Token.ALIAS);
\r
114 keywords.Add ("and", Token.AND);
\r
115 keywords.Add ("andalso", Token.ANDALSO);
\r
116 keywords.Add ("ansi", Token.ANSI);
\r
117 keywords.Add ("as", Token.AS);
\r
118 keywords.Add ("assembly", Token.ASSEMBLY);
\r
119 keywords.Add ("auto", Token.AUTO);
\r
120 keywords.Add ("boolean", Token.BOOLEAN);
\r
121 keywords.Add ("byref", Token.BYREF);
\r
122 keywords.Add ("byte", Token.BYTE);
\r
123 keywords.Add ("byval", Token.BYVAL);
\r
124 keywords.Add ("call", Token.CALL);
\r
125 keywords.Add ("case", Token.CASE);
\r
126 keywords.Add ("catch", Token.CATCH);
\r
127 keywords.Add ("cbool", Token.CBOOL);
\r
128 keywords.Add ("cbyte", Token.CBYTE);
\r
129 keywords.Add ("cchar", Token.CCHAR);
\r
130 keywords.Add ("cdate", Token.CDATE);
\r
131 keywords.Add ("cdec", Token.CDEC);
\r
132 keywords.Add ("cdbl", Token.CDBL);
\r
133 keywords.Add ("char", Token.CHAR);
\r
134 keywords.Add ("cint", Token.CINT);
\r
135 keywords.Add ("class", Token.CLASS);
\r
136 keywords.Add ("clng", Token.CLNG);
\r
137 keywords.Add ("cobj", Token.COBJ);
\r
138 //keywords.Add ("compare", Token.COMPARE);
\r
139 keywords.Add ("const", Token.CONST);
\r
140 keywords.Add ("cshort", Token.CSHORT);
\r
141 keywords.Add ("csng", Token.CSNG);
\r
142 keywords.Add ("cstr", Token.CSTR);
\r
143 keywords.Add ("ctype", Token.CTYPE);
\r
144 keywords.Add ("date", Token.DATE);
\r
145 keywords.Add ("decimal", Token.DECIMAL);
\r
146 keywords.Add ("declare", Token.DECLARE);
\r
147 keywords.Add ("default", Token.DEFAULT);
\r
148 keywords.Add ("delegate", Token.DELEGATE);
\r
149 keywords.Add ("dim", Token.DIM);
\r
150 keywords.Add ("do", Token.DO);
\r
151 keywords.Add ("double", Token.DOUBLE);
\r
152 keywords.Add ("each", Token.EACH);
\r
153 keywords.Add ("else", Token.ELSE);
\r
154 keywords.Add ("elseif", Token.ELSEIF);
\r
155 keywords.Add ("end", Token.END);
\r
156 keywords.Add ("enum", Token.ENUM);
\r
157 keywords.Add ("erase", Token.ERASE);
\r
158 keywords.Add ("error", Token.ERROR);
\r
159 keywords.Add ("event", Token.EVENT);
\r
160 keywords.Add ("exit", Token.EXIT);
\r
161 //keywords.Add ("explicit", Token.EXPLICIT);
\r
162 keywords.Add ("false", Token.FALSE);
\r
163 keywords.Add ("finally", Token.FINALLY);
\r
164 keywords.Add ("for", Token.FOR);
\r
165 keywords.Add ("friend", Token.FRIEND);
\r
166 keywords.Add ("function", Token.FUNCTION);
\r
167 keywords.Add ("get", Token.GET);
\r
168 keywords.Add ("gettype", Token.GETTYPE);
\r
169 keywords.Add ("goto", Token.GOTO);
\r
170 keywords.Add ("handles", Token.HANDLES);
\r
171 keywords.Add ("if", Token.IF);
\r
172 keywords.Add ("implements", Token.IMPLEMENTS);
\r
173 keywords.Add ("imports", Token.IMPORTS);
\r
174 keywords.Add ("in", Token.IN);
\r
175 keywords.Add ("inherits", Token.INHERITS);
\r
176 keywords.Add ("integer", Token.INTEGER);
\r
177 keywords.Add ("interface", Token.INTERFACE);
\r
178 keywords.Add ("is", Token.IS);
\r
179 keywords.Add ("let ", Token.LET );
\r
180 keywords.Add ("lib ", Token.LIB );
\r
181 keywords.Add ("like ", Token.LIKE );
\r
182 keywords.Add ("long", Token.LONG);
\r
183 keywords.Add ("loop", Token.LOOP);
\r
184 keywords.Add ("me", Token.ME);
\r
185 keywords.Add ("mod", Token.MOD);
\r
186 keywords.Add ("module", Token.MODULE);
\r
187 keywords.Add ("mustinherit", Token.MUSTINHERIT);
\r
188 keywords.Add ("mustoverride", Token.MUSTOVERRIDE);
\r
189 keywords.Add ("mybase", Token.MYBASE);
\r
190 keywords.Add ("myclass", Token.MYCLASS);
\r
191 keywords.Add ("namespace", Token.NAMESPACE);
\r
192 keywords.Add ("new", Token.NEW);
\r
193 keywords.Add ("next", Token.NEXT);
\r
194 keywords.Add ("not", Token.NOT);
\r
195 keywords.Add ("nothing", Token.NOTHING);
\r
196 keywords.Add ("notinheritable", Token.NOTINHERITABLE);
\r
197 keywords.Add ("notoverridable", Token.NOTOVERRIDABLE);
\r
198 keywords.Add ("object", Token.OBJECT);
\r
199 keywords.Add ("on", Token.ON);
\r
200 keywords.Add ("option", Token.OPTION);
\r
201 keywords.Add ("optional", Token.OPTIONAL);
\r
202 keywords.Add ("or", Token.OR);
\r
203 keywords.Add ("orelse", Token.ORELSE);
\r
204 keywords.Add ("overloads", Token.OVERLOADS);
\r
205 keywords.Add ("overridable", Token.OVERRIDABLE);
\r
206 keywords.Add ("overrides", Token.OVERRIDES);
\r
207 keywords.Add ("paramarray", Token.PARAM_ARRAY);
\r
208 keywords.Add ("preserve", Token.PRESERVE);
\r
209 keywords.Add ("private", Token.PRIVATE);
\r
210 keywords.Add ("property", Token.PROPERTY);
\r
211 keywords.Add ("protected", Token.PROTECTED);
\r
212 keywords.Add ("public", Token.PUBLIC);
\r
213 keywords.Add ("raiseevent", Token.RAISEEVENT);
\r
214 keywords.Add ("readonly", Token.READONLY);
\r
215 keywords.Add ("redim", Token.REDIM);
\r
216 keywords.Add ("rem", Token.REM);
\r
217 keywords.Add ("removehandler", Token.REMOVEHANDLER);
\r
218 keywords.Add ("resume", Token.RESUME);
\r
219 keywords.Add ("return", Token.RETURN);
\r
220 keywords.Add ("select", Token.SELECT);
\r
221 keywords.Add ("set", Token.SET);
\r
222 keywords.Add ("shadows", Token.SHADOWS);
\r
223 keywords.Add ("shared", Token.SHARED);
\r
224 keywords.Add ("short", Token.SHORT);
\r
225 keywords.Add ("single", Token.SINGLE);
\r
226 keywords.Add ("sizeof", Token.SIZEOF);
\r
227 keywords.Add ("static", Token.STATIC);
\r
228 keywords.Add ("step", Token.STEP);
\r
229 keywords.Add ("stop", Token.STOP);
\r
230 keywords.Add ("string", Token.STRING);
\r
231 keywords.Add ("structure", Token.STRUCTURE);
\r
232 keywords.Add ("sub", Token.SUB);
\r
233 keywords.Add ("synclock", Token.SYNCLOCK);
\r
234 keywords.Add ("then", Token.THEN);
\r
235 keywords.Add ("throw", Token.THROW);
\r
236 keywords.Add ("to", Token.TO);
\r
237 keywords.Add ("true", Token.TRUE);
\r
238 keywords.Add ("try", Token.TRY);
\r
239 keywords.Add ("typeof", Token.TYPEOF);
\r
240 keywords.Add ("unicode", Token.UNICODE);
\r
241 keywords.Add ("until", Token.UNTIL);
\r
242 keywords.Add ("variant", Token.VARIANT);
\r
243 keywords.Add ("when", Token.WHEN);
\r
244 keywords.Add ("while", Token.WHILE);
\r
245 keywords.Add ("with", Token.WITH);
\r
246 keywords.Add ("withevents", Token.WITHEVENTS);
\r
247 keywords.Add ("writeonly", Token.WRITEONLY);
\r
248 keywords.Add ("xor", Token.XOR);
\r
252 // Class initializer
\r
254 static Tokenizer ()
\r
257 csharp_format_info = new NumberFormatInfo ();
\r
258 csharp_format_info.CurrencyDecimalSeparator = ".";
\r
259 styles = NumberStyles.AllowExponent | NumberStyles.AllowDecimalPoint;
\r
262 bool is_keyword (string name)
\r
266 res = keywords.Contains(name.ToLower());
\r
267 if ((name == "get" || name == "set") && handle_get_set == false)
\r
272 int getKeyword (string name)
\r
274 return (int) (keywords [name.ToLower()]);
\r
277 public Location Location {
\r
279 return new Location (ref_line);
\r
283 public Tokenizer (System.IO.TextReader input, string fname)
\r
285 this.ref_name = fname;
\r
289 Location.Push (fname);
\r
292 bool is_identifier_start_character (char c)
\r
294 return Char.IsLetter (c) || c == '_' ;
\r
297 bool is_identifier_part_character (char c)
\r
299 return (Char.IsLetter (c) || Char.IsDigit (c) || c == '_');
\r
302 int is_punct (char c, ref bool doread)
\r
304 int idx = "{}[](),:;~+-*/%&|^!=<>?".IndexOf (c);
\r
312 return Token.OPEN_BRACKET;
\r
314 return Token.CLOSE_BRACKET;
\r
316 return Token.OPEN_PARENS;
\r
318 return Token.CLOSE_PARENS;
\r
320 return Token.COMMA;
\r
322 return Token.COLON;
\r
324 return Token.INTERR;
\r
333 t = Token.OP_ADD_ASSIGN;
\r
341 t = Token.OP_SUB_ASSIGN;
\r
343 return Token.MINUS;
\r
351 return Token.OP_EQ;
\r
353 return Token.ASSIGN;
\r
359 return Token.OP_MULT_ASSIGN;
\r
367 return Token.OP_DIV_ASSIGN;
\r
375 return Token.OP_IDIV_ASSIGN;
\r
377 return Token.OP_IDIV;
\r
383 return Token.OP_EXP_ASSIGN;
\r
385 return Token.OP_EXP;
\r
392 return Token.OP_NE;
\r
396 return Token.OP_LE;
\r
398 return Token.OP_LT;
\r
404 return Token.OP_GE;
\r
406 return Token.OP_GT;
\r
408 return Token.ERROR;
\r
411 bool decimal_digits (int c)
\r
414 bool seen_digits = false;
\r
417 number.Append ((char) c);
\r
419 while ((d = peekChar ()) != -1){
\r
420 if (Char.IsDigit ((char)d)){
\r
421 number.Append ((char) d);
\r
423 seen_digits = true;
\r
427 return seen_digits;
\r
430 void hex_digits (int c)
\r
435 number.Append ((char) c);
\r
436 while ((d = peekChar ()) != -1){
\r
437 char e = Char.ToUpper ((char) d);
\r
439 if (Char.IsDigit (e) ||
\r
440 (e >= 'A' && e <= 'F')){
\r
441 number.Append ((char) e);
\r
448 int real_type_suffix (int c)
\r
453 case 'F': case 'f':
\r
454 t = Token.LITERAL_SINGLE;
\r
456 case 'D': case 'd':
\r
457 t = Token.LITERAL_DOUBLE;
\r
459 case 'M': case 'm':
\r
460 t= Token.LITERAL_DECIMAL;
\r
469 int integer_type_suffix (int c)
\r
471 // FIXME: Handle U and L suffixes.
\r
472 // We also need to see in which kind of
\r
473 // Int the thing fits better according to the spec.
\r
474 return Token.LITERAL_INTEGER;
\r
477 void adjust_int (int t)
\r
479 val = new System.Int32();
\r
480 val = System.Int32.Parse (number.ToString (), 0);
\r
483 int adjust_real (int t)
\r
485 string s = number.ToString ();
\r
487 Console.WriteLine (s);
\r
489 case Token.LITERAL_DECIMAL:
\r
490 val = new System.Decimal ();
\r
491 val = System.Decimal.Parse (
\r
492 s, styles, csharp_format_info);
\r
494 case Token.LITERAL_DOUBLE:
\r
495 val = new System.Double ();
\r
496 val = System.Double.Parse (
\r
497 s, styles, csharp_format_info);
\r
499 case Token.LITERAL_SINGLE:
\r
500 val = new System.Double ();
\r
501 val = (float) System.Double.Parse (
\r
502 s, styles, csharp_format_info);
\r
506 val = new System.Double ();
\r
507 val = System.Double.Parse (
\r
508 s, styles, csharp_format_info);
\r
509 t = Token.LITERAL_DOUBLE;
\r
516 // Invoked if we know we have .digits or digits
\r
518 int is_number (int c)
\r
520 bool is_real = false;
\r
521 number = new System.Text.StringBuilder ();
\r
526 if (Char.IsDigit ((char)c)){
\r
527 if (c == '0' && peekChar () == 'x' || peekChar () == 'X'){
\r
530 val = new System.Int32 ();
\r
531 val = System.Int32.Parse (number.ToString (), NumberStyles.HexNumber);
\r
532 return integer_type_suffix (peekChar ());
\r
534 decimal_digits (c);
\r
539 // We need to handle the case of
\r
540 // "1.1" vs "1.string" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)
\r
543 if (decimal_digits ('.')){
\r
548 number.Length -= 1;
\r
549 adjust_int (Token.LITERAL_INTEGER);
\r
550 return Token.LITERAL_INTEGER;
\r
554 if (c == 'e' || c == 'E'){
\r
556 number.Append ("e");
\r
561 number.Append ((char) c);
\r
564 } else if (c == '-'){
\r
565 number.Append ((char) c);
\r
569 decimal_digits (-1);
\r
573 type = real_type_suffix (c);
\r
574 if (type == Token.NONE && !is_real){
\r
575 type = integer_type_suffix (c);
\r
583 return adjust_real (type);
\r
585 Console.WriteLine ("This should not be reached");
\r
586 throw new Exception ("Is Number should never reach this point");
\r
622 error_details = "cs1009: Unrecognized escape sequence " + (char)d;
\r
631 if (putback_char != -1){
\r
632 int x = putback_char;
\r
637 return reader.Read ();
\r
642 if (putback_char != -1)
\r
643 return putback_char;
\r
644 return reader.Peek ();
\r
647 void putback (int c)
\r
649 if (putback_char != -1)
\r
650 throw new Exception ("This should not happen putback on putback");
\r
654 public bool advance ()
\r
656 return current_token != Token.EOF ;
\r
659 public Object Value {
\r
665 public Object value ()
\r
670 public int token ()
\r
672 current_token = xtoken ();
\r
673 if (current_token == 0)
\r
675 return current_token;
\r
678 public int xtoken ()
\r
681 bool allow_keyword_as_ident = false;
\r
682 bool doread = false;
\r
686 for (;(c = getChar ()) != -1; col++) {
\r
688 // Handle line comments.
\r
690 int d = getChar ();
\r
691 while ((d = getChar ()) != -1 && (d != '\n'))
\r
696 if (current_token == Token.EOL) // if last token was also EOL keep skipping
\r
707 if (current_token == Token.EOL) // if last token was also EOL keep skipping
\r
712 // Handle identifiers
\r
713 if (is_identifier_start_character ((char) c)){
\r
714 System.Text.StringBuilder id = new System.Text.StringBuilder ();
\r
717 id.Append ((char) c);
\r
719 while ((c = peekChar ()) != -1) {
\r
720 if (is_identifier_part_character ((char) c)){
\r
721 id.Append ((char)getChar ());
\r
727 ids = id.ToString ();
\r
729 if (!is_keyword (ids) || allow_keyword_as_ident) {
\r
731 return Token.IDENTIFIER;
\r
734 // true, false and null are in the hash anyway.
\r
735 return getKeyword (ids);
\r
740 if (Char.IsDigit ((char) peekChar ()))
\r
741 return is_number (c);
\r
745 if (Char.IsDigit ((char) c))
\r
746 return is_number (c);
\r
748 /* For now, ignore pre-processor commands */
\r
749 if (col == 1 && c == '#'){
\r
750 System.Text.StringBuilder s = new System.Text.StringBuilder ();
\r
752 while ((c = getChar ()) != -1 && (c != '\n')){
\r
753 s.Append ((char) c);
\r
755 if (String.Compare (s.ToString (), 0, "line", 0, 4) == 0){
\r
756 string arg = s.ToString ().Substring (5);
\r
759 if ((pos = arg.IndexOf (' ')) != -1 && pos != 0){
\r
760 ref_line = System.Int32.Parse (arg.Substring (0, pos));
\r
763 char [] quotes = { '\"' };
\r
765 ref_name = arg.Substring (pos);
\r
766 ref_name.TrimStart (quotes);
\r
767 ref_name.TrimEnd (quotes);
\r
769 ref_line = System.Int32.Parse (arg);
\r
776 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){
\r
785 System.Text.StringBuilder s = new System.Text.StringBuilder ();
\r
787 while ((c = getChar ()) != -1){
\r
789 val = s.ToString ();
\r
790 return Token.LITERAL_STRING;
\r
795 return Token.ERROR;
\r
796 s.Append ((char) c);
\r
801 if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == '\r'){
\r
803 col = (((col + 8) / 8) * 8) - 1;
\r
809 allow_keyword_as_ident = true;
\r
813 error_details = ((char)c).ToString ();
\r
815 return Token.ERROR;
\r
818 if (current_token != Token.EOL) // if last token wasn´t EOL send it before EOF
\r