2 // Mono.MonoBASIC.Tokenizer.cs: The Tokenizer for the MonoBASIC compiler
\r
4 // Author: A Rafael D Teixeira (rafaelteixeirabr@hotmail.com)
\r
6 // Based on cs-tokenizer.cs by Miguel de Icaza (miguel@gnu.org)
\r
8 // Licensed under the terms of the GNU GPL
\r
10 // Copyright (C) 2001 A Rafael D Teixeira
\r
13 namespace Mono.MonoBASIC
\r
17 using System.Collections;
\r
19 using System.Globalization;
\r
20 using Mono.Languages;
\r
24 /// Tokenizer for MonoBASIC source code.
\r
27 public class Tokenizer : yyParser.yyInput
\r
30 public string ref_name;
\r
31 public int ref_line = 1;
\r
32 public int line = 1;
\r
34 public int current_token;
\r
35 bool handle_get_set = false;
\r
37 public int ExpandedTabsSize = 4;
\r
39 public string location {
\r
43 if (current_token == Token.ERROR)
\r
44 det = "detail: " + error_details;
\r
48 return "Line: "+line+" Col: "+col + "\n" +
\r
49 "VirtLine: "+ref_line +
\r
50 " Token: "+current_token + " " + det;
\r
54 public bool properties {
\r
56 return handle_get_set;
\r
60 handle_get_set = value;
\r
67 static Hashtable keywords;
\r
68 static NumberStyles styles;
\r
69 static NumberFormatInfo csharp_format_info;
\r
72 // Values for the associated token returned
\r
74 System.Text.StringBuilder number;
\r
79 // Details about the error encoutered by the tokenizer
\r
81 string error_details;
\r
83 public string error {
\r
85 return error_details;
\r
101 static void initTokens ()
\r
103 keywords = new Hashtable ();
\r
105 keywords.Add ("addhandler", Token.ADDHANDLER);
\r
106 keywords.Add ("addressof", Token.ADDRESSOF);
\r
107 keywords.Add ("alias", Token.ALIAS);
\r
108 keywords.Add ("and", Token.AND);
\r
109 keywords.Add ("andalso", Token.ANDALSO);
\r
110 keywords.Add ("ansi", Token.ANSI);
\r
111 keywords.Add ("as", Token.AS);
\r
112 keywords.Add ("assembly", Token.ASSEMBLY);
\r
113 keywords.Add ("auto", Token.AUTO);
\r
114 keywords.Add ("boolean", Token.BOOLEAN);
\r
115 keywords.Add ("byref", Token.BYREF);
\r
116 keywords.Add ("byte", Token.BYTE);
\r
117 keywords.Add ("byval", Token.BYVAL);
\r
118 keywords.Add ("call", Token.CALL);
\r
119 keywords.Add ("case", Token.CASE);
\r
120 keywords.Add ("catch", Token.CATCH);
\r
121 keywords.Add ("cbool", Token.CBOOL);
\r
122 keywords.Add ("cbyte", Token.CBYTE);
\r
123 keywords.Add ("cchar", Token.CCHAR);
\r
124 keywords.Add ("cdate", Token.CDATE);
\r
125 keywords.Add ("cdec", Token.CDEC);
\r
126 keywords.Add ("cdbl", Token.CDBL);
\r
127 keywords.Add ("char", Token.CHAR);
\r
128 keywords.Add ("cint", Token.CINT);
\r
129 keywords.Add ("class", Token.CLASS);
\r
130 keywords.Add ("clng", Token.CLNG);
\r
131 keywords.Add ("cobj", Token.COBJ);
\r
132 //keywords.Add ("compare", Token.COMPARE);
\r
133 keywords.Add ("const", Token.CONST);
\r
134 keywords.Add ("cshort", Token.CSHORT);
\r
135 keywords.Add ("csng", Token.CSNG);
\r
136 keywords.Add ("cstr", Token.CSTR);
\r
137 keywords.Add ("ctype", Token.CTYPE);
\r
138 keywords.Add ("date", Token.DATE);
\r
139 keywords.Add ("decimal", Token.DECIMAL);
\r
140 keywords.Add ("declare", Token.DECLARE);
\r
141 keywords.Add ("default", Token.DEFAULT);
\r
142 keywords.Add ("delegate", Token.DELEGATE);
\r
143 keywords.Add ("dim", Token.DIM);
\r
144 keywords.Add ("do", Token.DO);
\r
145 keywords.Add ("double", Token.DOUBLE);
\r
146 keywords.Add ("each", Token.EACH);
\r
147 keywords.Add ("else", Token.ELSE);
\r
148 keywords.Add ("elseif", Token.ELSEIF);
\r
149 keywords.Add ("end", Token.END);
\r
150 keywords.Add ("enum", Token.ENUM);
\r
151 keywords.Add ("erase", Token.ERASE);
\r
152 keywords.Add ("error", Token.ERROR);
\r
153 keywords.Add ("event", Token.EVENT);
\r
154 keywords.Add ("exit", Token.EXIT);
\r
155 //keywords.Add ("explicit", Token.EXPLICIT);
\r
156 keywords.Add ("false", Token.FALSE);
\r
157 keywords.Add ("finally", Token.FINALLY);
\r
158 keywords.Add ("for", Token.FOR);
\r
159 keywords.Add ("friend", Token.FRIEND);
\r
160 keywords.Add ("function", Token.FUNCTION);
\r
161 keywords.Add ("get", Token.GET);
\r
162 keywords.Add ("gettype", Token.GETTYPE);
\r
163 keywords.Add ("goto", Token.GOTO);
\r
164 keywords.Add ("handles", Token.HANDLES);
\r
165 keywords.Add ("if", Token.IF);
\r
166 keywords.Add ("implements", Token.IMPLEMENTS);
\r
167 keywords.Add ("imports", Token.IMPORTS);
\r
168 keywords.Add ("in", Token.IN);
\r
169 keywords.Add ("inherits", Token.INHERITS);
\r
170 keywords.Add ("integer", Token.INTEGER);
\r
171 keywords.Add ("interface", Token.INTERFACE);
\r
172 keywords.Add ("is", Token.IS);
\r
173 keywords.Add ("let ", Token.LET );
\r
174 keywords.Add ("lib ", Token.LIB );
\r
175 keywords.Add ("like ", Token.LIKE );
\r
176 keywords.Add ("long", Token.LONG);
\r
177 keywords.Add ("loop", Token.LOOP);
\r
178 keywords.Add ("me", Token.ME);
\r
179 keywords.Add ("mod", Token.MOD);
\r
180 keywords.Add ("module", Token.MODULE);
\r
181 keywords.Add ("mustinherit", Token.MUSTINHERIT);
\r
182 keywords.Add ("mustoverride", Token.MUSTOVERRIDE);
\r
183 keywords.Add ("mybase", Token.MYBASE);
\r
184 keywords.Add ("myclass", Token.MYCLASS);
\r
185 keywords.Add ("namespace", Token.NAMESPACE);
\r
186 keywords.Add ("new", Token.NEW);
\r
187 keywords.Add ("next", Token.NEXT);
\r
188 keywords.Add ("not", Token.NOT);
\r
189 keywords.Add ("nothing", Token.NOTHING);
\r
190 keywords.Add ("notinheritable", Token.NOTINHERITABLE);
\r
191 keywords.Add ("notoverridable", Token.NOTOVERRIDABLE);
\r
192 keywords.Add ("object", Token.OBJECT);
\r
193 keywords.Add ("on", Token.ON);
\r
194 keywords.Add ("option", Token.OPTION);
\r
195 keywords.Add ("optional", Token.OPTIONAL);
\r
196 keywords.Add ("or", Token.OR);
\r
197 keywords.Add ("orelse", Token.ORELSE);
\r
198 keywords.Add ("overloads", Token.OVERLOADS);
\r
199 keywords.Add ("overridable", Token.OVERRIDABLE);
\r
200 keywords.Add ("overrides", Token.OVERRIDES);
\r
201 keywords.Add ("paramarray", Token.PARAM_ARRAY);
\r
202 keywords.Add ("preserve", Token.PRESERVE);
\r
203 keywords.Add ("private", Token.PRIVATE);
\r
204 keywords.Add ("property", Token.PROPERTY);
\r
205 keywords.Add ("protected", Token.PROTECTED);
\r
206 keywords.Add ("public", Token.PUBLIC);
\r
207 keywords.Add ("raiseevent", Token.RAISEEVENT);
\r
208 keywords.Add ("readonly", Token.READONLY);
\r
209 keywords.Add ("redim", Token.REDIM);
\r
210 keywords.Add ("rem", Token.REM);
\r
211 keywords.Add ("removehandler", Token.REMOVEHANDLER);
\r
212 keywords.Add ("resume", Token.RESUME);
\r
213 keywords.Add ("return", Token.RETURN);
\r
214 keywords.Add ("select", Token.SELECT);
\r
215 keywords.Add ("set", Token.SET);
\r
216 keywords.Add ("shadows", Token.SHADOWS);
\r
217 keywords.Add ("shared", Token.SHARED);
\r
218 keywords.Add ("short", Token.SHORT);
\r
219 keywords.Add ("single", Token.SINGLE);
\r
220 keywords.Add ("sizeof", Token.SIZEOF);
\r
221 keywords.Add ("static", Token.STATIC);
\r
222 keywords.Add ("step", Token.STEP);
\r
223 keywords.Add ("stop", Token.STOP);
\r
224 keywords.Add ("string", Token.STRING);
\r
225 keywords.Add ("structure", Token.STRUCTURE);
\r
226 keywords.Add ("sub", Token.SUB);
\r
227 keywords.Add ("synclock", Token.SYNCLOCK);
\r
228 keywords.Add ("then", Token.THEN);
\r
229 keywords.Add ("throw", Token.THROW);
\r
230 keywords.Add ("to", Token.TO);
\r
231 keywords.Add ("true", Token.TRUE);
\r
232 keywords.Add ("try", Token.TRY);
\r
233 keywords.Add ("typeof", Token.TYPEOF);
\r
234 keywords.Add ("unicode", Token.UNICODE);
\r
235 keywords.Add ("until", Token.UNTIL);
\r
236 keywords.Add ("variant", Token.VARIANT);
\r
237 keywords.Add ("when", Token.WHEN);
\r
238 keywords.Add ("while", Token.WHILE);
\r
239 keywords.Add ("with", Token.WITH);
\r
240 keywords.Add ("withevents", Token.WITHEVENTS);
\r
241 keywords.Add ("writeonly", Token.WRITEONLY);
\r
242 keywords.Add ("xor", Token.XOR);
\r
246 // Class initializer
\r
248 static Tokenizer ()
\r
251 csharp_format_info = new NumberFormatInfo ();
\r
252 csharp_format_info.CurrencyDecimalSeparator = ".";
\r
253 styles = NumberStyles.AllowExponent | NumberStyles.AllowDecimalPoint;
\r
256 bool is_keyword (string name)
\r
260 res = keywords.Contains(name.ToLower());
\r
261 if ((name == "get" || name == "set") && handle_get_set == false)
\r
266 int getKeyword (string name)
\r
268 return (int) (keywords [name.ToLower()]);
\r
271 public Location Location {
\r
273 return new Location (ref_line);
\r
277 bool is_identifier_start_character (char c)
\r
279 return Char.IsLetter (c) || c == '_' ;
\r
282 bool is_identifier_part_character (char c)
\r
284 return (Char.IsLetter (c) || Char.IsDigit (c) || c == '_');
\r
287 int is_punct (char c, ref bool doread)
\r
289 int idx = "{}[](),:;~+-*/%&|^!=<>?".IndexOf (c);
\r
297 // return Token.OPEN_BRACKET;
\r
299 // return Token.CLOSE_BRACKET;
\r
301 return Token.OPEN_PARENS;
\r
303 return Token.CLOSE_PARENS;
\r
305 return Token.COMMA;
\r
307 return Token.COLON;
\r
309 return Token.INTERR;
\r
318 t = Token.OP_ADD_ASSIGN;
\r
326 t = Token.OP_SUB_ASSIGN;
\r
328 return Token.MINUS;
\r
336 return Token.OP_EQ;
\r
338 return Token.ASSIGN;
\r
344 return Token.OP_MULT_ASSIGN;
\r
352 return Token.OP_DIV_ASSIGN;
\r
360 return Token.OP_IDIV_ASSIGN;
\r
362 return Token.OP_IDIV;
\r
368 return Token.OP_EXP_ASSIGN;
\r
370 return Token.OP_EXP;
\r
377 return Token.OP_NE;
\r
381 return Token.OP_LE;
\r
383 return Token.OP_LT;
\r
389 return Token.OP_GE;
\r
391 return Token.OP_GT;
\r
393 return Token.ERROR;
\r
396 bool decimal_digits (int c)
\r
399 bool seen_digits = false;
\r
402 number.Append ((char) c);
\r
404 while ((d = peekChar ()) != -1){
\r
405 if (Char.IsDigit ((char)d)){
\r
406 number.Append ((char) d);
\r
408 seen_digits = true;
\r
412 return seen_digits;
\r
415 void hex_digits (int c)
\r
420 number.Append ((char) c);
\r
421 while ((d = peekChar ()) != -1){
\r
422 char e = Char.ToUpper ((char) d);
\r
424 if (Char.IsDigit (e) ||
\r
425 (e >= 'A' && e <= 'F')){
\r
426 number.Append ((char) e);
\r
433 int real_type_suffix (int c)
\r
438 case 'F': case 'f':
\r
439 t = Token.LITERAL_SINGLE;
\r
441 case 'D': case 'd':
\r
442 t = Token.LITERAL_DOUBLE;
\r
444 case 'M': case 'm':
\r
445 t= Token.LITERAL_DECIMAL;
\r
454 int integer_type_suffix (int c)
\r
456 // FIXME: Handle U and L suffixes.
\r
457 // We also need to see in which kind of
\r
458 // Int the thing fits better according to the spec.
\r
459 return Token.LITERAL_INTEGER;
\r
462 void adjust_int (int t)
\r
464 val = new System.Int32();
\r
465 val = System.Int32.Parse (number.ToString (), 0);
\r
468 int adjust_real (int t)
\r
470 string s = number.ToString ();
\r
472 Console.WriteLine (s);
\r
474 case Token.LITERAL_DECIMAL:
\r
475 val = new System.Decimal ();
\r
476 val = System.Decimal.Parse (
\r
477 s, styles, csharp_format_info);
\r
479 case Token.LITERAL_DOUBLE:
\r
480 val = new System.Double ();
\r
481 val = System.Double.Parse (
\r
482 s, styles, csharp_format_info);
\r
484 case Token.LITERAL_SINGLE:
\r
485 val = new System.Double ();
\r
486 val = (float) System.Double.Parse (
\r
487 s, styles, csharp_format_info);
\r
491 val = new System.Double ();
\r
492 val = System.Double.Parse (
\r
493 s, styles, csharp_format_info);
\r
494 t = Token.LITERAL_DOUBLE;
\r
501 // Invoked if we know we have .digits or digits
\r
503 int is_number (int c)
\r
505 bool is_real = false;
\r
506 number = new System.Text.StringBuilder ();
\r
511 if (Char.IsDigit ((char)c)){
\r
512 if (c == '0' && peekChar () == 'x' || peekChar () == 'X'){
\r
515 val = new System.Int32 ();
\r
516 val = System.Int32.Parse (number.ToString (), NumberStyles.HexNumber);
\r
517 return integer_type_suffix (peekChar ());
\r
519 decimal_digits (c);
\r
524 // We need to handle the case of
\r
525 // "1.1" vs "1.string" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)
\r
528 if (decimal_digits ('.')){
\r
533 number.Length -= 1;
\r
534 adjust_int (Token.LITERAL_INTEGER);
\r
535 return Token.LITERAL_INTEGER;
\r
539 if (c == 'e' || c == 'E'){
\r
541 number.Append ("e");
\r
546 number.Append ((char) c);
\r
549 } else if (c == '-'){
\r
550 number.Append ((char) c);
\r
554 decimal_digits (-1);
\r
558 type = real_type_suffix (c);
\r
559 if (type == Token.NONE && !is_real){
\r
560 type = integer_type_suffix (c);
\r
568 return adjust_real (type);
\r
570 Console.WriteLine ("This should not be reached");
\r
571 throw new Exception ("Is Number should never reach this point");
\r
607 error_details = "cs1009: Unrecognized escape sequence " + (char)d;
\r
616 if (putback_char != -1){
\r
617 int x = putback_char;
\r
622 return reader.Read ();
\r
627 if (putback_char != -1)
\r
628 return putback_char;
\r
629 return reader.Peek ();
\r
632 void putback (int c)
\r
634 if (putback_char != -1)
\r
635 throw new Exception ("This should not happen putback on putback");
\r
639 public bool advance ()
\r
641 return current_token != Token.EOF ;
\r
644 public Object Value {
\r
650 public Object value ()
\r
655 private bool IsEOL(int currentChar)
\r
657 if (currentChar == 0x0D)
\r
659 if (peekChar() == 0x0A) // if it is a CR-LF pair consume LF also
\r
664 return (currentChar == -1 || currentChar == 0x0A || currentChar == 0x2028 || currentChar == 0x2029);
\r
667 private int DropComments()
\r
670 while (!IsEOL(d = getChar ()))
\r
679 public int token ()
\r
681 int lastToken = current_token;
\r
684 current_token = xtoken ();
\r
685 if (current_token == 0)
\r
687 if (current_token == Token.REM)
\r
688 current_token = DropComments();
\r
689 } while (lastToken == Token.EOL && current_token == Token.EOL);
\r
691 return current_token;
\r
694 private string GetIdentifier()
\r
697 if (is_identifier_start_character ((char) c))
\r
698 return GetIdentifier(c);
\r
703 private string GetIdentifier(int c)
\r
705 System.Text.StringBuilder id = new System.Text.StringBuilder ();
\r
707 id.Append ((char) c);
\r
709 while ((c = peekChar ()) != -1)
\r
711 if (is_identifier_part_character ((char) c))
\r
713 id.Append ((char)getChar ());
\r
720 return id.ToString ();
\r
723 public int xtoken ()
\r
726 bool doread = false;
\r
730 for (;(c = getChar ()) != -1; col++) {
\r
732 // Handle line comments.
\r
742 if (current_token == Token.EOL) // if last token was also EOL keep skipping
\r
747 // Handle escaped identifiers
\r
750 if ((val = GetIdentifier()) == null)
\r
752 if ((c = getChar()) != ']')
\r
754 return Token.IDENTIFIER;
\r
757 // Handle unescaped identifiers
\r
758 if (is_identifier_start_character ((char) c))
\r
761 if ((id = GetIdentifier(c)) == null)
\r
763 if (is_keyword(id))
\r
764 return getKeyword(id);
\r
766 return Token.IDENTIFIER;
\r
769 // handle numeric literals
\r
771 if (Char.IsDigit ((char) peekChar ()))
\r
772 return is_number (c);
\r
776 if (Char.IsDigit ((char) c))
\r
777 return is_number (c);
\r
779 /* For now, limited support for pre-processor commands */
\r
780 if (col == 1 && c == '#'){
\r
781 System.Text.StringBuilder s = new System.Text.StringBuilder ();
\r
783 while ((c = getChar ()) != -1 && (c != '\n')){
\r
784 s.Append ((char) c);
\r
786 if (String.Compare (s.ToString (), 0, "line", 0, 4) == 0){
\r
787 string arg = s.ToString ().Substring (5);
\r
790 if ((pos = arg.IndexOf (' ')) != -1 && pos != 0){
\r
791 ref_line = System.Int32.Parse (arg.Substring (0, pos));
\r
794 char [] quotes = { '\"' };
\r
796 ref_name = arg.Substring (pos);
\r
797 ref_name.TrimStart (quotes);
\r
798 ref_name.TrimEnd (quotes);
\r
800 ref_line = System.Int32.Parse (arg);
\r
807 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){
\r
815 // Treat string literals
\r
817 System.Text.StringBuilder s = new System.Text.StringBuilder ();
\r
819 while ((c = getChar ()) != -1){
\r
820 if (c == '"'){ // TODO: treat double-doublequotes
\r
821 val = s.ToString ();
\r
822 return Token.LITERAL_STRING;
\r
827 return Token.ERROR;
\r
828 s.Append ((char) c);
\r
832 // expand tabs for location and ignore it as whitespace
\r
835 col = (((col + ExpandedTabsSize) / ExpandedTabsSize) * ExpandedTabsSize) - 1;
\r
840 if (c == ' ' || c == '\f' || c == '\v')
\r
843 error_details = ((char)c).ToString ();
\r
845 return Token.ERROR;
\r
848 if (current_token != Token.EOL) // if last token wasn´t EOL send it before EOF
\r
854 public Tokenizer (System.IO.TextReader input, string fname, ArrayList defines)
\r
856 this.ref_name = fname;
\r
860 Location.Push (fname);
\r