2 // Mono.MonoBASIC.Tokenizer.cs: The Tokenizer for the MonoBASIC compiler
4 // Author: A Rafael D Teixeira (rafaelteixeirabr@hotmail.com)
6 // Based on cs-tokenizer.cs by Miguel de Icaza (miguel@gnu.org)
8 // Licensed under the terms of the GNU GPL
10 // Copyright (C) 2001 A Rafael D Teixeira
13 namespace Mono.MonoBASIC
17 using System.Collections;
19 using System.Globalization;
24 /// Tokenizer for MonoBASIC source code.
27 public class Tokenizer : yyParser.yyInput
30 public string ref_name;
31 public int ref_line = 1;
34 public int current_token;
35 bool handle_get_set = false;
37 public int ExpandedTabsSize = 4;
39 public string location {
43 if (current_token == Token.ERROR)
44 det = "detail: " + error_details;
48 return "Line: "+line+" Col: "+col + "\n" +
49 "VirtLine: "+ref_line +
50 " Token: "+current_token + " " + det;
54 public bool properties {
56 return handle_get_set;
60 handle_get_set = value;
67 static Hashtable keywords;
68 static NumberStyles styles;
69 static NumberFormatInfo csharp_format_info;
72 // Values for the associated token returned
74 System.Text.StringBuilder number;
79 // Details about the error encoutered by the tokenizer
101 static void initTokens ()
103 keywords = new Hashtable ();
105 keywords.Add ("addhandler", Token.ADDHANDLER);
106 keywords.Add ("addressof", Token.ADDRESSOF);
107 keywords.Add ("alias", Token.ALIAS);
108 keywords.Add ("and", Token.AND);
109 keywords.Add ("andalso", Token.ANDALSO);
110 keywords.Add ("ansi", Token.ANSI);
111 keywords.Add ("as", Token.AS);
112 keywords.Add ("assembly", Token.ASSEMBLY);
113 keywords.Add ("auto", Token.AUTO);
114 keywords.Add ("binary", Token.BINARY);
115 keywords.Add ("boolean", Token.BOOLEAN);
116 keywords.Add ("byref", Token.BYREF);
117 keywords.Add ("byte", Token.BYTE);
118 keywords.Add ("byval", Token.BYVAL);
119 keywords.Add ("call", Token.CALL);
120 keywords.Add ("case", Token.CASE);
121 keywords.Add ("catch", Token.CATCH);
122 keywords.Add ("cbool", Token.CBOOL);
123 keywords.Add ("cbyte", Token.CBYTE);
124 keywords.Add ("cchar", Token.CCHAR);
125 keywords.Add ("cdate", Token.CDATE);
126 keywords.Add ("cdec", Token.CDEC);
127 keywords.Add ("cdbl", Token.CDBL);
128 keywords.Add ("char", Token.CHAR);
129 keywords.Add ("cint", Token.CINT);
130 keywords.Add ("class", Token.CLASS);
131 keywords.Add ("clng", Token.CLNG);
132 keywords.Add ("cobj", Token.COBJ);
133 keywords.Add ("compare", Token.COMPARE);
134 keywords.Add ("const", Token.CONST);
135 keywords.Add ("cshort", Token.CSHORT);
136 keywords.Add ("csng", Token.CSNG);
137 keywords.Add ("cstr", Token.CSTR);
138 keywords.Add ("ctype", Token.CTYPE);
139 keywords.Add ("date", Token.DATE);
140 keywords.Add ("decimal", Token.DECIMAL);
141 keywords.Add ("declare", Token.DECLARE);
142 keywords.Add ("default", Token.DEFAULT);
143 keywords.Add ("delegate", Token.DELEGATE);
144 keywords.Add ("dim", Token.DIM);
145 keywords.Add ("do", Token.DO);
146 keywords.Add ("double", Token.DOUBLE);
147 keywords.Add ("each", Token.EACH);
148 keywords.Add ("else", Token.ELSE);
149 keywords.Add ("elseif", Token.ELSEIF);
150 keywords.Add ("end", Token.END);
151 keywords.Add ("enum", Token.ENUM);
152 keywords.Add ("erase", Token.ERASE);
153 keywords.Add ("error", Token.ERROR);
154 keywords.Add ("event", Token.EVENT);
155 keywords.Add ("exit", Token.EXIT);
156 keywords.Add ("explicit", Token.EXPLICIT);
157 keywords.Add ("false", Token.FALSE);
158 keywords.Add ("finally", Token.FINALLY);
159 keywords.Add ("for", Token.FOR);
160 keywords.Add ("friend", Token.FRIEND);
161 keywords.Add ("function", Token.FUNCTION);
162 keywords.Add ("get", Token.GET);
163 //keywords.Add ("gettype", Token.GETTYPE);
164 keywords.Add ("goto", Token.GOTO);
165 keywords.Add ("handles", Token.HANDLES);
166 keywords.Add ("if", Token.IF);
167 keywords.Add ("implements", Token.IMPLEMENTS);
168 keywords.Add ("imports", Token.IMPORTS);
169 keywords.Add ("in", Token.IN);
170 keywords.Add ("inherits", Token.INHERITS);
171 keywords.Add ("integer", Token.INTEGER);
172 keywords.Add ("interface", Token.INTERFACE);
173 keywords.Add ("is", Token.IS);
174 keywords.Add ("let ", Token.LET );
175 keywords.Add ("lib ", Token.LIB );
176 keywords.Add ("like ", Token.LIKE );
177 keywords.Add ("long", Token.LONG);
178 keywords.Add ("loop", Token.LOOP);
179 keywords.Add ("me", Token.ME);
180 keywords.Add ("mod", Token.MOD);
181 keywords.Add ("module", Token.MODULE);
182 keywords.Add ("mustinherit", Token.MUSTINHERIT);
183 keywords.Add ("mustoverride", Token.MUSTOVERRIDE);
184 keywords.Add ("mybase", Token.MYBASE);
185 keywords.Add ("myclass", Token.MYCLASS);
186 keywords.Add ("namespace", Token.NAMESPACE);
187 keywords.Add ("new", Token.NEW);
188 keywords.Add ("next", Token.NEXT);
189 keywords.Add ("not", Token.NOT);
190 keywords.Add ("nothing", Token.NOTHING);
191 keywords.Add ("notinheritable", Token.NOTINHERITABLE);
192 keywords.Add ("notoverridable", Token.NOTOVERRIDABLE);
193 keywords.Add ("object", Token.OBJECT);
194 keywords.Add ("off", Token.OFF);
195 keywords.Add ("on", Token.ON);
196 keywords.Add ("option", Token.OPTION);
197 keywords.Add ("optional", Token.OPTIONAL);
198 keywords.Add ("or", Token.OR);
199 keywords.Add ("orelse", Token.ORELSE);
200 keywords.Add ("overloads", Token.OVERLOADS);
201 keywords.Add ("overridable", Token.OVERRIDABLE);
202 keywords.Add ("overrides", Token.OVERRIDES);
203 keywords.Add ("paramarray", Token.PARAM_ARRAY);
204 keywords.Add ("preserve", Token.PRESERVE);
205 keywords.Add ("private", Token.PRIVATE);
206 keywords.Add ("property", Token.PROPERTY);
207 keywords.Add ("protected", Token.PROTECTED);
208 keywords.Add ("public", Token.PUBLIC);
209 keywords.Add ("raiseevent", Token.RAISEEVENT);
210 keywords.Add ("readonly", Token.READONLY);
211 keywords.Add ("redim", Token.REDIM);
212 keywords.Add ("rem", Token.REM);
213 keywords.Add ("removehandler", Token.REMOVEHANDLER);
214 keywords.Add ("resume", Token.RESUME);
215 keywords.Add ("return", Token.RETURN);
216 keywords.Add ("select", Token.SELECT);
217 keywords.Add ("set", Token.SET);
218 keywords.Add ("shadows", Token.SHADOWS);
219 keywords.Add ("shared", Token.SHARED);
220 keywords.Add ("short", Token.SHORT);
221 keywords.Add ("single", Token.SINGLE);
222 keywords.Add ("sizeof", Token.SIZEOF);
223 keywords.Add ("static", Token.STATIC);
224 keywords.Add ("step", Token.STEP);
225 keywords.Add ("stop", Token.STOP);
226 keywords.Add ("strict", Token.STRICT);
227 keywords.Add ("string", Token.STRING);
228 keywords.Add ("structure", Token.STRUCTURE);
229 keywords.Add ("sub", Token.SUB);
230 keywords.Add ("synclock", Token.SYNCLOCK);
231 keywords.Add ("text", Token.TEXT);
232 keywords.Add ("then", Token.THEN);
233 keywords.Add ("throw", Token.THROW);
234 keywords.Add ("to", Token.TO);
235 keywords.Add ("true", Token.TRUE);
236 keywords.Add ("try", Token.TRY);
237 keywords.Add ("typeof", Token.TYPEOF);
238 keywords.Add ("unicode", Token.UNICODE);
239 keywords.Add ("until", Token.UNTIL);
240 keywords.Add ("variant", Token.VARIANT);
241 keywords.Add ("when", Token.WHEN);
242 keywords.Add ("while", Token.WHILE);
243 keywords.Add ("with", Token.WITH);
244 keywords.Add ("withevents", Token.WITHEVENTS);
245 keywords.Add ("writeonly", Token.WRITEONLY);
246 keywords.Add ("xor", Token.XOR);
255 csharp_format_info = new NumberFormatInfo ();
256 csharp_format_info.CurrencyDecimalSeparator = ".";
257 styles = NumberStyles.AllowExponent | NumberStyles.AllowDecimalPoint;
260 bool is_keyword (string name)
264 res = keywords.Contains(name.ToLower());
265 if ((name == "get" || name == "set") && handle_get_set == false)
270 int getKeyword (string name)
272 return (int) (keywords [name.ToLower()]);
275 public Location Location {
277 return new Location (ref_line);
281 public bool PropertyParsing {
283 return handle_get_set;
287 handle_get_set = value;
291 bool is_identifier_start_character (char c)
293 return Char.IsLetter (c) || c == '_' ;
296 bool is_identifier_part_character (char c)
298 return (Char.IsLetter (c) || Char.IsDigit (c) || c == '_');
301 int is_punct (char c, ref bool doread)
303 int idx = "{}[](),:;~+-*/%&|^!=<>?".IndexOf (c);
311 return Token.OPEN_BRACKET;
313 return Token.CLOSE_BRACKET;
315 return Token.OPEN_BRACE;
317 return Token.CLOSE_BRACE;
319 return Token.OPEN_PARENS;
321 return Token.CLOSE_PARENS;
325 // return Token.COLON;
329 return Token.OP_CONCAT;
338 t = Token.OP_ADD_ASSIGN;
346 t = Token.OP_SUB_ASSIGN;
364 return Token.OP_MULT_ASSIGN;
372 return Token.OP_DIV_ASSIGN;
380 return Token.OP_IDIV_ASSIGN;
382 return Token.OP_IDIV;
388 return Token.OP_EXP_ASSIGN;
416 return Token.ATTR_ASSIGN;
423 bool decimal_digits (int c)
426 bool seen_digits = false;
429 number.Append ((char) c);
431 while ((d = peekChar ()) != -1){
432 if (Char.IsDigit ((char)d)){
433 number.Append ((char) d);
442 void hex_digits (int c)
447 number.Append ((char) c);
448 while ((d = peekChar ()) != -1){
449 char e = Char.ToUpper ((char) d);
451 if (Char.IsDigit (e) ||
452 (e >= 'A' && e <= 'F')){
453 number.Append ((char) e);
460 int real_type_suffix (int c)
466 t = Token.LITERAL_SINGLE;
469 t = Token.LITERAL_DOUBLE;
472 t= Token.LITERAL_DECIMAL;
481 int integer_type_suffix (int c)
483 // FIXME: Handle U and L suffixes.
484 // We also need to see in which kind of
485 // Int the thing fits better according to the spec.
486 return Token.LITERAL_INTEGER;
489 void adjust_int (int t)
491 val = new System.Int32();
492 val = System.Int32.Parse (number.ToString (), 0);
495 int adjust_real (int t)
497 string s = number.ToString ();
499 Console.WriteLine (s);
501 case Token.LITERAL_DECIMAL:
502 val = new System.Decimal ();
503 val = System.Decimal.Parse (
504 s, styles, csharp_format_info);
506 case Token.LITERAL_DOUBLE:
507 val = new System.Double ();
508 val = System.Double.Parse (
509 s, styles, csharp_format_info);
511 case Token.LITERAL_SINGLE:
512 val = new System.Double ();
513 val = (float) System.Double.Parse (
514 s, styles, csharp_format_info);
518 val = new System.Double ();
519 val = System.Double.Parse (
520 s, styles, csharp_format_info);
521 t = Token.LITERAL_DOUBLE;
528 // Invoked if we know we have .digits or digits
530 int is_number (int c)
532 bool is_real = false;
533 number = new System.Text.StringBuilder ();
538 if (Char.IsDigit ((char)c)){
539 if (c == '0' && peekChar () == 'x' || peekChar () == 'X'){
542 val = new System.Int32 ();
543 val = System.Int32.Parse (number.ToString (), NumberStyles.HexNumber);
544 return integer_type_suffix (peekChar ());
551 // We need to handle the case of
552 // "1.1" vs "1.string" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)
555 if (decimal_digits ('.')){
561 adjust_int (Token.LITERAL_INTEGER);
562 return Token.LITERAL_INTEGER;
566 if (c == 'e' || c == 'E'){
573 number.Append ((char) c);
576 } else if (c == '-'){
577 number.Append ((char) c);
585 type = real_type_suffix (c);
586 if (type == Token.NONE && !is_real){
587 type = integer_type_suffix (c);
595 return adjust_real (type);
597 Console.WriteLine ("This should not be reached");
598 throw new Exception ("Is Number should never reach this point");
634 error_details = "cs1009: Unrecognized escape sequence " + (char)d;
643 if (putback_char != -1){
644 int x = putback_char;
649 return reader.Read ();
654 if (putback_char != -1)
656 return reader.Peek ();
661 if (putback_char != -1)
662 throw new Exception ("This should not happen putback on putback");
666 public bool advance ()
668 return current_token != Token.EOF ;
671 public Object Value {
677 public Object value ()
682 private bool IsEOL(int currentChar)
684 if (currentChar == 0x0D)
686 if (peekChar() == 0x0A) // if it is a CR-LF pair consume LF also
691 return (currentChar == -1 || currentChar == 0x0A || currentChar == 0x2028 || currentChar == 0x2029);
694 private int DropComments()
697 while (!IsEOL(d = getChar ()))
708 int lastToken = current_token;
711 current_token = xtoken ();
712 if (current_token == 0)
714 if (current_token == Token.REM)
715 current_token = DropComments();
716 } while (lastToken == Token.EOL && current_token == Token.EOL);
718 return current_token;
721 private string GetIdentifier()
724 if (is_identifier_start_character ((char) c))
725 return GetIdentifier(c);
730 private string GetIdentifier(int c)
732 System.Text.StringBuilder id = new System.Text.StringBuilder ();
734 id.Append ((char) c);
736 while ((c = peekChar ()) != -1)
738 if (is_identifier_part_character ((char) c))
740 id.Append ((char)getChar ());
747 return id.ToString ();
757 for (;(c = getChar ()) != -1; col++) {
759 // Handle line comments.
763 // Handle line continuation character
765 while ((c = getChar ()) != -1 && (c != '\n')){}
774 if (current_token == Token.EOL) // if last token was also EOL keep skipping
779 // Handle escaped identifiers
782 if ((val = GetIdentifier()) == null)
784 if ((c = getChar()) != ']')
786 return Token.IDENTIFIER;
789 // Handle unescaped identifiers
790 if (is_identifier_start_character ((char) c))
793 if ((id = GetIdentifier(c)) == null)
797 return getKeyword(id);
798 return Token.IDENTIFIER;
801 // handle numeric literals
803 if (Char.IsDigit ((char) peekChar ()))
804 return is_number (c);
808 if (Char.IsDigit ((char) c))
809 return is_number (c);
811 /* For now, limited support for pre-processor commands */
812 if (col == 1 && c == '#'){
813 System.Text.StringBuilder s = new System.Text.StringBuilder ();
815 while ((c = getChar ()) != -1 && (c != '\n')){
818 if (String.Compare (s.ToString (), 0, "line", 0, 4) == 0){
819 string arg = s.ToString ().Substring (5);
822 if ((pos = arg.IndexOf (' ')) != -1 && pos != 0){
823 ref_line = System.Int32.Parse (arg.Substring (0, pos));
826 char [] quotes = { '\"' };
828 ref_name = arg.Substring (pos);
829 ref_name.TrimStart (quotes);
830 ref_name.TrimEnd (quotes);
832 ref_line = System.Int32.Parse (arg);
839 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){
847 // Treat string literals
849 System.Text.StringBuilder s = new System.Text.StringBuilder ();
851 while ((c = getChar ()) != -1){
852 if (c == '"'){ // TODO: treat double-doublequotes
854 return Token.LITERAL_STRING;
864 // expand tabs for location and ignore it as whitespace
867 col = (((col + ExpandedTabsSize) / ExpandedTabsSize) * ExpandedTabsSize) - 1;
872 if (c == ' ' || c == '\f' || c == '\v')
875 error_details = ((char)c).ToString ();
880 if (current_token != Token.EOL) // if last token wasn´t EOL send it before EOF
886 public void cleanup ()
888 /* borrowed from mcs - have to work it to have preprocessing in mbas
890 if (ifstack != null && ifstack.Count >= 1) {
891 int state = (int) ifstack.Pop ();
892 if ((state & REGION) != 0)
893 Report.Error (1038, "#endregion directive expected");
895 Report.Error (1027, "#endif directive expected");
900 public Tokenizer (System.IO.TextReader input, string fname, ArrayList defines)
902 this.ref_name = fname;
906 Location.Push (fname);