2 // Mono.MonoBASIC.Tokenizer.cs: The Tokenizer for the MonoBASIC compiler
4 // Author: A Rafael D Teixeira (rafaelteixeirabr@hotmail.com)
5 // : Manjula GHM (mmanjula@novell.com)
6 // Based on cs-tokenizer.cs by Miguel de Icaza (miguel@gnu.org)
8 // Licensed under the terms of the GNU GPL
10 // Copyright (C) 2001 A Rafael D Teixeira
13 namespace Mono.MonoBASIC
17 using System.Collections;
19 using System.Globalization;
24 /// Tokenizer for MonoBASIC source code.
27 public class Tokenizer : yyParser.yyInput
35 public int current_token = Token.ERROR;
36 public int last_token = Token.ERROR;
37 bool handle_get_set = false;
38 bool cant_have_a_type_character = false;
40 public int ExpandedTabsSize = 4;
42 public string location {
46 if (current_token == Token.ERROR)
47 det = "detail: " + error_details;
51 return "Line: "+line+" Col: "+col + "\n" +
52 "VirtLine: "+ref_line +
53 " Token: "+current_token + " " + det;
57 public bool properties {
59 return handle_get_set;
63 handle_get_set = value;
70 static Hashtable keywords;
71 static NumberStyles styles;
72 static NumberFormatInfo csharp_format_info;
75 // Values for the associated token returned
78 int putback_char = -1;
83 // Details about the error encoutered by the tokenizer
94 public string Source {
102 Location.SetCurrentSource(file_name);
106 public string EffectiveSource {
112 Location.SetCurrentSource(ref_name);
122 public int EffectiveLine {
137 static void initTokens ()
139 keywords = new Hashtable ();
141 keywords.Add ("addhandler", Token.ADDHANDLER);
142 keywords.Add ("addressof", Token.ADDRESSOF);
143 keywords.Add ("alias", Token.ALIAS);
144 keywords.Add ("and", Token.AND);
145 keywords.Add ("andalso", Token.ANDALSO);
146 keywords.Add ("ansi", Token.ANSI);
147 keywords.Add ("as", Token.AS);
148 keywords.Add ("assembly", Token.ASSEMBLY);
149 keywords.Add ("auto", Token.AUTO);
150 keywords.Add ("binary", Token.BINARY); // Not a VB.NET Keyword
151 keywords.Add ("boolean", Token.BOOLEAN);
152 keywords.Add ("byref", Token.BYREF);
153 keywords.Add ("byte", Token.BYTE);
154 keywords.Add ("byval", Token.BYVAL);
155 keywords.Add ("call", Token.CALL);
156 keywords.Add ("case", Token.CASE);
157 keywords.Add ("catch", Token.CATCH);
158 keywords.Add ("cbool", Token.CBOOL);
159 keywords.Add ("cbyte", Token.CBYTE);
160 keywords.Add ("cchar", Token.CCHAR);
161 keywords.Add ("cdate", Token.CDATE);
162 keywords.Add ("cdec", Token.CDEC);
163 keywords.Add ("cdbl", Token.CDBL);
164 keywords.Add ("char", Token.CHAR);
165 keywords.Add ("cint", Token.CINT);
166 keywords.Add ("class", Token.CLASS);
167 keywords.Add ("clng", Token.CLNG);
168 keywords.Add ("cobj", Token.COBJ);
169 keywords.Add ("compare", Token.COMPARE); // Not a VB.NET Keyword
170 keywords.Add ("const", Token.CONST);
171 keywords.Add ("cshort", Token.CSHORT);
172 keywords.Add ("csng", Token.CSNG);
173 keywords.Add ("cstr", Token.CSTR);
174 keywords.Add ("ctype", Token.CTYPE);
175 keywords.Add ("date", Token.DATE);
176 keywords.Add ("decimal", Token.DECIMAL);
177 keywords.Add ("declare", Token.DECLARE);
178 keywords.Add ("default", Token.DEFAULT);
179 keywords.Add ("delegate", Token.DELEGATE);
180 keywords.Add ("dim", Token.DIM);
181 keywords.Add ("directcast", Token.DIRECTCAST);
182 keywords.Add ("do", Token.DO);
183 keywords.Add ("double", Token.DOUBLE);
184 keywords.Add ("each", Token.EACH);
185 keywords.Add ("else", Token.ELSE);
186 keywords.Add ("elseif", Token.ELSEIF);
187 keywords.Add ("end", Token.END);
188 keywords.Add ("endif", Token.ENDIF); // An unused VB.NET keyword
189 keywords.Add ("enum", Token.ENUM);
190 keywords.Add ("erase", Token.ERASE);
191 keywords.Add ("error", Token.ERROR);
192 keywords.Add ("event", Token.EVENT);
193 keywords.Add ("exit", Token.EXIT);
194 keywords.Add ("explicit", Token.EXPLICIT); // Not a VB.NET keyword
195 keywords.Add ("false", Token.FALSE);
196 keywords.Add ("finally", Token.FINALLY);
197 keywords.Add ("for", Token.FOR);
198 keywords.Add ("friend", Token.FRIEND);
199 keywords.Add ("function", Token.FUNCTION);
200 keywords.Add ("get", Token.GET);
201 keywords.Add ("gettype", Token.GETTYPE);
202 keywords.Add ("gosub", Token.GOSUB); // An unused VB.NET keyword
203 keywords.Add ("goto", Token.GOTO);
204 keywords.Add ("handles", Token.HANDLES);
205 keywords.Add ("if", Token.IF);
206 keywords.Add ("implements", Token.IMPLEMENTS);
207 keywords.Add ("imports", Token.IMPORTS);
208 keywords.Add ("in", Token.IN);
209 keywords.Add ("inherits", Token.INHERITS);
210 keywords.Add ("integer", Token.INTEGER);
211 keywords.Add ("interface", Token.INTERFACE);
212 keywords.Add ("is", Token.IS);
213 keywords.Add ("let ", Token.LET ); // An unused VB.NET keyword
214 keywords.Add ("lib ", Token.LIB );
215 keywords.Add ("like", Token.LIKE );
216 keywords.Add ("long", Token.LONG);
217 keywords.Add ("loop", Token.LOOP);
218 keywords.Add ("me", Token.ME);
219 keywords.Add ("mod", Token.MOD);
220 keywords.Add ("module", Token.MODULE);
221 keywords.Add ("mustinherit", Token.MUSTINHERIT);
222 keywords.Add ("mustoverride", Token.MUSTOVERRIDE);
223 keywords.Add ("mybase", Token.MYBASE);
224 keywords.Add ("myclass", Token.MYCLASS);
225 keywords.Add ("namespace", Token.NAMESPACE);
226 keywords.Add ("new", Token.NEW);
227 keywords.Add ("next", Token.NEXT);
228 keywords.Add ("not", Token.NOT);
229 keywords.Add ("nothing", Token.NOTHING);
230 keywords.Add ("notinheritable", Token.NOTINHERITABLE);
231 keywords.Add ("notoverridable", Token.NOTOVERRIDABLE);
232 keywords.Add ("object", Token.OBJECT);
233 keywords.Add ("off", Token.OFF); // Not a VB.NET Keyword
234 keywords.Add ("on", Token.ON);
235 keywords.Add ("option", Token.OPTION);
236 keywords.Add ("optional", Token.OPTIONAL);
237 keywords.Add ("or", Token.OR);
238 keywords.Add ("orelse", Token.ORELSE);
239 keywords.Add ("overloads", Token.OVERLOADS);
240 keywords.Add ("overridable", Token.OVERRIDABLE);
241 keywords.Add ("overrides", Token.OVERRIDES);
242 keywords.Add ("paramarray", Token.PARAM_ARRAY);
243 keywords.Add ("preserve", Token.PRESERVE);
244 keywords.Add ("private", Token.PRIVATE);
245 keywords.Add ("property", Token.PROPERTY);
246 keywords.Add ("protected", Token.PROTECTED);
247 keywords.Add ("public", Token.PUBLIC);
248 keywords.Add ("raiseevent", Token.RAISEEVENT);
249 keywords.Add ("readonly", Token.READONLY);
250 keywords.Add ("redim", Token.REDIM);
251 keywords.Add ("rem", Token.REM);
252 keywords.Add ("removehandler", Token.REMOVEHANDLER);
253 keywords.Add ("resume", Token.RESUME);
254 keywords.Add ("return", Token.RETURN);
255 keywords.Add ("select", Token.SELECT);
256 keywords.Add ("set", Token.SET);
257 keywords.Add ("shadows", Token.SHADOWS);
258 keywords.Add ("shared", Token.SHARED);
259 keywords.Add ("short", Token.SHORT);
260 keywords.Add ("single", Token.SINGLE);
261 keywords.Add ("sizeof", Token.SIZEOF); // Not a VB.NET Keyword
262 keywords.Add ("static", Token.STATIC);
263 keywords.Add ("step", Token.STEP);
264 keywords.Add ("stop", Token.STOP);
265 keywords.Add ("strict", Token.STRICT); // Not a VB.NET Keyword
266 keywords.Add ("string", Token.STRING);
267 keywords.Add ("structure", Token.STRUCTURE);
268 keywords.Add ("sub", Token.SUB);
269 keywords.Add ("synclock", Token.SYNCLOCK);
270 keywords.Add ("text", Token.TEXT); // Not a VB.NET Keyword
271 keywords.Add ("then", Token.THEN);
272 keywords.Add ("throw", Token.THROW);
273 keywords.Add ("to", Token.TO);
274 keywords.Add ("true", Token.TRUE);
275 keywords.Add ("try", Token.TRY);
276 keywords.Add ("typeof", Token.TYPEOF);
277 keywords.Add ("unicode", Token.UNICODE);
278 keywords.Add ("until", Token.UNTIL);
279 keywords.Add ("variant", Token.VARIANT); // An unused VB.NET keyword
280 keywords.Add ("wend", Token.WEND); // An unused VB.NET keyword
281 keywords.Add ("when", Token.WHEN);
282 keywords.Add ("while", Token.WHILE);
283 keywords.Add ("with", Token.WITH);
284 keywords.Add ("withevents", Token.WITHEVENTS);
285 keywords.Add ("writeonly", Token.WRITEONLY);
286 keywords.Add ("xor", Token.XOR);
288 if (Parser.UseExtendedSyntax){
289 keywords.Add ("yield", Token.YIELD);
297 csharp_format_info = new NumberFormatInfo ();
298 csharp_format_info.CurrencyDecimalSeparator = ".";
299 styles = NumberStyles.AllowExponent | NumberStyles.AllowDecimalPoint;
302 public Tokenizer (System.IO.TextReader input, string fname, ArrayList defines)
308 // putback an EOL at the beginning of a stream. This is a convenience that
309 // allows pre-processor directives to be added to the beginning of a vb file.
313 bool is_keyword (string name)
316 name = name.ToLower();
318 res = keywords.Contains(name);
319 if ((name == "GET" || name == "SET") && handle_get_set == false)
324 int getKeyword (string name)
326 return (int) (keywords [name.ToLower()]);
329 public Location Location {
331 return new Location (ref_line, col);
335 public bool PropertyParsing {
337 return handle_get_set;
341 handle_get_set = value;
345 bool is_identifier_start_character (char c)
347 return Char.IsLetter (c) || c == '_' ;
350 bool is_identifier_part_character (char c)
352 return (Char.IsLetter (c) || Char.IsDigit (c) || c == '_');
355 int is_punct (char c, ref bool doread)
362 error_details = c.ToString();
368 return Token.OPEN_BRACKET;
370 return Token.CLOSE_BRACKET;
372 return Token.OPEN_BRACE;
374 return Token.CLOSE_BRACE;
376 return Token.OPEN_PARENS;
378 return Token.CLOSE_PARENS;
384 if (is_identifier_start_character((char)d) || cant_have_a_type_character)
385 return Token.EXCLAMATION;
386 return Token.SINGLETYPECHAR;
388 if (cant_have_a_type_character)
390 return Token.DOLAR_SIGN;
392 if (cant_have_a_type_character)
394 return Token.AT_SIGN;
396 if (cant_have_a_type_character)
398 return Token.PERCENT;
402 if (cant_have_a_type_character)
403 return ExtractDateTimeLiteral();
405 return Token.NUMBER_SIGN;
413 if (!cant_have_a_type_character)
414 return Token.LONGTYPECHAR;
415 t = handle_integer_literal_in_other_bases(d);
416 if (t == Token.NONE) {
447 return Token.OP_IDIV;
467 return Token.OP_SHIFT_LEFT;
480 return Token.OP_SHIFT_RIGHT;
488 return Token.ATTR_ASSIGN;
496 bool decimal_digits (int c)
499 bool seen_digits = false;
502 number.Append ((char) c);
503 while ((d = peekChar ()) != -1){
504 if (Char.IsDigit ((char)d)){
505 number.Append ((char) d);
515 int real_type_suffix (int c)
521 t = Token.LITERAL_SINGLE;
524 t = Token.LITERAL_DOUBLE;
527 t= Token.LITERAL_DECIMAL;
536 int integer_type_suffix (int c)
544 t = Token.LITERAL_INTEGER; // SHORT ?
546 // hexadecimal literals - like &H8000S is "-32768"
547 // and not an overflow exception
548 // Check for other literals ???
554 val = ((IConvertible)val).ToInt16(null);
557 t = Token.LITERAL_INTEGER;
558 val = ((IConvertible)val).ToInt32(null);
561 t= Token.LITERAL_INTEGER; // LONG ?
562 val = ((IConvertible)val).ToInt64(null);
565 if ((long)val <= System.Int32.MaxValue &&
566 (long)val >= System.Int32.MinValue) {
567 val = ((IConvertible)val).ToInt32(null);
568 return Token.LITERAL_INTEGER;
570 val = ((IConvertible)val).ToInt64(null);
571 return Token.LITERAL_INTEGER; // LONG ?
576 } catch (Exception e) {
582 int adjust_real (int t)
584 string s = number.ToString ();
587 case Token.LITERAL_DECIMAL:
588 val = new System.Decimal ();
589 val = System.Decimal.Parse (
590 s, styles, csharp_format_info);
592 case Token.LITERAL_DOUBLE:
593 val = new System.Double ();
594 val = System.Double.Parse (
595 s, styles, csharp_format_info);
597 case Token.LITERAL_SINGLE:
598 val = new System.Double ();
599 val = (float) System.Double.Parse (
600 s, styles, csharp_format_info);
604 val = new System.Double ();
605 val = System.Double.Parse (
606 s, styles, csharp_format_info);
607 t = Token.LITERAL_DOUBLE;
615 StringBuilder hexNumber = new StringBuilder ();
619 while ((d = peekChar ()) != -1){
620 char e = Char.ToUpper ((char) d);
622 if (Char.IsDigit (e) || (e >= 'A' && e <= 'F')){
623 hexNumber.Append (e);
628 lon = System.Int64.Parse (hexNumber.ToString(), NumberStyles.HexNumber);
634 long valueToReturn = 0;
638 while ((d = peekChar ()) != -1){
640 if (Char.IsDigit (e) && (e < '8')){
642 valueToReturn += (d - (int)'0');
648 return valueToReturn;
651 int handle_integer_literal_in_other_bases(int peek)
653 if (peek == 'h' || peek == 'H'){
656 return integer_type_suffix (peekChar ());
659 if (peek == 'o' || peek == 'O'){
661 val = octal_digits ();
662 return integer_type_suffix (peekChar ());
669 // Invoked if we know we have .digits or digits
671 int is_number (int c)
673 bool is_real = false;
674 number = new StringBuilder ();
676 bool non_prefixdecimal = false; //To capture decimals like .50
680 if (Char.IsDigit ((char)c)){
683 non_prefixdecimal = true;
687 // We need to handle the case of
688 // "1.1" vs "1.ToString()" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)
691 if (non_prefixdecimal == false)
693 if (decimal_digits (getChar())){
699 val = System.Int64.Parse(number.ToString());
700 return integer_type_suffix('.');
704 if (c == 'e' || c == 'E'){
711 number.Append ((char) c);
714 } else if (c == '-'){
715 number.Append ((char) c);
723 type = real_type_suffix (c);
724 if (type == Token.NONE && !is_real){
725 val = System.Int64.Parse(number.ToString());
726 return integer_type_suffix(c);
729 return adjust_real (type);
734 if (putback_char != -1){
735 int x = putback_char;
740 return reader.Read ();
745 if (putback_char != -1)
747 return reader.Peek ();
753 if (putback_char != -1)
754 throw new Exception ("This should not happen putback on putback");
758 public bool advance ()
760 return current_token != Token.EOF ;
763 public Object Value {
769 public Object value ()
774 private bool IsEOL(int currentChar)
778 if (currentChar == 0x0D) {
779 if (peekChar() == 0x0A) // if it is a CR-LF pair consume LF also
785 retVal = (currentChar == -1 || currentChar == 0x0A || currentChar == 0x2028 || currentChar == 0x2029);
795 private int DropComments()
798 while (!IsEOL(/*d =*/ getChar ()))
804 public bool putbacktoken = false;
805 public bool flag = false;
810 int before_last_token = last_token;
811 last_token = current_token;
814 current_token = xtoken ();
815 if(current_token == Token.END) {
816 next_token = xtoken();
818 if (next_token == Token.EOL)
819 return Token.END_EOL;
823 if (current_token == Token.COLON) {
824 next_token = xtoken();
826 if (next_token == Token.EOL) {
827 if (last_token != Token.LABELNAME && last_token != Token.LITERAL_INTEGER) {
828 current_token = Token.EOL;
829 putbacktoken = false;
831 else if (before_last_token == Token.GOTO) {
832 current_token = Token.EOL;
833 putbacktoken = false;
837 if (current_token == 0)
839 if (current_token == Token.REM)
840 current_token = DropComments();
841 } while (last_token == Token.EOL && current_token == Token.EOL);
843 return current_token;
846 private string GetIdentifier()
849 if (is_identifier_start_character ((char) c))
850 return GetIdentifier(c);
855 private bool IsLabel ()
857 char c = (char) peekChar();
862 private string GetIdentifier(int c)
864 StringBuilder id = new StringBuilder ();
866 id.Append ((char) c);
868 while ((c = peekChar ()) != -1)
870 if (is_identifier_part_character ((char) c))
872 id.Append ((char)getChar ());
879 cant_have_a_type_character = false;
881 return id.ToString();
884 private bool is_doublequote(int currentChar)
886 return (currentChar == '"' ||
887 currentChar == 0x201C || // unicode left double-quote character
888 currentChar == 0x201D); // unicode right double-quote character
891 private bool is_whitespace(int c)
893 return (c == ' ' || c == '\t' || c == '\v' || c == '\r' || c == 0xa0);
896 private bool tokens_seen = false;
898 private void nextLine()
900 cant_have_a_type_character = true;
913 if (putbacktoken == true) {
914 putbacktoken = false;
919 for (;(c = getChar ()) != -1; col++) {
921 // Handle line continuation character
925 if (!is_identifier_part_character((char)d)) {
926 while ((c = getChar ()) != -1 && !IsEOL(c)) {}
934 if (is_whitespace(c)) {
935 // expand tabs for location
937 col = (((col + ExpandedTabsSize) / ExpandedTabsSize) * ExpandedTabsSize) - 1;
938 cant_have_a_type_character = true;
942 // Handle line comments.
949 if (current_token == Token.EOL) // if last token was also EOL keep skipping
954 // Handle escaped identifiers
957 bool is_first_token_in_line = !tokens_seen;
958 if ((val = GetIdentifier()) == null)
960 if ((c = getChar()) != ']')
963 if (IsLabel() && is_first_token_in_line)
964 return Token.LABELNAME;
966 if (last_token == Token.GOTO)
967 return Token.LABELNAME;
968 return Token.IDENTIFIER;
971 // Handle unescaped identifiers
972 if (is_identifier_start_character ((char) c))
975 bool is_first_token_in_line = !tokens_seen;
976 if ((id = GetIdentifier(c)) == null)
980 if (is_keyword(id) && (current_token != Token.DOT))
981 return getKeyword(id);
983 if (IsLabel() && is_first_token_in_line)
984 return Token.LABELNAME;
986 if (last_token == Token.GOTO)
987 return Token.LABELNAME;
988 return Token.IDENTIFIER;
991 // Treat string literals
992 if (is_doublequote(c)) {
993 cant_have_a_type_character = true;
994 return ExtractStringOrCharLiteral(c);
997 // handle numeric literals
999 if (Char.IsDigit ((char) c))
1001 cant_have_a_type_character = false;
1003 return is_number (c);
1008 cant_have_a_type_character = true;
1010 if (Char.IsDigit ((char) peekChar ()))
1011 return is_number (c);
1014 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR) {
1015 cant_have_a_type_character = true;
1017 if (t == Token.NONE)
1028 error_details = ((char)c).ToString ();
1032 if (current_token != Token.EOL) // if last token wasn't EOL send it before EOF
1038 private int ExtractDateTimeLiteral()
1042 StringBuilder sb = new StringBuilder();
1043 for (;(c = getChar ()) != -1; col++)
1046 val = ParseDateLiteral(sb);
1047 return Token.LITERAL_DATE;
1059 private int ExtractStringOrCharLiteral(int c)
1061 StringBuilder s = new StringBuilder ();
1065 while ((c = getChar ()) != -1){
1066 if (is_doublequote(c)){
1067 if (is_doublequote(peekChar()))
1070 //handle Char Literals
1071 if (peekChar() == 'C' || peekChar() == 'c') {
1073 if (s.Length == 1) {
1075 return Token.LITERAL_CHARACTER;
1077 val = "Incorrect length for a character literal";
1081 val = s.ToString ();
1082 return Token.LITERAL_STRING;
1091 s.Append ((char) c);
1097 static IFormatProvider enUSculture = new CultureInfo("en-US", true);
1099 private DateTime ParseDateLiteral(StringBuilder value)
1103 return DateTime.Parse(value.ToString(),
1105 DateTimeStyles.NoCurrentDateDefault | DateTimeStyles.AllowWhiteSpaces);
1107 catch (FormatException ex)
1109 //TODO: What is the correct error number and message?
1110 Report.Error (1, Location, string.Format("Invalid date literal '{0}'", value.ToString())
1111 + Environment.NewLine + ex.ToString());
1115 Report.Error (1, Location, "Error parsing date literal"); //TODO: What is the correct error number and message?
1117 return new DateTime();
1120 public void PositionCursorAtNextPreProcessorDirective()
1124 for(t = token(); t != Token.HASH && t != Token.EOF ; t = token());
1127 throw new ApplicationException("Unexpected EOF while looking for a pre-processor directive");
1129 if(t == Token.HASH) {
1130 tokens_seen = false;