2002-10-25 Sebastien Pouliot <spouliot@videotron.ca>
[mono.git] / mcs / mbas / mb-tokenizer.cs
1 //\r
2 // Mono.MonoBASIC.Tokenizer.cs: The Tokenizer for the MonoBASIC compiler\r
3 //\r
4 // Author: A Rafael D Teixeira (rafaelteixeirabr@hotmail.com)\r
5 //         \r
6 // Based on cs-tokenizer.cs by Miguel de Icaza (miguel@gnu.org)\r
7 //\r
8 // Licensed under the terms of the GNU GPL\r
9 //\r
10 // Copyright (C) 2001 A Rafael D Teixeira\r
11 //\r
12 \r
13 namespace Mono.MonoBASIC\r
14 {\r
15         using System;\r
16         using System.Text;\r
17         using System.Collections;\r
18         using System.IO;\r
19         using System.Globalization;\r
20         using Mono.Languages;\r
21         using Mono.CSharp;\r
22         \r
23         /// <summary>\r
24         ///    Tokenizer for MonoBASIC source code. \r
25         /// </summary>\r
26         \r
27         public class Tokenizer : yyParser.yyInput\r
28         {\r
29                 TextReader reader;\r
30                 public string ref_name;\r
31                 public int ref_line = 1;\r
32                 public int line = 1;\r
33                 public int col = 1;\r
34                 public int current_token;\r
35                 bool handle_get_set = false;\r
36 \r
37                 public int ExpandedTabsSize = 4; \r
38 \r
39                 public string location {\r
40                         get {\r
41                                 string det;\r
42 \r
43                                 if (current_token == Token.ERROR)\r
44                                         det = "detail: " + error_details;\r
45                                 else\r
46                                         det = "";\r
47                                 \r
48                                 return "Line:     "+line+" Col: "+col + "\n" +\r
49                                        "VirtLine: "+ref_line +\r
50                                        " Token: "+current_token + " " + det;\r
51                         }\r
52                 }\r
53 \r
54                 public bool properties {\r
55                         get {\r
56                                 return handle_get_set;\r
57                         }\r
58 \r
59                         set {\r
60                                 handle_get_set = value;\r
61                         }\r
62                 }\r
63                 \r
64                 //\r
65                 // Class variables\r
66                 // \r
67                 static Hashtable keywords;\r
68                 static NumberStyles styles;\r
69                 static NumberFormatInfo csharp_format_info;\r
70                 \r
71                 //\r
72                 // Values for the associated token returned\r
73                 //\r
74                 System.Text.StringBuilder number;\r
75                 int putback_char;\r
76                 Object val;\r
77                 \r
78                 //\r
79                 // Details about the error encoutered by the tokenizer\r
80                 //\r
81                 string error_details;\r
82                 \r
83                 public string error {\r
84                         get {\r
85                                 return error_details;\r
86                         }\r
87                 }\r
88                 \r
89                 public int Line {\r
90                         get {\r
91                                 return line;\r
92                         }\r
93                 }\r
94 \r
95                 public int Col {\r
96                         get {\r
97                                 return col;\r
98                         }\r
99                 }\r
100                 \r
101                 static void initTokens ()\r
102                 {\r
103                         keywords = new Hashtable ();\r
104 \r
105                         keywords.Add ("addhandler", Token.ADDHANDLER);\r
106                         keywords.Add ("addressof", Token.ADDRESSOF);\r
107                         keywords.Add ("alias", Token.ALIAS);\r
108                         keywords.Add ("and", Token.AND);\r
109                         keywords.Add ("andalso", Token.ANDALSO);\r
110                         keywords.Add ("ansi", Token.ANSI);\r
111                         keywords.Add ("as", Token.AS);\r
112                         keywords.Add ("assembly", Token.ASSEMBLY);\r
113                         keywords.Add ("auto", Token.AUTO);\r
114                         keywords.Add ("boolean", Token.BOOLEAN);\r
115                         keywords.Add ("byref", Token.BYREF);\r
116                         keywords.Add ("byte", Token.BYTE);\r
117                         keywords.Add ("byval", Token.BYVAL);\r
118                         keywords.Add ("call", Token.CALL);\r
119                         keywords.Add ("case", Token.CASE);\r
120                         keywords.Add ("catch", Token.CATCH);\r
121                         keywords.Add ("cbool", Token.CBOOL);\r
122                         keywords.Add ("cbyte", Token.CBYTE);\r
123                         keywords.Add ("cchar", Token.CCHAR);\r
124                         keywords.Add ("cdate", Token.CDATE);\r
125                         keywords.Add ("cdec", Token.CDEC);\r
126                         keywords.Add ("cdbl", Token.CDBL);\r
127                         keywords.Add ("char", Token.CHAR);\r
128                         keywords.Add ("cint", Token.CINT);\r
129                         keywords.Add ("class", Token.CLASS);\r
130                         keywords.Add ("clng", Token.CLNG);\r
131                         keywords.Add ("cobj", Token.COBJ);\r
132                         //keywords.Add ("compare", Token.COMPARE);\r
133                         keywords.Add ("const", Token.CONST);\r
134                         keywords.Add ("cshort", Token.CSHORT);\r
135                         keywords.Add ("csng", Token.CSNG);\r
136                         keywords.Add ("cstr", Token.CSTR);\r
137                         keywords.Add ("ctype", Token.CTYPE);\r
138                         keywords.Add ("date", Token.DATE);\r
139                         keywords.Add ("decimal", Token.DECIMAL);\r
140                         keywords.Add ("declare", Token.DECLARE);\r
141                         keywords.Add ("default", Token.DEFAULT);\r
142                         keywords.Add ("delegate", Token.DELEGATE);\r
143                         keywords.Add ("dim", Token.DIM);\r
144                         keywords.Add ("do", Token.DO);\r
145                         keywords.Add ("double", Token.DOUBLE);\r
146                         keywords.Add ("each", Token.EACH);\r
147                         keywords.Add ("else", Token.ELSE);\r
148                         keywords.Add ("elseif", Token.ELSEIF);\r
149                         keywords.Add ("end", Token.END);\r
150                         keywords.Add ("enum", Token.ENUM);\r
151                         keywords.Add ("erase", Token.ERASE);\r
152                         keywords.Add ("error", Token.ERROR);\r
153                         keywords.Add ("event", Token.EVENT);\r
154                         keywords.Add ("exit", Token.EXIT);\r
155                         //keywords.Add ("explicit", Token.EXPLICIT);\r
156                         keywords.Add ("false", Token.FALSE);\r
157                         keywords.Add ("finally", Token.FINALLY);\r
158                         keywords.Add ("for", Token.FOR);\r
159                         keywords.Add ("friend", Token.FRIEND);\r
160                         keywords.Add ("function", Token.FUNCTION);\r
161                         keywords.Add ("get", Token.GET);\r
162                         keywords.Add ("gettype", Token.GETTYPE);\r
163                         keywords.Add ("goto", Token.GOTO);\r
164                         keywords.Add ("handles", Token.HANDLES);\r
165                         keywords.Add ("if", Token.IF);\r
166                         keywords.Add ("implements", Token.IMPLEMENTS);\r
167                         keywords.Add ("imports", Token.IMPORTS);\r
168                         keywords.Add ("in", Token.IN);\r
169                         keywords.Add ("inherits", Token.INHERITS);\r
170                         keywords.Add ("integer", Token.INTEGER);\r
171                         keywords.Add ("interface", Token.INTERFACE);\r
172                         keywords.Add ("is", Token.IS);\r
173                         keywords.Add ("let ", Token.LET );\r
174                         keywords.Add ("lib ", Token.LIB );\r
175                         keywords.Add ("like ", Token.LIKE );\r
176                         keywords.Add ("long", Token.LONG);\r
177                         keywords.Add ("loop", Token.LOOP);\r
178                         keywords.Add ("me", Token.ME);\r
179                         keywords.Add ("mod", Token.MOD);\r
180                         keywords.Add ("module", Token.MODULE);\r
181                         keywords.Add ("mustinherit", Token.MUSTINHERIT);\r
182                         keywords.Add ("mustoverride", Token.MUSTOVERRIDE);\r
183                         keywords.Add ("mybase", Token.MYBASE);\r
184                         keywords.Add ("myclass", Token.MYCLASS);\r
185                         keywords.Add ("namespace", Token.NAMESPACE);\r
186                         keywords.Add ("new", Token.NEW);\r
187                         keywords.Add ("next", Token.NEXT);\r
188                         keywords.Add ("not", Token.NOT);\r
189                         keywords.Add ("nothing", Token.NOTHING);\r
190                         keywords.Add ("notinheritable", Token.NOTINHERITABLE);\r
191                         keywords.Add ("notoverridable", Token.NOTOVERRIDABLE);\r
192                         keywords.Add ("object", Token.OBJECT);\r
193                         keywords.Add ("on", Token.ON);\r
194                         keywords.Add ("option", Token.OPTION);\r
195                         keywords.Add ("optional", Token.OPTIONAL);\r
196                         keywords.Add ("or", Token.OR);\r
197                         keywords.Add ("orelse", Token.ORELSE);\r
198                         keywords.Add ("overloads", Token.OVERLOADS);\r
199                         keywords.Add ("overridable", Token.OVERRIDABLE);\r
200                         keywords.Add ("overrides", Token.OVERRIDES);\r
201                         keywords.Add ("paramarray", Token.PARAM_ARRAY);\r
202                         keywords.Add ("preserve", Token.PRESERVE);\r
203                         keywords.Add ("private", Token.PRIVATE);\r
204                         keywords.Add ("property", Token.PROPERTY);\r
205                         keywords.Add ("protected", Token.PROTECTED);\r
206                         keywords.Add ("public", Token.PUBLIC);\r
207                         keywords.Add ("raiseevent", Token.RAISEEVENT);\r
208                         keywords.Add ("readonly", Token.READONLY);\r
209                         keywords.Add ("redim", Token.REDIM);\r
210                         keywords.Add ("rem", Token.REM);\r
211                         keywords.Add ("removehandler", Token.REMOVEHANDLER);\r
212                         keywords.Add ("resume", Token.RESUME);\r
213                         keywords.Add ("return", Token.RETURN);\r
214                         keywords.Add ("select", Token.SELECT);\r
215                         keywords.Add ("set", Token.SET);\r
216                         keywords.Add ("shadows", Token.SHADOWS);\r
217                         keywords.Add ("shared", Token.SHARED);\r
218                         keywords.Add ("short", Token.SHORT);\r
219                         keywords.Add ("single", Token.SINGLE);\r
220                         keywords.Add ("sizeof", Token.SIZEOF);\r
221                         keywords.Add ("static", Token.STATIC);\r
222                         keywords.Add ("step", Token.STEP);\r
223                         keywords.Add ("stop", Token.STOP);\r
224                         keywords.Add ("string", Token.STRING);\r
225                         keywords.Add ("structure", Token.STRUCTURE);\r
226                         keywords.Add ("sub", Token.SUB);\r
227                         keywords.Add ("synclock", Token.SYNCLOCK);\r
228                         keywords.Add ("then", Token.THEN);\r
229                         keywords.Add ("throw", Token.THROW);\r
230                         keywords.Add ("to", Token.TO);\r
231                         keywords.Add ("true", Token.TRUE);\r
232                         keywords.Add ("try", Token.TRY);\r
233                         keywords.Add ("typeof", Token.TYPEOF);\r
234                         keywords.Add ("unicode", Token.UNICODE);\r
235                         keywords.Add ("until", Token.UNTIL);\r
236                         keywords.Add ("variant", Token.VARIANT);\r
237                         keywords.Add ("when", Token.WHEN);\r
238                         keywords.Add ("while", Token.WHILE);\r
239                         keywords.Add ("with", Token.WITH);\r
240                         keywords.Add ("withevents", Token.WITHEVENTS);\r
241                         keywords.Add ("writeonly", Token.WRITEONLY);\r
242                         keywords.Add ("xor", Token.XOR);\r
243                 }\r
244 \r
245                 //\r
246                 // Class initializer\r
247                 // \r
248                 static Tokenizer ()\r
249                 {\r
250                         initTokens ();\r
251                         csharp_format_info = new NumberFormatInfo ();\r
252                         csharp_format_info.CurrencyDecimalSeparator = ".";\r
253                         styles = NumberStyles.AllowExponent | NumberStyles.AllowDecimalPoint;\r
254                 }\r
255 \r
256                 bool is_keyword (string name)\r
257                 {\r
258                         bool res;\r
259 \r
260                         res = keywords.Contains(name.ToLower());\r
261                         if ((name == "get" || name == "set") && handle_get_set == false)\r
262                                 return false;\r
263                         return res;\r
264                 }\r
265 \r
266                 int getKeyword (string name)\r
267                 {\r
268                         return (int) (keywords [name.ToLower()]);\r
269                 }\r
270                 \r
271                 public Location Location {\r
272                         get {\r
273                                 return new Location (ref_line);\r
274                         }\r
275                 }\r
276                 \r
277                 bool is_identifier_start_character (char c)\r
278                 {\r
279                         return Char.IsLetter (c) || c == '_' ;\r
280                 }\r
281 \r
282                 bool is_identifier_part_character (char c)\r
283                 {\r
284                         return (Char.IsLetter (c) || Char.IsDigit (c) || c == '_');\r
285                 }\r
286 \r
287                 int is_punct (char c, ref bool doread)\r
288                 {\r
289                         int idx = "{}[](),:;~+-*/%&|^!=<>?".IndexOf (c);\r
290                         int d;\r
291                         int t;\r
292 \r
293                         doread = false;\r
294 \r
295                         switch (c){\r
296 //                      case '[':\r
297 //                              return Token.OPEN_BRACKET;\r
298 //                      case ']':\r
299 //                              return Token.CLOSE_BRACKET;\r
300                         case '(':\r
301                                 return Token.OPEN_PARENS;\r
302                         case ')':\r
303                                 return Token.CLOSE_PARENS;\r
304                         case ',':\r
305                                 return Token.COMMA;\r
306                         case ':':\r
307                                 return Token.COLON;\r
308                         case '?':\r
309                                 return Token.INTERR;\r
310                         }\r
311 \r
312                         d = peekChar ();\r
313                         if (c == '+'){\r
314                                 \r
315                                 if (d == '+')\r
316                                         t = Token.OP_INC;\r
317                                 else if (d == '=')\r
318                                         t = Token.OP_ADD_ASSIGN;\r
319                                 else\r
320                                         return Token.PLUS;\r
321                                 doread = true;\r
322                                 return t;\r
323                         }\r
324                         if (c == '-'){\r
325                                 if (d == '=')\r
326                                         t = Token.OP_SUB_ASSIGN;\r
327                                 else\r
328                                         return Token.MINUS;\r
329                                 doread = true;\r
330                                 return t;\r
331                         }\r
332 \r
333                         if (c == '='){\r
334                                 /*if (d == '='){\r
335                                         doread = true;\r
336                                         return Token.OP_EQ;\r
337                                 }*/\r
338                                 return Token.ASSIGN;\r
339                         }\r
340 \r
341                         if (c == '*'){\r
342                                 if (d == '='){\r
343                                         doread = true;\r
344                                         return Token.OP_MULT_ASSIGN;\r
345                                 }\r
346                                 return Token.STAR;\r
347                         }\r
348 \r
349                         if (c == '/'){\r
350                                 if (d == '='){\r
351                                         doread = true;\r
352                                         return Token.OP_DIV_ASSIGN;\r
353                                 }\r
354                                 return Token.DIV;\r
355                         }\r
356 \r
357                         if (c == '\\'){\r
358                                 if (d == '='){\r
359                                         doread = true;\r
360                                         return Token.OP_IDIV_ASSIGN;\r
361                                 }\r
362                                 return Token.OP_IDIV;\r
363                         }\r
364 \r
365                         if (c == '^'){\r
366                                 if (d == '='){\r
367                                         doread = true;\r
368                                         return Token.OP_EXP_ASSIGN;\r
369                                 }\r
370                                 return Token.OP_EXP;\r
371                         }\r
372 \r
373                         if (c == '<'){\r
374                                 if (d == '>')\r
375                                 {\r
376                                         doread = true;\r
377                                         return Token.OP_NE;\r
378                                 }\r
379                                 if (d == '='){\r
380                                         doread = true;\r
381                                         return Token.OP_LE;\r
382                                 }\r
383                                 return Token.OP_LT;\r
384                         }\r
385 \r
386                         if (c == '>'){\r
387                                 if (d == '='){\r
388                                         doread = true;\r
389                                         return Token.OP_GE;\r
390                                 }\r
391                                 return Token.OP_GT;\r
392                         }\r
393                         return Token.ERROR;\r
394                 }\r
395 \r
396                 bool decimal_digits (int c)\r
397                 {\r
398                         int d;\r
399                         bool seen_digits = false;\r
400                         \r
401                         if (c != -1)\r
402                                 number.Append ((char) c);\r
403                         \r
404                         while ((d = peekChar ()) != -1){\r
405                                 if (Char.IsDigit ((char)d)){\r
406                                         number.Append ((char) d);\r
407                                         getChar ();\r
408                                         seen_digits = true;\r
409                                 } else\r
410                                         break;\r
411                         }\r
412                         return seen_digits;\r
413                 }\r
414 \r
415                 void hex_digits (int c)\r
416                 {\r
417                         int d;\r
418 \r
419                         if (c != -1)\r
420                                 number.Append ((char) c);\r
421                         while ((d = peekChar ()) != -1){\r
422                                 char e = Char.ToUpper ((char) d);\r
423                                 \r
424                                 if (Char.IsDigit (e) ||\r
425                                     (e >= 'A' && e <= 'F')){\r
426                                         number.Append ((char) e);\r
427                                         getChar ();\r
428                                 } else\r
429                                         break;\r
430                         }\r
431                 }\r
432                 \r
433                 int real_type_suffix (int c)\r
434                 {\r
435                         int t;\r
436                         \r
437                         switch (c){\r
438                         case 'F': case 'f':\r
439                                 t =  Token.LITERAL_SINGLE;\r
440                                 break;\r
441                         case 'D': case 'd':\r
442                                 t = Token.LITERAL_DOUBLE;\r
443                                 break;\r
444                         case 'M': case 'm':\r
445                                  t= Token.LITERAL_DECIMAL;\r
446                                 break;\r
447                         default:\r
448                                 return Token.NONE;\r
449                         }\r
450                         getChar ();\r
451                         return t;\r
452                 }\r
453 \r
454                 int integer_type_suffix (int c)\r
455                 {\r
456                         // FIXME: Handle U and L suffixes.\r
457                         // We also need to see in which kind of\r
458                         // Int the thing fits better according to the spec.\r
459                         return Token.LITERAL_INTEGER;\r
460                 }\r
461                 \r
462                 void adjust_int (int t)\r
463                 {\r
464                         val = new System.Int32();\r
465                         val = System.Int32.Parse (number.ToString (), 0);\r
466                 }\r
467 \r
468                 int adjust_real (int t)\r
469                 {\r
470                         string s = number.ToString ();\r
471 \r
472                         Console.WriteLine (s);\r
473                         switch (t){\r
474                         case Token.LITERAL_DECIMAL:\r
475                                 val = new System.Decimal ();\r
476                                 val = System.Decimal.Parse (\r
477                                         s, styles, csharp_format_info);\r
478                                 break;\r
479                         case Token.LITERAL_DOUBLE:\r
480                                 val = new System.Double ();\r
481                                 val = System.Double.Parse (\r
482                                         s, styles, csharp_format_info);\r
483                                 break;\r
484                         case Token.LITERAL_SINGLE:\r
485                                 val = new System.Double ();\r
486                                 val = (float) System.Double.Parse (\r
487                                         s, styles, csharp_format_info);\r
488                                 break;\r
489 \r
490                         case Token.NONE:\r
491                                 val = new System.Double ();\r
492                                 val = System.Double.Parse (\r
493                                         s, styles, csharp_format_info);\r
494                                 t = Token.LITERAL_DOUBLE;\r
495                                 break;\r
496                         }\r
497                         return t;\r
498                 }\r
499 \r
500                 //\r
501                 // Invoked if we know we have .digits or digits\r
502                 //\r
503                 int is_number (int c)\r
504                 {\r
505                         bool is_real = false;\r
506                         number = new System.Text.StringBuilder ();\r
507                         int type;\r
508 \r
509                         number.Length = 0;\r
510 \r
511                         if (Char.IsDigit ((char)c)){\r
512                                 if (c == '0' && peekChar () == 'x' || peekChar () == 'X'){\r
513                                         getChar ();\r
514                                         hex_digits (-1);\r
515                                         val = new System.Int32 ();\r
516                                         val = System.Int32.Parse (number.ToString (), NumberStyles.HexNumber);\r
517                                         return integer_type_suffix (peekChar ());\r
518                                 }\r
519                                 decimal_digits (c);\r
520                                 c = getChar ();\r
521                         }\r
522 \r
523                         //\r
524                         // We need to handle the case of\r
525                         // "1.1" vs "1.string" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)\r
526                         //\r
527                         if (c == '.'){\r
528                                 if (decimal_digits ('.')){\r
529                                         is_real = true;\r
530                                         c = peekChar ();\r
531                                 } else {\r
532                                         putback ('.');\r
533                                         number.Length -= 1;\r
534                                         adjust_int (Token.LITERAL_INTEGER);\r
535                                         return Token.LITERAL_INTEGER;\r
536                                 }\r
537                         }\r
538                         \r
539                         if (c == 'e' || c == 'E'){\r
540                                 is_real = true;\r
541                                 number.Append ("e");\r
542                                 getChar ();\r
543                                 \r
544                                 c = peekChar ();\r
545                                 if (c == '+'){\r
546                                         number.Append ((char) c);\r
547                                         getChar ();\r
548                                         c = peekChar ();\r
549                                 } else if (c == '-'){\r
550                                         number.Append ((char) c);\r
551                                         getChar ();\r
552                                         c = peekChar ();\r
553                                 }\r
554                                 decimal_digits (-1);\r
555                                 c = peekChar ();\r
556                         }\r
557 \r
558                         type = real_type_suffix (c);\r
559                         if (type == Token.NONE && !is_real){\r
560                                 type = integer_type_suffix (c);\r
561                                 adjust_int (type);\r
562                                 putback (c);\r
563                                 return type;\r
564                         } else\r
565                                 is_real = true;\r
566 \r
567                         if (is_real)\r
568                                 return adjust_real (type);\r
569 \r
570                         Console.WriteLine ("This should not be reached");\r
571                         throw new Exception ("Is Number should never reach this point");\r
572                 }\r
573                         \r
574                 int escape (int c)\r
575                 {\r
576                         int d;\r
577                         int v;\r
578 \r
579                         d = peekChar ();\r
580                         if (c != '\\')\r
581                                 return c;\r
582                         \r
583                         switch (d){\r
584                         case 'a':\r
585                                 v = '\a'; break;\r
586                         case 'b':\r
587                                 v = '\b'; break;\r
588                         case 'n':\r
589                                 v = '\n'; break;\r
590                         case 't':\r
591                                 v = '\t'; break;\r
592                         case 'v':\r
593                                 v = '\v'; break;\r
594                         case 'r':\r
595                                 v = 'c'; break;\r
596                         case '\\':\r
597                                 v = '\\'; break;\r
598                         case 'f':\r
599                                 v = '\f'; break;\r
600                         case '0':\r
601                                 v = 0; break;\r
602                         case '"':\r
603                                 v = '"'; break;\r
604                         case '\'':\r
605                                 v = '\''; break;\r
606                         default:\r
607                                 error_details = "cs1009: Unrecognized escape sequence " + (char)d;\r
608                                 return -1;\r
609                         }\r
610                         getChar ();\r
611                         return v;\r
612                 }\r
613 \r
614                 int getChar ()\r
615                 {\r
616                         if (putback_char != -1){\r
617                                 int x = putback_char;\r
618                                 putback_char = -1;\r
619 \r
620                                 return x;\r
621                         }\r
622                         return reader.Read ();\r
623                 }\r
624 \r
625                 int peekChar ()\r
626                 {\r
627                         if (putback_char != -1)\r
628                                 return putback_char;\r
629                         return reader.Peek ();\r
630                 }\r
631 \r
632                 void putback (int c)\r
633                 {\r
634                         if (putback_char != -1)\r
635                                 throw new Exception ("This should not happen putback on putback");\r
636                         putback_char = c;\r
637                 }\r
638 \r
639                 public bool advance ()\r
640                 {\r
641                         return current_token != Token.EOF ;\r
642                 }\r
643 \r
644                 public Object Value {\r
645                         get {\r
646                                 return val;\r
647                         }\r
648                 }\r
649 \r
650                 public Object value ()\r
651                 {\r
652                         return val;\r
653                 }\r
654 \r
655                 private bool IsEOL(int currentChar)\r
656                 {\r
657                         if (currentChar ==  0x0D)\r
658                         {\r
659                                 if (peekChar() ==  0x0A) // if it is a CR-LF pair consume LF also\r
660                                         getChar();\r
661 \r
662                                 return true;\r
663                         }\r
664                         return (currentChar ==  -1 || currentChar ==  0x0A || currentChar ==  0x2028 || currentChar ==  0x2029);\r
665                 }\r
666 \r
667                 private int DropComments()              \r
668                 {\r
669                         int d;\r
670                         while (!IsEOL(d = getChar ()))\r
671                                 col++;\r
672                         line++;\r
673                         ref_line++;\r
674                         col = 0;\r
675 \r
676                         return Token.EOL;\r
677                 }       \r
678                         \r
679                 public int token ()\r
680                 {\r
681                         int lastToken = current_token;\r
682                         do\r
683                         {\r
684                                 current_token = xtoken ();\r
685                                 if (current_token == 0) \r
686                                         return Token.EOF;\r
687                                 if (current_token == Token.REM)\r
688                                         current_token = DropComments();\r
689                         } while (lastToken == Token.EOL && current_token == Token.EOL);\r
690 \r
691                         return current_token;\r
692                 }\r
693 \r
694                 private string GetIdentifier()\r
695                 {\r
696                         int c = getChar();\r
697                         if (is_identifier_start_character ((char) c))\r
698                                 return GetIdentifier(c);\r
699                         else\r
700                                 return null;\r
701                 }\r
702 \r
703                 private string GetIdentifier(int c)\r
704                 {\r
705                         System.Text.StringBuilder id = new System.Text.StringBuilder ();\r
706 \r
707                         id.Append ((char) c);\r
708                                 \r
709                         while ((c = peekChar ()) != -1) \r
710                         {\r
711                                 if (is_identifier_part_character ((char) c))\r
712                                 {\r
713                                         id.Append ((char)getChar ());\r
714                                         col++;\r
715                                 } \r
716                                 else \r
717                                         break;\r
718                         }\r
719 \r
720                         return id.ToString ();\r
721                 }\r
722 \r
723                 public int xtoken ()\r
724                 {\r
725                         int t;\r
726                         bool doread = false;\r
727                         int c;\r
728 \r
729                         val = null;\r
730                         for (;(c = getChar ()) != -1; col++) {\r
731                         \r
732                                 // Handle line comments.\r
733                                 if (c == '\'')\r
734                                         return Token.REM;\r
735 \r
736                                 // Handle EOL.\r
737                                 if (IsEOL(c))\r
738                                 {\r
739                                         line++;\r
740                                         ref_line++;\r
741                                         col = 0;\r
742                                         if (current_token == Token.EOL) // if last token was also EOL keep skipping\r
743                                                 continue;\r
744                                         return Token.EOL;\r
745                                 }\r
746                                 \r
747                                 // Handle escaped identifiers\r
748                                 if (c == '[')\r
749                                 {\r
750                                         if ((val = GetIdentifier()) == null)\r
751                                                 break;\r
752                                         if ((c = getChar()) != ']')\r
753                                                 break;\r
754                                         return Token.IDENTIFIER;\r
755                                 }\r
756 \r
757                                 // Handle unescaped identifiers\r
758                                 if (is_identifier_start_character ((char) c))\r
759                                 {\r
760                                         string id;\r
761                                         if ((id = GetIdentifier(c)) == null)\r
762                                                 break;\r
763                                         if (is_keyword(id))\r
764                                                 return getKeyword(id);\r
765                                         val = id;\r
766                                         return Token.IDENTIFIER;\r
767                                 }\r
768 \r
769                                 // handle numeric literals\r
770                                 if (c == '.'){\r
771                                         if (Char.IsDigit ((char) peekChar ()))\r
772                                                 return is_number (c);\r
773                                         return Token.DOT;\r
774                                 }\r
775                                 \r
776                                 if (Char.IsDigit ((char) c))\r
777                                         return is_number (c);\r
778 \r
779                                 /* For now, limited support for pre-processor commands */\r
780                                 if (col == 1 && c == '#'){\r
781                                         System.Text.StringBuilder s = new System.Text.StringBuilder ();\r
782                                         \r
783                                         while ((c = getChar ()) != -1 && (c != '\n')){\r
784                                                 s.Append ((char) c);\r
785                                         }\r
786                                         if (String.Compare (s.ToString (), 0, "line", 0, 4) == 0){\r
787                                                 string arg = s.ToString ().Substring (5);\r
788                                                 int pos;\r
789 \r
790                                                 if ((pos = arg.IndexOf (' ')) != -1 && pos != 0){\r
791                                                         ref_line = System.Int32.Parse (arg.Substring (0, pos));\r
792                                                         pos++;\r
793 \r
794                                                         char [] quotes = { '\"' };\r
795 \r
796                                                         ref_name = arg.Substring (pos);\r
797                                                         ref_name.TrimStart (quotes);\r
798                                                         ref_name.TrimEnd (quotes);\r
799                                                 } else\r
800                                                         ref_line = System.Int32.Parse (arg);\r
801                                         }\r
802                                         line++;\r
803                                         ref_line++;\r
804                                         continue;\r
805                                 }\r
806                                 \r
807                                 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){\r
808                                         if (doread){\r
809                                                 getChar ();\r
810                                                 col++;\r
811                                         }\r
812                                         return t;\r
813                                 }\r
814                                 \r
815                                 // Treat string literals\r
816                                 if (c == '"'){\r
817                                         System.Text.StringBuilder s = new System.Text.StringBuilder ();\r
818 \r
819                                         while ((c = getChar ()) != -1){\r
820                                                 if (c == '"'){ // TODO: treat double-doublequotes\r
821                                                         val = s.ToString ();\r
822                                                         return Token.LITERAL_STRING;\r
823                                                 }\r
824 \r
825                                                 c = escape (c);\r
826                                                 if (c == -1)\r
827                                                         return Token.ERROR;\r
828                                                 s.Append ((char) c);\r
829                                         }\r
830                                 }\r
831                         \r
832                                 // expand tabs for location and ignore it as whitespace\r
833                                 if (c == '\t')\r
834                                 {\r
835                                         col = (((col + ExpandedTabsSize) / ExpandedTabsSize) * ExpandedTabsSize) - 1;\r
836                                         continue;\r
837                                 }\r
838 \r
839                                 // white space\r
840                                 if (c == ' ' || c == '\f' || c == '\v')\r
841                                         continue;\r
842 \r
843                                 error_details = ((char)c).ToString ();\r
844                                 \r
845                                 return Token.ERROR;\r
846                         }\r
847 \r
848                         if (current_token != Token.EOL) // if last token wasn´t EOL send it before EOF\r
849                                 return Token.EOL;\r
850                         \r
851                         return Token.EOF;\r
852                 }\r
853 \r
854                 public Tokenizer (System.IO.TextReader input, string fname, ArrayList defines)\r
855                 {\r
856                         this.ref_name = fname;\r
857                         reader = input;\r
858                         putback_char = -1;\r
859                         \r
860                         Location.Push (fname);\r
861                 }\r
862 \r
863         }\r
864 }\r