Merge pull request #3422 from xmcclure/tarjan-doublefan
[mono.git] / mcs / ilasm / scanner / ILTokenizer.cs
1 // ILTokenizer.cs\r
2 // Author: Sergey Chaban (serge@wildwestsoftware.com)\r
3 \r
4 using System;\r
5 using System.IO;\r
6 using System.Text;\r
7 using System.Collections;\r
8 using System.Globalization;\r
9 \r
10 namespace Mono.ILASM {\r
11 \r
12         public delegate void NewTokenEvent (object sender, NewTokenEventArgs args);\r
13 \r
14         public class NewTokenEventArgs : EventArgs {\r
15 \r
16                 public readonly ILToken Token;\r
17 \r
18                 public NewTokenEventArgs (ILToken token)\r
19                 {\r
20                         Token = token;\r
21                 }\r
22         }\r
23 \r
24         /// <summary>\r
25         /// </summary>\r
26         public class ILTokenizer : ITokenStream {\r
27 \r
28                 private static readonly string idchars = "_$@?.`";\r
29 \r
30                 private static Hashtable keywords;\r
31                 private static Hashtable directives;\r
32 \r
33                 private ILToken lastToken;\r
34                 private ILReader reader;\r
35                 private StringHelper strBuilder;\r
36                 private NumberHelper numBuilder;\r
37                 private bool in_byte_array;\r
38                 \r
39                 public event NewTokenEvent NewTokenEvent;\r
40 \r
41                 static ILTokenizer()\r
42                 {\r
43                         keywords = ILTables.Keywords;\r
44                         directives = ILTables.Directives;\r
45                 }\r
46 \r
47                 /// <summary>\r
48                 /// </summary>\r
49                 /// <param name="reader"></param>\r
50                 public ILTokenizer (StreamReader reader)\r
51                 {\r
52                         this.reader = new ILReader (reader);\r
53                         strBuilder = new StringHelper (this);\r
54                         numBuilder = new NumberHelper (this);\r
55                         lastToken = ILToken.Invalid.Clone () as ILToken;\r
56                 }\r
57 \r
58                 public ILReader Reader {\r
59                         get {\r
60                                 return reader;\r
61                         }\r
62                 }\r
63 \r
64                 public Location Location {\r
65                         get {\r
66                                 return reader.Location;\r
67                         }\r
68                 }\r
69 \r
70                 public bool InByteArray {\r
71                         get { return in_byte_array; }\r
72                         set { in_byte_array = value; }\r
73                 }\r
74 \r
75                 public ILToken GetNextToken ()\r
76                 {\r
77                         if (lastToken == ILToken.EOF) return ILToken.EOF;\r
78 \r
79                         int ch;\r
80                         int next;\r
81                         ILToken res = ILToken.EOF.Clone () as ILToken;\r
82 \r
83                         \r
84                         while ((ch = reader.Read ()) != -1) {\r
85 \r
86                                 // Comments\r
87                                 if (ch == '/') {\r
88                                         next = reader.Peek ();\r
89                                         if (next == '/') {\r
90                                                 // double-slash comment, skip to the end of the line.\r
91                                                 for (reader.Read ();\r
92                                                         next != -1 && next != '\n';\r
93                                                         next = reader.Read ());\r
94                                                 continue;\r
95                                         } else if (next == '*') {\r
96                                                 reader.Read ();\r
97                                                 for (next = reader.Read (); next != -1; next = reader.Read ()) {\r
98                                                         if (next == '*' && reader.Peek () == '/') {\r
99                                                                 reader.Read ();\r
100                                                                 goto end;\r
101                                                         }\r
102                                                 }\r
103                                         end:\r
104                                                 continue;\r
105                                         }\r
106                                 }\r
107 \r
108                                 // HEXBYTES are flagged by the parser otherwise it is\r
109                                 // impossible to figure them out\r
110                                 if (in_byte_array) {\r
111                                         string hx = String.Empty;\r
112 \r
113                                         if (Char.IsWhiteSpace ((char) ch))\r
114                                                 continue;\r
115 \r
116                                         if (ch == ')') {\r
117                                                 res = ILToken.CloseParens;\r
118                                                 break;\r
119                                         }\r
120 \r
121                                         if (!is_hex (ch))\r
122                                                 throw new ILTokenizingException (reader.Location, ((char) ch).ToString ());\r
123                                         hx += (char) ch;\r
124                                         if (is_hex (reader.Peek ()))\r
125                                                 hx += (char) reader.Read ();\r
126                                         else if (!Char.IsWhiteSpace ((char) reader.Peek ()) && reader.Peek () != ')')\r
127                                                 throw new ILTokenizingException (reader.Location,\r
128                                                                 ((char) reader.Peek ()).ToString ());\r
129                                         res.token = Token.HEXBYTE;\r
130                                         res.val = Byte.Parse (hx, NumberStyles.HexNumber);\r
131 \r
132                                         while (Char.IsWhiteSpace ((char) reader.Peek ()))\r
133                                                 reader.Read ();\r
134                                         break;\r
135                                 }\r
136                                 \r
137                                 // Ellipsis\r
138                                 if (ch == '.' && reader.Peek () == '.') {\r
139                                         reader.MarkLocation ();\r
140                                         int ch2 = reader.Read ();\r
141                                         if (reader.Peek () == '.') {\r
142                                                 res = ILToken.Ellipsis;\r
143                                                 reader.Read ();\r
144                                                 break;\r
145                                         }\r
146                                         reader.Unread (ch2);\r
147                                         reader.RestoreLocation ();\r
148                                 }\r
149 \r
150                                 if (ch == '.' || ch == '#') {\r
151                                         next = reader.Peek ();\r
152                                         if (ch == '.' && Char.IsDigit((char) next)) {\r
153                                                 numBuilder.Start (ch);\r
154                                                 reader.Unread (ch);\r
155                                                 numBuilder.Build ();\r
156                                                 if (numBuilder.ResultToken != ILToken.Invalid) {\r
157                                                         res.CopyFrom (numBuilder.ResultToken);\r
158                                                         break;\r
159                                                 }\r
160                                         } else {\r
161                                                 if (strBuilder.Start (next) && strBuilder.TokenId == Token.ID) {\r
162                                                         reader.MarkLocation ();\r
163                                                         string dirBody = strBuilder.Build ();\r
164                                                         string dir = new string ((char) ch, 1) + dirBody;\r
165                                                         if (IsDirective (dir)) {\r
166                                                                 res = ILTables.Directives [dir] as ILToken;\r
167                                                         } else {\r
168                                                                 reader.Unread (dirBody.ToCharArray ());\r
169                                                                 reader.RestoreLocation ();\r
170                                                                 res = ILToken.Dot;\r
171                                                         }\r
172                                                 } else {\r
173                                                         res = ILToken.Dot;\r
174                                                 }\r
175                                                 break;\r
176                                         }\r
177                                 }\r
178 \r
179                                 // Numbers && Hexbytes\r
180                                 if (numBuilder.Start (ch)) {\r
181                                         if ((ch == '-') && !(Char.IsDigit ((char) reader.Peek ()))) {\r
182                                                 res = ILToken.Dash;\r
183                                                 break;\r
184                                         } else {\r
185                                                 reader.Unread (ch);\r
186                                                 numBuilder.Build ();\r
187                                                 if (numBuilder.ResultToken != ILToken.Invalid) {\r
188                                                         res.CopyFrom (numBuilder.ResultToken);\r
189                                                         break;\r
190                                                 }\r
191                                         }\r
192                                 }\r
193 \r
194                                 // Punctuation\r
195                                 ILToken punct = ILToken.GetPunctuation (ch);\r
196                                 if (punct != null) {\r
197                                         if (punct == ILToken.Colon && reader.Peek () == ':') {\r
198                                                 reader.Read ();\r
199                                                 res = ILToken.DoubleColon;\r
200                                         } else {\r
201                                                 res = punct;\r
202                                         }\r
203                                         break;\r
204                                 }\r
205 \r
206                                 // ID | QSTRING | SQSTRING | INSTR_* | KEYWORD\r
207                                 if (strBuilder.Start (ch)) {\r
208                                         reader.Unread (ch);\r
209                                         string val = strBuilder.Build ();\r
210                                         if (strBuilder.TokenId == Token.ID) {\r
211                                                 ILToken opcode;\r
212                                                 next = reader.Peek ();\r
213                                                 if (next == '.') {\r
214                                                         reader.MarkLocation ();\r
215                                                         reader.Read ();\r
216                                                         next = reader.Peek ();\r
217                                                         if (IsIdChar ((char) next)) {\r
218                                                                 string opTail = BuildId ();\r
219                                                                 string full_str = String.Format ("{0}.{1}", val, opTail);\r
220                                                                 opcode = InstrTable.GetToken (full_str);\r
221 \r
222                                                                 if (opcode == null) {\r
223                                                                         if (strBuilder.TokenId != Token.ID) {\r
224                                                                                 reader.Unread (opTail.ToCharArray ());\r
225                                                                                 reader.Unread ('.');\r
226                                                                                 reader.RestoreLocation ();\r
227                                                                                 res.val = val;\r
228                                                                         } else {\r
229                                                                                 res.token = Token.COMP_NAME;\r
230                                                                                 res.val = full_str;\r
231                                                                         }\r
232                                                                         break;\r
233                                                                 } else {\r
234                                                                         res = opcode;\r
235                                                                         break;\r
236                                                                 }\r
237 \r
238                                                         } else if (Char.IsWhiteSpace ((char) next)) {\r
239                                                                 // Handle 'tail.' and 'unaligned.'\r
240                                                                 opcode = InstrTable.GetToken (val + ".");\r
241                                                                 if (opcode != null) {\r
242                                                                         res = opcode;\r
243                                                                         break;\r
244                                                                 }\r
245                                                                 // Let the parser handle the dot\r
246                                                                 reader.Unread ('.');\r
247                                                         }\r
248                                                 }\r
249                                                 opcode = InstrTable.GetToken (val);\r
250                                                 if (opcode != null) {\r
251                                                         res = opcode;\r
252                                                         break;\r
253                                                 }\r
254                                                 if (IsKeyword (val)) {\r
255                                                         res = ILTables.Keywords [val] as ILToken;\r
256                                                         break;\r
257                                                 }\r
258                                         }\r
259 \r
260                                         res.token = strBuilder.TokenId;\r
261                                         res.val = val;\r
262                                         break;\r
263                                 }\r
264                         }\r
265 \r
266                         OnNewToken (res);\r
267                         lastToken.CopyFrom (res);\r
268                         return res;\r
269                 }\r
270 \r
271 \r
272                 /// <summary>\r
273                 /// </summary>\r
274                 public ILToken NextToken {\r
275                         get {\r
276                                 return GetNextToken ();\r
277                         }\r
278                 }\r
279 \r
280 \r
281                 /// <summary>\r
282                 /// </summary>\r
283                 public ILToken LastToken {\r
284                         get {\r
285                                 return lastToken;\r
286                         }\r
287                 }\r
288 \r
289                 bool is_hex (int e)\r
290                 {\r
291                         return (e >= '0' && e <= '9') || (e >= 'A' && e <= 'F') || (e >= 'a' && e <= 'f');\r
292                 }\r
293 \r
294                 private static bool IsIdStartChar (char ch)\r
295                 {\r
296                         return (Char.IsLetter (ch) || (idchars.IndexOf (ch) != -1));\r
297                 }\r
298 \r
299 \r
300                 private static bool IsIdChar (char ch)\r
301                 {\r
302                         return (Char.IsLetterOrDigit (ch) || (idchars.IndexOf (ch) != -1));\r
303                 }\r
304 \r
305                 /// <summary>\r
306                 /// </summary>\r
307                 /// <param name="name"></param>\r
308                 /// <returns></returns>\r
309                 public static bool IsOpcode (string name)\r
310                 {\r
311                         return InstrTable.IsInstr (name);\r
312                 }\r
313 \r
314 \r
315                 /// <summary>\r
316                 /// </summary>\r
317                 /// <param name="name"></param>\r
318                 /// <returns></returns>\r
319                 public static bool IsDirective (string name)\r
320                 {\r
321                         char ch = name [0];\r
322                         bool res = (ch == '.' || ch == '#');\r
323 \r
324                         if (res) {\r
325                                 res = directives.Contains (name);\r
326                         }\r
327 \r
328                         return res;\r
329                 }\r
330 \r
331                 private string BuildId ()\r
332                 {\r
333                         StringBuilder idsb = new StringBuilder ();\r
334                         int ch, last;\r
335 \r
336                         last = -1;\r
337                         while ((ch = reader.Read ()) != -1) {\r
338                                 if (IsIdChar ((char) ch) || ch == '.') {\r
339                                         idsb.Append ((char) ch);\r
340                                 } else {\r
341                                         reader.Unread (ch);\r
342                                         // Never end an id on a DOT\r
343                                         if (last == '.') {\r
344                                                 reader.Unread (last);\r
345                                                 idsb.Length -= 1;\r
346                                         }        \r
347                                         break;\r
348                                 }\r
349                                 last = ch;\r
350                         }\r
351 \r
352                         return idsb.ToString ();\r
353                 }\r
354 \r
355                 /// <summary>\r
356                 /// </summary>\r
357                 /// <param name="name"></param>\r
358                 /// <returns></returns>\r
359                 public static bool IsKeyword (string name)\r
360                 {\r
361                         return keywords.Contains (name);\r
362                 }\r
363 \r
364                 private void OnNewToken (ILToken token)\r
365                 {\r
366                         if (NewTokenEvent != null)\r
367                                 NewTokenEvent (this, new NewTokenEventArgs (token));\r
368                 }\r
369 \r
370         }\r
371 }\r