a4d726718a36ab21156b10287fe29a0e1a8a5ad7
[mono.git] / mcs / class / monodoc / Monodoc.Ecma / EcmaUrlTokenizer.cs
1 using System;
2 using System.Text;
3 using System.Globalization;
4
5 namespace Monodoc.Ecma
6 {
7         public class EcmaUrlTokenizer : yyParser.yyInput
8         {
9                 const char EndOfStream = (char)0;
10                 string input;
11                 object val;
12                 int current_token;
13                 int current_pos;
14                 int real_current_pos;
15                 int identCount = 0;
16
17                 public EcmaUrlTokenizer (string input)
18                 {
19                         this.input = input;
20                 }
21
22                 static bool is_identifier_start_character (char c)
23                 {
24                         return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || Char.IsLetter (c);
25                 }
26
27                 static bool is_identifier_part_character (char c)
28                 {
29                         if (c >= 'a' && c <= 'z')
30                                 return true;
31
32                         if (c >= 'A' && c <= 'Z')
33                                 return true;
34
35                         if (c == '_' || (c >= '0' && c <= '9'))
36                                 return true;
37
38                         if (c < 0x80)
39                                 return false;
40
41                         return Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
42                 }
43
44                 public bool advance ()
45                 {
46                         return Peek () != EndOfStream;
47                 }
48
49                 public Object Value {
50                         get {
51                                 return val;
52                         }
53                 }
54
55                 public Object value ()
56                 {
57                         return val;
58                 }
59
60                 public int token ()
61                 {
62                         int token = xtoken ();
63                         //Console.WriteLine ("Current token {0} with value {1}", token, val == null ? "(none)" : val.ToString ());
64                         if (token == Token.ERROR) {
65                                 throw new Exception (string.Format ("Error at position {0} parsing url '{0}'",  current_pos, input));
66                         }
67                         current_token = token;
68                         return token;
69                 }
70
71                 int xtoken ()
72                 {
73                         char next = Read ();
74                         while (char.IsWhiteSpace (next))
75                                 next = Read ();
76                         current_pos++;
77                         val = null;
78
79                         switch (next) {
80                         case ',':
81                                 return Token.COMMA;
82                         case '.':
83                                 return Token.DOT;
84                         case '{':
85                         case '<':
86                                 return Token.OP_GENERICS_LT;
87                         case '}':
88                         case '>':
89                                 return Token.OP_GENERICS_GT;
90                         case '`':
91                                 return Token.OP_GENERICS_BACKTICK;
92                         case '(':
93                                 return Token.OP_OPEN_PAREN;
94                         case ')':
95                                 return Token.OP_CLOSE_PAREN;
96                         case '+':
97                                 return Token.INNER_TYPE_SEPARATOR;
98                         case ':':
99                                 return Token.COLON;
100                         case '/':
101                                 return Token.SLASH_SEPARATOR;
102                         case '[':
103                                 return Token.OP_ARRAY_OPEN;
104                         case ']':
105                                 return Token.OP_ARRAY_CLOSE;
106                         case '*':
107                                 return Token.STAR;
108                         case '&':
109                                 return Token.REF_ARG;
110                         case '@':
111                                 return Token.OUT_ARG;
112                         case '$':
113                                 return Token.EXPLICIT_IMPL_SEP;
114                         default:
115                                 return TokenizeIdentifierOrNumber (next);
116                         }
117                 }
118
119                 int TokenizeIdentifierOrNumber (char current)
120                 {
121                         // We must first return the expression type which is a uppercase letter and a colon
122                         if (current_pos < 2) {
123                                 val = null;
124                                 return (int)current;
125                         }
126
127                         if (is_identifier_start_character (current) || current == '*') {
128                                 unsafe {
129                                         // identifier length is artificially limited to 1024 bytes by implementations
130                                         char* pIdent = stackalloc char[512];
131                                         *pIdent = current;
132                                         identCount = 1;
133
134                                         char peek;
135                                         while ((peek = Peek ()) != EndOfStream && is_identifier_part_character (peek)) {
136                                                 *(pIdent + identCount) = Read ();
137                                                 ++current_pos;
138                                                 ++identCount;
139                                         }
140
141                                         val = new string ((char*)pIdent, 0, identCount);
142                                         return Token.IDENTIFIER;
143                                 }
144                         } else if (char.IsDigit (current)) {
145                                 val = current - '0';
146                                 return Token.DIGIT;
147                         } else {
148                                 val = null;
149                                 return Token.ERROR;
150                         }
151                 }
152
153                 char Read ()
154                 {
155                         try {
156                                 return input[real_current_pos++];
157                         } catch {
158                                 return EndOfStream;
159                         }
160                 }
161
162                 char Peek ()
163                 {
164                         try {
165                                 return input[real_current_pos];
166                         } catch {
167                                 return EndOfStream;
168                         }
169                 }
170         }
171 }