Merge pull request #496 from nicolas-raoul/unit-test-for-issue2907
[mono.git] / mcs / tools / monkeydoc / Monkeydoc.Ecma / EcmaUrlTokenizer.cs
1 using System;
2 using System.Text;
3 using System.Globalization;
4
5 namespace Monkeydoc.Ecma
6 {
7         public class EcmaUrlTokenizer : yyParser.yyInput
8         {
9                 const char EndOfStream = (char)0;
10                 string input;
11                 object val;
12                 int current_token;
13                 int current_pos;
14                 int real_current_pos;
15                 int identCount = 0;
16
17                 public EcmaUrlTokenizer (string input)
18                 {
19                         this.input = input;
20                 }
21
22                 static bool is_identifier_start_character (char c)
23                 {
24                         return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || Char.IsLetter (c);
25                 }
26
27                 static bool is_identifier_part_character (char c)
28                 {
29                         if (c >= 'a' && c <= 'z')
30                                 return true;
31
32                         if (c >= 'A' && c <= 'Z')
33                                 return true;
34
35                         if (c == '_' || (c >= '0' && c <= '9'))
36                                 return true;
37
38                         if (c < 0x80)
39                                 return false;
40
41                         return Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
42                 }
43
44                 public bool advance ()
45                 {
46                         return Peek () != EndOfStream;
47                 }
48
49                 public Object Value {
50                         get {
51                                 return val;
52                         }
53                 }
54
55                 public Object value ()
56                 {
57                         return val;
58                 }
59
60                 public int token ()
61                 {
62                         int token = xtoken ();
63                         //Console.WriteLine ("Current token {0} with value {1}", token, val == null ? "(none)" : val.ToString ());
64                         if (token == Token.ERROR)
65                                 Console.WriteLine ("Problem at pos {0} after token {1}", current_pos, current_token);
66                         current_token = token;
67                         return token;
68                 }
69
70                 int xtoken ()
71                 {
72                         char next = Read ();
73                         while (char.IsWhiteSpace (next))
74                                 next = Read ();
75                         current_pos++;
76                         val = null;
77
78                         switch (next) {
79                         case ',':
80                                 return Token.COMMA;
81                         case '.':
82                                 return Token.DOT;
83                         case '<':
84                                 return Token.OP_GENERICS_LT;
85                         case '>':
86                                 return Token.OP_GENERICS_GT;
87                         case '`':
88                                 return Token.OP_GENERICS_BACKTICK;
89                         case '(':
90                                 return Token.OP_OPEN_PAREN;
91                         case ')':
92                                 return Token.OP_CLOSE_PAREN;
93                         case '+':
94                                 return Token.INNER_TYPE_SEPARATOR;
95                         case ':':
96                                 return Token.COLON;
97                         case '/':
98                                 return Token.SLASH_SEPARATOR;
99                         case '[':
100                                 return Token.OP_ARRAY_OPEN;
101                         case ']':
102                                 return Token.OP_ARRAY_CLOSE;
103                         case '*':
104                                 return Token.STAR;
105                         case '&':
106                                 return Token.REF_ARG;
107                         case '@':
108                                 return Token.OUT_ARG;
109                         case '$':
110                                 return Token.EXPLICIT_IMPL_SEP;
111                         default:
112                                 return TokenizeIdentifierOrNumber (next);
113                         }
114                 }
115
116                 int TokenizeIdentifierOrNumber (char current)
117                 {
118                         // We must first return the expression type which is a uppercase letter and a colon
119                         if (current_pos < 2) {
120                                 val = null;
121                                 return (int)current;
122                         }
123
124                         if (is_identifier_start_character (current) || current == '*') {
125                                 unsafe {
126                                         // identifier length is artificially limited to 1024 bytes by implementations
127                                         char* pIdent = stackalloc char[512];
128                                         *pIdent = current;
129                                         identCount = 1;
130
131                                         char peek;
132                                         while ((peek = Peek ()) != EndOfStream && is_identifier_part_character (peek)) {
133                                                 *(pIdent + identCount) = Read ();
134                                                 ++current_pos;
135                                                 ++identCount;
136                                         }
137
138                                         val = new string ((char*)pIdent, 0, identCount);
139                                         return Token.IDENTIFIER;
140                                 }
141                         } else if (char.IsDigit (current)) {
142                                 val = current - '0';
143                                 return Token.DIGIT;
144                         } else {
145                                 val = null;
146                                 return Token.ERROR;
147                         }
148                 }
149
150                 char Read ()
151                 {
152                         try {
153                                 return input[real_current_pos++];
154                         } catch {
155                                 return EndOfStream;
156                         }
157                 }
158
159                 char Peek ()
160                 {
161                         try {
162                                 return input[real_current_pos];
163                         } catch {
164                                 return EndOfStream;
165                         }
166                 }
167         }
168 }