2b1bdcf0ec7cc68e72054c6c194bb3eff73cd871
[mono.git] / mcs / class / System / System / UriHelper.cs
1 using System;
2 using System.Globalization;
3 using System.Text;
4 using System.Collections.Generic;
5
6 namespace System {
7         internal static class UriHelper {
8                 internal const UriFormat ToStringUnescape = (UriFormat) 0x7FFF;
9
10                 internal static bool IriParsing {
11                         get { return Uri.IriParsing; }
12                 }
13
14                 [Flags]
15                 internal enum FormatFlags {
16                         None = 0,
17                         HasComponentCharactersToNormalize = 1 << 0,
18                         HasUriCharactersToNormalize = 1 << 1,
19                         HasHost = 1 << 2,
20                         HasFragmentPercentage = 1 << 3,
21                         UserEscaped = 1 << 4,
22                         IPv6Host = 1 << 5,
23                         NoSlashReplace = 1 << 6,
24                         NoReduce = 1 << 7,
25                         HasWindowsPath = 1 << 8,
26                 }
27
28                 [Flags]
29                 internal enum UriSchemes {
30                         Http = 1 << 0,
31                         Https = 1 << 1,
32                         File = 1 << 2,
33                         Ftp = 1 << 3,
34                         Gopher = 1 << 4,
35                         Ldap = 1 << 5,
36                         Mailto = 1 << 6,
37                         NetPipe = 1 << 7,
38                         NetTcp = 1 << 8,
39                         News = 1 << 9,
40                         Nntp = 1 << 10,
41                         Telnet = 1 << 11,
42                         Uuid = 1 << 12,
43                         Custom = 1 << 13,
44                         CustomWithHost = 1 << 14,
45                         All = ~0,
46                         None = 0
47                 }
48
49                 private static UriSchemes GetScheme (string schemeName)
50                 {
51                         schemeName = schemeName.ToLowerInvariant ();
52
53                         if (schemeName == "")
54                                 return UriSchemes.None;
55                         if (schemeName == Uri.UriSchemeHttp)
56                                 return UriSchemes.Http;
57                         if (schemeName == Uri.UriSchemeHttps)
58                                 return UriSchemes.Https;
59                         if (schemeName == Uri.UriSchemeFile)
60                                 return UriSchemes.File;
61                         if (schemeName == Uri.UriSchemeFtp)
62                                 return UriSchemes.Ftp;
63                         if (schemeName == Uri.UriSchemeGopher)
64                                 return UriSchemes.Gopher;
65                         if (schemeName == Uri.UriSchemeLdap)
66                                 return UriSchemes.Ldap;
67                         if (schemeName == Uri.UriSchemeMailto)
68                                 return UriSchemes.Mailto;
69                         if (schemeName == Uri.UriSchemeNetPipe)
70                                 return UriSchemes.NetPipe;
71                         if (schemeName == Uri.UriSchemeNetTcp)
72                                 return UriSchemes.NetTcp;
73                         if (schemeName == Uri.UriSchemeNews)
74                                 return UriSchemes.News;
75                         if (schemeName == Uri.UriSchemeNntp)
76                                 return UriSchemes.Nntp;
77                         if (schemeName == Uri.UriSchemeTelnet)
78                                 return UriSchemes.Telnet;
79                         if (schemeName == Uri.UriSchemeUuid)
80                                 return UriSchemes.Uuid;
81
82                         return UriSchemes.Custom;
83                 }
84
85                 internal static bool SchemeContains (UriSchemes keys, UriSchemes flag)
86                 {
87                         return (keys & flag) != 0;
88                 }
89
90                 internal static bool IsKnownScheme(string scheme)
91                 {
92                         return GetScheme(scheme) != UriSchemes.Custom;
93                 }
94
95                 internal static string HexEscapeMultiByte (char character)
96                 {
97                         const string hex_upper_chars = "0123456789ABCDEF";
98
99                         var sb = new StringBuilder ();
100                         byte [] bytes = Encoding.UTF8.GetBytes (new [] {character});
101                         foreach (byte b in bytes) {
102                                 sb.Append ("%");
103                                 sb.Append (hex_upper_chars [(b & 0xf0) >> 4]);
104                                 sb.Append (hex_upper_chars [b & 0x0f]);
105                         }
106
107                         return sb.ToString ();
108                 }
109
110                 internal static bool SupportsQuery (string scheme)
111                 {
112                         return SupportsQuery (GetScheme (scheme));
113                 }
114
115                 internal static bool SupportsQuery(UriSchemes scheme)
116                 {
117                         if (SchemeContains (scheme, UriSchemes.File))
118                                 return IriParsing;
119
120                         return !SchemeContains (scheme, UriSchemes.Ftp | UriSchemes.Gopher | UriSchemes.Nntp | UriSchemes.Telnet | UriSchemes.News);
121                 }
122
123                 internal static bool HasCharactersToNormalize(string str)
124                 {
125                         int len = str.Length;
126                         for (int i = 0; i < len; i++) {
127                                 char c = str [i];
128                                 if (c != '%')
129                                         continue;
130
131                                 int iStart = i;
132                                 char surrogate;
133                                 char x = Uri.HexUnescapeMultiByte (str, ref i, out surrogate);
134
135                                 bool isEscaped = i - iStart > 1;
136                                 if (!isEscaped)
137                                         continue;
138
139                                 if ((x >= 'A' && x <= 'Z') || (x >= 'a' && x <= 'z') || (x >= '0' && x <= '9') || 
140                                          x == '-' || x == '.' || x == '_' || x == '~')
141                                         return true;
142
143                                 if (x > 0x7f)
144                                         return true;
145                         }
146
147                         return false;
148                 }
149
150                 internal static bool HasPercentage (string str)
151                 {
152                         int len = str.Length;
153                         for (int i = 0; i < len; i++) {
154                                 char c = str [i];
155                                 if (c != '%')
156                                         continue;
157
158                                 int iStart = i;
159                                 char surrogate;
160                                 char x = Uri.HexUnescapeMultiByte (str, ref i, out surrogate);
161
162                                 if (x == '%')
163                                         return true;
164
165                                 bool isEscaped = i - iStart > 1;
166                                 if (!isEscaped)
167                                         return true;
168                         }
169
170                         return false;
171                 }
172
173                 internal static string FormatAbsolute (string str, string schemeName,
174                         UriComponents component, UriFormat uriFormat, FormatFlags formatFlags = FormatFlags.None)
175                 {
176                         return Format (str, schemeName, UriKind.Absolute, component, uriFormat, formatFlags);
177                 }
178
179                 internal static string FormatRelative (string str, string schemeName, UriFormat uriFormat)
180                 {
181                         return Format (str, schemeName, UriKind.Relative, UriComponents.Path, uriFormat, FormatFlags.None);
182                 }
183
184                 private static string Format (string str, string schemeName, UriKind uriKind,
185                         UriComponents component, UriFormat uriFormat, FormatFlags formatFlags)
186                 {
187                         if (string.IsNullOrEmpty (str))
188                                 return "";
189
190                         if (UriHelper.HasCharactersToNormalize (str))
191                                 formatFlags |= UriHelper.FormatFlags.HasComponentCharactersToNormalize | FormatFlags.HasUriCharactersToNormalize;
192
193                         if (component == UriComponents.Fragment && UriHelper.HasPercentage (str))
194                                 formatFlags |= UriHelper.FormatFlags.HasFragmentPercentage;
195
196                         if (component == UriComponents.Host &&
197                                 str.Length > 1 && str [0] == '[' && str [str.Length - 1] == ']')
198                                  formatFlags |= UriHelper.FormatFlags.IPv6Host;
199
200                         if (component == UriComponents.Path &&
201                                 str.Length >= 2 && str [1] != ':' &&
202                                 ('a' <= str [0] && str [0] <= 'z') || ('A' <= str [0] && str [0] <= 'Z'))
203                                 formatFlags |= UriHelper.FormatFlags.HasWindowsPath;
204
205                         UriSchemes scheme = GetScheme (schemeName);
206
207                         if (scheme == UriSchemes.Custom && (formatFlags & FormatFlags.HasHost) != 0)
208                                 scheme = UriSchemes.CustomWithHost;
209
210                         var reduceAfter = UriSchemes.Http | UriSchemes.Https | UriSchemes.File | UriSchemes.NetPipe | UriSchemes.NetTcp;
211
212                         if (IriParsing) {
213                                 reduceAfter |= UriSchemes.Ftp;
214                         } else if (component == UriComponents.Path &&
215                                 (formatFlags & FormatFlags.NoSlashReplace) == 0) {
216                                 if(scheme == UriSchemes.Ftp)
217                                         str = Reduce (str.Replace ('\\', '/'), !IriParsing);
218                                 if (scheme == UriSchemes.CustomWithHost)
219                                         str = Reduce (str.Replace ('\\', '/'), false);
220                         }
221
222                         str = FormatString (str, scheme, uriKind, component, uriFormat, formatFlags);
223
224                         if (component == UriComponents.Path &&
225                                 (formatFlags & FormatFlags.NoReduce) == 0) {
226                                 if (SchemeContains (scheme, reduceAfter))
227                                         str = Reduce (str, !IriParsing);
228                                 if(IriParsing && scheme == UriSchemes.CustomWithHost)
229                                         str = Reduce (str, false);
230                         }
231
232                         return str;
233                 }
234
235                 private static string FormatString (string str, UriSchemes scheme, UriKind uriKind,
236                         UriComponents component, UriFormat uriFormat, FormatFlags formatFlags)
237                 {
238                         var s = new StringBuilder ();
239                         int len = str.Length;
240                         for (int i = 0; i < len; i++) {
241                                 char c = str [i];
242                                 if (c == '%') {
243                                         int iStart = i;
244                                         char surrogate;
245                                         bool invalidUnescape;
246                                         char x = Uri.HexUnescapeMultiByte (str, ref i, out surrogate, out invalidUnescape);
247
248
249                                         if (invalidUnescape
250 #if !NET_4_0
251                                                 && uriFormat == UriFormat.SafeUnescaped && char.IsControl (x)
252 #endif
253                                         ) {
254                                                 s.Append (c);
255                                                 i = iStart;
256                                                 continue;
257                                         }
258
259                                         string cStr = str.Substring(iStart, i-iStart);
260                                         s.Append (FormatChar (x, surrogate, cStr, scheme, uriKind, component, uriFormat, formatFlags));
261
262                                         i--;
263                                 } else
264                                         s.Append (FormatChar (c, char.MinValue, "" + c, scheme, uriKind, component, uriFormat, formatFlags));
265                         }
266                         
267                         return s.ToString();
268                 }
269
270                 private static string FormatChar (char c, char surrogate, string cStr, UriSchemes scheme, UriKind uriKind,
271                         UriComponents component, UriFormat uriFormat, FormatFlags formatFlags)
272                 {
273                         var isEscaped = cStr.Length != 1;
274
275                         var userEscaped = (formatFlags & FormatFlags.UserEscaped) != 0;
276                         if (!isEscaped && !userEscaped && NeedToEscape (c, scheme, component, uriKind, uriFormat, formatFlags))
277                                 return HexEscapeMultiByte (c);
278
279                         if (isEscaped && (
280 #if NET_4_0
281                                 (userEscaped && c < 0xFF) ||
282 #endif
283                                 !NeedToUnescape (c, scheme, component, uriKind, uriFormat, formatFlags))) {
284                                 if (IriParsing &&
285                                         (c == '<' || c == '>' || c == '^' || c == '{' || c == '|' || c ==  '}' || c > 0x7F) &&
286                                         (formatFlags & FormatFlags.HasUriCharactersToNormalize) != 0)
287                                         return cStr.ToUpperInvariant (); //Upper case escape
288
289                                 return cStr; //Keep original case
290                         }
291
292                         if ((formatFlags & FormatFlags.NoSlashReplace) == 0 &&
293                                 c == '\\' && component == UriComponents.Path) {
294                                 if (!IriParsing && uriFormat != UriFormat.UriEscaped &&
295                                         SchemeContains (scheme, UriSchemes.Http | UriSchemes.Https))
296                                         return "/";
297
298                                 if (SchemeContains (scheme, UriSchemes.Http | UriSchemes.Https | UriSchemes.Ftp | UriSchemes.CustomWithHost))
299                                         return (isEscaped && uriFormat != UriFormat.UriEscaped) ? "\\" : "/";
300
301                                 if (SchemeContains (scheme, UriSchemes.NetPipe | UriSchemes.NetTcp | UriSchemes.File))
302                                         return "/";
303
304                                 if (SchemeContains (scheme, UriSchemes.Custom) &&
305                                         (formatFlags & FormatFlags.HasWindowsPath) == 0)
306                                         return "/";
307                         }
308
309                         var ret = c.ToString (CultureInfo.InvariantCulture);
310                         if (surrogate != char.MinValue)
311                                 ret += surrogate.ToString (CultureInfo.InvariantCulture);
312
313                         return ret;
314                 }
315
316                 private static bool NeedToUnescape (char c, UriSchemes scheme, UriComponents component, UriKind uriKind,
317                         UriFormat uriFormat, FormatFlags formatFlags)
318                 {
319                         if ((formatFlags & FormatFlags.IPv6Host) != 0)
320                                 return false;
321
322                         if (uriFormat == UriFormat.Unescaped)
323                                 return true;
324
325                         UriSchemes sDecoders = UriSchemes.NetPipe | UriSchemes.NetTcp;
326
327                         if (!IriParsing)
328                                 sDecoders |= UriSchemes.Http | UriSchemes.Https;
329
330                         if (c == '/' || c == '\\') {
331                                 if (!IriParsing && uriKind == UriKind.Absolute && uriFormat != UriFormat.UriEscaped &&
332                                         uriFormat != UriFormat.SafeUnescaped)
333                                         return true;
334
335                                 if (SchemeContains (scheme, UriSchemes.File)) {
336                                         return component != UriComponents.Fragment &&
337                                                    (component != UriComponents.Query || !IriParsing);
338                                 }
339
340                                 return component != UriComponents.Query && component != UriComponents.Fragment &&
341                                            SchemeContains (scheme, sDecoders);
342                         }
343
344                         if (c == '?') {
345                                 //Avoid creating new query
346                                 if (SupportsQuery (scheme) && component == UriComponents.Path)
347                                         return false;
348
349                                 if (!IriParsing && uriFormat == ToStringUnescape) {
350                                         if (SupportsQuery (scheme))
351                                                 return component == UriComponents.Query || component == UriComponents.Fragment;
352
353                                         return component == UriComponents.Fragment;
354                                 }
355
356                                 return false;
357                         }
358
359                         if (c == '#')
360                                 return false;
361
362                         if (uriFormat == ToStringUnescape && !IriParsing) {
363                                 if (uriKind == UriKind.Relative)
364                                         return false;
365
366                                 if (c == '$' || c == '&' || c == '+' || c == ',' || c == ';' || c == '=' || c == '@')
367                                         return true;
368
369                                 if (c < 0x20 || c == 0x7f)
370                                         return true;
371                         }
372
373                         if (uriFormat == UriFormat.SafeUnescaped || uriFormat == ToStringUnescape) {
374                                 if (c == '-' || c == '.' || c == '_' || c == '~')
375                                         return true;
376
377                                 if (c == ' ' || c == '!' || c == '"' || c == '\'' || c == '(' || c == ')' || c == '*' ||
378                                         c == '<' || c == '>' || c == '^' || c == '`' || c == '{' || c == '}' || c == '|')
379                                         return uriKind != UriKind.Relative ||
380                                                 (IriParsing && (formatFlags & FormatFlags.HasUriCharactersToNormalize) != 0);
381
382                                 if (c == ':' || c == '[' || c == ']')
383                                         return uriKind != UriKind.Relative;
384
385                                 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'))
386                                         return true;
387
388                                 if (c > 0x7f)
389                                         return true;
390
391                                 return false;
392                         }
393
394                         if (uriFormat == UriFormat.UriEscaped) {
395                                 if (!IriParsing) {
396                                         if (c == '.') {
397                                                 if (SchemeContains (scheme, UriSchemes.File))
398                                                         return component != UriComponents.Fragment;
399
400                                                 return component != UriComponents.Query && component != UriComponents.Fragment &&
401                                                            SchemeContains (scheme, sDecoders);
402                                         }
403
404                                         return false;
405                                 }
406
407                                 if (c == '-' || c == '.' || c == '_' || c == '~')
408                                         return true;
409                                 
410                                 if ((formatFlags & FormatFlags.HasUriCharactersToNormalize) != 0 &&
411                                         (c == '!' || c == '\'' || c == '(' || c == ')' || c == '*' ||
412                                         c == ':' || c == '[' || c == ']'))
413                                         return true;
414
415                                 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'))
416                                         return true;
417
418                                 return false;
419                         }
420
421                         return false;
422                 }
423
424                 private static bool NeedToEscape (char c, UriSchemes scheme, UriComponents component, UriKind uriKind,
425                         UriFormat uriFormat, FormatFlags formatFlags)
426                 {
427                         if ((formatFlags & FormatFlags.IPv6Host) != 0)
428                                 return false;
429
430                         if (c == '?') {
431                                 if (uriFormat == UriFormat.Unescaped)
432                                         return false;
433
434                                 if (!SupportsQuery (scheme))
435                                         return component != UriComponents.Fragment;
436
437                                 return false;
438                         }
439
440                         if (c == '#') {
441                                 //Avoid removing fragment
442                                 if (component == UriComponents.Path || component == UriComponents.Query)
443                                         return false;
444
445                                 if (component == UriComponents.Fragment &&
446                                         (uriFormat == ToStringUnescape || uriFormat == UriFormat.SafeUnescaped) &&
447                                         (formatFlags & FormatFlags.HasFragmentPercentage) != 0)
448                                         return true;
449
450                                 if (IriParsing)
451                                         return false;
452
453                                 return uriFormat == UriFormat.UriEscaped ||
454                                         (uriFormat != UriFormat.Unescaped && (formatFlags & FormatFlags.HasComponentCharactersToNormalize) != 0);
455                         }
456
457                         if (uriFormat == UriFormat.SafeUnescaped || uriFormat == ToStringUnescape) {
458                                 if (c == '%')
459                                         return uriKind != UriKind.Relative;
460                         }
461
462                         if (uriFormat == UriFormat.SafeUnescaped) {
463                                 if (c < 0x20 || c == 0x7F)
464                                         return true;
465                         }
466
467                         if (uriFormat == UriFormat.UriEscaped) {
468                                 if (c < 0x20 || c >= 0x7F)
469                                         return component != UriComponents.Host;
470
471                                 if (c == ' ' || c == '"' || c == '%' || c == '<' || c == '>' || c == '^' ||
472                                         c == '`' || c == '{' || c == '}' || c == '|')
473                                         return true;
474
475                                 if (c == '[' || c == ']')
476                                         return !IriParsing;
477
478                                 if (c == '\\') {
479                                         return component != UriComponents.Path ||
480                                                    SchemeContains (scheme,
481                                                            UriSchemes.Gopher | UriSchemes.Ldap | UriSchemes.Mailto | UriSchemes.Nntp |
482                                                            UriSchemes.Telnet | UriSchemes.News | UriSchemes.Custom);
483                                 }
484                         }
485
486                         return false;
487                 }
488
489                 // This is called "compacting" in the MSDN documentation
490                 internal static string Reduce (string path, bool trimDots)
491                 {
492                         // quick out, allocation-free, for a common case
493                         if (path == "/")
494                                 return path;
495
496                         bool endWithSlash = false;
497
498                         List<string> result = new List<string> ();
499
500                         bool begin = true;
501                         for (int startpos = 0; startpos < path.Length; ) {
502                                 endWithSlash = true;
503
504                                 int endpos = path.IndexOf ('/', startpos);
505                                 if (endpos == -1)
506                                         endpos = path.Length;
507                                 string current = path.Substring (startpos, endpos-startpos);
508                                 startpos = endpos + 1;
509                                 if (begin && current.Length == 0) {
510                                         begin = false;
511                                         continue;
512                                 }
513
514                                 begin = false;
515                                 if (current == "..") {
516                                         int resultCount = result.Count;
517                                         // in 2.0 profile, skip leading ".." parts
518                                         if (resultCount == 0) {
519                                                 continue;
520                                         }
521
522                                         result.RemoveAt (resultCount - 1);
523                                         continue;
524                                 }
525
526                                 if (current == "." ||
527                                         (trimDots && current.EndsWith("."))) {
528                                         current = current.TrimEnd('.');
529                                         if (current == "" && endpos < path.Length)
530                                                 continue;
531                                 }
532
533                                 endWithSlash = false;
534
535                                 result.Add (current);
536                         }
537
538                         if (result.Count == 0)
539                                 return "/";
540
541                         StringBuilder res = new StringBuilder ();
542
543                         if (path [0] == '/')
544                                 res.Append ('/');
545
546                         bool first = true;
547                         foreach (string part in result) {
548                                 if (first) {
549                                         first = false;
550                                 } else {
551                                         res.Append ('/');
552                                 }
553                                 res.Append(part);
554                         }
555
556                         if (path [path.Length - 1] == '/' || endWithSlash)
557                                 res.Append ('/');
558                                 
559                         return res.ToString();
560                 }
561         }
562 }