Merge pull request #1304 from slluis/mac-proxy-autoconfig
[mono.git] / mcs / class / System / System / UriHelper.cs
1 using System;
2 using System.Globalization;
3 using System.Text;
4 using System.Collections.Generic;
5
6 namespace System {
7         internal static class UriHelper {
8                 internal const UriFormat ToStringUnescape = (UriFormat) 0x7FFF;
9
10                 internal static bool IriParsing {
11                         get { return Uri.IriParsing; }
12                 }
13
14                 [Flags]
15                 internal enum FormatFlags {
16                         None = 0,
17                         HasComponentCharactersToNormalize = 1 << 0,
18                         HasUriCharactersToNormalize = 1 << 1,
19                         HasHost = 1 << 2,
20                         HasFragmentPercentage = 1 << 3,
21                         UserEscaped = 1 << 4,
22                         IPv6Host = 1 << 5,
23                         NoSlashReplace = 1 << 6,
24                         NoReduce = 1 << 7,
25                         HasWindowsPath = 1 << 8,
26                 }
27
28                 [Flags]
29                 internal enum UriSchemes {
30                         Http = 1 << 0,
31                         Https = 1 << 1,
32                         File = 1 << 2,
33                         Ftp = 1 << 3,
34                         Gopher = 1 << 4,
35                         Ldap = 1 << 5,
36                         Mailto = 1 << 6,
37                         NetPipe = 1 << 7,
38                         NetTcp = 1 << 8,
39                         News = 1 << 9,
40                         Nntp = 1 << 10,
41                         Telnet = 1 << 11,
42                         Uuid = 1 << 12,
43                         Custom = 1 << 13,
44                         CustomWithHost = 1 << 14,
45                         All = ~0,
46                         None = 0
47                 }
48
49                 private static UriSchemes GetScheme (string schemeName)
50                 {
51                         schemeName = schemeName.ToLowerInvariant ();
52
53                         if (schemeName == "")
54                                 return UriSchemes.None;
55                         if (schemeName == Uri.UriSchemeHttp)
56                                 return UriSchemes.Http;
57                         if (schemeName == Uri.UriSchemeHttps)
58                                 return UriSchemes.Https;
59                         if (schemeName == Uri.UriSchemeFile)
60                                 return UriSchemes.File;
61                         if (schemeName == Uri.UriSchemeFtp)
62                                 return UriSchemes.Ftp;
63                         if (schemeName == Uri.UriSchemeGopher)
64                                 return UriSchemes.Gopher;
65                         if (schemeName == Uri.UriSchemeLdap)
66                                 return UriSchemes.Ldap;
67                         if (schemeName == Uri.UriSchemeMailto)
68                                 return UriSchemes.Mailto;
69                         if (schemeName == Uri.UriSchemeNetPipe)
70                                 return UriSchemes.NetPipe;
71                         if (schemeName == Uri.UriSchemeNetTcp)
72                                 return UriSchemes.NetTcp;
73                         if (schemeName == Uri.UriSchemeNews)
74                                 return UriSchemes.News;
75                         if (schemeName == Uri.UriSchemeNntp)
76                                 return UriSchemes.Nntp;
77                         if (schemeName == Uri.UriSchemeTelnet)
78                                 return UriSchemes.Telnet;
79                         if (schemeName == Uri.UriSchemeUuid)
80                                 return UriSchemes.Uuid;
81
82                         return UriSchemes.Custom;
83                 }
84
85                 internal static bool SchemeContains (UriSchemes keys, UriSchemes flag)
86                 {
87                         return (keys & flag) != 0;
88                 }
89
90                 internal static bool IsKnownScheme (string scheme)
91                 {
92                         return GetScheme (scheme) != UriSchemes.Custom;
93                 }
94
95                 internal static string HexEscapeMultiByte (char character)
96                 {
97                         const string hex_upper_chars = "0123456789ABCDEF";
98
99                         var sb = new StringBuilder ();
100                         byte [] bytes = Encoding.UTF8.GetBytes (new [] {character});
101                         foreach (byte b in bytes) {
102                                 sb.Append ("%");
103                                 sb.Append (hex_upper_chars [(b & 0xf0) >> 4]);
104                                 sb.Append (hex_upper_chars [b & 0x0f]);
105                         }
106
107                         return sb.ToString ();
108                 }
109
110                 internal static bool SupportsQuery (string scheme)
111                 {
112                         return SupportsQuery (GetScheme (scheme));
113                 }
114
115                 internal static bool SupportsQuery (UriSchemes scheme)
116                 {
117                         if (SchemeContains (scheme, UriSchemes.File))
118                                 return IriParsing;
119
120                         return !SchemeContains (scheme, UriSchemes.Ftp | UriSchemes.Gopher | UriSchemes.Nntp | UriSchemes.Telnet | UriSchemes.News);
121                 }
122
123                 internal static bool HasCharactersToNormalize (string str)
124                 {
125                         int len = str.Length;
126                         for (int i = 0; i < len; i++) {
127                                 char c = str [i];
128                                 if (c != '%')
129                                         continue;
130
131                                 int iStart = i;
132                                 char surrogate;
133                                 char x = Uri.HexUnescapeMultiByte (str, ref i, out surrogate);
134
135                                 bool isEscaped = i - iStart > 1;
136                                 if (!isEscaped)
137                                         continue;
138
139                                 if ((x >= 'A' && x <= 'Z') || (x >= 'a' && x <= 'z') || (x >= '0' && x <= '9') || 
140                                          x == '-' || x == '.' || x == '_' || x == '~')
141                                         return true;
142
143                                 if (x > 0x7f)
144                                         return true;
145                         }
146
147                         return false;
148                 }
149
150                 internal static bool HasPercentage (string str)
151                 {
152                         int len = str.Length;
153                         for (int i = 0; i < len; i++) {
154                                 char c = str [i];
155                                 if (c != '%')
156                                         continue;
157
158                                 int iStart = i;
159                                 char surrogate;
160                                 char x = Uri.HexUnescapeMultiByte (str, ref i, out surrogate);
161
162                                 if (x == '%')
163                                         return true;
164
165                                 bool isEscaped = i - iStart > 1;
166                                 if (!isEscaped)
167                                         return true;
168                         }
169
170                         return false;
171                 }
172
173                 internal static string FormatAbsolute (string str, string schemeName,
174                         UriComponents component, UriFormat uriFormat, FormatFlags formatFlags = FormatFlags.None)
175                 {
176                         return Format (str, schemeName, UriKind.Absolute, component, uriFormat, formatFlags);
177                 }
178
179                 internal static string FormatRelative (string str, string schemeName, UriFormat uriFormat)
180                 {
181                         return Format (str, schemeName, UriKind.Relative, UriComponents.Path, uriFormat, FormatFlags.None);
182                 }
183
184                 private static string Format (string str, string schemeName, UriKind uriKind,
185                         UriComponents component, UriFormat uriFormat, FormatFlags formatFlags)
186                 {
187                         if (string.IsNullOrEmpty (str))
188                                 return "";
189
190                         if (UriHelper.HasCharactersToNormalize (str))
191                                 formatFlags |= UriHelper.FormatFlags.HasComponentCharactersToNormalize | FormatFlags.HasUriCharactersToNormalize;
192
193                         if (component == UriComponents.Fragment && UriHelper.HasPercentage (str))
194                                 formatFlags |= UriHelper.FormatFlags.HasFragmentPercentage;
195
196                         if (component == UriComponents.Host &&
197                                 str.Length > 1 && str [0] == '[' && str [str.Length - 1] == ']')
198                                  formatFlags |= UriHelper.FormatFlags.IPv6Host;
199
200                         if (component == UriComponents.Path &&
201                                 str.Length >= 2 && str [1] != ':' &&
202                                 ('a' <= str [0] && str [0] <= 'z') || ('A' <= str [0] && str [0] <= 'Z'))
203                                 formatFlags |= UriHelper.FormatFlags.HasWindowsPath;
204
205                         UriSchemes scheme = GetScheme (schemeName);
206
207                         if (scheme == UriSchemes.Custom && (formatFlags & FormatFlags.HasHost) != 0)
208                                 scheme = UriSchemes.CustomWithHost;
209
210                         var reduceAfter = UriSchemes.Http | UriSchemes.Https | UriSchemes.File | UriSchemes.NetPipe | UriSchemes.NetTcp;
211
212                         if (IriParsing) {
213                                 reduceAfter |= UriSchemes.Ftp;
214                         } else if (component == UriComponents.Path &&
215                                 (formatFlags & FormatFlags.NoSlashReplace) == 0) {
216                                 if (scheme == UriSchemes.Ftp)
217                                         str = Reduce (str.Replace ('\\', '/'), !IriParsing);
218                                 if (scheme == UriSchemes.CustomWithHost)
219                                         str = Reduce (str.Replace ('\\', '/'), false);
220                         }
221
222                         str = FormatString (str, scheme, uriKind, component, uriFormat, formatFlags);
223
224                         if (component == UriComponents.Path &&
225                                 (formatFlags & FormatFlags.NoReduce) == 0) {
226                                 if (SchemeContains (scheme, reduceAfter))
227                                         str = Reduce (str, !IriParsing);
228                                 if (IriParsing && scheme == UriSchemes.CustomWithHost)
229                                         str = Reduce (str, false);
230                         }
231
232                         return str;
233                 }
234
235                 private static string FormatString (string str, UriSchemes scheme, UriKind uriKind,
236                         UriComponents component, UriFormat uriFormat, FormatFlags formatFlags)
237                 {
238                         var s = new StringBuilder ();
239                         int len = str.Length;
240                         for (int i = 0; i < len; i++) {
241                                 char c = str [i];
242                                 if (c == '%') {
243                                         int iStart = i;
244                                         char surrogate;
245                                         bool invalidUnescape;
246                                         char x = Uri.HexUnescapeMultiByte (str, ref i, out surrogate, out invalidUnescape);
247
248
249                                         if (invalidUnescape
250 #if !NET_4_0
251                                                 && uriFormat == UriFormat.SafeUnescaped && char.IsControl (x)
252 #endif
253                                         ) {
254                                                 s.Append (c);
255                                                 i = iStart;
256                                                 continue;
257                                         }
258
259                                         string cStr = str.Substring (iStart, i-iStart);
260                                         s.Append (FormatChar (x, surrogate, cStr, scheme, uriKind, component, uriFormat, formatFlags));
261
262                                         i--;
263                                 } else
264                                         s.Append (FormatChar (c, char.MinValue, "" + c, scheme, uriKind, component, uriFormat, formatFlags));
265                         }
266                         
267                         return s.ToString ();
268                 }
269
270                 private static string FormatChar (char c, char surrogate, string cStr, UriSchemes scheme, UriKind uriKind,
271                         UriComponents component, UriFormat uriFormat, FormatFlags formatFlags)
272                 {
273                         var isEscaped = cStr.Length != 1;
274
275                         var userEscaped = (formatFlags & FormatFlags.UserEscaped) != 0;
276                         if (!isEscaped && !userEscaped && NeedToEscape (c, scheme, component, uriKind, uriFormat, formatFlags))
277                                 return HexEscapeMultiByte (c);
278
279                         if (isEscaped && (
280 #if NET_4_0
281                                 (userEscaped && c < 0xFF) ||
282 #endif
283                                 !NeedToUnescape (c, scheme, component, uriKind, uriFormat, formatFlags))) {
284                                 if (IriParsing &&
285                                         (c == '<' || c == '>' || c == '^' || c == '{' || c == '|' || c ==  '}' || c > 0x7F) &&
286                                         (formatFlags & FormatFlags.HasUriCharactersToNormalize) != 0)
287                                         return cStr.ToUpperInvariant (); //Upper case escape
288
289                                 return cStr; //Keep original case
290                         }
291
292                         if ((formatFlags & FormatFlags.NoSlashReplace) == 0 &&
293                                 c == '\\' && component == UriComponents.Path) {
294                                 if (!IriParsing && uriFormat != UriFormat.UriEscaped &&
295                                         SchemeContains (scheme, UriSchemes.Http | UriSchemes.Https))
296                                         return "/";
297
298                                 if (SchemeContains (scheme, UriSchemes.Http | UriSchemes.Https | UriSchemes.Ftp | UriSchemes.CustomWithHost))
299                                         return (isEscaped && uriFormat != UriFormat.UriEscaped) ? "\\" : "/";
300
301                                 if (SchemeContains (scheme, UriSchemes.NetPipe | UriSchemes.NetTcp | UriSchemes.File))
302                                         return "/";
303
304                                 if (SchemeContains (scheme, UriSchemes.Custom) &&
305                                         (formatFlags & FormatFlags.HasWindowsPath) == 0)
306                                         return "/";
307                         }
308
309                         var ret = c.ToString (CultureInfo.InvariantCulture);
310                         if (surrogate != char.MinValue)
311                                 ret += surrogate.ToString (CultureInfo.InvariantCulture);
312
313                         return ret;
314                 }
315
316                 private static bool NeedToUnescape (char c, UriSchemes scheme, UriComponents component, UriKind uriKind,
317                         UriFormat uriFormat, FormatFlags formatFlags)
318                 {
319                         if ((formatFlags & FormatFlags.IPv6Host) != 0)
320                                 return false;
321
322                         if (uriFormat == UriFormat.Unescaped)
323                                 return true;
324
325                         UriSchemes sDecoders = UriSchemes.NetPipe | UriSchemes.NetTcp;
326
327                         if (!IriParsing)
328                                 sDecoders |= UriSchemes.Http | UriSchemes.Https;
329
330                         if (c == '/' || c == '\\') {
331                                 if (!IriParsing && uriKind == UriKind.Absolute && uriFormat != UriFormat.UriEscaped &&
332                                         uriFormat != UriFormat.SafeUnescaped)
333                                         return true;
334
335                                 if (SchemeContains (scheme, UriSchemes.File)) {
336                                         return component != UriComponents.Fragment &&
337                                                    (component != UriComponents.Query || !IriParsing);
338                                 }
339
340                                 return component != UriComponents.Query && component != UriComponents.Fragment &&
341                                            SchemeContains (scheme, sDecoders);
342                         }
343
344                         if (c == '?') {
345                                 //Avoid creating new query
346                                 if (SupportsQuery (scheme) && component == UriComponents.Path)
347                                         return false;
348
349                                 if (!IriParsing && uriFormat == ToStringUnescape) {
350                                         if (SupportsQuery (scheme))
351                                                 return component == UriComponents.Query || component == UriComponents.Fragment;
352
353                                         return component == UriComponents.Fragment;
354                                 }
355
356                                 return false;
357                         }
358
359                         if (c == '#')
360                                 return false;
361
362                         if (uriFormat == ToStringUnescape && !IriParsing) {
363                                 if (uriKind == UriKind.Relative)
364                                         return false;
365
366                                 switch (c) {
367                                 case '$':
368                                 case '&':
369                                 case '+':
370                                 case ',':
371                                 case ';':
372                                 case '=':
373                                 case '@':
374                                         return true;
375                                 }
376
377                                 if (c < 0x20 || c == 0x7f)
378                                         return true;
379                         }
380
381                         if (uriFormat == UriFormat.SafeUnescaped || uriFormat == ToStringUnescape) {
382                                 switch (c) {
383                                 case '-':
384                                 case '.':
385                                 case '_':
386                                 case '~':
387                                         return true;
388                                 case ' ':
389                                 case '!':
390                                 case '"':
391                                 case '\'':
392                                 case '(':
393                                 case ')':
394                                 case '*':
395                                 case '<':
396                                 case '>':
397                                 case '^':
398                                 case '`':
399                                 case '{':
400                                 case '}':
401                                 case '|':
402                                         return uriKind != UriKind.Relative ||
403                                                 (IriParsing && (formatFlags & FormatFlags.HasUriCharactersToNormalize) != 0);
404                                 case ':':
405                                 case '[':
406                                 case ']':
407                                         return uriKind != UriKind.Relative;
408                                 }
409
410                                 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'))
411                                         return true;
412
413                                 if (c > 0x7f)
414                                         return true;
415
416                                 return false;
417                         }
418
419                         if (uriFormat == UriFormat.UriEscaped) {
420                                 if (!IriParsing) {
421                                         if (c == '.') {
422                                                 if (SchemeContains (scheme, UriSchemes.File))
423                                                         return component != UriComponents.Fragment;
424
425                                                 return component != UriComponents.Query && component != UriComponents.Fragment &&
426                                                            SchemeContains (scheme, sDecoders);
427                                         }
428
429                                         return false;
430                                 }
431                                 
432                                 switch (c) {
433                                 case '-':
434                                 case '.':
435                                 case '_':
436                                 case '~':
437                                         return true;
438                                 }
439
440                                 if ((formatFlags & FormatFlags.HasUriCharactersToNormalize) != 0) {
441                                         switch (c) {
442                                         case '!':
443                                         case '\'':
444                                         case '(':
445                                         case ')':
446                                         case '*':
447                                         case ':':
448                                         case '[':
449                                         case ']':
450                                                 return true;
451                                         }
452                                 }
453
454                                 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'))
455                                         return true;
456
457                                 return false;
458                         }
459
460                         return false;
461                 }
462
463                 private static bool NeedToEscape (char c, UriSchemes scheme, UriComponents component, UriKind uriKind,
464                         UriFormat uriFormat, FormatFlags formatFlags)
465                 {
466                         if ((formatFlags & FormatFlags.IPv6Host) != 0)
467                                 return false;
468
469                         if (c == '?') {
470                                 if (uriFormat == UriFormat.Unescaped)
471                                         return false;
472
473                                 if (!SupportsQuery (scheme))
474                                         return component != UriComponents.Fragment;
475
476                                 return false;
477                         }
478
479                         if (c == '#') {
480                                 //Avoid removing fragment
481                                 if (component == UriComponents.Path || component == UriComponents.Query)
482                                         return false;
483
484                                 if (component == UriComponents.Fragment &&
485                                         (uriFormat == ToStringUnescape || uriFormat == UriFormat.SafeUnescaped) &&
486                                         (formatFlags & FormatFlags.HasFragmentPercentage) != 0)
487                                         return true;
488
489 #if NET_4_5
490                                 return false;
491 #else
492                                 return uriFormat == UriFormat.UriEscaped ||
493                                         (uriFormat != UriFormat.Unescaped && (formatFlags & FormatFlags.HasComponentCharactersToNormalize) != 0);
494 #endif
495                         }
496
497                         if (uriFormat == UriFormat.SafeUnescaped || uriFormat == ToStringUnescape) {
498                                 if (c == '%')
499                                         return uriKind != UriKind.Relative;
500                         }
501
502                         if (uriFormat == UriFormat.SafeUnescaped) {
503                                 if (c < 0x20 || c == 0x7F)
504                                         return true;
505                         }
506
507                         if (uriFormat == UriFormat.UriEscaped) {
508                                 if (c < 0x20 || c >= 0x7F)
509                                         return component != UriComponents.Host;
510
511                                 switch (c) {
512                                 case ' ':
513                                 case '"':
514                                 case '%':
515                                 case '<':
516                                 case '>':
517                                 case '^':
518                                 case '`':
519                                 case '{':
520                                 case '}':
521                                 case '|':
522                                         return true;
523                                 case '[':
524                                 case ']':
525                                         return !IriParsing;
526                                 case '\\':
527                                         return component != UriComponents.Path ||
528                                                    SchemeContains (scheme,
529                                                            UriSchemes.Gopher | UriSchemes.Ldap | UriSchemes.Mailto | UriSchemes.Nntp |
530                                                            UriSchemes.Telnet | UriSchemes.News | UriSchemes.Custom);
531                                 }
532                         }
533
534                         return false;
535                 }
536
537                 // This is called "compacting" in the MSDN documentation
538                 internal static string Reduce (string path, bool trimDots)
539                 {
540                         // quick out, allocation-free, for a common case
541                         if (path == "/")
542                                 return path;
543
544                         bool endWithSlash = false;
545
546                         List<string> result = new List<string> ();
547
548                         string[] segments = path.Split ('/');
549                         int lastSegmentIndex = segments.Length - 1;
550                         for (var i = 0; i <= lastSegmentIndex; i++) {
551                                 string segment = segments [i];
552
553                                 if (i == lastSegmentIndex &&
554                                         (segment.Length == 0 || segment == ".." || segment == "."))
555                                         endWithSlash = true;
556
557                                 if ((i == 0 || i == lastSegmentIndex) && segment.Length == 0)
558                                         continue;
559
560                                 if (segment == "..") {
561                                         int resultCount = result.Count;
562                                         // in 2.0 profile, skip leading ".." parts
563                                         if (resultCount == 0)
564                                                 continue;
565
566                                         result.RemoveAt (resultCount - 1);
567                                         continue;
568                                 }
569
570                                 if (segment == "." ||
571                                         (trimDots && segment.EndsWith (".", StringComparison.Ordinal))) {
572                                         segment = segment.TrimEnd ('.');
573                                         if (segment == "" && i < lastSegmentIndex)
574                                                 continue;
575                                 }
576
577                                 endWithSlash = false;
578
579                                 result.Add (segment);
580                         }
581
582                         if (result.Count == 0)
583                                 return "/";
584
585                         StringBuilder res = new StringBuilder ();
586
587                         if (path [0] == '/')
588                                 res.Append ('/');
589
590                         bool first = true;
591                         foreach (string part in result) {
592                                 if (first) {
593                                         first = false;
594                                 } else {
595                                         res.Append ('/');
596                                 }
597                                 res.Append (part);
598                         }
599
600                         if (path [path.Length - 1] == '/' || endWithSlash)
601                                 res.Append ('/');
602                                 
603                         return res.ToString ();
604                 }
605         }
606 }