2007-10-24 Marek Habersack <mhabersack@novell.com>
[mono.git] / mcs / class / System.Web / System.Web / HttpUtility.cs
index 66afaba230dab65606b1f50890f191103cf28559..ac2f36e375a9b0fcb74147007a7a05ce71032866 100644 (file)
@@ -3,7 +3,7 @@
 //
 // Authors:
 //   Patrik Torstensson (Patrik.Torstensson@labs2.com)
-//   Wictor Wilén (decode/encode functions) (wictor@ibizkit.se)
+//   Wictor Wilén (decode/encode functions) (wictor@ibizkit.se)
 //   Tim Coleman (tim@timcoleman.com)
 //   Gonzalo Paniagua Javier (gonzalo@ximian.com)
 //
@@ -42,7 +42,6 @@ namespace System.Web {
        // CAS - no InheritanceDemand here as the class is sealed
        [AspNetHostingPermission (SecurityAction.LinkDemand, Level = AspNetHostingPermissionLevel.Minimal)]
        public sealed class HttpUtility {
-
                #region Fields
        
                static Hashtable entities;
@@ -340,20 +339,32 @@ namespace System.Web {
                        if (null == s) 
                                return null;
        
-                       if (s.IndexOf ('&') == -1 && s.IndexOf ('"') == -1)
+                       bool needEncode = false;
+                       for (int i = 0; i < s.Length; i++) {
+                               if (s [i] == '&' || s [i] == '"' || s [i] == '<') {
+                                       needEncode = true;
+                                       break;
+                               }
+                       }
+
+                       if (!needEncode)
                                return s;
 
                        StringBuilder output = new StringBuilder ();
-                       foreach (char c in s) 
-                               switch (c) {
+                       int len = s.Length;
+                       for (int i = 0; i < len; i++)
+                               switch (s [i]) {
                                case '&' : 
                                        output.Append ("&amp;");
                                        break;
                                case '"' :
                                        output.Append ("&quot;");
                                        break;
+                               case '<':
+                                       output.Append ("&lt;");
+                                       break;
                                default:
-                                       output.Append (c);
+                                       output.Append (s [i]);
                                        break;
                                }
        
@@ -369,7 +380,7 @@ namespace System.Web {
                {
                        return e.GetChars (b.GetBuffer (), 0, (int) b.Length);
                }
-               
+
                public static string UrlDecode (string s, Encoding e)
                {
                        if (null == s) 
@@ -383,21 +394,29 @@ namespace System.Web {
        
                        StringBuilder output = new StringBuilder ();
                        long len = s.Length;
-                       NumberStyles hexa = NumberStyles.HexNumber;
                        MemoryStream bytes = new MemoryStream ();
+                       int xchar;
        
                        for (int i = 0; i < len; i++) {
-                               if (s [i] == '%' && i + 2 < len) {
+                               if (s [i] == '%' && i + 2 < len && s [i + 1] != '%') {
                                        if (s [i + 1] == 'u' && i + 5 < len) {
                                                if (bytes.Length > 0) {
                                                        output.Append (GetChars (bytes, e));
                                                        bytes.SetLength (0);
                                                }
-                                               output.Append ((char) Int32.Parse (s.Substring (i + 2, 4), hexa));
-                                               i += 5;
-                                       } else {
-                                               bytes.WriteByte ((byte) Int32.Parse (s.Substring (i + 1, 2), hexa));
+
+                                               xchar = GetChar (s, i + 2, 4);
+                                               if (xchar != -1) {
+                                                       output.Append ((char) xchar);
+                                                       i += 5;
+                                               } else {
+                                                       output.Append ('%');
+                                               }
+                                       } else if ((xchar = GetChar (s, i + 1, 2)) != -1) {
+                                               bytes.WriteByte ((byte) xchar);
                                                i += 2;
+                                       } else {
+                                               output.Append ('%');
                                        }
                                        continue;
                                }
@@ -432,24 +451,49 @@ namespace System.Web {
 
                private static int GetInt (byte b)
                {
-                       char c = Char.ToUpper ((char) b);
+                       char c = (char) b;
                        if (c >= '0' && c <= '9')
                                return c - '0';
 
-                       if (c < 'A' || c > 'F')
-                               return 0;
+                       if (c >= 'a' && c <= 'f')
+                               return c - 'a' + 10;
 
-                       return (c - 'A' + 10);
+                       if (c >= 'A' && c <= 'F')
+                               return c - 'A' + 10;
+
+                       return -1;
                }
 
-               private static char GetChar (byte [] bytes, int offset, int length)
+               private static int GetChar (byte [] bytes, int offset, int length)
                {
                        int value = 0;
                        int end = length + offset;
-                       for (int i = offset; i < end; i++)
-                               value = (value << 4) + GetInt (bytes [i]);
+                       for (int i = offset; i < end; i++) {
+                               int current = GetInt (bytes [i]);
+                               if (current == -1)
+                                       return -1;
+                               value = (value << 4) + current;
+                       }
 
-                       return (char) value;
+                       return value;
+               }
+
+               private static int GetChar (string str, int offset, int length)
+               {
+                       int val = 0;
+                       int end = length + offset;
+                       for (int i = offset; i < end; i++) {
+                               char c = str [i];
+                               if (c > 127)
+                                       return -1;
+
+                               int current = GetInt ((byte) c);
+                               if (current == -1)
+                                       return -1;
+                               val = (val << 4) + current;
+                       }
+
+                       return val;
                }
                
                public static string UrlDecode (byte [] bytes, int offset, int count, Encoding e)
@@ -472,20 +516,25 @@ namespace System.Web {
                        MemoryStream acc = new MemoryStream ();
 
                        int end = count + offset;
+                       int xchar;
                        for (int i = offset; i < end; i++) {
-                               if (bytes [i] == '%' && i + 2 < count) {
+                               if (bytes [i] == '%' && i + 2 < count && bytes [i + 1] != '%') {
                                        if (bytes [i + 1] == (byte) 'u' && i + 5 < end) {
                                                if (acc.Length > 0) {
                                                        output.Append (GetChars (acc, e));
                                                        acc.SetLength (0);
                                                }
-                                               output.Append (GetChar (bytes, i + 2, 4));
-                                               i += 5;
-                                       } else {
-                                               acc.WriteByte ((byte) GetChar (bytes, i + 1, 2));
+                                               xchar = GetChar (bytes, i + 2, 4);
+                                               if (xchar != -1) {
+                                                       output.Append ((char) xchar);
+                                                       i += 5;
+                                                       continue;
+                                               }
+                                       } else if ((xchar = GetChar (bytes, i + 1, 2)) != -1) {
+                                               acc.WriteByte ((byte) xchar);
                                                i += 2;
+                                               continue;
                                        }
-                                       continue;
                                }
 
                                if (acc.Length > 0) {
@@ -550,11 +599,14 @@ namespace System.Web {
                        int end = offset + count;
                        for (int i = offset; i < end; i++){
                                char c = (char) bytes [i];
-                               if (c == '+')
+                               if (c == '+') {
                                        c = ' ';
-                               else if (c == '%' && i < end - 2) {
-                                       c = GetChar (bytes, i + 1, 2);
-                                       i += 2;
+                               } else if (c == '%' && i < end - 2) {
+                                       int xchar = GetChar (bytes, i + 1, 2);
+                                       if (xchar != -1) {
+                                               c = (char) xchar;
+                                               i += 2;
+                                       }
                                }
                                result.WriteByte ((byte) c);
                        }
@@ -575,8 +627,26 @@ namespace System.Web {
                        if (s == "")
                                return "";
 
-                       byte [] bytes = Enc.GetBytes (s);
-                       return Encoding.ASCII.GetString (UrlEncodeToBytes (bytes, 0, bytes.Length));
+                       bool needEncode = false;
+                       int len = s.Length;
+                       for (int i = 0; i < len; i++) {
+                               char c = s [i];
+                               if ((c < '0') || (c < 'A' && c > '9') || (c > 'Z' && c < 'a') || (c > 'z')) {
+                                       if (NotEncoded (c))
+                                               continue;
+
+                                       needEncode = true;
+                                       break;
+                               }
+                       }
+
+                       if (!needEncode)
+                               return s;
+
+                       // avoided GetByteCount call
+                       byte [] bytes = new byte[Enc.GetMaxByteCount(s.Length)];
+                       int realLen = Enc.GetBytes (s, 0, s.Length, bytes, 0);
+                       return Encoding.ASCII.GetString (UrlEncodeToBytes (bytes, 0, realLen));
                }
          
                public static string UrlEncode (byte [] bytes)
@@ -631,6 +701,62 @@ namespace System.Web {
 
                static char [] hexChars = "0123456789abcdef".ToCharArray ();
 
+               static bool NotEncoded (char c)
+               {
+                       return (c == '!' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '-' || c == '.' || c == '_');
+               }
+
+               static void UrlEncodeChar (char c, Stream result, bool isUnicode) {
+                       if (c > 255) {
+                               //FIXME: what happens when there is an internal error?
+                               //if (!isUnicode)
+                               //      throw new ArgumentOutOfRangeException ("c", c, "c must be less than 256");
+                               int idx;
+                               int i = (int) c;
+
+                               result.WriteByte ((byte)'%');
+                               result.WriteByte ((byte)'u');
+                               idx = i >> 12;
+                               result.WriteByte ((byte)hexChars [idx]);
+                               idx = (i >> 8) & 0x0F;
+                               result.WriteByte ((byte)hexChars [idx]);
+                               idx = (i >> 4) & 0x0F;
+                               result.WriteByte ((byte)hexChars [idx]);
+                               idx = i & 0x0F;
+                               result.WriteByte ((byte)hexChars [idx]);
+                               return;
+                       }
+                       
+                       if (c > ' ' && NotEncoded (c)) {
+                               result.WriteByte ((byte)c);
+                               return;
+                       }
+                       if (c==' ') {
+                               result.WriteByte ((byte)'+');
+                               return;
+                       }
+                       if (    (c < '0') ||
+                               (c < 'A' && c > '9') ||
+                               (c > 'Z' && c < 'a') ||
+                               (c > 'z')) {
+                               if (isUnicode && c > 127) {
+                                       result.WriteByte ((byte)'%');
+                                       result.WriteByte ((byte)'u');
+                                       result.WriteByte ((byte)'0');
+                                       result.WriteByte ((byte)'0');
+                               }
+                               else
+                                       result.WriteByte ((byte)'%');
+                               
+                               int idx = ((int) c) >> 4;
+                               result.WriteByte ((byte)hexChars [idx]);
+                               idx = ((int) c) & 0x0F;
+                               result.WriteByte ((byte)hexChars [idx]);
+                       }
+                       else
+                               result.WriteByte ((byte)c);
+               }
+
                public static byte [] UrlEncodeToBytes (byte [] bytes, int offset, int count)
                {
                        if (bytes == null)
@@ -646,25 +772,12 @@ namespace System.Web {
                        if (count < 0 || count > len - offset)
                                throw new ArgumentOutOfRangeException("count");
 
-                       MemoryStream result = new MemoryStream ();
+                       MemoryStream result = new MemoryStream (count);
                        int end = offset + count;
-                       for (int i = offset; i < end; i++) {
-                               char c = (char) bytes [i];
-                               if ((c == ' ') || (c < '0' && c != '-' && c != '.') ||
-                                   (c < 'A' && c > '9') ||
-                                   (c > 'Z' && c < 'a' && c != '_') ||
-                                   (c > 'z')) {
-                                       result.WriteByte ((byte) '%');
-                                       int idx = ((int) c) >> 4;
-                                       result.WriteByte ((byte) hexChars [idx]);
-                                       idx = ((int) c) & 0x0F;
-                                       result.WriteByte ((byte) hexChars [idx]);
-                               } else {
-                                       result.WriteByte ((byte) c);
-                               }
-                       }
+                       for (int i = offset; i < end; i++)
+                               UrlEncodeChar ((char)bytes [i], result, false);
 
-                       return result.ToArray ();
+                       return result.ToArray();
                }
 
                public static string UrlEncodeUnicode (string str)
@@ -672,43 +785,7 @@ namespace System.Web {
                        if (str == null)
                                return null;
 
-                       StringBuilder result = new StringBuilder ();
-                       foreach (char c in str){
-                               int idx;
-
-                               if (c > 255) {
-                                       result.Append ("%u");
-                                       idx = ((int) c) >> 24;
-                                       result.Append (hexChars [idx]);
-                                       idx = (((int) c) >> 16) & 0x0F;
-                                       result.Append (hexChars [idx]);
-                                       idx = (((int) c) >> 8) & 0x0F;
-                                       result.Append (hexChars [idx]);
-                                       idx = ((int) c) & 0x0F;
-                                       result.Append (hexChars [idx]);
-                                       continue;
-                               }
-                               
-                               if ((c == ' ') || (c < '0' && c != '-' && c != '.') ||
-                                   (c < 'A' && c > '9') ||
-                                   (c > 'Z' && c < 'a' && c != '_') ||
-                                   (c > 'z')) {
-                                       if (c > 127)
-                                               result.Append ("%u00");
-                                       else
-                                               result.Append ("%");
-                                       
-                                       idx = ((int) c) >> 4;
-                                       result.Append (hexChars [idx]);
-                                       idx = ((int) c) & 0x0F;
-                                       result.Append (hexChars [idx]);
-                                       continue;
-                               }
-
-                               result.Append (c);
-                       }
-
-                       return result.ToString ();
+                       return Encoding.ASCII.GetString (UrlEncodeUnicodeToBytes (str));
                }
 
                public static byte [] UrlEncodeUnicodeToBytes (string str)
@@ -719,7 +796,11 @@ namespace System.Web {
                        if (str == "")
                                return new byte [0];
 
-                       return Encoding.ASCII.GetBytes (UrlEncodeUnicode (str));
+                       MemoryStream result = new MemoryStream (str.Length);
+                       foreach (char c in str){
+                               UrlEncodeChar (c, result, true);
+                       }
+                       return result.ToArray ();
                }
 
                /// <summary>
@@ -852,10 +933,23 @@ namespace System.Web {
                        if (s == null)
                                return null;
 
+                       bool needEncode = false;
+                       for (int i = 0; i < s.Length; i++) {
+                               char c = s [i];
+                               if (c == '&' || c == '"' || c == '<' || c == '>' || c > 159) {
+                                       needEncode = true;
+                                       break;
+                               }
+                       }
+
+                       if (!needEncode)
+                               return s;
+
                        StringBuilder output = new StringBuilder ();
                        
-                       foreach (char c in s) 
-                               switch (c) {
+                       int len = s.Length;
+                       for (int i = 0; i < len; i++) 
+                               switch (s [i]) {
                                case '&' :
                                        output.Append ("&amp;");
                                        break;
@@ -872,12 +966,12 @@ namespace System.Web {
                                        // MS starts encoding with &# from 160 and stops at 255.
                                        // We don't do that. One reason is the 65308/65310 unicode
                                        // characters that look like '<' and '>'.
-                                       if (c > 159) {
+                                       if (s [i] > 159) {
                                                output.Append ("&#");
-                                               output.Append (((int) c).ToString (CultureInfo.InvariantCulture));
+                                               output.Append (((int) s [i]).ToString (CultureInfo.InvariantCulture));
                                                output.Append (";");
                                        } else {
-                                               output.Append (c);
+                                               output.Append (s [i]);
                                        }
                                        break;
                                }
@@ -898,36 +992,105 @@ namespace System.Web {
 #if NET_1_1
                public static string UrlPathEncode (string s)
                {
-                       if (s == null)
-                               return null;
+                       if (s == null || s.Length == 0)
+                               return s;
 
-                       int idx = s.IndexOf ("?");
-                       string s2 = null;
-                       if (idx != -1) {
-                               s2 = s.Substring (0, idx-1);
-                               s2 = UrlEncode (s2) + s.Substring (idx);
-                       } else {
-                               s2 = UrlEncode (s);
+                       MemoryStream result = new MemoryStream ();
+                       int length = s.Length;
+            for (int i = 0; i < length; i++) {
+                               UrlPathEncodeChar (s [i], result);
                        }
-
-                       return s2;
+                       return Encoding.ASCII.GetString (result.ToArray ());
+               }
+               
+               static void UrlPathEncodeChar (char c, Stream result) {
+                       if (c > 127) {
+                               byte [] bIn = Encoding.UTF8.GetBytes (c.ToString ());
+                               for (int i = 0; i < bIn.Length; i++) {
+                                       result.WriteByte ((byte) '%');
+                                       int idx = ((int) bIn [i]) >> 4;
+                                       result.WriteByte ((byte) hexChars [idx]);
+                                       idx = ((int) bIn [i]) & 0x0F;
+                                       result.WriteByte ((byte) hexChars [idx]);
+                               }
+                       }
+                       else if (c == ' ') {
+                               result.WriteByte ((byte) '%');
+                               result.WriteByte ((byte) '2');
+                               result.WriteByte ((byte) '0');
+                       }
+                       else
+                               result.WriteByte ((byte) c);
                }
 #endif
 
 #if NET_2_0
-               [MonoTODO]
                public static NameValueCollection ParseQueryString (string query)
                {
-                       // LAMESPEC: default encoding not specified
-                       throw new NotImplementedException ();
+                       return ParseQueryString (query, Encoding.UTF8);
                }
 
-               [MonoTODO]
                public static NameValueCollection ParseQueryString (string query, Encoding encoding)
                {
-                       throw new NotImplementedException ();
-               }
+                       if (query == null)
+                               throw new ArgumentNullException ("query");
+                       if (encoding == null)
+                               throw new ArgumentNullException ("encoding");
+                       if (query.Length == 0 || (query.Length == 1 && query[0] == '?'))
+                               return new NameValueCollection ();
+                       if (query[0] == '?')
+                               query = query.Substring (1);
+                               
+                       NameValueCollection result = new NameValueCollection ();
+                       ParseQueryString (query, encoding, result);
+                       return result;
+               }                               
 #endif
+
+               internal static void ParseQueryString (string query, Encoding encoding, NameValueCollection result)
+               {
+                       if (query.Length == 0)
+                               return;
+
+                       int namePos = 0;
+                       bool first = true;
+                       while (namePos <= query.Length) {
+                               int valuePos = -1, valueEnd = -1;
+                               for (int q = namePos; q < query.Length; q++) {
+                                       if (valuePos == -1 && query[q] == '=') {
+                                               valuePos = q + 1;
+                                       } else if (query[q] == '&') {
+                                               valueEnd = q;
+                                               break;
+                                       }
+                               }
+
+                               if (first) {
+                                       first = false;
+                                       if (query [namePos] == '?')
+                                               namePos++;
+                               }
+                               
+                               string name, value;
+                               if (valuePos == -1) {
+                                       name = null;
+                                       valuePos = namePos;
+                               } else {
+                                       name = UrlDecode (query.Substring (namePos, valuePos - namePos - 1), encoding);
+                               }
+                               if (valueEnd < 0) {
+                                       namePos = -1;
+                                       valueEnd = query.Length;
+                               } else {
+                                       namePos = valueEnd + 1;
+                               }
+                               value = UrlDecode (query.Substring (valuePos, valueEnd - valuePos), encoding);
+
+                               result.Add (name, value);
+                               if (namePos == -1)
+                                       break;
+                       }
+               }
                #endregion // Methods
        }
 }