Fix handling of strings in System.Json
authorSteffen Kieß <Steffen.Kiess@ipvs.uni-stuttgart.de>
Fri, 11 Jul 2014 13:46:25 +0000 (15:46 +0200)
committerSteffen Kieß <Steffen.Kiess@ipvs.uni-stuttgart.de>
Fri, 11 Jul 2014 13:46:25 +0000 (15:46 +0200)
- Escape control characters, as required by JSON spec
- Escape invalid surrogate pairs
- Escape characters invalid in JavaScript strings
- Escape "</" for HTML script tags
- Fix serialization of chars
- Remove unused variable in JavaScriptReader.cs
- Add tests

mcs/class/System.Json/System.Json/JsonPrimitive.cs
mcs/class/System.Json/System.Json/JsonValue.cs
mcs/class/System.Json/Test/System.Json/JsonValueTest.cs
mcs/class/System.ServiceModel.Web/System.Runtime.Serialization.Json/JavaScriptReader.cs

index 5d47eb4a4e6b6875a8cc103d914b56ee10cac2f4..24f51ed560a688f52a42d43cd34f96be030369d8 100644 (file)
@@ -161,6 +161,8 @@ namespace System.Json
                        case JsonType.String:
                                if (value is string || value == null)
                                        return (string) value;
+                               if (value is char)
+                                       return value.ToString ();
                                throw new NotImplementedException ("GetFormattedString from value type " + value.GetType ());
                        case JsonType.Number:
                                string s;
index d703edd5512c5c6427f43f78a2975f8600f5edd5..ac3f723e2adec73af42a5a1f22e3c03f8d7a7ff3 100644 (file)
@@ -197,13 +197,37 @@ namespace System.Json
                        throw new InvalidOperationException ();
                }
 
+               // Characters which have to be escaped:
+               // - Required by JSON Spec: Control characters, '"' and '\\'
+               // - Broken surrogates to make sure the JSON string is valid Unicode
+               //   (and can be encoded as UTF8)
+               // - JSON does not require U+2028 and U+2029 to be escaped, but
+               //   JavaScript does require this:
+               //   http://stackoverflow.com/questions/2965293/javascript-parse-error-on-u2028-unicode-character/9168133#9168133
+               // - '/' also does not have to be escaped, but escaping it when
+               //   preceeded by a '<' avoids problems with JSON in HTML <script> tags
+               bool NeedEscape (string src, int i) {
+                       char c = src [i];
+                       return c < 32 || c == '"' || c == '\\'
+                               // Broken lead surrogate
+                               || (c >= '\uD800' && c <= '\uDBFF' &&
+                                       (i == src.Length - 1 || src [i + 1] < '\uDC00' || src [i + 1] > '\uDFFF'))
+                               // Broken tail surrogate
+                               || (c >= '\uDC00' && c <= '\uDFFF' &&
+                                       (i == 0 || src [i - 1] < '\uD800' || src [i - 1] > '\uDBFF'))
+                               // To produce valid JavaScript
+                               || c == '\u2028' || c == '\u2029'
+                               // Escape "</" for <script> tags
+                               || (c == '/' && i > 0 && src [i - 1] == '<');
+               }
+               
                internal string EscapeString (string src)
                {
                        if (src == null)
                                return null;
 
                        for (int i = 0; i < src.Length; i++)
-                               if (src [i] == '"' || src [i] == '\\') {
+                               if (NeedEscape (src, i)) {
                                        var sb = new StringBuilder ();
                                        if (i > 0)
                                                sb.Append (src, 0, i);
@@ -216,10 +240,22 @@ namespace System.Json
                {
                        int start = cur;
                        for (int i = cur; i < src.Length; i++)
-                               if (src [i] == '"' || src [i] == '\\') {
+                               if (NeedEscape (src, i)) {
                                        sb.Append (src, start, i - start);
-                                       sb.Append ('\\');
-                                       sb.Append (src [i]);
+                                       switch (src [i]) {
+                                       case '\b': sb.Append ("\\b"); break;
+                                       case '\f': sb.Append ("\\f"); break;
+                                       case '\n': sb.Append ("\\n"); break;
+                                       case '\r': sb.Append ("\\r"); break;
+                                       case '\t': sb.Append ("\\t"); break;
+                                       case '\"': sb.Append ("\\\""); break;
+                                       case '\\': sb.Append ("\\\\"); break;
+                                       case '/': sb.Append ("\\/"); break;
+                                       default:
+                                               sb.Append ("\\u");
+                                               sb.Append (((int) src [i]).ToString ("x04"));
+                                               break;
+                                       }
                                        start = i + 1;
                                }
                        sb.Append (src, start, src.Length - start);
index 02dd106dfd8b82a5b3f3f9bf2ea71663d7aead57..bc04e2d403fd3ecd7a985b3259664f569440aa7d 100644 (file)
@@ -181,5 +181,64 @@ namespace MonoTests.System
                                Thread.CurrentThread.CurrentCulture = old;
                        }
                }
+
+               // Convert a string to json and parse the string, then compare the result to the original value
+               void CheckString (string str)
+               {
+                       var json = new JsonPrimitive (str).ToString ();
+                       // Check whether the string is valid Unicode (will throw for broken surrogate pairs)
+                       new UTF8Encoding (false, true).GetBytes (json);
+                       string jvalue = (string) JsonValue.Parse (json);
+                       Assert.AreEqual (str, jvalue);
+               }
+               
+               // String handling: http://tools.ietf.org/html/rfc7159#section-7
+               [Test]
+               public void CheckStrings () 
+               {
+                       Assert.AreEqual ("\"test\"", new JsonPrimitive ("test").ToString ());
+                       // Handling of characters
+                       Assert.AreEqual ("\"f\"", new JsonPrimitive ('f').ToString ());
+                       Assert.AreEqual ('f', (char) JsonValue.Parse ("\"f\""));
+
+                       // Control characters with special escape sequence
+                       Assert.AreEqual ("\"\\b\\f\\n\\r\\t\"", new JsonPrimitive ("\b\f\n\r\t").ToString ());
+                       // Other characters which must be escaped
+                       Assert.AreEqual (@"""\""\\""", new JsonPrimitive ("\"\\").ToString ());
+                       // Control characters without special escape sequence
+                       for (int i = 0; i < 32; i++)
+                               if (i != '\b' && i != '\f' && i != '\n' && i != '\r' && i != '\t')
+                                       Assert.AreEqual ("\"\\u" + i.ToString ("x04") + "\"", new JsonPrimitive ("" + (char) i).ToString ());
+
+                       // JSON does not require U+2028 and U+2029 to be escaped, but
+                       // JavaScript does require this:
+                       // http://stackoverflow.com/questions/2965293/javascript-parse-error-on-u2028-unicode-character/9168133#9168133
+                       Assert.AreEqual ("\"\\u2028\\u2029\"", new JsonPrimitive ("\u2028\u2029").ToString ());
+
+                       // '/' also does not have to be escaped, but escaping it when
+                       // preceeded by a '<' avoids problems with JSON in HTML <script> tags
+                       Assert.AreEqual ("\"<\\/\"", new JsonPrimitive ("</").ToString ());
+                       // Don't escape '/' in other cases as this makes the JSON hard to read
+                       Assert.AreEqual ("\"/bar\"", new JsonPrimitive ("/bar").ToString ());
+                       Assert.AreEqual ("\"foo/bar\"", new JsonPrimitive ("foo/bar").ToString ());
+
+                       CheckString ("test\b\f\n\r\t\"\\/</\0x");
+                       for (int i = 0; i < 65536; i++)
+                               CheckString ("x" + ((char) i));
+
+                       // Check broken surrogate pairs
+                       CheckString ("\ud800");
+                       CheckString ("x\ud800");
+                       CheckString ("\udfff\ud800");
+                       CheckString ("\ude03\ud912");
+                       CheckString ("\uc000\ubfff");
+                       CheckString ("\udfffx");
+                       // Valid strings should not be escaped:
+                       Assert.AreEqual ("\"\ud7ff\"", new JsonPrimitive ("\ud7ff").ToString ());
+                       Assert.AreEqual ("\"\ue000\"", new JsonPrimitive ("\ue000").ToString ());
+                       Assert.AreEqual ("\"\ud800\udc00\"", new JsonPrimitive ("\ud800\udc00").ToString ());
+                       Assert.AreEqual ("\"\ud912\ude03\"", new JsonPrimitive ("\ud912\ude03").ToString ());
+                       Assert.AreEqual ("\"\udbff\udfff\"", new JsonPrimitive ("\udbff\udfff").ToString ());
+               }
        }
 }
index 2d11b1727aec81a2f5f1b1296e1b4336c19296f4..3e97c0bc8edef1e31a74e7c8715710810b3f5c37 100644 (file)
@@ -164,9 +164,7 @@ namespace System.Runtime.Serialization.Json
                {
                        var sb = new StringBuilder ();
                        
-                       bool negative = false;
                        if (PeekChar () == '-') {
-                               negative = true;
                                sb.Append ((char) ReadChar ());
                        }