Revert "Merge pull request #5330 from alexanderkyte/dedup_mkbundle"
[mono.git] / mcs / class / corlib / Test / System.Text / UTF8EncodingTest.cs
index 54460afd4e3b835e58c8794a4bb358f41414c54d..12de999dc5b6372937898c66fdfa1c8d7805b088 100644 (file)
@@ -11,6 +11,7 @@
 
 using NUnit.Framework;
 using System;
+using System.Reflection;
 using System.IO;
 using System.Text;
 
@@ -175,7 +176,14 @@ namespace MonoTests.System.Text
                        Assert.AreEqual (0x32, (int) s [5], "#B7");
                }
 
-               // UTF8 decoding tests from http://www.cl.cam.ac.uk/~mgk25/
+               //
+               // UTF8 decoding tests are based on the test file from http://www.cl.cam.ac.uk/~mgk25/
+               // The test file is: https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
+               // which is licensed under CC-by-4.0: https://creativecommons.org/licenses/by/4.0/
+               //
+               // The file is not copied verbatim, instead individual
+               // tests are based on individual portions of that file
+               //
 
                [Test]
                public void T1_Correct_GreekWord_kosme () 
@@ -1035,7 +1043,7 @@ namespace MonoTests.System.Text
                [Category ("MobileNotWorking")]
                public void Bug415628 ()
                {
-                       using (var f = File.Open ("Test/resources/415628.bin", FileMode.Open)) {
+                       using (var f = File.Open (Path.Combine (Path.GetDirectoryName (Assembly.GetExecutingAssembly ().Location), "Test/resources/415628.bin"), FileMode.Open)) {
                                BinaryReader br = new BinaryReader (f);
                                byte [] buf = br.ReadBytes (8000);
                                Encoding.UTF8.GetString(buf);
@@ -1156,5 +1164,46 @@ namespace MonoTests.System.Text
                        data = new byte [] { 0xF4, 0x8F, 0xB0, 0xC0 };
                        t.TestDecoderFallback (data, "??", new byte [] { 0xF4, 0x8F, 0xB0 }, new byte [] { 0xC0 });
                }
+
+               [Test]
+               public void DecoderBug23771 ()
+               {
+                       var input = "\u733F"; // 'mono' on Japanese, 3bytes in UTF-8.
+                       var encoded = Encoding.UTF8.GetBytes (input);
+                       var decoder = Encoding.UTF8.GetDecoder ();
+                       var chars = new char [10]; // Just enough space to decode.
+                       var result = new StringBuilder ();
+                       var bytes = new byte [1]; // Simulates chunked input bytes.
+                       // Specify encoded bytes separetely.
+                       foreach (var b in encoded) {
+                               bytes [0] = b;
+                               int bytesUsed, charsUsed;
+                               bool completed;
+                               decoder.Convert (bytes, 0, bytes.Length, chars, 0, chars.Length, false, out bytesUsed, out charsUsed, out completed);
+                               result.Append (chars, 0, charsUsed);
+                               // Expected outputs are written in bottom.
+                               //Debug.Print ("bytesUsed:{0}, charsUsed:{1}, completed:{2}, result:'{3}'", bytesUsed, charsUsed, completed, result);
+                       }
+
+                       // Expected: NO assertion error.
+                       Assert.AreEqual (input, result.ToString (), "#1");
+
+                       /*
+                        * Expected Debug outputs are:
+                        * bytesUsed:1, charsUsed:0, completed:True, result:''
+                        * bytesUsed:1, charsUsed:0, completed:True, result:''
+                        * bytesUsed:1, charsUsed:1, completed:True, result:'猿'
+                        * 
+                        * -- Note: '猿' is U+733F (1char in UTF-16)
+                        * 
+                        * Actual Debug output are:
+                        * bytesUsed:3, charsUsed:1, completed:False, result:'�'
+                        * bytesUsed:3, charsUsed:1, completed:False, result:'��'
+                        * bytesUsed:3, charsUsed:1, completed:False, result:'���'
+                        * 
+                        * All output parameters are not match.
+                        * -- Note: '�' is decoder fallback char (U+FFFD)
+                        */
+               }
        }
 }