UTF8 Marshaling APIs (#3736)
authorMiguel de Icaza <miguel@gnome.org>
Fri, 14 Oct 2016 18:23:23 +0000 (14:23 -0400)
committerGitHub <noreply@github.com>
Fri, 14 Oct 2016 18:23:23 +0000 (14:23 -0400)
Adds support for the new UTF8 marshaling functionality in .NET, there is support for the new MarshalAs(UnmanagedType.LPUTF8St) as well as a handful of convenience methods in Marshal.cs

In Mono, this is mostly a pass-through as we have historically only done
UTF8, so this merely adds the constant processing to the runtime.

This fixes a few bugs that the new test suite exhibited: when we marshaled UTF8 strings, we assumed Chars() == Bytes() and we would end up chopping when marshaling out, and marshaling in.

There is also an additional behavioral bug that was fixed in Marshal.cs, unlike the rest of the runtime that treated Ansi as Utf8, StringToCoTaskMemAnsi behaved as ascii. This brings the API in line with the rest of the runtime.

The equivalent CoreCLR changes were:

dotnet/coreclr#4793
dotnet/coreclr#6561

This has also surfaced two limitations in Mono's marshaling code, which is why two sets of tests are disabled: StringBuilder return types are not handled specially (char * needs to be turned into a new StringBuilder when present) and StringBuilders with an Out attribute should update the StringBuilder in place, not make a new copy of it.

13 files changed:
mcs/class/corlib/System.Runtime.InteropServices/Marshal.cs
mcs/class/corlib/Test/System.Runtime.InteropServices/MarshalTest.cs
mcs/class/referencesource/mscorlib/system/runtime/interopservices/attributes.cs
mcs/class/referencesource/mscorlib/system/string.cs
mono/metadata/icall-def.h
mono/metadata/marshal.c
mono/metadata/marshal.h
mono/metadata/metadata.c
mono/metadata/metadata.h
mono/tests/Makefile.am
mono/tests/libtest.c
mono/tests/pinvoke-utf8.cs [new file with mode: 0644]
scripts/mono-package-runtime [changed mode: 0644->0755]

index 2def9ddba50828744ab6ae26d6e37d1870df033b..f1d0829ff99fcfd89b8a6cc7fb9d4503d163276c 100644 (file)
@@ -38,6 +38,7 @@ using System.Security;
 using System.Reflection;
 using System.Threading;
 using System.Runtime.InteropServices.ComTypes;
+using System.Text;
 
 using System.Runtime.ConstrainedExecution;
 #if !FULL_AOT_RUNTIME
@@ -76,6 +77,9 @@ namespace System.Runtime.InteropServices
 
                [MethodImplAttribute(MethodImplOptions.InternalCall)]
                public extern static IntPtr AllocCoTaskMem (int cb);
+               
+               [MethodImplAttribute(MethodImplOptions.InternalCall)]
+               public extern static IntPtr AllocCoTaskMemSize (UIntPtr sizet);
 
                [MethodImplAttribute(MethodImplOptions.InternalCall)]
                [ReliabilityContractAttribute (Consistency.WillNotCorruptState, Cer.MayFail)]
@@ -278,6 +282,12 @@ namespace System.Runtime.InteropServices
                        FreeCoTaskMem (s);
                }
 
+               public static void ZeroFreeCoTaskMemUTF8 (IntPtr s)
+               {
+                       ClearAnsi (s);
+                       FreeCoTaskMem (s);
+               }
+               
                public static void ZeroFreeGlobalAllocAnsi (IntPtr s)
                {
                        ClearAnsi (s);
@@ -751,6 +761,16 @@ namespace System.Runtime.InteropServices
                [MethodImplAttribute(MethodImplOptions.InternalCall)]
                public extern static string PtrToStringAnsi (IntPtr ptr, int len);
 
+               public static string PtrToStringUTF8 (IntPtr ptr)
+               {
+                       return PtrToStringAnsi (ptr);
+               }
+               
+               public static string PtrToStringUTF8 (IntPtr ptr, int byteLen)
+               {
+                       return PtrToStringAnsi (ptr, byteLen);
+               }
+               
                public static string PtrToStringAuto (IntPtr ptr)
                {
                        return SystemDefaultCharSize == 2
@@ -1053,23 +1073,9 @@ namespace System.Runtime.InteropServices
                [MethodImplAttribute(MethodImplOptions.InternalCall)]
                public extern static IntPtr StringToBSTR (string s);
 
-               //
-               // I believe this is wrong, because in Mono and in P/Invoke
-               // we treat "Ansi" conversions as UTF-8 conversions, while
-               // this one does not do this
-               //
                public static IntPtr StringToCoTaskMemAnsi (string s)
                {
-                       int length = s.Length + 1;
-                       IntPtr ctm = AllocCoTaskMem (length);
-
-                       byte[] asBytes = new byte[length];
-                       for (int i = 0; i < s.Length; i++)
-                               asBytes[i] = (byte)s[i];
-                       asBytes[s.Length] = 0;
-
-                       copy_to_unmanaged (asBytes, 0, ctm, length);
-                       return ctm;
+                       return StringToAllocatedMemoryUTF8 (s);
                }
 
                public static IntPtr StringToCoTaskMemAuto (string s)
@@ -1094,6 +1100,29 @@ namespace System.Runtime.InteropServices
                [MethodImplAttribute(MethodImplOptions.InternalCall)]
                public extern static IntPtr StringToHGlobalAnsi (string s);
 
+               unsafe public static IntPtr StringToAllocatedMemoryUTF8(String s)
+               {
+                       const int MAX_UTF8_CHAR_SIZE = 3;
+                       if (s == null)
+                               return IntPtr.Zero;
+
+                       int nb = (s.Length + 1) * MAX_UTF8_CHAR_SIZE;
+
+                       // Overflow checking
+                       if (nb < s.Length)
+                               throw new ArgumentOutOfRangeException("s");
+                       
+                       IntPtr pMem = AllocCoTaskMemSize(new UIntPtr((uint)nb +1));
+                       
+                       if (pMem == IntPtr.Zero)
+                               throw new OutOfMemoryException();
+
+                       byte* pbMem = (byte*)pMem;
+                       int nbWritten = s.GetBytesFromEncoding(pbMem, nb, Encoding.UTF8);
+                       pbMem[nbWritten] = 0;
+                       return pMem;
+               }
+               
                public static IntPtr StringToHGlobalAuto (string s)
                {
                        return SystemDefaultCharSize == 2
index d4ca72dc0a94857730af44a9635cffa80624a605..0fcaa9847de0e276744c9ca410f65b508fa5f59e 100644 (file)
@@ -155,6 +155,58 @@ namespace MonoTests.System.Runtime.InteropServices
                        }
                }
 
+               readonly String[] TestStrings = new String[] {
+                       "", //Empty String
+                       "Test String",
+                       "A", //Single character string
+                       "This is a very long string as it repeats itself. " +
+                       "This is a very long string as it repeats itself. " +
+                       "This is a very long string as it repeats itself. " +
+                       "This is a very long string as it repeats itself. " +
+                       "This is a very long string as it repeats itself. " +
+                       "This is a very long string as it repeats itself. " +
+                       "This is a very long string as it repeats itself. " +
+                       "This is a very long string as it repeats itself. " +
+                       "This is a very long string as it repeats itself. " +
+                       "This is a very long string as it repeats itself. " +
+                       "This is a very long string as it repeats itself. " +
+                       "This is a very long string as it repeats itself. " +
+                       "This is a very long string as it repeats itself.",
+                       "This \n is \n a \n multiline \n string",
+                       "This \0 is \0 a \0 string \0 with \0 nulls",
+                       "\0string",
+                       "string\0",
+                       "\0\0\0\0\0\0\0\0"
+               };
+
+               [Test]
+               public unsafe void PtrToStringUTF8_Test ()
+               {
+                       foreach (String srcString in TestStrings)
+                       {
+                               // we assume string null terminated
+                               if (srcString.Contains("\0"))
+                                       continue;
+
+                               IntPtr ptrString = Marshal.StringToAllocatedMemoryUTF8(srcString);
+                               string retString = Marshal.PtrToStringUTF8(ptrString);
+
+                               if (!srcString.Equals(retString))
+                               {
+                                       throw new Exception("Round triped strings do not match...");
+                               }
+                               if (srcString.Length > 0)
+                               {
+                                       string retString2 = Marshal.PtrToStringUTF8(ptrString, srcString.Length - 1);
+                                       if (!retString2.Equals(srcString.Substring(0, srcString.Length - 1)))
+                                       {
+                                               throw new Exception("Round triped strings do not match...");
+                                       }
+                               }
+                               Marshal.FreeHGlobal(ptrString);
+                       }                       
+               }
+               
                [Test]
                public unsafe void UnsafeAddrOfPinnedArrayElement ()
                {
index 17068f63bc9e4ad9cabf831cfc9067ba21bac739..83a6c0b161dc06d981fb41e8c181b99d62e96dfb 100644 (file)
@@ -522,6 +522,9 @@ namespace System.Runtime.InteropServices{
         
         [System.Runtime.InteropServices.ComVisible(false)]
         HString          = 0x2f,        // Windows Runtime HSTRING
+
+       [System.Runtime.InteropServices.ComVisible(false)]
+        LPUTF8Str        = 0x30,        // UTF8 string
     }
 
 #if !MONO
index 3f96ac1c00e42170e0153dea36793ab74d142992..e8fe32e54d119a9dffc79131c9a1106c06ab56e2 100644 (file)
@@ -1392,6 +1392,16 @@ namespace System {
 
             return s;
         }
+
+        unsafe internal int GetBytesFromEncoding(byte* pbNativeBuffer, int cbNativeBuffer,Encoding encoding)
+        {
+            // encoding == Encoding.UTF8
+            fixed (char* pwzChar = &this.m_firstChar)
+            {
+                return encoding.GetBytes(pwzChar, m_stringLength, pbNativeBuffer, cbNativeBuffer);
+            }
+        }
+
 #if !MONO
         [System.Security.SecuritySafeCritical]  // auto-generated
         unsafe internal int ConvertToAnsi(byte *pbNativeBuffer, int cbNativeBuffer, bool fBestFit, bool fThrowOnUnmappableChar)
index a4bd93e0260f9fe86e566fc37db7d0dfad996bbb..46a2bedb529dec03dc6a2b62581acbce19f3ead1 100644 (file)
@@ -907,6 +907,7 @@ ICALL(MARSHAL_1, "AddRefInternal", ves_icall_System_Runtime_InteropServices_Mars
 ICALL_TYPE(MARSHAL, "System.Runtime.InteropServices.Marshal", MARSHAL_2)
 #endif
 ICALL(MARSHAL_2, "AllocCoTaskMem", ves_icall_System_Runtime_InteropServices_Marshal_AllocCoTaskMem)
+ICALL(MARSHAL_51,"AllocCoTaskMemSize(uintptr)", ves_icall_System_Runtime_InteropServices_Marshal_AllocCoTaskMemSize)
 ICALL(MARSHAL_3, "AllocHGlobal", ves_icall_System_Runtime_InteropServices_Marshal_AllocHGlobal)
 ICALL(MARSHAL_50, "BufferToBSTR", ves_icall_System_Runtime_InteropServices_Marshal_BufferToBSTR)
 ICALL(MARSHAL_4, "DestroyStructure", ves_icall_System_Runtime_InteropServices_Marshal_DestroyStructure)
index a725148ad78c8b9aef410fb93b77ee9bca0e011c..154b5a6a83b704e411f69cbd020fc205b13d76cf 100644 (file)
@@ -107,10 +107,8 @@ mono_marshal_string_to_utf16 (MonoString *s);
 static void *
 mono_marshal_string_to_utf16_copy (MonoString *s);
 
-#ifndef HOST_WIN32
 static gpointer
-mono_string_to_lpstr (MonoString *string_obj);
-#endif
+mono_string_to_utf8str (MonoString *string_obj);
 
 static MonoStringBuilder *
 mono_string_utf8_to_builder2 (char *text);
@@ -306,7 +304,7 @@ mono_marshal_init (void)
                register_icall (mono_string_new_wrapper, "mono_string_new_wrapper", "obj ptr", FALSE);
                register_icall (mono_string_new_len_wrapper, "mono_string_new_len_wrapper", "obj ptr int", FALSE);
                register_icall (ves_icall_mono_string_to_utf8, "ves_icall_mono_string_to_utf8", "ptr obj", FALSE);
-               register_icall (mono_string_to_lpstr, "mono_string_to_lpstr", "ptr obj", FALSE);
+               register_icall (mono_string_to_utf8str, "mono_string_to_utf8str", "ptr obj", FALSE);
                register_icall (mono_string_to_ansibstr, "mono_string_to_ansibstr", "ptr object", FALSE);
                register_icall (mono_string_builder_to_utf8, "mono_string_builder_to_utf8", "ptr object", FALSE);
                register_icall (mono_string_builder_to_utf16, "mono_string_builder_to_utf16", "ptr object", FALSE);
@@ -922,13 +920,13 @@ mono_string_utf8_to_builder (MonoStringBuilder *sb, char *text)
        if (!sb || !text)
                return;
 
-       int len = strlen (text);
-       if (len > mono_string_builder_capacity (sb))
-               len = mono_string_builder_capacity (sb);
-
        GError *error = NULL;
        glong copied;
-       gunichar2* ut = g_utf8_to_utf16 (text, len, NULL, &copied, &error);
+       gunichar2* ut = g_utf8_to_utf16 (text, strlen (text), NULL, &copied, &error);
+       int capacity = mono_string_builder_capacity (sb);
+       
+       if (copied > capacity)
+               copied = capacity;
 
        if (!error) {
                MONO_OBJECT_SETREF (sb, chunkPrevious, NULL);
@@ -952,7 +950,6 @@ mono_string_utf8_to_builder2 (char *text)
        return sb;
 }
 
-
 void
 mono_string_utf16_to_builder (MonoStringBuilder *sb, gunichar2 *text)
 {
@@ -985,7 +982,8 @@ mono_string_builder_to_utf8 (MonoStringBuilder *sb)
 {
        MonoError error;
        GError *gerror = NULL;
-
+       glong byte_count;
+       
        if (!sb)
                return NULL;
 
@@ -993,7 +991,7 @@ mono_string_builder_to_utf8 (MonoStringBuilder *sb)
 
        guint str_len = mono_string_builder_string_length (sb);
 
-       gchar *tmp = g_utf16_to_utf8 (str_utf16, str_len, NULL, NULL, &gerror);
+       gchar *tmp = g_utf16_to_utf8 (str_utf16, str_len, NULL, &byte_count, &gerror);
 
        if (gerror) {
                g_error_free (gerror);
@@ -1001,8 +999,7 @@ mono_string_builder_to_utf8 (MonoStringBuilder *sb)
                mono_set_pending_exception (mono_get_exception_execution_engine ("Failed to convert StringBuilder from utf16 to utf8"));
                return NULL;
        } else {
-               guint len = mono_string_builder_capacity (sb) + 1;
-               gchar *res = (gchar *)mono_marshal_alloc (len * sizeof (gchar), &error);
+               gchar *res = (gchar *)mono_marshal_alloc (byte_count+1, &error);
                if (!mono_error_ok (&error)) {
                        mono_marshal_free (str_utf16);
                        g_free (tmp);
@@ -1010,9 +1007,8 @@ mono_string_builder_to_utf8 (MonoStringBuilder *sb)
                        return NULL;
                }
 
-               g_assert (str_len < len);
-               memcpy (res, tmp, str_len * sizeof (gchar));
-               res[str_len] = '\0';
+               memcpy (res, tmp, byte_count);
+               res[byte_count] = '\0';
 
                mono_marshal_free (str_utf16);
                g_free (tmp);
@@ -1081,7 +1077,7 @@ mono_string_builder_to_utf16 (MonoStringBuilder *sb)
 /* This is a JIT icall, it sets the pending exception and returns NULL on error. */
 #ifndef HOST_WIN32
 static gpointer
-mono_string_to_lpstr (MonoString *s)
+mono_string_to_utf8str (MonoString *s)
 {
        MonoError error;
        char *result = mono_string_to_utf8_checked (s, &error);
@@ -1476,7 +1472,10 @@ emit_ptr_to_object_conv (MonoMethodBuilder *mb, MonoType *type, MonoMarshalConv
 #endif
                mono_mb_emit_byte (mb, CEE_STIND_REF);  
                break;
+
+               // In Mono historically LPSTR was treated as a UTF8STR
        case MONO_MARSHAL_CONV_STR_LPSTR:
+       case MONO_MARSHAL_CONV_STR_UTF8STR:
                mono_mb_emit_ldloc (mb, 1);
                mono_mb_emit_ldloc (mb, 0);
                mono_mb_emit_byte (mb, CEE_LDIND_I);
@@ -1604,6 +1603,7 @@ conv_to_icall (MonoMarshalConv conv, int *ind_store_type)
        case MONO_MARSHAL_CONV_LPTSTR_STR:
                *ind_store_type = CEE_STIND_REF;
                return mono_string_new_wrapper;
+       case MONO_MARSHAL_CONV_UTF8STR_STR:
        case MONO_MARSHAL_CONV_LPSTR_STR:
                *ind_store_type = CEE_STIND_REF;
                return mono_string_new_wrapper;
@@ -1611,10 +1611,12 @@ conv_to_icall (MonoMarshalConv conv, int *ind_store_type)
 #ifdef TARGET_WIN32
                return mono_marshal_string_to_utf16;
 #else
-               return mono_string_to_lpstr;
+               return mono_string_to_utf8str;
 #endif
+               // In Mono historically LPSTR was treated as a UTF8STR
+       case MONO_MARSHAL_CONV_STR_UTF8STR:
        case MONO_MARSHAL_CONV_STR_LPSTR:
-               return mono_string_to_lpstr;
+               return mono_string_to_utf8str;
        case MONO_MARSHAL_CONV_STR_BSTR:
                return mono_string_to_bstr;
        case MONO_MARSHAL_CONV_BSTR_STR:
@@ -1623,6 +1625,7 @@ conv_to_icall (MonoMarshalConv conv, int *ind_store_type)
        case MONO_MARSHAL_CONV_STR_TBSTR:
        case MONO_MARSHAL_CONV_STR_ANSIBSTR:
                return mono_string_to_ansibstr;
+       case MONO_MARSHAL_CONV_SB_UTF8STR:
        case MONO_MARSHAL_CONV_SB_LPSTR:
                return mono_string_builder_to_utf8;
        case MONO_MARSHAL_CONV_SB_LPTSTR:
@@ -1644,6 +1647,7 @@ conv_to_icall (MonoMarshalConv conv, int *ind_store_type)
        case MONO_MARSHAL_CONV_FTN_DEL:
                *ind_store_type = CEE_STIND_REF;
                return mono_ftnptr_to_delegate;
+       case MONO_MARSHAL_CONV_UTF8STR_SB:
        case MONO_MARSHAL_CONV_LPSTR_SB:
                *ind_store_type = CEE_STIND_REF;
                return mono_string_utf8_to_builder;
@@ -1690,6 +1694,8 @@ emit_object_to_ptr_conv (MonoMethodBuilder *mb, MonoType *type, MonoMarshalConv
                mono_mb_emit_byte (mb, CEE_NEG);
                mono_mb_emit_byte (mb, CEE_STIND_I2);
                break;
+       // In Mono historically LPSTR was treated as a UTF8STR
+       case MONO_MARSHAL_CONV_STR_UTF8STR:
        case MONO_MARSHAL_CONV_STR_LPWSTR:
        case MONO_MARSHAL_CONV_STR_LPSTR:
        case MONO_MARSHAL_CONV_STR_LPTSTR:
@@ -2418,6 +2424,8 @@ mono_marshal_get_string_to_ptr_conv (MonoMethodPInvoke *piinfo, MonoMarshalSpec
                return MONO_MARSHAL_CONV_STR_LPTSTR;
        case MONO_NATIVE_BSTR:
                return MONO_MARSHAL_CONV_STR_BSTR;
+       case MONO_NATIVE_UTF8STR:
+               return MONO_MARSHAL_CONV_STR_UTF8STR;
        default:
                return MONO_MARSHAL_CONV_INVALID;
        }
@@ -2431,13 +2439,12 @@ mono_marshal_get_stringbuilder_to_ptr_conv (MonoMethodPInvoke *piinfo, MonoMarsh
        switch (encoding) {
        case MONO_NATIVE_LPWSTR:
                return MONO_MARSHAL_CONV_SB_LPWSTR;
-               break;
        case MONO_NATIVE_LPSTR:
                return MONO_MARSHAL_CONV_SB_LPSTR;
-               break;
+       case MONO_NATIVE_UTF8STR:
+               return MONO_MARSHAL_CONV_SB_UTF8STR;
        case MONO_NATIVE_LPTSTR:
                return MONO_MARSHAL_CONV_SB_LPTSTR;
-               break;
        default:
                return MONO_MARSHAL_CONV_INVALID;
        }
@@ -2454,6 +2461,8 @@ mono_marshal_get_ptr_to_string_conv (MonoMethodPInvoke *piinfo, MonoMarshalSpec
        case MONO_NATIVE_LPWSTR:
                *need_free = FALSE;
                return MONO_MARSHAL_CONV_LPWSTR_STR;
+       case MONO_NATIVE_UTF8STR:
+               return MONO_MARSHAL_CONV_UTF8STR_STR;
        case MONO_NATIVE_LPSTR:
        case MONO_NATIVE_VBBYREFSTR:
                return MONO_MARSHAL_CONV_LPSTR_STR;
@@ -2481,6 +2490,8 @@ mono_marshal_get_ptr_to_stringbuilder_conv (MonoMethodPInvoke *piinfo, MonoMarsh
                 */
                *need_free = FALSE;
                return MONO_MARSHAL_CONV_LPWSTR_SB;
+       case MONO_NATIVE_UTF8STR:
+               return MONO_MARSHAL_CONV_UTF8STR_SB;
        case MONO_NATIVE_LPSTR:
                return MONO_MARSHAL_CONV_LPSTR_SB;
                break;
@@ -5729,6 +5740,9 @@ emit_marshal_object (EmitMarshalContext *m, int argnum, MonoType *t,
                                case MONO_NATIVE_LPSTR:
                                        mono_mb_emit_icall (mb, mono_string_utf8_to_builder2);
                                        break;
+                               case MONO_NATIVE_UTF8STR:
+                                       mono_mb_emit_icall (mb, mono_string_utf8_to_builder2);
+                                       break;
                                default:
                                        g_assert_not_reached ();
                                }
@@ -5839,6 +5853,8 @@ emit_marshal_object (EmitMarshalContext *m, int argnum, MonoType *t,
                        mono_mb_emit_ldloc (mb, 0);
                        mono_mb_emit_icall (mb, conv_to_icall (MONO_MARSHAL_CONV_FTN_DEL, NULL));
                        mono_mb_emit_stloc (mb, 3);
+               } else if (klass == mono_defaults.stringbuilder_class){
+                       // FIXME: implement
                } else {
                        /* set src */
                        mono_mb_emit_stloc (mb, 0);
@@ -5898,7 +5914,7 @@ emit_marshal_object (EmitMarshalContext *m, int argnum, MonoType *t,
                        encoding = mono_marshal_get_string_encoding (m->piinfo, spec);
 
                        // FIXME:
-                       g_assert (encoding == MONO_NATIVE_LPSTR);
+                       g_assert (encoding == MONO_NATIVE_LPSTR || encoding == MONO_NATIVE_UTF8STR);
 
                        g_assert (!t->byref);
                        g_assert (encoding != -1);
@@ -11014,6 +11030,7 @@ mono_struct_delete_old (MonoClass *klass, char *ptr)
                case MONO_MARSHAL_CONV_STR_BSTR:
                case MONO_MARSHAL_CONV_STR_ANSIBSTR:
                case MONO_MARSHAL_CONV_STR_TBSTR:
+               case MONO_MARSHAL_CONV_STR_UTF8STR:
                        mono_marshal_free (*(gpointer *)cpos);
                        break;
 
@@ -11124,6 +11141,27 @@ ves_icall_System_Runtime_InteropServices_Marshal_AllocCoTaskMem (int size)
        return res;
 }
 
+void*
+ves_icall_System_Runtime_InteropServices_Marshal_AllocCoTaskMemSize (gulong size)
+{
+       void *res;
+
+#ifdef HOST_WIN32
+       res = CoTaskMemAlloc (size);
+#else
+       if (size == 0)
+               /* This returns a valid pointer for size 0 on MS.NET */
+               size = 4;
+
+       res = g_try_malloc (size);
+#endif
+       if (!res) {
+               mono_set_pending_exception (mono_domain_get ()->out_of_memory_ex);
+               return NULL;
+       }
+       return res;
+}
+
 void
 ves_icall_System_Runtime_InteropServices_Marshal_FreeCoTaskMem (void *ptr)
 {
@@ -11503,6 +11541,7 @@ mono_marshal_type_size (MonoType *type, MonoMarshalSpec *mspec, guint32 *align,
        case MONO_NATIVE_BSTR:
        case MONO_NATIVE_ANSIBSTR:
        case MONO_NATIVE_TBSTR:
+       case MONO_NATIVE_UTF8STR:
        case MONO_NATIVE_LPARRAY:
        case MONO_NATIVE_SAFEARRAY:
        case MONO_NATIVE_IUNKNOWN:
@@ -11586,10 +11625,10 @@ mono_marshal_asany (MonoObject *o, MonoMarshalNative string_encoding, int param_
                switch (string_encoding) {
                case MONO_NATIVE_LPWSTR:
                        return mono_marshal_string_to_utf16_copy ((MonoString*)o);
-                       break;
                case MONO_NATIVE_LPSTR:
-                       return mono_string_to_lpstr ((MonoString*)o);
-                       break;
+               case MONO_NATIVE_UTF8STR:
+                       // Same code path, because in Mono, we treated strings as Utf8
+                       return mono_string_to_utf8str ((MonoString*)o);
                default:
                        g_warning ("marshaling conversion %d not implemented", string_encoding);
                        g_assert_not_reached ();
@@ -11657,6 +11696,7 @@ mono_marshal_free_asany (MonoObject *o, gpointer ptr, MonoMarshalNative string_e
                switch (string_encoding) {
                case MONO_NATIVE_LPWSTR:
                case MONO_NATIVE_LPSTR:
+               case MONO_NATIVE_UTF8STR:
                        mono_marshal_free (ptr);
                        break;
                default:
index 166db1478ce52312ca113fd954646df022247a7e..e22653f2d5bd2f1538e4c02e1f451bca53a067b2 100644 (file)
@@ -490,6 +490,9 @@ ves_icall_System_Runtime_InteropServices_Marshal_DestroyStructure (gpointer src,
 void*
 ves_icall_System_Runtime_InteropServices_Marshal_AllocCoTaskMem (int size);
 
+void*
+ves_icall_System_Runtime_InteropServices_Marshal_AllocCoTaskMemSize (gulong size);
+
 void
 ves_icall_System_Runtime_InteropServices_Marshal_FreeCoTaskMem (void *ptr);
 
index f9e2bc514889b5f1fb78f1898467f7bbc2408ce1..3a4c22860f7c7b022e10743442037436c061c718 100644 (file)
@@ -5812,6 +5812,9 @@ handle_enum:
                        case MONO_NATIVE_TBSTR:
                                *conv = MONO_MARSHAL_CONV_STR_TBSTR;
                                return MONO_NATIVE_TBSTR;
+                       case MONO_NATIVE_UTF8STR:
+                               *conv = MONO_MARSHAL_CONV_STR_LPTSTR;
+                               return MONO_NATIVE_UTF8STR;
                        case MONO_NATIVE_BYVALTSTR:
                                if (unicode)
                                        *conv = MONO_MARSHAL_CONV_STR_BYVALWSTR;
index 046491943d5e7d5493b9a225d2a28f7e275162c5..6036fe3ae7dcf7e576e50f2af39f30d8ede9410e 100644 (file)
@@ -76,6 +76,9 @@ typedef enum {
        MONO_NATIVE_LPSTRUCT = 0x2b,
        MONO_NATIVE_CUSTOM = 0x2c,
        MONO_NATIVE_ERROR = 0x2d,
+       // TODO: MONO_NATIVE_IINSPECTABLE = 0x2e
+       // TODO: MONO_NATIVE_HSTRING = 0x2f
+       MONO_NATIVE_UTF8STR = 0x30,
        MONO_NATIVE_MAX = 0x50 /* no info */
 } MonoMarshalNative;
 
@@ -162,7 +165,11 @@ typedef enum {
        MONO_MARSHAL_FREE_ARRAY,
        MONO_MARSHAL_CONV_BSTR_STR,
        MONO_MARSHAL_CONV_SAFEHANDLE,
-       MONO_MARSHAL_CONV_HANDLEREF
+       MONO_MARSHAL_CONV_HANDLEREF,
+       MONO_MARSHAL_CONV_STR_UTF8STR,
+       MONO_MARSHAL_CONV_SB_UTF8STR,
+       MONO_MARSHAL_CONV_UTF8STR_STR,
+       MONO_MARSHAL_CONV_UTF8STR_SB,
 } MonoMarshalConv;
 
 #define MONO_MARSHAL_CONV_INVALID ((MonoMarshalConv)-1)
index b33cb19ef982f9117fc10dc76898692456dda1f1..4f824fb6a415c4c9b59180300d036a9a965f2990 100644 (file)
@@ -194,6 +194,7 @@ BASE_TEST_CS_SRC_UNIVERSAL=         \
        typeof-ptr.cs           \
        static-constructor.cs   \
        pinvoke.cs              \
+       pinvoke-utf8.cs         \
        pinvoke3.cs             \
        pinvoke11.cs            \
        pinvoke13.cs            \
index d2a09a6d068a6290ca4045289e175b872061f7f8..f65b779e2838eeb9176ce9847f90802e268cbdf1 100644 (file)
@@ -7230,6 +7230,184 @@ mono_test_marshal_fixed_array (FixedArrayStruct s)
        return s.array [0] + s.array [1] + s.array [2];
 }
 
+const int NSTRINGS = 6;
+//test strings
+const char  *utf8Strings[] = {  
+                                "Managed",
+                                 "Sîne klâwen durh die wolken sint geslagen" ,
+                                 "काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम्",
+                                 "我能吞下玻璃而不伤身体",
+                                 "ღმერთსი შემვედრე,შემვედრე, ნუთუ კვლა დამხსნას შემვედრე,სოფლისა შემვედრე, შემვედრე,შემვედრე,შემვედრე,შრომასა, ცეცხლს, წყალსა და მიწასა, ჰაერთა თანა მრომასა; მომცნეს ფრთენი და აღვფრინდე, მივჰხვდე მას ჩემსა ნდომასა, დღისით და ღამით ვჰხედვიდე მზისა ელვათა კრთომაასაშემვედრე,შემვედრე,",
+                                 "Τη γλώσσα μου έδωσαν ελληνική",
+"\0"
+};
+
+LIBTEST_API char *
+build_return_string(const char* pReturn)
+{
+       char *ret = 0;
+       if (pReturn == 0 || *pReturn == 0)
+               return ret;
+
+       size_t strLength = strlen(pReturn);
+       ret = (char *)(malloc(sizeof(char)* (strLength + 1)));
+       memset(ret, '\0', strLength + 1);
+       strncpy(ret, pReturn, strLength);
+       return ret;
+}
+
+LIBTEST_API char *
+StringParameterInOut(/*[In,Out]*/ char *s, int index)
+{
+       // return a copy
+       return build_return_string(s);
+}
+
+LIBTEST_API void
+StringParameterRefOut(/*out*/ char **s, int index)
+{
+       char *pszTextutf8 = (char*)utf8Strings[index];
+       size_t strLength = strlen(pszTextutf8);
+       *s = (char *)(malloc(sizeof(char)* (strLength + 1)));
+       memcpy(*s, pszTextutf8, strLength);
+       (*s)[strLength] = '\0';
+}
+
+LIBTEST_API void
+StringParameterRef(/*ref*/ char **s, int index)
+{
+    char *pszTextutf8 = (char*)utf8Strings[index];
+    size_t strLength = strlen(pszTextutf8);
+    // do byte by byte validation of in string
+    size_t szLen = strlen(*s);
+    for (size_t i = 0; i < szLen; i++)
+    {
+        if ((*s)[i] != pszTextutf8[i])
+        {
+            printf("[in] managed string do not match native string\n");
+           abort ();
+        }
+    }
+
+    if (*s)
+    {
+       free(*s);
+    }
+    // overwrite the orginal 
+    *s = (char *)(malloc(sizeof(char)* (strLength + 1)));
+    memcpy(*s, pszTextutf8, strLength);
+    (*s)[strLength] = '\0';
+}
+
+LIBTEST_API void
+StringBuilderParameterInOut(/*[In,Out] StringBuilder*/ char *s, int index)
+{
+    // if string.empty 
+    if (s == 0 || *s == 0)
+        return;
+
+    char *pszTextutf8 = (char*)utf8Strings[index];
+
+    // do byte by byte validation of in string
+    size_t szLen = strlen(s);
+    for (size_t i = 0; i < szLen; i++) 
+    {
+        if (s[i] != pszTextutf8[i])
+        {
+            printf("[in] managed string do not match native string\n");
+           abort ();
+        }
+    }  
+
+    // modify the string inplace 
+    size_t outLen = strlen(pszTextutf8);
+    for (size_t i = 0; i < outLen; i++) {
+        s[i] = pszTextutf8[i];
+    }
+    s[outLen] = '\0';
+}
+
+//out string builder
+LIBTEST_API void
+StringBuilderParameterOut(/*[Out] StringBuilder*/ char *s, int index)
+{
+    char *pszTextutf8 = (char*)utf8Strings[index];
+
+    printf ("SBPO: Receiving %s\n", s);
+    // modify the string inplace 
+    size_t outLen = strlen(pszTextutf8);
+    for (size_t i = 0; i < outLen; i++) {
+        s[i] = pszTextutf8[i];
+    }
+    s[outLen] = '\0';
+}
+
+LIBTEST_API char *
+StringParameterOut(/*[Out]*/ char *s, int index)
+{
+    // return a copy
+    return build_return_string(s);
+}
+
+// Utf8 field
+typedef struct FieldWithUtf8
+{
+    char *pFirst;
+    int index;
+}FieldWithUtf8;
+
+//utf8 struct field
+LIBTEST_API void
+TestStructWithUtf8Field(struct FieldWithUtf8 fieldStruct)
+{
+    char *pszManagedutf8 = fieldStruct.pFirst;
+    int stringIndex = fieldStruct.index;
+    char *pszNative = 0;
+    size_t outLen = 0;
+
+    if (pszManagedutf8 == 0 || *pszManagedutf8 == 0)
+        return;
+
+    pszNative = (char*)utf8Strings[stringIndex];
+
+    outLen = strlen(pszNative);
+    // do byte by byte comparision
+    for (size_t i = 0; i < outLen; i++) 
+    {
+        if (pszNative[i] != pszManagedutf8[i]) 
+        {
+            printf("Native and managed string do not match.\n");
+           abort ();
+        }
+    }
+}
+
+typedef void (* Callback2)(char *text, int index);
+
+LIBTEST_API void
+Utf8DelegateAsParameter(Callback2 managedCallback)
+{
+    for (int i = 0; i < NSTRINGS; ++i) 
+    {
+        char *pszNative = 0;
+        pszNative = (char*)utf8Strings[i];
+        managedCallback(pszNative, i);
+    }
+}
+
+
+LIBTEST_API char*
+StringBuilderParameterReturn(int index)
+{
+    char *pszTextutf8 = (char*)utf8Strings[index];
+    size_t strLength = strlen(pszTextutf8);
+    char * ret = (char *)(malloc(sizeof(char)* (strLength + 1)));
+    memcpy(ret, pszTextutf8, strLength);
+    ret[strLength] = '\0';
+
+    return  ret;
+}
+
 LIBTEST_API int STDCALL
 mono_test_marshal_pointer_array (int *arr[])
 {
diff --git a/mono/tests/pinvoke-utf8.cs b/mono/tests/pinvoke-utf8.cs
new file mode 100644 (file)
index 0000000..b13e249
--- /dev/null
@@ -0,0 +1,259 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Collections.Generic;
+
+
+// UTF8 
+class UTF8StringTests
+{
+       [DllImport("libtest", CallingConvention = CallingConvention.Cdecl)]
+       [return: MarshalAs(UnmanagedType.LPUTF8Str)]
+       public static extern string StringParameterInOut([In, Out][MarshalAs(UnmanagedType.LPUTF8Str)]string s, int index);
+       public static bool TestInOutStringParameter(string orgString, int index)
+       {
+               string passedString = orgString;
+               string expectedNativeString = passedString;
+               
+               string nativeString = StringParameterInOut(passedString, index);
+               if (!(nativeString == expectedNativeString))
+               {
+                       Console.WriteLine("StringParameterInOut: nativeString != expecedNativeString ");
+                       return false;
+               }
+               return true;
+       }
+
+       [DllImport("libtest", CallingConvention = CallingConvention.Cdecl)]
+       [return: MarshalAs(UnmanagedType.LPUTF8Str)]
+       public static extern string StringParameterOut([Out][MarshalAs(UnmanagedType.LPUTF8Str)]string s, int index);
+       public static bool TestOutStringParameter(string orgString, int index)
+       {
+               string passedString = orgString;
+               string expecedNativeString = passedString;
+               string nativeString = StringParameterInOut(passedString, index);
+               if (!(nativeString == expecedNativeString))
+               {
+                       Console.WriteLine("StringParameterInOut: nativeString != expecedNativeString ");
+                       return false;
+               }
+               return true;
+       }
+
+       [DllImport("libtest", CallingConvention = CallingConvention.Cdecl)]
+       public static extern void StringParameterRefOut([MarshalAs(UnmanagedType.LPUTF8Str)]out string s, int index);
+       public static bool TestStringPassByOut(string orgString, int index)
+       {
+               // out string 
+               string expectedNative = string.Empty;
+               StringParameterRefOut(out expectedNative, index);
+               if (orgString != expectedNative)
+               {
+                       Console.WriteLine ("TestStringPassByOut : expectedNative != outString");
+                       return false;
+               }
+               return true;
+       }
+
+       [DllImport("libtest", CallingConvention = CallingConvention.Cdecl)]
+       public static extern void StringParameterRef([MarshalAs(UnmanagedType.LPUTF8Str)]ref string s, int index);
+       public static bool TestStringPassByRef(string orgString, int index)
+       {
+               string orgCopy = new string(orgString.ToCharArray());
+               StringParameterRef(ref orgString, index);
+               if (orgString != orgCopy)
+               {
+                       Console.WriteLine("TestStringPassByOut : string mismatch");
+                       return false;
+               }
+               return true;
+       }
+
+       public static bool EmptyStringTest()
+       {
+               StringParameterInOut(string.Empty, 0);
+               StringParameterOut(string.Empty, 0);
+               return true;
+       }
+}
+
+// UTF8 stringbuilder
+class UTF8StringBuilderTests
+{
+       [DllImport("libtest", CallingConvention = CallingConvention.Cdecl)]
+       public static extern void StringBuilderParameterInOut([In,Out][MarshalAs(UnmanagedType.LPUTF8Str)]StringBuilder s, int index);
+       public static bool TestInOutStringBuilderParameter(string expectedString, int index)
+       {
+               StringBuilder nativeStrBuilder = new StringBuilder(expectedString);
+               
+               StringBuilderParameterInOut(nativeStrBuilder, index);
+               
+               if (!nativeStrBuilder.ToString().Equals(expectedString))
+               {
+                       Console.WriteLine($"TestInOutStringBuilderParameter: nativeString != expecedNativeString index={index} got={nativeStrBuilder} and expected={expectedString} ");
+                       return false;
+               }
+               return true;
+       }
+       
+       [DllImport("libtest", CallingConvention = CallingConvention.Cdecl)]
+       public static extern void StringBuilderParameterOut([Out][MarshalAs(UnmanagedType.LPUTF8Str)]StringBuilder s, int index);
+       public static bool TestOutStringBuilderParameter(string expectedString, int index)
+       {
+               // string builder capacity
+               StringBuilder nativeStringBuilder = new StringBuilder(expectedString.Length);
+               
+               StringBuilderParameterOut(nativeStringBuilder, index);
+               
+               if (!nativeStringBuilder.ToString().Equals(expectedString))
+               {
+                       Console.WriteLine("TestOutStringBuilderParameter: string != expecedString ");
+                       return false;
+               }
+               return true;
+       }
+       
+
+       [DllImport("libtest", CallingConvention = CallingConvention.Cdecl)]
+       [return: MarshalAs(UnmanagedType.LPUTF8Str,SizeConst = 512)]
+       public static extern StringBuilder StringBuilderParameterReturn(int index);
+       public static bool TestReturnStringBuilder(string expectedReturn, int index)
+       {
+               StringBuilder nativeString = StringBuilderParameterReturn(index);
+               if (!expectedReturn.Equals(nativeString.ToString()))
+               {
+                       Console.WriteLine(string.Format( "TestReturnStringBuilder: nativeString {0} != expecedNativeString {1}",nativeString.ToString(),expectedReturn) );
+                       return false;
+               }
+               return true;
+       }
+}
+
+// UTF8 string as struct field
+class UTF8StructMarshalling
+{
+       public struct Utf8Struct
+       {
+               [MarshalAs(UnmanagedType.LPUTF8Str)]
+               public string FirstName;
+               public int index;
+       }
+
+       [DllImport("libtest", CallingConvention = CallingConvention.Cdecl)]
+       public static extern void TestStructWithUtf8Field(Utf8Struct utfStruct);
+       public static bool  TestUTF8StructMarshalling(string[] utf8Strings)
+       {
+               Utf8Struct utf8Struct = new Utf8Struct();
+               for (int i = 0; i < utf8Strings.Length; i++)
+               {
+                       utf8Struct.FirstName = utf8Strings[i];
+                       utf8Struct.index = i;
+                       TestStructWithUtf8Field(utf8Struct);
+               }
+               return true;
+       }
+}
+
+// UTF8 string as delegate parameter
+class UTF8DelegateMarshalling
+{
+       [UnmanagedFunctionPointer(CallingConvention.Cdecl)]
+       public delegate void DelegateUTF8Parameter([MarshalAs(UnmanagedType.LPUTF8Str)]string utf8String, int index);
+
+       [DllImport("libtest", CallingConvention = CallingConvention.Cdecl)]
+       public static extern void Utf8DelegateAsParameter(DelegateUTF8Parameter param);
+       
+       static bool failed;
+       public static bool TestUTF8DelegateMarshalling()
+       {
+               failed = false;
+               Utf8DelegateAsParameter(new DelegateUTF8Parameter(Utf8StringCallback));
+               
+               return !failed;
+       }
+       
+       public static void Utf8StringCallback(string nativeString, int index)
+       {
+               if (string.CompareOrdinal(nativeString, Test.utf8Strings[index]) != 0)
+               {
+                       Console.WriteLine("Utf8StringCallback string do not match");
+                       failed = true;
+               }
+       }
+}
+
+class Test
+{
+       //test strings
+       public static string[] utf8Strings = {
+               "Managed",
+               "Sîne klâwen durh die wolken sint geslagen" ,
+               "काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम्",
+               "我能吞下玻璃而不伤身体",
+               "ღმერთსი შემვედრე,შემვედრე, ნუთუ კვლა დამხსნას შემვედრე,სოფლისა შემვედრე, შემვედრე,შემვედრე,შემვედრე,შრომასა, ცეცხლს, წყალსა და მიწასა, ჰაერთა თანა მრომასა; მომცნეს ფრთენი და აღვფრინდე, მივჰხვდე მას ჩემსა ნდომასა, დღისით და ღამით ვჰხედვიდე მზისა ელვათა კრთომაასაშემვედრე,შემვედრე,",
+               "Τη γλώσσα μου έδωσαν ελληνική",
+               null,
+       };
+       
+       public static int Main(string[] args)
+       {
+               // Test string as [In,Out] parameter
+               for (int i = 0; i < utf8Strings.Length; i++)
+                       if (!UTF8StringTests.TestInOutStringParameter(utf8Strings[i], i))
+                               return i+1;
+               
+               // Test string as [Out] parameter
+               for (int i = 0; i < utf8Strings.Length; i++)
+                       if (!UTF8StringTests.TestOutStringParameter(utf8Strings[i], i))
+                               return i+100;
+               
+               for (int i = 0; i < utf8Strings.Length - 1; i++)
+                       if (!UTF8StringTests.TestStringPassByOut(utf8Strings[i], i))
+                               return i+200;
+               
+               for (int i = 0; i < utf8Strings.Length - 1; i++)
+                       if (!UTF8StringTests.TestStringPassByRef(utf8Strings[i], i))
+                               return i+300;
+               
+               
+               // Test StringBuilder as [In,Out] parameter
+               for (int i = 0; i < utf8Strings.Length - 1; i++)
+                       if (!UTF8StringBuilderTests.TestInOutStringBuilderParameter(utf8Strings[i], i))
+                               return i+400;
+               
+#if NOT_YET
+               // This requires support for [Out] in StringBuilder
+               
+               // Test StringBuilder as [Out] parameter
+               for (int i = 0; i < utf8Strings.Length - 1; i++){
+                       if (!UTF8StringBuilderTests.TestOutStringBuilderParameter(utf8Strings[i], i))
+                               return i+500;
+               }
+
+#endif
+       
+               // utf8 string as struct fields
+               if (!UTF8StructMarshalling.TestUTF8StructMarshalling(utf8Strings))
+                       return 600;
+
+               // delegate
+               UTF8DelegateMarshalling.TestUTF8DelegateMarshalling();
+
+#if NOT_YET
+               // This requires special support for StringBuilder return values
+               // Test StringBuilder as return value
+               for (int i = 0; i < utf8Strings.Length - 1; i++)
+                       if (!UTF8StringBuilderTests.TestReturnStringBuilder(utf8Strings[i], i))
+                               return 700+i;
+#endif
+               // String.Empty tests
+               if (!UTF8StringTests.EmptyStringTest())
+                       return 800;
+
+               return 0;
+       }
+}
old mode 100644 (file)
new mode 100755 (executable)