Merge pull request #5714 from alexischr/update_bockbuild
[mono.git] / mono / mini / simd-intrinsics.c
index 091f6b4073cb90cd768495ae34c8052eb7846348..d55055f9f4763bf2833553eb5d40c3aceb338cce 100644 (file)
@@ -1,5 +1,6 @@
-/*
- * simd-instrisics.c: simd support for intrinsics
+/**
+ * \file
+ * simd support for intrinsics
  *
  * Author:
  *   Rodrigo Kumpera (rkumpera@novell.com)
@@ -14,6 +15,7 @@
 #include "ir-emit.h"
 #include "mono/utils/bsearch.h"
 #include <mono/metadata/abi-details.h>
+#include <mono/metadata/reflection-internals.h>
 
 /*
 General notes on SIMD intrinsics
@@ -827,7 +829,7 @@ get_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src)
                return src->sreg1;
        } else if (spec [MONO_INST_DEST] == 'x') {
                return src->dreg;
-       } else if (src->opcode == OP_VCALL) {
+       } else if (src->opcode == OP_VCALL || src->opcode == OP_VCALL_MEMBASE) {
                return src->dreg;
        }
 
@@ -1028,13 +1030,15 @@ type_to_pmul_op (MonoType *t)
        case MONO_TYPE_U4:
        case MONO_TYPE_I4:
                return OP_PMULD;
-       case MONO_TYPE_U8:
-       case MONO_TYPE_I8:
-               return OP_PMULQ;
        case MONO_TYPE_R4:
                return OP_MULPS;
        case MONO_TYPE_R8:
                return OP_MULPD;
+       case MONO_TYPE_U8:
+               /* PMULQ multiplies two 32 bit numbers into a 64 bit one */
+               return -1;
+       case MONO_TYPE_I8:
+               return -1;
        default:
                break;
        }
@@ -1055,6 +1059,99 @@ type_to_pdiv_op (MonoType *t)
        return -1;
 }
 
+static int
+type_to_pxor_op (MonoType *t)
+{
+       /*
+        * These opcodes have the same semantics, but using the
+        * correctly typed version is better for performance.
+        */
+       switch (t->type) {
+       case MONO_TYPE_R4:
+               return OP_XORPS;
+       case MONO_TYPE_R8:
+               return OP_XORPD;
+       default:
+               return OP_PXOR;
+       }
+}
+
+static int
+type_to_pand_op (MonoType *t)
+{
+       switch (t->type) {
+       case MONO_TYPE_R4:
+               return OP_ANDPS;
+       case MONO_TYPE_R8:
+               return OP_ANDPD;
+       default:
+               return OP_PAND;
+       }
+}
+
+static int
+type_to_por_op (MonoType *t)
+{
+       switch (t->type) {
+       case MONO_TYPE_R4:
+               return OP_ORPS;
+       case MONO_TYPE_R8:
+               return OP_ORPD;
+       default:
+               return OP_POR;
+       }
+}
+
+static int
+type_to_pmin_op (MonoType *t)
+{
+       switch (t->type) {
+       case MONO_TYPE_R4:
+               return OP_MINPS;
+       case MONO_TYPE_R8:
+               return OP_MINPD;
+       case MONO_TYPE_I1:
+               return OP_PMINB;
+       case MONO_TYPE_U1:
+               return OP_PMINB_UN;
+       case MONO_TYPE_I2:
+               return OP_PMINW;
+       case MONO_TYPE_U2:
+               return OP_PMINW_UN;
+       case MONO_TYPE_I4:
+               return OP_PMIND;
+       case MONO_TYPE_U4:
+               return OP_PMIND_UN;
+       default:
+               return -1;
+       }
+}
+
+static int
+type_to_pmax_op (MonoType *t)
+{
+       switch (t->type) {
+       case MONO_TYPE_R4:
+               return OP_MAXPS;
+       case MONO_TYPE_R8:
+               return OP_MAXPD;
+       case MONO_TYPE_I1:
+               return OP_PMAXB;
+       case MONO_TYPE_U1:
+               return OP_PMAXB_UN;
+       case MONO_TYPE_I2:
+               return OP_PMAXW;
+       case MONO_TYPE_U2:
+               return OP_PMAXW_UN;
+       case MONO_TYPE_I4:
+               return OP_PMAXD;
+       case MONO_TYPE_U4:
+               return OP_PMAXD_UN;
+       default:
+               return -1;
+       }
+}
+
 static int
 get_simd_vreg_or_expanded_scalar (MonoCompile *cfg, MonoClass *klass, MonoType *param_type, MonoInst *src)
 {
@@ -1938,6 +2035,30 @@ mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign
        return NULL;
 }
 
+static void
+assert_handled (MonoCompile *cfg, MonoMethod *method)
+{
+       MonoCustomAttrInfo *cattr;
+       MonoError error;
+
+       if (cfg->verbose_level > 1) {
+               cattr = mono_custom_attrs_from_method_checked (method, &error);
+
+               if (cattr) {
+                       gboolean has_attr = FALSE;
+                       for (int i = 0; i < cattr->num_attrs; ++i)
+                               if (cattr->attrs [i].ctor && (!strcmp (cattr->attrs [i].ctor->klass->name, "JitIntrinsicAttribute")))
+                                       has_attr = TRUE;
+                       if (has_attr) {
+                               printf ("SIMD intrinsic unhandled: %s\n", mono_method_get_name_full (method, TRUE, TRUE, MONO_TYPE_NAME_FORMAT_IL));
+                               fflush (stdout);
+                               //g_assert_not_reached ();
+                       }
+                       mono_custom_attrs_free (cattr);
+               }
+       }
+}
+
 // The entries should be ordered by name
 // System.Numerics.Vector2/Vector3/Vector4
 static const SimdIntrinsic vector2_intrinsics[] = {
@@ -1959,13 +2080,14 @@ emit_vector_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignatu
 {
        const SimdIntrinsic *intrins;
        MonoMethodSignature *sig = mono_method_signature (cmethod);
+       MonoType *type = &cmethod->klass->byval_arg;
 
        /*
         * Vector2/3/4 are handled the same way, since the underlying SIMD type is the same (4 * r4).
         */
        intrins = (const SimdIntrinsic*)mono_binary_search (cmethod->name, vector2_intrinsics, sizeof (vector2_intrinsics) / sizeof (SimdIntrinsic), sizeof (SimdIntrinsic), &simd_intrinsic_compare_by_name);
        if (!intrins) {
-               //printf ("%s\n", mono_method_full_name (cmethod, 1));
+               assert_handled (cfg, cmethod);
                return NULL;
        }
 
@@ -1976,16 +2098,26 @@ emit_vector_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignatu
        }
 
        switch (intrins->name) {
-       case SN_ctor:
+       case SN_ctor: {
+               gboolean match = TRUE;
+               for (int i = 0; i < fsig->param_count; ++i)
+                       if (fsig->params [i]->type != MONO_TYPE_R4)
+                               match = FALSE;
+               if (!match)
+                       break;
                return simd_intrinsic_emit_ctor (intrins, cfg, cmethod, args);
-               break;
+       }
        case SN_Equals:
+               if (!(fsig->param_count == 1 && fsig->ret->type == MONO_TYPE_BOOLEAN && fsig->params [0] == type))
+                       break;
                return simd_intrinsic_emit_equality (intrins, cfg, cmethod, args);
-               break;
        case SN_SquareRoot:
+               if (!(fsig->param_count == 1 && fsig->ret == type && fsig->params [0] == type))
+                       break;
                return simd_intrinsic_emit_unary (intrins, cfg, cmethod, args);
-               break;
        case SN_Dot:
+               if (!(fsig->param_count == 2 && fsig->ret->type == MONO_TYPE_R4 && fsig->params [0] == type && fsig->params [1] == type))
+                       break;
                if (COMPILE_LLVM (cfg)) {
                        MonoInst *ins;
 
@@ -1999,6 +2131,9 @@ emit_vector_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignatu
                MonoInst *sub;
                MonoInst *zero;
 
+               if (!(fsig->param_count == 1 && fsig->ret == type && fsig->params [0] == type))
+                       break;
+
                MONO_INST_NEW (cfg, zero, OP_XZERO);
                zero->dreg = alloc_xreg (cfg);
                zero->klass = cmethod->klass;
@@ -2013,14 +2148,37 @@ emit_vector_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignatu
        case SN_op_Division:
        case SN_op_Multiply:
        case SN_op_Subtraction:
+               if (!(fsig->param_count == 2 && fsig->ret == type && (fsig->params [0] == type || fsig->params [0]->type == MONO_TYPE_R4) && (fsig->params [1] == type || fsig->params [1]->type == MONO_TYPE_R4)))
+                       break;
                return simd_intrinsic_emit_binary (intrins, cfg, cmethod, args);
        default:
                break;
        }
 
+       assert_handled (cfg, cmethod);
+
+       if (cfg->verbose_level > 1) {
+               char *name = mono_method_full_name (cmethod, TRUE);
+               printf ("  SIMD method %s not handled.\n", name);
+               g_free (name);
+       }
        return NULL;
 }
 
+static MonoInst*
+emit_vector_is_hardware_accelerated_intrinsic (MonoCompile *cfg)
+{
+       MonoInst *ins;
+
+       if (simd_supported_versions)
+               EMIT_NEW_ICONST (cfg, ins, 1);
+       else
+               EMIT_NEW_ICONST (cfg, ins, 0);
+       ins->type = STACK_I4;
+       return ins;
+}
+
+/* These should be ordered by name */
 static const SimdIntrinsic vector_t_intrinsics[] = {
        { SN_ctor },
        { SN_Abs },
@@ -2030,12 +2188,17 @@ static const SimdIntrinsic vector_t_intrinsics[] = {
        { SN_GreaterThanOrEqual },
        { SN_LessThan },
        { SN_LessThanOrEqual },
+       { SN_Max },
+       { SN_Min },
        { SN_get_AllOnes, OP_XONES },
        { SN_get_Count },
        { SN_get_Item },
        { SN_get_Zero, OP_XZERO },
        { SN_op_Addition },
+       { SN_op_BitwiseAnd },
+       { SN_op_BitwiseOr },
        { SN_op_Division },
+       { SN_op_ExclusiveOr },
        { SN_op_Explicit },
        { SN_op_Multiply },
        { SN_op_Subtraction }
@@ -2045,16 +2208,17 @@ static MonoInst*
 emit_vector_t_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
 {
        const SimdIntrinsic *intrins;
-       MonoType *etype;
+       MonoType *type, *etype;
        MonoInst *ins;
        int size, len, index;
 
        intrins = (const SimdIntrinsic*)mono_binary_search (cmethod->name, vector_t_intrinsics, sizeof (vector_t_intrinsics) / sizeof (SimdIntrinsic), sizeof (SimdIntrinsic), &simd_intrinsic_compare_by_name);
        if (!intrins) {
-               //printf ("%s\n", mono_method_full_name (cmethod, 1));
+               assert_handled (cfg, cmethod);
                return NULL;
        }
 
+       type = &cmethod->klass->byval_arg;
        etype = mono_class_get_context (cmethod->klass)->class_inst->type_argv [0];
        size = mono_class_value_size (mono_class_from_mono_type (etype), NULL);
        g_assert (size);
@@ -2071,10 +2235,14 @@ emit_vector_t_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSigna
 
        switch (intrins->name) {
        case SN_get_Count:
+               if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
+                       break;
                EMIT_NEW_ICONST (cfg, ins, len);
                return ins;
        case SN_get_AllOnes:
        case SN_get_Zero:
+               if (!(fsig->param_count == 0 && mono_metadata_type_equal (fsig->ret, type)))
+                       break;
                return simd_intrinsic_emit_const (intrins, cfg, cmethod, args);
        case SN_get_Item:
                g_assert (fsig->param_count == 1);
@@ -2122,15 +2290,16 @@ emit_vector_t_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSigna
        case SN_op_Explicit:
                return simd_intrinsic_emit_cast (intrins, cfg, cmethod, args);
        case SN_Equals:
-               if (fsig->param_count == 1)
+               if (fsig->param_count == 1 && fsig->ret->type == MONO_TYPE_BOOLEAN && mono_metadata_type_equal (fsig->params [0], type))
                        return simd_intrinsic_emit_equality_op (cfg, cmethod, args, type_to_comp_op (etype), SIMD_COMP_EQ);
-               if (fsig->param_count == 2)
+               if (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type))
                        return simd_intrinsic_emit_binary_op (cfg, type_to_comp_op (etype), 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]);
                break;
 
        case SN_GreaterThan:
        case SN_GreaterThanOrEqual:
-       case SN_LessThan: {
+       case SN_LessThan:
+       case SN_LessThanOrEqual: {
                MonoInst *cmp1, *cmp2;
                int eq_op, gt_op;
 
@@ -2150,16 +2319,16 @@ emit_vector_t_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSigna
                switch (intrins->name) {
                case SN_GreaterThan:
                        return simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]);
-               case SN_LessThanOrEqual:
+               case SN_LessThan:
                        return simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]);
+               case SN_LessThanOrEqual:
+                       cmp1 = simd_intrinsic_emit_binary_op (cfg, eq_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]);
+                       cmp2 = simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]);
+                       return simd_intrinsic_emit_binary_op (cfg, OP_POR, 0, cmethod->klass, fsig->params [0], fsig->params [1], cmp1, cmp2);
                case SN_GreaterThanOrEqual:
                        cmp1 = simd_intrinsic_emit_binary_op (cfg, eq_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]);
                        cmp2 = simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]);
                        return simd_intrinsic_emit_binary_op (cfg, OP_POR, 0, cmethod->klass, fsig->params [0], fsig->params [1], cmp1, cmp2);
-               case SN_LessThan:
-                       cmp1 = simd_intrinsic_emit_binary_op (cfg, eq_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]);
-                       cmp2 = simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]);
-                       return simd_intrinsic_emit_binary_op (cfg, OP_POR, 0, cmethod->klass, fsig->params [0], fsig->params [1], cmp1, cmp2);
                default:
                        g_assert_not_reached ();
                        break;
@@ -2187,26 +2356,49 @@ emit_vector_t_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSigna
                        break;
                }
                break;
-       case SN_op_Addition: {
-               int op = type_to_padd_op (etype);
-               if (op != -1)
-                       return simd_intrinsic_emit_binary_op (cfg, op, 0, cmethod->klass, fsig->params [0], fsig->params [0], args [0], args [1]);
-               break;
-       }
-       case SN_op_Subtraction: {
-               int op = type_to_psub_op (etype);
-               if (op != -1)
-                       return simd_intrinsic_emit_binary_op (cfg, op, 0, cmethod->klass, fsig->params [0], fsig->params [0], args [0], args [1]);
-               break;
-       }
-       case SN_op_Multiply: {
-               int op = type_to_pmul_op (etype);
-               if (op != -1)
-                       return simd_intrinsic_emit_binary_op (cfg, op, 0, cmethod->klass, fsig->params [0], fsig->params [0], args [0], args [1]);
-               break;
-       }
-       case SN_op_Division: {
-               int op = type_to_pdiv_op (etype);
+       case SN_op_Addition:
+       case SN_op_Subtraction:
+       case SN_op_Multiply:
+       case SN_op_Division:
+       case SN_op_ExclusiveOr:
+       case SN_op_BitwiseAnd:
+       case SN_op_BitwiseOr:
+       case SN_Max:
+       case SN_Min: {
+               if (!(fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, fsig->params [0]) && mono_metadata_type_equal (fsig->params [0], fsig->params [1])))
+                       break;
+               int op = 0;
+               switch (intrins->name) {
+               case SN_op_Addition:
+                       op = type_to_padd_op (etype);
+                       break;
+               case SN_op_Subtraction:
+                       op = type_to_psub_op (etype);
+                       break;
+               case SN_op_Multiply:
+                       op = type_to_pmul_op (etype);
+                       break;
+               case SN_op_Division:
+                       op = type_to_pdiv_op (etype);
+                       break;
+               case SN_op_ExclusiveOr:
+                       op = type_to_pxor_op (etype);
+                       break;
+               case SN_op_BitwiseAnd:
+                       op = type_to_pand_op (etype);
+                       break;
+               case SN_op_BitwiseOr:
+                       op = type_to_por_op (etype);
+                       break;
+               case SN_Min:
+                       op = type_to_pmin_op (etype);
+                       break;
+               case SN_Max:
+                       op = type_to_pmax_op (etype);
+                       break;
+               default:
+                       g_assert_not_reached ();
+               }
                if (op != -1)
                        return simd_intrinsic_emit_binary_op (cfg, op, 0, cmethod->klass, fsig->params [0], fsig->params [0], args [0], args [1]);
                break;
@@ -2243,6 +2435,14 @@ emit_vector_t_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSigna
                break;
        }
 
+       assert_handled (cfg, cmethod);
+
+       if (cfg->verbose_level > 1) {
+               char *name = mono_method_full_name (cmethod, TRUE);
+               printf ("  SIMD method %s not handled.\n", name);
+               g_free (name);
+       }
+
        return NULL;
 }
 
@@ -2260,20 +2460,9 @@ emit_sys_numerics_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodS
        if (!strcmp ("Vector2", class_name) || !strcmp ("Vector4", class_name) || !strcmp ("Vector3", class_name))
                return emit_vector_intrinsics (cfg, cmethod, fsig, args);
 
-       if (!strcmp ("Vector`1", class_name))
-               return emit_vector_t_intrinsics (cfg, cmethod, fsig, args);
-
        if (!strcmp ("System.Numerics", nspace) && !strcmp ("Vector", class_name)) {
-               if (!strcmp (cmethod->name, "get_IsHardwareAccelerated")) {
-                       MonoInst *ins;
-
-                       if (simd_supported_versions)
-                               EMIT_NEW_ICONST (cfg, ins, 1);
-                       else
-                               EMIT_NEW_ICONST (cfg, ins, 0);
-                       ins->type = STACK_I4;
-                       return ins;
-               }
+               if (!strcmp (cmethod->name, "get_IsHardwareAccelerated"))
+                       return emit_vector_is_hardware_accelerated_intrinsic (cfg);
        }
 
        return NULL;
@@ -2282,10 +2471,17 @@ emit_sys_numerics_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodS
 static MonoInst*
 emit_sys_numerics_vectors_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
 {
+       const char *nspace = cmethod->klass->name_space;
        const char *class_name = cmethod->klass->name;
 
        if (!strcmp (class_name, "Vector`1"))
                return emit_vector_t_intrinsics (cfg, cmethod, fsig, args);
+
+       if (!strcmp ("System.Numerics", nspace) && !strcmp ("Vector", class_name)) {
+               if (!strcmp (cmethod->name, "get_IsHardwareAccelerated"))
+                       return emit_vector_is_hardware_accelerated_intrinsic (cfg);
+       }
+
        return NULL;
 }