Implement Shuffle for 64bits types.
authorRodrigo Kumpera <kumpera@gmail.com>
Thu, 6 Jan 2011 20:36:31 +0000 (21:36 +0100)
committerRodrigo Kumpera <kumpera@gmail.com>
Thu, 6 Jan 2011 20:52:12 +0000 (21:52 +0100)
* x86-codegen.h: Add macro and define to emit pshufpd.

* mini-ops.h: Add OP_SHUPD.

* cpu-x86.md:
* mini-x86.h: Implement x86 support.

* simd-intrinsics.c: Handle shuffle on 64bit types.

* VectorOperations.cs: Add new methods.

mcs/class/Mono.Simd/Mono.Simd/VectorOperations.cs
mono/arch/x86/x86-codegen.h
mono/mini/cpu-x86.md
mono/mini/mini-ops.h
mono/mini/mini-x86.c
mono/mini/simd-intrinsics.c

index e0940cf7cd41a271c8ce5c5083b5106bc480f095..4e80db5a1068b74b08e49c0a73aea855d92050ef 100644 (file)
@@ -988,6 +988,29 @@ namespace Mono.Simd
                        return new Vector4ui (*(p1 + ((idx >> 0) & 0x3)), *(p1 + ((idx >> 2) & 0x3)), *(p2 + ((idx >> 4) & 0x3)), *(p2 + ((idx >> 6) & 0x3))); 
                }
 
+               [Acceleration (AccelMode.SSE2)]
+               public static unsafe Vector2d Shuffle (this Vector2d v1, Vector2d v2, int sel)
+               {
+                       double *p1 = (double*)&v1;
+                       double *p2 = (double*)&v2;
+                       return new Vector2d (*(p1 + ((sel >> 0) & 0x3)), *(p2 + ((sel >> 2) & 0x3))); 
+               }
+
+               [Acceleration (AccelMode.SSE2)]
+               public static unsafe Vector2l Shuffle (this Vector2l v1, Vector2l v2, int sel)
+               {
+                       long *p1 = (long*)&v1;
+                       long *p2 = (long*)&v2;
+                       return new Vector2l (*(p1 + ((sel >> 0) & 0x3)), *(p2 + ((sel >> 2) & 0x3))); 
+               }
+
+               [Acceleration (AccelMode.SSE2)]
+               public static unsafe Vector2ul Shuffle (this Vector2ul v1, Vector2ul v2, int sel)
+               {
+                       ulong *p1 = (ulong*)&v1;
+                       ulong *p2 = (ulong*)&v2;
+                       return new Vector2ul (*(p1 + ((sel >> 0) & 0x3)), *(p2 + ((sel >> 2) & 0x3))); 
+               }
 
                [Acceleration (AccelMode.SSE2)]
                public static unsafe Vector4f Shuffle (this Vector4f v1, ShuffleSel sel)
index 0a5fca14a99fddd1d0264f479a2a5c9181dd3bd5..0c67b45d56879658e25e3f15acc3be429b1e4597 100644 (file)
@@ -2381,7 +2381,7 @@ typedef enum {
        X86_SSE_PEXTRW = 0xC5,
        X86_SSE_PEXTRD = 0x16,/*sse41*/
 
-       X86_SSE_SHUFPS = 0xC6,  
+       X86_SSE_SHUFP = 0xC6,   
        
 } X86_SSE_Opcode;
 
@@ -2438,6 +2438,13 @@ typedef enum {
                *(inst)++ = (unsigned char)(imm8);      \
        } while (0)
 
+#define x86_sse_alu_pd_reg_reg_imm8(inst,opc,dreg,reg, imm8)       \
+       do {    \
+               x86_codegen_pre(&(inst), 5); \
+               *(inst)++ = (unsigned char)0x66;        \
+               x86_sse_alu_reg_reg_imm8 ((inst), (opc), (dreg), (reg), (imm8)); \
+       } while (0)
+
 #define x86_sse_alu_pd_reg_reg(inst,opc,dreg,reg)       \
        do {    \
                x86_codegen_pre(&(inst), 4); \
index b477700a1fd722abb11b125623e96619113f7c5b..32a1607983484ee92ce17dea5444aa1321dde59f 100644 (file)
@@ -456,6 +456,7 @@ pshufflew_high: dest:x src1:x len:5
 pshufflew_low: dest:x src1:x len:5
 pshuffled: dest:x src1:x len:5
 shufps: dest:x src1:x src2:x len:4 clob:1
+shufpd: dest:x src1:x src2:x len:5 clob:1
 
 extract_mask: dest:i src1:x len:4
 
index 612f4f45ad21a3d6d50cbcccc580383f14a9aad5..f2e180877406ef1a32f4539e75179e072b9e95b0 100644 (file)
@@ -628,6 +628,7 @@ MINI_OP(OP_PSHUFLEW_HIGH, "pshufflew_high", XREG, XREG, NONE)
 MINI_OP(OP_PSHUFLEW_LOW, "pshufflew_low", XREG, XREG, NONE)
 MINI_OP(OP_PSHUFLED, "pshuffled", XREG, XREG, NONE)
 MINI_OP(OP_SHUFPS, "shufps", XREG, XREG, XREG)
+MINI_OP(OP_SHUFPD, "shufpd", XREG, XREG, XREG)
 
 MINI_OP(OP_ADDPD, "addpd", XREG, XREG, XREG)
 MINI_OP(OP_DIVPD, "divpd", XREG, XREG, XREG)
index 5ed19b7b95abc80b0fd7cd22dae90f18d87aea27..3b6fac3fe492d406f78741b7a89c2cbbeac7e18d 100644 (file)
@@ -4202,7 +4202,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_SHUFPS:
                        g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
-                       x86_sse_alu_reg_reg_imm8 (code, X86_SSE_SHUFPS, ins->sreg1, ins->sreg2, ins->inst_c0);
+                       x86_sse_alu_reg_reg_imm8 (code, X86_SSE_SHUFP, ins->sreg1, ins->sreg2, ins->inst_c0);
+                       break; 
+               case OP_SHUFPD:
+                       g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0x3);
+                       x86_sse_alu_pd_reg_reg_imm8 (code, X86_SSE_SHUFP, ins->sreg1, ins->sreg2, ins->inst_c0);
                        break; 
 
                case OP_ADDPD:
index 28c632248c362079b2db34583514f8c37f47c703..4d9e69f99180118deef2b361bc0d904646163879 100644 (file)
@@ -201,6 +201,7 @@ static const SimdIntrinsc vector2d_intrinsics[] = {
        { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
        { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
        { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
+       { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
        { SN_Sqrt, OP_SQRTPD, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY },
        { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
        { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
@@ -225,6 +226,7 @@ static const SimdIntrinsc vector2ul_intrinsics[] = {
        { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
        { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
        { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
+       { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
        { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
        { SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
        { SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
@@ -253,6 +255,7 @@ static const SimdIntrinsc vector2l_intrinsics[] = {
        { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
        { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
        { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
+       { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
        { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
        { SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
        { SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
@@ -1317,10 +1320,13 @@ simd_intrinsic_emit_shuffle (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mo
        int vreg, vreg2 = -1;
        int param_count = mono_method_signature (cmethod)->param_count;
 
+printf ("shuffle\n");
        if (args [param_count - 1]->opcode != OP_ICONST) {
                /*TODO Shuffle with non literals is not yet supported */
                return NULL;
        }
+
+printf ("shuffle again\n");
        vreg = get_simd_vreg (cfg, cmethod, args [0]);
        if (param_count == 3)
                vreg2 = get_simd_vreg (cfg, cmethod, args [1]);
@@ -1337,10 +1343,8 @@ simd_intrinsic_emit_shuffle (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mo
        ins->dreg = alloc_ireg (cfg);
        MONO_ADD_INS (cfg->cbb, ins);
 
-       if (param_count == 3) {
-               g_assert (intrinsic->opcode == OP_PSHUFLED);
+       if (param_count == 3 && ins->opcode == OP_PSHUFLED)
                ins->opcode = OP_SHUFPS;
-       }
        return ins;
 }