* mini-ops.h: Add packed signed min, max and compare greater.
* cpu-x86.md: Same.
* mini-x86.c (mono_arch_output_basic_block): Same. Add packed add/sub with
saturation.
* simd-methods.h: Add CompareGreaterThan.
* simd-methods.h: Remove CompareEquals.
* simd-intrinsics.c: Add new TODO entry and some cosmetic changes.
* simd-intrinsics.c (vector16sb_intrinsics): New table of intrinsic type.
* simd-intrinsics.c (vector4f_intrinsics): Rename CompareEquals to
CompareEqual.
svn path=/trunk/mono/; revision=116899
+2008-10-23 Rodrigo Kumpera <rkumpera@novell.com>
+
+ * mini-ops.h: Add packed signed min, max and compare greater.
+
+ * cpu-x86.md: Same.
+
+ * mini-x86.c (mono_arch_output_basic_block): Same. Add packed add/sub with
+ saturation.
+
+ * simd-methods.h: Add CompareGreaterThan.
+
+ * simd-methods.h: Remove CompareEquals.
+
+ * simd-intrinsics.c: Add new TODO entry and some cosmetic changes.
+
+ * simd-intrinsics.c (vector16sb_intrinsics): New table of intrinsic type.
+
+ * simd-intrinsics.c (vector4f_intrinsics): Rename CompareEquals to
+ CompareEqual.
+
2008-10-23 Rodrigo Kumpera <rkumpera@novell.com>
* basic-simd.cs: Fix tests due to change in the API.
pmaxw_un: dest:x src1:x src2:x len:5 clob:1
pmaxd_un: dest:x src1:x src2:x len:5 clob:1
+pmaxb: dest:x src1:x src2:x len:5 clob:1
+pmaxw: dest:x src1:x src2:x len:4 clob:1
+pmaxd: dest:x src1:x src2:x len:5 clob:1
+
pavgb_un: dest:x src1:x src2:x len:4 clob:1
pavgw_un: dest:x src1:x src2:x len:4 clob:1
pminw_un: dest:x src1:x src2:x len:5 clob:1
pmind_un: dest:x src1:x src2:x len:5 clob:1
+pminb: dest:x src1:x src2:x len:5 clob:1
+pminw: dest:x src1:x src2:x len:4 clob:1
+pmind: dest:x src1:x src2:x len:5 clob:1
+
pcmpeqb: dest:x src1:x src2:x len:4 clob:1
pcmpeqw: dest:x src1:x src2:x len:4 clob:1
pcmpeqd: dest:x src1:x src2:x len:4 clob:1
+pcmpgtb: dest:x src1:x src2:x len:4 clob:1
+pcmpgtw: dest:x src1:x src2:x len:4 clob:1
+pcmpgtd: dest:x src1:x src2:x len:4 clob:1
+
psumabsdiff: dest:x src1:x src2:x len:4 clob:1
unpack_lowb: dest:x src1:x src2:x len:4 clob:1
MINI_OP(OP_PMAXW_UN, "pmaxw_un", XREG, XREG, XREG)
MINI_OP(OP_PMAXD_UN, "pmaxd_un", XREG, XREG, XREG)
+MINI_OP(OP_PMAXB, "pmaxb", XREG, XREG, XREG)
+MINI_OP(OP_PMAXW, "pmaxw", XREG, XREG, XREG)
+MINI_OP(OP_PMAXD, "pmaxd", XREG, XREG, XREG)
+
MINI_OP(OP_PAVGB_UN, "pavgb_un", XREG, XREG, XREG)
MINI_OP(OP_PAVGW_UN, "pavgw_un", XREG, XREG, XREG)
MINI_OP(OP_PMINW_UN, "pminw_un", XREG, XREG, XREG)
MINI_OP(OP_PMIND_UN, "pmind_un", XREG, XREG, XREG)
+MINI_OP(OP_PMINB, "pminb", XREG, XREG, XREG)
+MINI_OP(OP_PMINW, "pminw", XREG, XREG, XREG)
+MINI_OP(OP_PMIND, "pmind", XREG, XREG, XREG)
+
MINI_OP(OP_PCMPEQB, "pcmpeqb", XREG, XREG, XREG)
MINI_OP(OP_PCMPEQW, "pcmpeqw", XREG, XREG, XREG)
MINI_OP(OP_PCMPEQD, "pcmpeqd", XREG, XREG, XREG)
+MINI_OP(OP_PCMPGTB, "pcmpgtb", XREG, XREG, XREG)
+MINI_OP(OP_PCMPGTW, "pcmpgtw", XREG, XREG, XREG)
+MINI_OP(OP_PCMPGTD, "pcmpgtd", XREG, XREG, XREG)
+
MINI_OP(OP_PSUM_ABS_DIFF, "psumabsdiff", XREG, XREG, XREG)
MINI_OP(OP_UNPACK_LOWB, "unpack_lowb", XREG, XREG, XREG)
case OP_PMAXD_UN:
x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUD, ins->sreg1, ins->sreg2);
break;
+
+ case OP_PMAXB:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXW:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXSW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXD:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSD, ins->sreg1, ins->sreg2);
+ break;
case OP_PAVGB_UN:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGB, ins->sreg1, ins->sreg2);
x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUD, ins->sreg1, ins->sreg2);
break;
+ case OP_PMINB:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMINW:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINSW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMIND:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSD, ins->sreg1, ins->sreg2);
+ break;
+
case OP_PCMPEQB:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQB, ins->sreg1, ins->sreg2);
break;
x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQD, ins->sreg1, ins->sreg2);
break;
+ case OP_PCMPGTB:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTW:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTD, ins->sreg1, ins->sreg2);
+ break;
+
case OP_PSUM_ABS_DIFF:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PSADBW, ins->sreg1, ins->sreg2);
break;
case OP_PSUBW_SAT_UN:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSW, ins->sreg1, ins->sreg2);
break;
+
+ case OP_PADDB_SAT:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBB_SAT:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDW_SAT:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBW_SAT:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSW, ins->sreg1, ins->sreg2);
+ break;
case OP_PMULW:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULLW, ins->sreg1, ins->sreg2);
TODO maybe add SSE3 emulation on top of SSE2, or just implement the corresponding functions using SSE2 intrinsics.
TODO pass simd arguments in registers or, at least, add SSE support for pushing large (>=16) valuetypes
TODO pass simd args byval to a non-intrinsic method cause some useless local var load/store to happen.
-TODO check if we need to init the SSE control word with better precision.
+TODO check if we need to init the SSE control word with better precision.
+TODO add support for 3 reg sources in mini without slowing the common path. Or find a way to make MASKMOVDQU work.
General notes for SIMD intrinsics.
{ SN_ctor, 0, SIMD_EMIT_CTOR },
{ SN_AndNot, OP_ANDNPS, SIMD_EMIT_BINARY },
{ SN_AddSub, OP_ADDSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
- { SN_CompareEquals, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
+ { SN_CompareEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
{ SN_CompareLessEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LE },
{ SN_CompareLessThan, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LT },
{ SN_CompareNotEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ },
{ SN_CompareEqual, OP_PCMPEQW, SIMD_EMIT_BINARY },
{ SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK },
{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
- { SN_Max, OP_PMAXW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41},
+ { SN_Max, OP_PMAXW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
{ SN_Min, OP_PMINW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
{ SN_MultiplyStoreHigh, OP_PMULW_HIGH_UN, SIMD_EMIT_BINARY },
{ SN_ShiftRightArithmetic, OP_PSARW, SIMD_EMIT_SHIFT },
{ SN_op_Subtraction, OP_PSUBB, SIMD_EMIT_BINARY },
};
+/*
+Missing:
+.ctor
+getters
+setters
+ */
+static const SimdIntrinsc vector16sb_intrinsics[] = {
+ { SN_AddWithSaturation, OP_PADDB_SAT, SIMD_EMIT_BINARY },
+ { SN_CompareEqual, OP_PCMPEQB, SIMD_EMIT_BINARY },
+ { SN_CompareGreaterThan, OP_PCMPGTB, SIMD_EMIT_BINARY },
+ { SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK },
+ { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
+ { SN_Max, OP_PMAXB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
+ { SN_Min, OP_PMINB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
+ { SN_StoreAligned, 0, SIMD_EMIT_STORE_ALIGNED },
+ { SN_SubWithSaturation, OP_PSUBB_SAT, SIMD_EMIT_BINARY },
+ { SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_EMIT_BINARY },
+ { SN_UnpackLow, OP_UNPACK_LOWB, SIMD_EMIT_BINARY },
+ { SN_op_Addition, OP_PADDB, SIMD_EMIT_BINARY },
+ { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
+ { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
+ { SN_op_BitwiseXor, OP_PXOR, SIMD_EMIT_BINARY },
+ { SN_op_Explicit, 0, SIMD_EMIT_CAST },
+ { SN_op_Subtraction, OP_PSUBB, SIMD_EMIT_BINARY },
+};
+
static guint32 simd_supported_versions;
/*TODO match using number of parameters as well*/
return emit_intrinsics (cfg, cmethod, fsig, args, vector8us_intrinsics, sizeof (vector8us_intrinsics) / sizeof (SimdIntrinsc));
if (!strcmp ("Vector16b", cmethod->klass->name))
return emit_intrinsics (cfg, cmethod, fsig, args, vector16b_intrinsics, sizeof (vector16b_intrinsics) / sizeof (SimdIntrinsc));
+ if (!strcmp ("Vector16sb", cmethod->klass->name))
+ return emit_intrinsics (cfg, cmethod, fsig, args, vector16sb_intrinsics, sizeof (vector16sb_intrinsics) / sizeof (SimdIntrinsc));
return NULL;
}
SIMD_METHOD("AndNot", SN_AndNot)
SIMD_METHOD("Average", SN_Average)
SIMD_METHOD("CompareEqual", SN_CompareEqual)
-SIMD_METHOD("CompareEquals", SN_CompareEquals)
+SIMD_METHOD("CompareGreaterThan", SN_CompareGreaterThan)
SIMD_METHOD("CompareLessEqual", SN_CompareLessEqual)
SIMD_METHOD("CompareLessThan", SN_CompareLessThan)
SIMD_METHOD("CompareNotEqual", SN_CompareNotEqual)