* mini-ops.h: Add remaining sse1 fp ops.
* cpu-x86.md: Add remaining sse1 fp ops.
* mini-x86.c (mono_arch_output_basic_block): Same.
* mini.h: Add enum for simd FP compare conditions.
* simd-intrinsics.c (vector4f_intrinsics): Add all new ops.
* simd-intrinsics.c (simd_intrinsic_emit_binary): Set inst_c0 to flags
so the backed can generate the appropriate op.
svn path=/trunk/mono/; revision=115701
+2008-10-13 Rodrigo Kumpera <rkumpera@novell.com>
+
+ * mini-ops.h: Add remaining sse1 fp ops.
+
+ * cpu-x86.md: Add remaining sse1 fp ops.
+
+ * mini-x86.c (mono_arch_output_basic_block): Same.
+
+ * mini.h: Add enum for simd FP compare conditions.
+
+ * simd-intrinsics.c (vector4f_intrinsics): Add all new ops.
+
+ * simd-intrinsics.c (simd_intrinsic_emit_binary): Set inst_c0 to flags
+ so the backed can generate the appropriate op.
+
2008-10-13 Rodrigo Kumpera <rkumpera@novell.com>
This patch squeese one more byte from the SimdIntrinsc struct.
subps: dest:x src1:x src2:x len:3 clob:1
maxps: dest:x src1:x src2:x len:3 clob:1
minps: dest:x src1:x src2:x len:3 clob:1
+compps: dest:x src1:x src2:x len:4 clob:1
+andps: dest:x src1:x src2:x len:3 clob:1
+andnps: dest:x src1:x src2:x len:3 clob:1
+orps: dest:x src1:x src2:x len:3 clob:1
+xorps: dest:x src1:x src2:x len:3 clob:1
haddps: dest:x src1:x src2:x len:4 clob:1
hsubps: dest:x src1:x src2:x len:4 clob:1
sqrtps: dest:x src1:x len:4
rsqrtps: dest:x src1:x len:4
+rcpps: dest:x src1:x len:4
shuffleps: dest:x src1:x len:5
paddb: dest:x src1:x src2:x len:4 clob:1
unpack_loww: dest:x src1:x src2:x len:4 clob:1
unpack_lowd: dest:x src1:x src2:x len:4 clob:1
unpack_lowq: dest:x src1:x src2:x len:4 clob:1
+unpack_lowps: dest:x src1:x src2:x len:3 clob:1
unpack_highb: dest:x src1:x src2:x len:4 clob:1
unpack_highw: dest:x src1:x src2:x len:4 clob:1
unpack_highd: dest:x src1:x src2:x len:4 clob:1
unpack_highq: dest:x src1:x src2:x len:4 clob:1
+unpack_highps: dest:x src1:x src2:x len:3 clob:1
packw: dest:x src1:x src2:x len:4 clob:1
packd: dest:x src1:x src2:x len:5 clob:1
MINI_OP(OP_SUBPS, "subps", XREG, XREG, XREG)
MINI_OP(OP_MAXPS, "maxps", XREG, XREG, XREG)
MINI_OP(OP_MINPS, "minps", XREG, XREG, XREG)
+MINI_OP(OP_COMPPS, "compps", XREG, XREG, XREG)
+MINI_OP(OP_ANDPS, "andps", XREG, XREG, XREG)
+MINI_OP(OP_ANDNPS, "andnps", XREG, XREG, XREG)
+MINI_OP(OP_ORPS, "orps", XREG, XREG, XREG)
+MINI_OP(OP_XORPS, "xorps", XREG, XREG, XREG)
MINI_OP(OP_HADDPS, "haddps", XREG, XREG, XREG)
MINI_OP(OP_HSUBPS, "hsubps", XREG, XREG, XREG)
MINI_OP(OP_ADDSUBPS, "addsubps", XREG, XREG, XREG)
MINI_OP(OP_RSQRTPS, "rsqrtps", XREG, XREG, NONE)
MINI_OP(OP_SQRTPS, "sqrtps", XREG, XREG, NONE)
+MINI_OP(OP_RCPPS, "rcpps", XREG, XREG, NONE)
MINI_OP(OP_SHUFLEPS, "shuffleps", XREG, XREG, NONE)
MINI_OP(OP_PAND, "pand", XREG, XREG, XREG)
MINI_OP(OP_UNPACK_LOWW, "unpack_loww", XREG, XREG, XREG)
MINI_OP(OP_UNPACK_LOWD, "unpack_lowd", XREG, XREG, XREG)
MINI_OP(OP_UNPACK_LOWQ, "unpack_lowq", XREG, XREG, XREG)
+MINI_OP(OP_UNPACK_LOWPS, "unpack_lowps", XREG, XREG, XREG)
MINI_OP(OP_UNPACK_HIGHB, "unpack_highb", XREG, XREG, XREG)
MINI_OP(OP_UNPACK_HIGHW, "unpack_highw", XREG, XREG, XREG)
MINI_OP(OP_UNPACK_HIGHD, "unpack_highd", XREG, XREG, XREG)
MINI_OP(OP_UNPACK_HIGHQ, "unpack_highq", XREG, XREG, XREG)
+MINI_OP(OP_UNPACK_HIGHPS, "unpack_highps", XREG, XREG, XREG)
MINI_OP(OP_PACKW, "packw", XREG, XREG, XREG)
MINI_OP(OP_PACKD, "packd", XREG, XREG, XREG)
case OP_MINPS:
x86_sse_alu_ps_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
break;
+ case OP_COMPPS:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
+ x86_sse_alu_ps_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
+ break;
+ case OP_ANDPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ANDNPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ORPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
+ break;
+ case OP_XORPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
+ break;
case OP_SQRTPS:
x86_sse_alu_ps_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
break;
case OP_RSQRTPS:
x86_sse_alu_ps_reg_reg (code, X86_SSE_RSQRT, ins->dreg, ins->sreg1);
break;
+ case OP_RCPPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_RCP, ins->dreg, ins->sreg1);
+ break;
case OP_ADDSUBPS:
x86_sse_alu_sd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
break;
case OP_UNPACK_LOWD:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLDQ, ins->sreg1, ins->sreg2);
break;
+ case OP_UNPACK_LOWPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
+ break;
case OP_UNPACK_HIGHB:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHBW, ins->sreg1, ins->sreg2);
case OP_UNPACK_HIGHD:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHDQ, ins->sreg1, ins->sreg2);
break;
+ case OP_UNPACK_HIGHPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
+ break;
case OP_PACKW:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKUSWB, ins->sreg1, ins->sreg2);
SIMD_VERSION_SSE4a = 6,
};
+enum {
+ SIMD_COMP_EQ,
+ SIMD_COMP_LT,
+ SIMD_COMP_LE,
+ SIMD_COMP_UNORD,
+ SIMD_COMP_NEQ,
+ SIMD_COMP_NLT,
+ SIMD_COMP_NLE,
+ SIMD_COMP_ORD
+};
+
const char *mono_arch_xregname (int reg) MONO_INTERNAL;
void mono_simd_simplify_indirection (MonoCompile *cfg) MONO_INTERNAL;
MonoInst* mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) MONO_INTERNAL;
*/
static const SimdIntrinsc vector4f_intrinsics[] = {
{ ".ctor", 0, SIMD_EMIT_CTOR },
+ { "AndNot", OP_ANDNPS, SIMD_EMIT_BINARY },
{ "AddSub", OP_ADDSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
+ { "CompareEquals", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
+ { "CompareLessEqual", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LE },
+ { "CompareLessThan", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LT },
+ { "CompareNotEqual", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ },
+ { "CompareNotLessEqual", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NLE },
+ { "CompareNotLessThan", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NLT },
+ { "CompareOrdered", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_ORD },
+ { "CompareUnordered", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_UNORD },
{ "HorizontalAdd", OP_HADDPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
{ "HorizontalSub", OP_HSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
+ { "InterleaveHigh", OP_UNPACK_HIGHPS, SIMD_EMIT_BINARY },
+ { "InterleaveLow", OP_UNPACK_LOWPS, SIMD_EMIT_BINARY },
{ "InvSqrt", OP_RSQRTPS, SIMD_EMIT_UNARY },
{ "LoadAligned", 0, SIMD_EMIT_LOAD_ALIGNED },
{ "Max", OP_MAXPS, SIMD_EMIT_BINARY },
{ "Min", OP_MINPS, SIMD_EMIT_BINARY },
+ { "Reciprocal", OP_RCPPS, SIMD_EMIT_UNARY },
{ "Shuffle", 0, SIMD_EMIT_SHUFFLE },
{ "Sqrt", OP_SQRTPS, SIMD_EMIT_UNARY },
{ "StoreAligned", 0, SIMD_EMIT_STORE_ALIGNED },
{ "get_Y", 1, SIMD_EMIT_GETTER },
{ "get_Z", 2, SIMD_EMIT_GETTER },
{ "op_Addition", OP_ADDPS, SIMD_EMIT_BINARY },
+ { "op_BitwiseAnd", OP_ANDPS, SIMD_EMIT_BINARY },
+ { "op_BitwiseOr", OP_ORPS, SIMD_EMIT_BINARY },
{ "op_Division", OP_DIVPS, SIMD_EMIT_BINARY },
+ { "op_ExclusiveOr", OP_XORPS, SIMD_EMIT_BINARY },
{ "op_Explicit", 0, SIMD_EMIT_CAST },
{ "op_Multiply", OP_MULPS, SIMD_EMIT_BINARY },
{ "op_Subtraction", OP_SUBPS, SIMD_EMIT_BINARY },
ins->type = STACK_VTYPE;
ins->klass = cmethod->klass;
ins->dreg = alloc_ireg (cfg);
+ ins->inst_c0 = intrinsic->flags;
MONO_ADD_INS (cfg->cbb, ins);
return ins;
}