* cpu-x86.md: Add long version of most packed int ops.
* mini-ops.h: Same.
* mini-x86.h: Same.
* simd-intrinsics.c: Add new vector type Vector2l.
svn path=/trunk/mono/; revision=117294
+2008-10-28 Rodrigo Kumpera <rkumpera@novell.com>
+
+ * cpu-x86.md: Add long version of most packed int ops.
+
+ * mini-ops.h: Same.
+
+ * mini-x86.h: Same.
+
+ * simd-intrinsics.c: Add new vector type Vector2l.
+
2008-10-28 Rodrigo Kumpera <rkumpera@novell.com>
* simd-intrinsics.c: Replace SN_op_BitwiseXor with SN_op_ExclusiveOr.
paddb: dest:x src1:x src2:x len:4 clob:1
paddw: dest:x src1:x src2:x len:4 clob:1
paddd: dest:x src1:x src2:x len:4 clob:1
+paddq: dest:x src1:x src2:x len:4 clob:1
psubb: dest:x src1:x src2:x len:4 clob:1
psubw: dest:x src1:x src2:x len:4 clob:1
psubd: dest:x src1:x src2:x len:4 clob:1
+psubq: dest:x src1:x src2:x len:4 clob:1
pmaxb_un: dest:x src1:x src2:x len:4 clob:1
pmaxw_un: dest:x src1:x src2:x len:5 clob:1
pcmpeqb: dest:x src1:x src2:x len:4 clob:1
pcmpeqw: dest:x src1:x src2:x len:4 clob:1
pcmpeqd: dest:x src1:x src2:x len:4 clob:1
+pcmpeqq: dest:x src1:x src2:x len:5 clob:1
pcmpgtb: dest:x src1:x src2:x len:4 clob:1
pcmpgtw: dest:x src1:x src2:x len:4 clob:1
pcmpgtd: dest:x src1:x src2:x len:4 clob:1
+pcmpgtq: dest:x src1:x src2:x len:5 clob:1
psumabsdiff: dest:x src1:x src2:x len:4 clob:1
pmulw: dest:x src1:x src2:x len:4 clob:1
pmuld: dest:x src1:x src2:x len:5 clob:1
+pmulq: dest:x src1:x src2:x len:4 clob:1
+
pmul_high_un: dest:x src1:x src2:x len:4 clob:1
pmul_high: dest:x src1:x src2:x len:4 clob:1
-pshrw: dest:x src1:x len:8 clob:1
-pshrw_reg: dest:x src1:x src2:x len:8 clob:1
+pshrw: dest:x src1:x len:5 clob:1
+pshrw_reg: dest:x src1:x src2:x len:4 clob:1
+
+psarw: dest:x src1:x len:5 clob:1
+psarw_reg: dest:x src1:x src2:x len:4 clob:1
+
+pshlw: dest:x src1:x len:5 clob:1
+pshlw_reg: dest:x src1:x src2:x len:4 clob:1
-psarw: dest:x src1:x len:8 clob:1
-psarw_reg: dest:x src1:x src2:x len:8 clob:1
+pshrd: dest:x src1:x len:5 clob:1
+pshrd_reg: dest:x src1:x src2:x len:4 clob:1
-pshlw: dest:x src1:x len:8 clob:1
-pshlw_reg: dest:x src1:x src2:x len:8 clob:1
+psard: dest:x src1:x len:5 clob:1
+psard_reg: dest:x src1:x src2:x len:4 clob:1
-pshrd: dest:x src1:x len:8 clob:1
-pshrd_reg: dest:x src1:x src2:x len:8 clob:1
+pshld: dest:x src1:x len:5 clob:1
+pshld_reg: dest:x src1:x src2:x len:4 clob:1
-psard: dest:x src1:x len:8 clob:1
-psard_reg: dest:x src1:x src2:x len:8 clob:1
+pshrq: dest:x src1:x len:5 clob:1
+pshrq_reg: dest:x src1:x src2:x len:4 clob:1
-pshld: dest:x src1:x len:8 clob:1
-pshld_reg: dest:x src1:x src2:x len:8 clob:1
+pshlq: dest:x src1:x len:5 clob:1
+pshlq_reg: dest:x src1:x src2:x len:4 clob:1
xmove: dest:x src1:x len:4
xzero: dest:x len:4
MINI_OP(OP_PADDB, "paddb", XREG, XREG, XREG)
MINI_OP(OP_PADDW, "paddw", XREG, XREG, XREG)
MINI_OP(OP_PADDD, "paddd", XREG, XREG, XREG)
+MINI_OP(OP_PADDQ, "paddq", XREG, XREG, XREG)
MINI_OP(OP_PSUBB, "psubb", XREG, XREG, XREG)
MINI_OP(OP_PSUBW, "psubw", XREG, XREG, XREG)
MINI_OP(OP_PSUBD, "psubd", XREG, XREG, XREG)
+MINI_OP(OP_PSUBQ, "psubq", XREG, XREG, XREG)
MINI_OP(OP_PMAXB_UN, "pmaxb_un", XREG, XREG, XREG)
MINI_OP(OP_PMAXW_UN, "pmaxw_un", XREG, XREG, XREG)
MINI_OP(OP_PCMPEQB, "pcmpeqb", XREG, XREG, XREG)
MINI_OP(OP_PCMPEQW, "pcmpeqw", XREG, XREG, XREG)
MINI_OP(OP_PCMPEQD, "pcmpeqd", XREG, XREG, XREG)
+MINI_OP(OP_PCMPEQQ, "pcmpeqq", XREG, XREG, XREG)
MINI_OP(OP_PCMPGTB, "pcmpgtb", XREG, XREG, XREG)
MINI_OP(OP_PCMPGTW, "pcmpgtw", XREG, XREG, XREG)
MINI_OP(OP_PCMPGTD, "pcmpgtd", XREG, XREG, XREG)
+MINI_OP(OP_PCMPGTQ, "pcmpgtq", XREG, XREG, XREG)
MINI_OP(OP_PSUM_ABS_DIFF, "psumabsdiff", XREG, XREG, XREG)
MINI_OP(OP_PMULW, "pmulw", XREG, XREG, XREG)
MINI_OP(OP_PMULD, "pmuld", XREG, XREG, XREG)
+MINI_OP(OP_PMULQ, "pmulq", XREG, XREG, XREG)
MINI_OP(OP_PMULW_HIGH_UN, "pmul_high_un", XREG, XREG, XREG)
MINI_OP(OP_PMULW_HIGH, "pmul_high", XREG, XREG, XREG)
MINI_OP(OP_PSHRD, "pshrd", XREG, XREG, NONE)
MINI_OP(OP_PSHRD_REG, "pshrd_reg", XREG, XREG, XREG)
+MINI_OP(OP_PSHRQ, "pshrq", XREG, XREG, NONE)
+MINI_OP(OP_PSHRQ_REG, "pshrq_reg", XREG, XREG, XREG)
+
MINI_OP(OP_PSARD, "psard", XREG, XREG, NONE)
MINI_OP(OP_PSARD_REG, "psard_reg", XREG, XREG, XREG)
MINI_OP(OP_PSHLD, "pshld", XREG, XREG, NONE)
MINI_OP(OP_PSHLD_REG, "pshld_reg", XREG, XREG, XREG)
+MINI_OP(OP_PSHLQ, "pshlq", XREG, XREG, NONE)
+MINI_OP(OP_PSHLQ_REG, "pshlq_reg", XREG, XREG, XREG)
+
MINI_OP(OP_EXTRACT_I4, "extract_i4", IREG, XREG, NONE)
MINI_OP(OP_ICONV_TO_R8_RAW, "iconv_to_r8_raw", FREG, IREG, NONE)
MINI_OP(OP_LOADX_R4, "loadx_r4", FREG, IREG, NONE)
case OP_PADDD:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDD, ins->sreg1, ins->sreg2);
break;
+ case OP_PADDQ:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDQ, ins->sreg1, ins->sreg2);
+ break;
case OP_PSUBB:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBB, ins->sreg1, ins->sreg2);
case OP_PSUBD:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBD, ins->sreg1, ins->sreg2);
break;
+ case OP_PSUBQ:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBQ, ins->sreg1, ins->sreg2);
+ break;
case OP_PMAXB_UN:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXUB, ins->sreg1, ins->sreg2);
case OP_PCMPEQD:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQD, ins->sreg1, ins->sreg2);
break;
+ case OP_PCMPEQQ:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPEQQ, ins->sreg1, ins->sreg2);
+ break;
case OP_PCMPGTB:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTB, ins->sreg1, ins->sreg2);
case OP_PCMPGTD:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTD, ins->sreg1, ins->sreg2);
break;
+ case OP_PCMPGTQ:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPGTQ, ins->sreg1, ins->sreg2);
+ break;
case OP_PSUM_ABS_DIFF:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PSADBW, ins->sreg1, ins->sreg2);
case OP_UNPACK_LOWD:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLDQ, ins->sreg1, ins->sreg2);
break;
+ case OP_UNPACK_LOWQ:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLQDQ, ins->sreg1, ins->sreg2);
+ break;
case OP_UNPACK_LOWPS:
x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
break;
case OP_UNPACK_HIGHD:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHDQ, ins->sreg1, ins->sreg2);
break;
+ case OP_UNPACK_HIGHQ:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHQDQ, ins->sreg1, ins->sreg2);
+ break;
case OP_UNPACK_HIGHPS:
x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
break;
case OP_PMULD:
x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMULLD, ins->sreg1, ins->sreg2);
break;
+ case OP_PMULQ:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULUDQ, ins->sreg1, ins->sreg2);
+ break;
case OP_PMULW_HIGH_UN:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHUW, ins->sreg1, ins->sreg2);
break;
x86_sse_shift_reg_reg (code, X86_SSE_PSLLD_REG, ins->dreg, ins->sreg2);
break;
+ case OP_PSHRQ:
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHR, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHRQ_REG:
+ x86_sse_shift_reg_reg (code, X86_SSE_PSRLQ_REG, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHLQ:
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHL, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHLQ_REG:
+ x86_sse_shift_reg_reg (code, X86_SSE_PSLLQ_REG, ins->dreg, ins->sreg2);
+ break;
+
case OP_ICONV_TO_X:
x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
break;
{ SN_op_Subtraction, OP_SUBPD, SIMD_EMIT_BINARY },
};
+/*
+Missing:
+.ctor
+getters
+setters
+ */
+static const SimdIntrinsc vector2l_intrinsics[] = {
+ { SN_CompareEqual, OP_PCMPEQQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
+ { SN_CompareGreaterThan, OP_PCMPGTQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
+ { SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK },
+ { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
+ { SN_ShiftRightLogic, OP_PSHRQ, SIMD_EMIT_SHIFT },
+ { SN_StoreAligned, 0, SIMD_EMIT_STORE_ALIGNED },
+ { SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_EMIT_BINARY },
+ { SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_EMIT_BINARY },
+ { SN_op_Addition, OP_PADDQ, SIMD_EMIT_BINARY },
+ { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
+ { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
+ { SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY },
+ { SN_op_Explicit, 0, SIMD_EMIT_CAST },
+ { SN_op_LeftShift, OP_PSHLQ, SIMD_EMIT_SHIFT },
+ { SN_op_Multiply, OP_PMULQ, SIMD_EMIT_BINARY },
+ { SN_op_Subtraction, OP_PSUBQ, SIMD_EMIT_BINARY },
+};
+
/*
Missing:
.ctor
return emit_intrinsics (cfg, cmethod, fsig, args, vector2d_intrinsics, sizeof (vector2d_intrinsics) / sizeof (SimdIntrinsc));
if (!strcmp ("Vector4f", cmethod->klass->name))
return emit_intrinsics (cfg, cmethod, fsig, args, vector4f_intrinsics, sizeof (vector4f_intrinsics) / sizeof (SimdIntrinsc));
+ if (!strcmp ("Vector2l", cmethod->klass->name))
+ return emit_intrinsics (cfg, cmethod, fsig, args, vector2l_intrinsics, sizeof (vector2l_intrinsics) / sizeof (SimdIntrinsc));
if (!strcmp ("Vector4ui", cmethod->klass->name))
return emit_intrinsics (cfg, cmethod, fsig, args, vector4ui_intrinsics, sizeof (vector4ui_intrinsics) / sizeof (SimdIntrinsc));
if (!strcmp ("Vector4i", cmethod->klass->name))