From 6c5925ffda97fb01d3477856c76aad1e85f1f293 Mon Sep 17 00:00:00 2001 From: Rodrigo Kumpera Date: Tue, 28 Oct 2008 19:24:40 +0000 Subject: [PATCH] 2008-10-28 Rodrigo Kumpera * cpu-x86.md: Add long version of most packed int ops. * mini-ops.h: Same. * mini-x86.h: Same. * simd-intrinsics.c: Add new vector type Vector2l. svn path=/trunk/mono/; revision=117294 --- mono/mini/ChangeLog | 10 ++++++++++ mono/mini/cpu-x86.md | 36 ++++++++++++++++++++++++------------ mono/mini/mini-ops.h | 11 +++++++++++ mono/mini/mini-x86.c | 35 +++++++++++++++++++++++++++++++++++ mono/mini/simd-intrinsics.c | 27 +++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 12 deletions(-) diff --git a/mono/mini/ChangeLog b/mono/mini/ChangeLog index 9ead8bdb58d..76d46ffa39f 100644 --- a/mono/mini/ChangeLog +++ b/mono/mini/ChangeLog @@ -1,3 +1,13 @@ +2008-10-28 Rodrigo Kumpera + + * cpu-x86.md: Add long version of most packed int ops. + + * mini-ops.h: Same. + + * mini-x86.h: Same. + + * simd-intrinsics.c: Add new vector type Vector2l. + 2008-10-28 Rodrigo Kumpera * simd-intrinsics.c: Replace SN_op_BitwiseXor with SN_op_ExclusiveOr. diff --git a/mono/mini/cpu-x86.md b/mono/mini/cpu-x86.md index 05f558a9c23..79ce25a81f0 100644 --- a/mono/mini/cpu-x86.md +++ b/mono/mini/cpu-x86.md @@ -462,10 +462,12 @@ extract_mask: dest:i src1:x len:4 paddb: dest:x src1:x src2:x len:4 clob:1 paddw: dest:x src1:x src2:x len:4 clob:1 paddd: dest:x src1:x src2:x len:4 clob:1 +paddq: dest:x src1:x src2:x len:4 clob:1 psubb: dest:x src1:x src2:x len:4 clob:1 psubw: dest:x src1:x src2:x len:4 clob:1 psubd: dest:x src1:x src2:x len:4 clob:1 +psubq: dest:x src1:x src2:x len:4 clob:1 pmaxb_un: dest:x src1:x src2:x len:4 clob:1 pmaxw_un: dest:x src1:x src2:x len:5 clob:1 @@ -489,10 +491,12 @@ pmind: dest:x src1:x src2:x len:5 clob:1 pcmpeqb: dest:x src1:x src2:x len:4 clob:1 pcmpeqw: dest:x src1:x src2:x len:4 clob:1 pcmpeqd: dest:x src1:x src2:x len:4 clob:1 +pcmpeqq: dest:x src1:x src2:x len:5 clob:1 pcmpgtb: dest:x src1:x src2:x len:4 clob:1 pcmpgtw: dest:x src1:x src2:x len:4 clob:1 pcmpgtd: dest:x src1:x src2:x len:4 clob:1 +pcmpgtq: dest:x src1:x src2:x len:5 clob:1 psumabsdiff: dest:x src1:x src2:x len:4 clob:1 @@ -530,26 +534,34 @@ psubw_sat_un: dest:x src1:x src2:x len:4 clob:1 pmulw: dest:x src1:x src2:x len:4 clob:1 pmuld: dest:x src1:x src2:x len:5 clob:1 +pmulq: dest:x src1:x src2:x len:4 clob:1 + pmul_high_un: dest:x src1:x src2:x len:4 clob:1 pmul_high: dest:x src1:x src2:x len:4 clob:1 -pshrw: dest:x src1:x len:8 clob:1 -pshrw_reg: dest:x src1:x src2:x len:8 clob:1 +pshrw: dest:x src1:x len:5 clob:1 +pshrw_reg: dest:x src1:x src2:x len:4 clob:1 + +psarw: dest:x src1:x len:5 clob:1 +psarw_reg: dest:x src1:x src2:x len:4 clob:1 + +pshlw: dest:x src1:x len:5 clob:1 +pshlw_reg: dest:x src1:x src2:x len:4 clob:1 -psarw: dest:x src1:x len:8 clob:1 -psarw_reg: dest:x src1:x src2:x len:8 clob:1 +pshrd: dest:x src1:x len:5 clob:1 +pshrd_reg: dest:x src1:x src2:x len:4 clob:1 -pshlw: dest:x src1:x len:8 clob:1 -pshlw_reg: dest:x src1:x src2:x len:8 clob:1 +psard: dest:x src1:x len:5 clob:1 +psard_reg: dest:x src1:x src2:x len:4 clob:1 -pshrd: dest:x src1:x len:8 clob:1 -pshrd_reg: dest:x src1:x src2:x len:8 clob:1 +pshld: dest:x src1:x len:5 clob:1 +pshld_reg: dest:x src1:x src2:x len:4 clob:1 -psard: dest:x src1:x len:8 clob:1 -psard_reg: dest:x src1:x src2:x len:8 clob:1 +pshrq: dest:x src1:x len:5 clob:1 +pshrq_reg: dest:x src1:x src2:x len:4 clob:1 -pshld: dest:x src1:x len:8 clob:1 -pshld_reg: dest:x src1:x src2:x len:8 clob:1 +pshlq: dest:x src1:x len:5 clob:1 +pshlq_reg: dest:x src1:x src2:x len:4 clob:1 xmove: dest:x src1:x len:4 xzero: dest:x len:4 diff --git a/mono/mini/mini-ops.h b/mono/mini/mini-ops.h index 92b5245970c..1ef60a64e1e 100644 --- a/mono/mini/mini-ops.h +++ b/mono/mini/mini-ops.h @@ -684,10 +684,12 @@ MINI_OP(OP_PXOR, "pxor", XREG, XREG, XREG) MINI_OP(OP_PADDB, "paddb", XREG, XREG, XREG) MINI_OP(OP_PADDW, "paddw", XREG, XREG, XREG) MINI_OP(OP_PADDD, "paddd", XREG, XREG, XREG) +MINI_OP(OP_PADDQ, "paddq", XREG, XREG, XREG) MINI_OP(OP_PSUBB, "psubb", XREG, XREG, XREG) MINI_OP(OP_PSUBW, "psubw", XREG, XREG, XREG) MINI_OP(OP_PSUBD, "psubd", XREG, XREG, XREG) +MINI_OP(OP_PSUBQ, "psubq", XREG, XREG, XREG) MINI_OP(OP_PMAXB_UN, "pmaxb_un", XREG, XREG, XREG) MINI_OP(OP_PMAXW_UN, "pmaxw_un", XREG, XREG, XREG) @@ -711,10 +713,12 @@ MINI_OP(OP_PMIND, "pmind", XREG, XREG, XREG) MINI_OP(OP_PCMPEQB, "pcmpeqb", XREG, XREG, XREG) MINI_OP(OP_PCMPEQW, "pcmpeqw", XREG, XREG, XREG) MINI_OP(OP_PCMPEQD, "pcmpeqd", XREG, XREG, XREG) +MINI_OP(OP_PCMPEQQ, "pcmpeqq", XREG, XREG, XREG) MINI_OP(OP_PCMPGTB, "pcmpgtb", XREG, XREG, XREG) MINI_OP(OP_PCMPGTW, "pcmpgtw", XREG, XREG, XREG) MINI_OP(OP_PCMPGTD, "pcmpgtd", XREG, XREG, XREG) +MINI_OP(OP_PCMPGTQ, "pcmpgtq", XREG, XREG, XREG) MINI_OP(OP_PSUM_ABS_DIFF, "psumabsdiff", XREG, XREG, XREG) @@ -752,6 +756,7 @@ MINI_OP(OP_PSUBW_SAT_UN, "psubw_sat_un", XREG, XREG, XREG) MINI_OP(OP_PMULW, "pmulw", XREG, XREG, XREG) MINI_OP(OP_PMULD, "pmuld", XREG, XREG, XREG) +MINI_OP(OP_PMULQ, "pmulq", XREG, XREG, XREG) MINI_OP(OP_PMULW_HIGH_UN, "pmul_high_un", XREG, XREG, XREG) MINI_OP(OP_PMULW_HIGH, "pmul_high", XREG, XREG, XREG) @@ -769,12 +774,18 @@ MINI_OP(OP_PSHLW_REG, "pshlw_reg", XREG, XREG, XREG) MINI_OP(OP_PSHRD, "pshrd", XREG, XREG, NONE) MINI_OP(OP_PSHRD_REG, "pshrd_reg", XREG, XREG, XREG) +MINI_OP(OP_PSHRQ, "pshrq", XREG, XREG, NONE) +MINI_OP(OP_PSHRQ_REG, "pshrq_reg", XREG, XREG, XREG) + MINI_OP(OP_PSARD, "psard", XREG, XREG, NONE) MINI_OP(OP_PSARD_REG, "psard_reg", XREG, XREG, XREG) MINI_OP(OP_PSHLD, "pshld", XREG, XREG, NONE) MINI_OP(OP_PSHLD_REG, "pshld_reg", XREG, XREG, XREG) +MINI_OP(OP_PSHLQ, "pshlq", XREG, XREG, NONE) +MINI_OP(OP_PSHLQ_REG, "pshlq_reg", XREG, XREG, XREG) + MINI_OP(OP_EXTRACT_I4, "extract_i4", IREG, XREG, NONE) MINI_OP(OP_ICONV_TO_R8_RAW, "iconv_to_r8_raw", FREG, IREG, NONE) MINI_OP(OP_LOADX_R4, "loadx_r4", FREG, IREG, NONE) diff --git a/mono/mini/mini-x86.c b/mono/mini/mini-x86.c index ffb3666e3a0..d72fb410191 100644 --- a/mono/mini/mini-x86.c +++ b/mono/mini/mini-x86.c @@ -3949,6 +3949,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_PADDD: x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDD, ins->sreg1, ins->sreg2); break; + case OP_PADDQ: + x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDQ, ins->sreg1, ins->sreg2); + break; case OP_PSUBB: x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBB, ins->sreg1, ins->sreg2); @@ -3959,6 +3962,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_PSUBD: x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBD, ins->sreg1, ins->sreg2); break; + case OP_PSUBQ: + x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBQ, ins->sreg1, ins->sreg2); + break; case OP_PMAXB_UN: x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXUB, ins->sreg1, ins->sreg2); @@ -4016,6 +4022,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_PCMPEQD: x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQD, ins->sreg1, ins->sreg2); break; + case OP_PCMPEQQ: + x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPEQQ, ins->sreg1, ins->sreg2); + break; case OP_PCMPGTB: x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTB, ins->sreg1, ins->sreg2); @@ -4026,6 +4035,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_PCMPGTD: x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTD, ins->sreg1, ins->sreg2); break; + case OP_PCMPGTQ: + x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPGTQ, ins->sreg1, ins->sreg2); + break; case OP_PSUM_ABS_DIFF: x86_sse_alu_pd_reg_reg (code, X86_SSE_PSADBW, ins->sreg1, ins->sreg2); @@ -4040,6 +4052,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_UNPACK_LOWD: x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLDQ, ins->sreg1, ins->sreg2); break; + case OP_UNPACK_LOWQ: + x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLQDQ, ins->sreg1, ins->sreg2); + break; case OP_UNPACK_LOWPS: x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2); break; @@ -4056,6 +4071,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_UNPACK_HIGHD: x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHDQ, ins->sreg1, ins->sreg2); break; + case OP_UNPACK_HIGHQ: + x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHQDQ, ins->sreg1, ins->sreg2); + break; case OP_UNPACK_HIGHPS: x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2); break; @@ -4108,6 +4126,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_PMULD: x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMULLD, ins->sreg1, ins->sreg2); break; + case OP_PMULQ: + x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULUDQ, ins->sreg1, ins->sreg2); + break; case OP_PMULW_HIGH_UN: x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHUW, ins->sreg1, ins->sreg2); break; @@ -4157,6 +4178,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) x86_sse_shift_reg_reg (code, X86_SSE_PSLLD_REG, ins->dreg, ins->sreg2); break; + case OP_PSHRQ: + x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHR, ins->dreg, ins->inst_imm); + break; + case OP_PSHRQ_REG: + x86_sse_shift_reg_reg (code, X86_SSE_PSRLQ_REG, ins->dreg, ins->sreg2); + break; + + case OP_PSHLQ: + x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHL, ins->dreg, ins->inst_imm); + break; + case OP_PSHLQ_REG: + x86_sse_shift_reg_reg (code, X86_SSE_PSLLQ_REG, ins->dreg, ins->sreg2); + break; + case OP_ICONV_TO_X: x86_movd_xreg_reg (code, ins->dreg, ins->sreg1); break; diff --git a/mono/mini/simd-intrinsics.c b/mono/mini/simd-intrinsics.c index 613b6da602c..7d2eef53ee4 100644 --- a/mono/mini/simd-intrinsics.c +++ b/mono/mini/simd-intrinsics.c @@ -200,6 +200,31 @@ static const SimdIntrinsc vector2d_intrinsics[] = { { SN_op_Subtraction, OP_SUBPD, SIMD_EMIT_BINARY }, }; +/* +Missing: +.ctor +getters +setters + */ +static const SimdIntrinsc vector2l_intrinsics[] = { + { SN_CompareEqual, OP_PCMPEQQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, + { SN_CompareGreaterThan, OP_PCMPGTQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, + { SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK }, + { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, + { SN_ShiftRightLogic, OP_PSHRQ, SIMD_EMIT_SHIFT }, + { SN_StoreAligned, 0, SIMD_EMIT_STORE_ALIGNED }, + { SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_EMIT_BINARY }, + { SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_EMIT_BINARY }, + { SN_op_Addition, OP_PADDQ, SIMD_EMIT_BINARY }, + { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY }, + { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY }, + { SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY }, + { SN_op_Explicit, 0, SIMD_EMIT_CAST }, + { SN_op_LeftShift, OP_PSHLQ, SIMD_EMIT_SHIFT }, + { SN_op_Multiply, OP_PMULQ, SIMD_EMIT_BINARY }, + { SN_op_Subtraction, OP_PSUBQ, SIMD_EMIT_BINARY }, +}; + /* Missing: .ctor @@ -960,6 +985,8 @@ mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign return emit_intrinsics (cfg, cmethod, fsig, args, vector2d_intrinsics, sizeof (vector2d_intrinsics) / sizeof (SimdIntrinsc)); if (!strcmp ("Vector4f", cmethod->klass->name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector4f_intrinsics, sizeof (vector4f_intrinsics) / sizeof (SimdIntrinsc)); + if (!strcmp ("Vector2l", cmethod->klass->name)) + return emit_intrinsics (cfg, cmethod, fsig, args, vector2l_intrinsics, sizeof (vector2l_intrinsics) / sizeof (SimdIntrinsc)); if (!strcmp ("Vector4ui", cmethod->klass->name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector4ui_intrinsics, sizeof (vector4ui_intrinsics) / sizeof (SimdIntrinsc)); if (!strcmp ("Vector4i", cmethod->klass->name)) -- 2.25.1