From 69d64688edbaabb6d12aa460b3f85e3fbad394c2 Mon Sep 17 00:00:00 2001 From: Rodrigo Kumpera Date: Fri, 21 Nov 2008 00:49:19 +0000 Subject: [PATCH] 2008-11-20 Rodrigo Kumpera * mini-ops.h: Added OP_INSERTX_I8_SLOW,. * mini-x86.c (mono_arch_decompose_long_opts): Decompose OP_INSERTX_I8_SLOW. * simd-intrinsics.c: Add support for Vector2l and Vector2ul. svn path=/trunk/mono/; revision=119563 --- mono/mini/ChangeLog | 8 ++++ mono/mini/mini-ops.h | 1 + mono/mini/mini-x86.c | 79 +++++++++++++++++++++++-------------- mono/mini/simd-intrinsics.c | 15 ++++--- 4 files changed, 66 insertions(+), 37 deletions(-) diff --git a/mono/mini/ChangeLog b/mono/mini/ChangeLog index 1dc96ee1b84..9ff060553f4 100644 --- a/mono/mini/ChangeLog +++ b/mono/mini/ChangeLog @@ -1,3 +1,11 @@ +2008-11-20 Rodrigo Kumpera + + * mini-ops.h: Added OP_INSERTX_I8_SLOW,. + + * mini-x86.c (mono_arch_decompose_long_opts): Decompose OP_INSERTX_I8_SLOW. + + * simd-intrinsics.c: Add support for Vector2l and Vector2ul. + 2008-11-21 Mark Probst * mini-ppc64.c, mini-ppc64.h, cpu-ppc64.md: Several fixes. Now diff --git a/mono/mini/mini-ops.h b/mono/mini/mini-ops.h index a2343d6d3fd..ee15a481b43 100644 --- a/mono/mini/mini-ops.h +++ b/mono/mini/mini-ops.h @@ -754,6 +754,7 @@ MINI_OP(OP_INSERTX_I4_SLOW, "insertx_i4_slow", XREG, XREG, IREG) MINI_OP(OP_INSERTX_R4_SLOW, "insertx_r4_slow", XREG, XREG, FREG) MINI_OP(OP_INSERTX_R8_SLOW, "insertx_r8_slow", XREG, XREG, FREG) +MINI_OP(OP_INSERTX_I8_SLOW, "insertx_i8_slow", XREG, XREG, IREG) MINI_OP(OP_FCONV_TO_R8_X, "fconv_to_r8_x", XREG, FREG, NONE) MINI_OP(OP_XCONV_R8_TO_I4, "xconv_r8_to_i4", IREG, XREG, NONE) diff --git a/mono/mini/mini-x86.c b/mono/mini/mini-x86.c index b4e4cdb5bea..c1c6eaae1b2 100644 --- a/mono/mini/mini-x86.c +++ b/mono/mini/mini-x86.c @@ -5379,45 +5379,66 @@ mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *long_ins) { MonoInst *ins; int vreg; - if (!(cfg->opt & MONO_OPT_SIMD) || long_ins->opcode != OP_EXTRACT_I8) + if (!(cfg->opt & MONO_OPT_SIMD)) return; + /*TODO move this to simd-intrinsic.c once we support sse 4.1 dword extractors since we need the runtime caps info */ - - vreg = long_ins->sreg1; - - if (long_ins->inst_c0) { + switch (long_ins->opcode) { + case OP_EXTRACT_I8: + vreg = long_ins->sreg1; + + if (long_ins->inst_c0) { + MONO_INST_NEW (cfg, ins, OP_PSHUFLED); + ins->klass = long_ins->klass; + ins->sreg1 = long_ins->sreg1; + ins->inst_c0 = 2; + ins->type = STACK_VTYPE; + ins->dreg = vreg = alloc_ireg (cfg); + MONO_ADD_INS (cfg->cbb, ins); + } + + MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4); + ins->klass = mono_defaults.int32_class; + ins->sreg1 = vreg; + ins->type = STACK_I4; + ins->dreg = long_ins->dreg + 1; + MONO_ADD_INS (cfg->cbb, ins); + MONO_INST_NEW (cfg, ins, OP_PSHUFLED); ins->klass = long_ins->klass; ins->sreg1 = long_ins->sreg1; - ins->inst_c0 = 2; + ins->inst_c0 = long_ins->inst_c0 ? 3 : 1; ins->type = STACK_VTYPE; ins->dreg = vreg = alloc_ireg (cfg); MONO_ADD_INS (cfg->cbb, ins); - } + + MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4); + ins->klass = mono_defaults.int32_class; + ins->sreg1 = vreg; + ins->type = STACK_I4; + ins->dreg = long_ins->dreg + 2; + MONO_ADD_INS (cfg->cbb, ins); + + long_ins->opcode = OP_NOP; + break; + case OP_INSERTX_I8_SLOW: + MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW); + ins->dreg = long_ins->dreg; + ins->sreg1 = long_ins->dreg; + ins->sreg2 = long_ins->sreg2 + 1; + ins->inst_c0 = long_ins->inst_c0 * 2; + MONO_ADD_INS (cfg->cbb, ins); - MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4); - ins->klass = mono_defaults.int32_class; - ins->sreg1 = vreg; - ins->type = STACK_I4; - ins->dreg = long_ins->dreg + 1; - MONO_ADD_INS (cfg->cbb, ins); - - MONO_INST_NEW (cfg, ins, OP_PSHUFLED); - ins->klass = long_ins->klass; - ins->sreg1 = long_ins->sreg1; - ins->inst_c0 = long_ins->inst_c0 ? 3 : 1; - ins->type = STACK_VTYPE; - ins->dreg = vreg = alloc_ireg (cfg); - MONO_ADD_INS (cfg->cbb, ins); - - MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4); - ins->klass = mono_defaults.int32_class; - ins->sreg1 = vreg; - ins->type = STACK_I4; - ins->dreg = long_ins->dreg + 2; - MONO_ADD_INS (cfg->cbb, ins); + MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW); + ins->dreg = long_ins->dreg; + ins->sreg1 = long_ins->dreg; + ins->sreg2 = long_ins->sreg2 + 2; + ins->inst_c0 = long_ins->inst_c0 * 2 + 1; + MONO_ADD_INS (cfg->cbb, ins); - long_ins->opcode = OP_NOP; + long_ins->opcode = OP_NOP; + break; + } } #endif diff --git a/mono/mini/simd-intrinsics.c b/mono/mini/simd-intrinsics.c index 64fe6eb88b5..1d446bc474f 100644 --- a/mono/mini/simd-intrinsics.c +++ b/mono/mini/simd-intrinsics.c @@ -212,10 +212,6 @@ static const SimdIntrinsc vector2d_intrinsics[] = { { SN_set_Y, 1, SIMD_EMIT_SETTER }, }; -/* -Missing: -setters - */ static const SimdIntrinsc vector2ul_intrinsics[] = { { SN_ctor, 0, SIMD_EMIT_CTOR }, { SN_CompareEqual, OP_PCMPEQQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, @@ -239,12 +235,10 @@ static const SimdIntrinsc vector2ul_intrinsics[] = { { SN_op_Multiply, OP_PMULQ, SIMD_EMIT_BINARY }, { SN_op_RightShift, OP_PSHRQ, SIMD_EMIT_SHIFT }, { SN_op_Subtraction, OP_PSUBQ, SIMD_EMIT_BINARY }, + { SN_set_X, 0, SIMD_EMIT_SETTER }, + { SN_set_Y, 1, SIMD_EMIT_SETTER }, }; -/* -Missing: -setters - */ static const SimdIntrinsc vector2l_intrinsics[] = { { SN_ctor, 0, SIMD_EMIT_CTOR }, { SN_CompareEqual, OP_PCMPEQQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, @@ -269,6 +263,8 @@ static const SimdIntrinsc vector2l_intrinsics[] = { { SN_op_LeftShift, OP_PSHLQ, SIMD_EMIT_SHIFT }, { SN_op_Multiply, OP_PMULQ, SIMD_EMIT_BINARY }, { SN_op_Subtraction, OP_PSUBQ, SIMD_EMIT_BINARY }, + { SN_set_X, 0, SIMD_EMIT_SETTER }, + { SN_set_Y, 1, SIMD_EMIT_SETTER }, }; static const SimdIntrinsc vector4ui_intrinsics[] = { @@ -930,6 +926,9 @@ mono_type_to_slow_insert_op (MonoType *type) case MONO_TYPE_I4: case MONO_TYPE_U4: return OP_INSERTX_I4_SLOW; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_INSERTX_I8_SLOW; case MONO_TYPE_R4: return OP_INSERTX_R4_SLOW; case MONO_TYPE_R8: -- 2.25.1