From 8580919c1b3cd347ee4632593f2a827d3de98953 Mon Sep 17 00:00:00 2001 From: Mark Probst Date: Tue, 24 Jun 2014 16:04:26 -0700 Subject: [PATCH] [amd64] Fast OP_LREM_IMM for power of two operands. This was the reason for one of our bad performance results in the paper "Clash of the Lambdas" by Biboudis, Palladinos, Smaragdakis: http://cgi.di.uoa.gr/~biboudis/clashofthelambdas.pdf --- mono/mini/basic-long.cs | 42 ++++++++++++++++++++++++++++++++++++++++ mono/mini/cpu-amd64.md | 1 + mono/mini/method-to-ir.c | 6 +++++- mono/mini/mini-amd64.c | 29 ++++++++++++++++++++++++++- mono/mini/mini.c | 2 ++ 5 files changed, 78 insertions(+), 2 deletions(-) diff --git a/mono/mini/basic-long.cs b/mono/mini/basic-long.cs index 96db6ca37fb..e0c5129052a 100644 --- a/mono/mini/basic-long.cs +++ b/mono/mini/basic-long.cs @@ -1211,5 +1211,47 @@ class Tests return (int)res; } + + public static int test_0_lrem_imm_2 () + { + long x = 245345634L; + return (int)(x % 2L); + } + + public static int test_1_lrem_imm_2 () + { + long x = 24534553245L; + return (int)(x % 2L); + } + + public static int test_1_lrem_imm_2_neg () + { + long x = -24534553245L; + return -(int)(x % 2L); + } + + public static int test_13_lrem_imm_32 () + { + long x = 17389L; + return (int)(x % 32L); + } + + public static int test_27_lrem_imm_32_neg () + { + long x = -2435323L; + return -(int)(x % 32L); + } + + public static int test_5_lrem_imm_large () + { + long x = 0x1000000005L; + return (int)(x % 0x40000000L); + } + + public static int test_5_lrem_imm_too_large () + { + long x = 0x1000000005L; + return (int)(x % 0x80000000L); + } } diff --git a/mono/mini/cpu-amd64.md b/mono/mini/cpu-amd64.md index 5107e999fa3..cf472d6e500 100644 --- a/mono/mini/cpu-amd64.md +++ b/mono/mini/cpu-amd64.md @@ -98,6 +98,7 @@ long_conv_to_u1: dest:i src1:i len:4 zext_i4: dest:i src1:i len:4 long_mul_imm: dest:i src1:i clob:1 len:12 +long_rem_imm: dest:a src1:a len:32 clob:d long_min: dest:i src1:i src2:i len:16 clob:1 long_min_un: dest:i src1:i src2:i len:16 clob:1 long_max: dest:i src1:i src2:i len:16 clob:1 diff --git a/mono/mini/method-to-ir.c b/mono/mini/method-to-ir.c index e667acb1664..8d9ac98d2ba 100644 --- a/mono/mini/method-to-ir.c +++ b/mono/mini/method-to-ir.c @@ -12364,7 +12364,11 @@ mono_op_to_op_imm (int opcode) case OP_LSHR: return OP_LSHR_IMM; case OP_LSHR_UN: - return OP_LSHR_UN_IMM; + return OP_LSHR_UN_IMM; +#ifdef TARGET_AMD64 + case OP_LREM: + return OP_LREM_IMM; +#endif case OP_COMPARE: return OP_COMPARE_IMM; diff --git a/mono/mini/mini-amd64.c b/mono/mini/mini-amd64.c index b5fed14d8d2..944f5557386 100644 --- a/mono/mini/mini-amd64.c +++ b/mono/mini/mini-amd64.c @@ -3306,9 +3306,10 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) case OP_IREM_UN_IMM: mono_decompose_op_imm (cfg, bb, ins); break; + case OP_LREM_IMM: case OP_IREM_IMM: /* Keep the opcode if we can implement it efficiently */ - if (!((ins->inst_imm > 0) && (mono_is_power_of_two (ins->inst_imm) != -1))) + if (!(amd64_is_imm32 (ins->inst_imm) && (ins->inst_imm > 0) && (mono_is_power_of_two (ins->inst_imm) != -1))) mono_decompose_op_imm (cfg, bb, ins); break; case OP_COMPARE_IMM: @@ -4503,6 +4504,32 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_div_reg (code, ins->sreg2, FALSE); } break; + case OP_LREM_IMM: { + int power = mono_is_power_of_two (ins->inst_imm); + + g_assert (ins->sreg1 == AMD64_RAX); + g_assert (ins->dreg == AMD64_RAX); + g_assert (power >= 0); + + if (power == 0) { + amd64_mov_reg_imm (code, ins->dreg, 0); + break; + } + + /* Based on gcc code */ + + /* Add compensation for negative dividents */ + amd64_mov_reg_reg_size (code, AMD64_RDX, AMD64_RAX, 8); + if (power > 1) + amd64_shift_reg_imm_size (code, X86_SAR, AMD64_RDX, 63, 8); + amd64_shift_reg_imm_size (code, X86_SHR, AMD64_RDX, 64 - power, 8); + amd64_alu_reg_reg_size (code, X86_ADD, AMD64_RAX, AMD64_RDX, 8); + /* Compute remainder */ + amd64_alu_reg_imm_size (code, X86_AND, AMD64_RAX, (1 << power) - 1, 8); + /* Remove compensation */ + amd64_alu_reg_reg_size (code, X86_SUB, AMD64_RAX, AMD64_RDX, 8); + break; + } case OP_IDIV: case OP_IREM: #if defined( __native_client_codegen__ ) diff --git a/mono/mini/mini.c b/mono/mini/mini.c index 679fdcda71a..4d4c39c2f2a 100644 --- a/mono/mini/mini.c +++ b/mono/mini/mini.c @@ -1135,6 +1135,8 @@ mono_op_imm_to_op (int opcode) return OP_IREM_UN; case OP_IREM_IMM: return OP_IREM; + case OP_LREM_IMM: + return OP_LREM; case OP_DIV_IMM: #if SIZEOF_REGISTER == 4 return OP_IDIV; -- 2.25.1