X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mono%2Fmini%2Fmini-x86.c;h=65805930dbf92bff541191c0e61daba78e32f061;hb=2fd0ba2813d04d6ddf46e9d85398a7622a45a9da;hp=9603ab841dd21393343477225917d27053162932;hpb=d0e63e6351e0c4531c588786881c327f40eea9e0;p=mono.git diff --git a/mono/mini/mini-x86.c b/mono/mini/mini-x86.c index 9603ab841dd..65805930dbf 100644 --- a/mono/mini/mini-x86.c +++ b/mono/mini/mini-x86.c @@ -1961,6 +1961,8 @@ x86_pop_reg (code, X86_EAX); #define LOOP_ALIGNMENT 8 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting) +#ifndef DISABLE_JIT + void mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) { @@ -3171,6 +3173,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_SQRT: x86_fsqrt (code); break; + case OP_ROUND: + x86_frndint (code); + break; case OP_IMIN: g_assert (cfg->opt & MONO_OPT_CMOV); g_assert (ins->dreg == ins->sreg1); @@ -4028,6 +4033,53 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) x86_shift_reg_imm (code, X86_SHR, ins->dreg, 16); x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE); break; + case OP_EXTRACT_R8: + if (ins->inst_c0) + x86_sse_alu_pd_membase_reg (code, X86_SSE_MOVHPD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1); + else + x86_sse_alu_sd_membase_reg (code, X86_SSE_MOVSD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1); + x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE); + break; + + case OP_INSERT_I2: + x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->sreg1, ins->sreg2, ins->inst_c0); + break; + case OP_EXTRACTX_U2: + x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PEXTRW, ins->dreg, ins->sreg1, ins->inst_c0); + break; + case OP_INSERTX_U1_SLOW: + /*sreg1 is the extracted ireg (scratch) + /sreg2 is the to be inserted ireg (scratch) + /dreg is the xreg to receive the value*/ + + /*clear the bits from the extracted word*/ + x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00); + /*shift the value to insert if needed*/ + if (ins->inst_c0 & 1) + x86_shift_reg_imm (code, X86_SHL, ins->sreg2, 8); + /*join them together*/ + x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2); + x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, ins->inst_c0 / 2); + break; + case OP_INSERTX_I4_SLOW: + x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2); + x86_shift_reg_imm (code, X86_SHR, ins->sreg2, 16); + x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1); + break; + + case OP_INSERTX_R4_SLOW: + x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE); + /*TODO if inst_c0 == 0 use movss*/ + x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 0, ins->inst_c0 * 2); + x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 2, ins->inst_c0 * 2 + 1); + break; + case OP_INSERTX_R8_SLOW: + x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE); + if (ins->inst_c0) + x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVHPD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset); + else + x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVSD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset); + break; case OP_STOREX_MEMBASE_REG: case OP_STOREX_MEMBASE: @@ -4084,6 +4136,34 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; } break; + + case OP_EXPAND_I1: + /*FIXME this causes a partial register stall, maybe it would not be that bad to use shift + mask + or*/ + /*The +4 is to get a mov ?h, ?l over the same reg.*/ + x86_mov_reg_reg (code, ins->dreg + 4, ins->dreg, 1); + x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0); + x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1); + x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0); + break; + case OP_EXPAND_I2: + x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0); + x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1); + x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0); + break; + case OP_EXPAND_I4: + x86_movd_xreg_reg (code, ins->dreg, ins->sreg1); + x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0); + break; + case OP_EXPAND_R4: + x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE); + x86_movd_xreg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset); + x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0); + break; + case OP_EXPAND_R8: + x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE); + x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset); + x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0x44); + break; #endif default: g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode)); @@ -4102,6 +4182,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) cfg->code_len = code - cfg->native_code; } +#endif /* DISABLE_JIT */ + void mono_arch_register_lowlevel_calls (void) { @@ -4900,6 +4982,8 @@ mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMetho opcode = OP_SQRT; } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) { opcode = OP_ABS; + } else if (strcmp (cmethod->name, "Round") == 0 && fsig->param_count == 1 && fsig->params [0]->type == MONO_TYPE_R8) { + opcode = OP_ROUND; } if (opcode) { @@ -5320,8 +5404,100 @@ mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins) ins->dreg = dreg; ins->type = STACK_I4; ins->backend.source_opcode = src_opcode; +} - +void +mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *long_ins) +{ + MonoInst *ins; + int vreg; + if (!(cfg->opt & MONO_OPT_SIMD)) + return; + + /*TODO move this to simd-intrinsic.c once we support sse 4.1 dword extractors since we need the runtime caps info */ + switch (long_ins->opcode) { + case OP_EXTRACT_I8: + vreg = long_ins->sreg1; + + if (long_ins->inst_c0) { + MONO_INST_NEW (cfg, ins, OP_PSHUFLED); + ins->klass = long_ins->klass; + ins->sreg1 = long_ins->sreg1; + ins->inst_c0 = 2; + ins->type = STACK_VTYPE; + ins->dreg = vreg = alloc_ireg (cfg); + MONO_ADD_INS (cfg->cbb, ins); + } + + MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4); + ins->klass = mono_defaults.int32_class; + ins->sreg1 = vreg; + ins->type = STACK_I4; + ins->dreg = long_ins->dreg + 1; + MONO_ADD_INS (cfg->cbb, ins); + + MONO_INST_NEW (cfg, ins, OP_PSHUFLED); + ins->klass = long_ins->klass; + ins->sreg1 = long_ins->sreg1; + ins->inst_c0 = long_ins->inst_c0 ? 3 : 1; + ins->type = STACK_VTYPE; + ins->dreg = vreg = alloc_ireg (cfg); + MONO_ADD_INS (cfg->cbb, ins); + + MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4); + ins->klass = mono_defaults.int32_class; + ins->sreg1 = vreg; + ins->type = STACK_I4; + ins->dreg = long_ins->dreg + 2; + MONO_ADD_INS (cfg->cbb, ins); + + long_ins->opcode = OP_NOP; + break; + case OP_INSERTX_I8_SLOW: + MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW); + ins->dreg = long_ins->dreg; + ins->sreg1 = long_ins->dreg; + ins->sreg2 = long_ins->sreg2 + 1; + ins->inst_c0 = long_ins->inst_c0 * 2; + MONO_ADD_INS (cfg->cbb, ins); + + MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW); + ins->dreg = long_ins->dreg; + ins->sreg1 = long_ins->dreg; + ins->sreg2 = long_ins->sreg2 + 2; + ins->inst_c0 = long_ins->inst_c0 * 2 + 1; + MONO_ADD_INS (cfg->cbb, ins); + + long_ins->opcode = OP_NOP; + break; + case OP_EXPAND_I8: + MONO_INST_NEW (cfg, ins, OP_ICONV_TO_X); + ins->dreg = long_ins->dreg; + ins->sreg1 = long_ins->sreg1 + 1; + ins->klass = long_ins->klass; + ins->type = STACK_VTYPE; + MONO_ADD_INS (cfg->cbb, ins); + + MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW); + ins->dreg = long_ins->dreg; + ins->sreg1 = long_ins->dreg; + ins->sreg2 = long_ins->sreg1 + 2; + ins->inst_c0 = 1; + ins->klass = long_ins->klass; + ins->type = STACK_VTYPE; + MONO_ADD_INS (cfg->cbb, ins); + + MONO_INST_NEW (cfg, ins, OP_PSHUFLED); + ins->dreg = long_ins->dreg; + ins->sreg1 = long_ins->dreg;; + ins->inst_c0 = 0x44; /*Magic number for swizzling (X,Y,X,Y)*/ + ins->klass = long_ins->klass; + ins->type = STACK_VTYPE; + MONO_ADD_INS (cfg->cbb, ins); + + long_ins->opcode = OP_NOP; + break; + } } #endif