#define LOOP_ALIGNMENT 8
#define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
+#ifndef DISABLE_JIT
+
void
mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
{
x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, ins->inst_c0 / 2);
break;
+ case OP_INSERTX_I4_SLOW:
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2);
+ x86_shift_reg_imm (code, X86_SHR, ins->sreg2, 16);
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1);
+ break;
+
+ case OP_INSERTX_R4_SLOW:
+ x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
+ /*TODO if inst_c0 == 0 use movss*/
+ x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 0, ins->inst_c0 * 2);
+ x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 2, ins->inst_c0 * 2 + 1);
+ break;
+ case OP_INSERTX_R8_SLOW:
+ x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
+ if (ins->inst_c0)
+ x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVHPD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
+ else
+ x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVSD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
+ break;
case OP_STOREX_MEMBASE_REG:
case OP_STOREX_MEMBASE:
break;
}
break;
+
+ case OP_EXPAND_I1:
+ /*FIXME this causes a partial register stall, maybe it would not be that bad to use shift + mask + or*/
+ /*The +4 is to get a mov ?h, ?l over the same reg.*/
+ x86_mov_reg_reg (code, ins->dreg + 4, ins->dreg, 1);
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_I2:
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_I4:
+ x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_R4:
+ x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
+ x86_movd_xreg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_R8:
+ x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
+ x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0x44);
+ break;
#endif
default:
g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode));
cfg->code_len = code - cfg->native_code;
}
+#endif /* DISABLE_JIT */
+
void
mono_arch_register_lowlevel_calls (void)
{
{
MonoInst *ins;
int vreg;
- if (!(cfg->opt & MONO_OPT_SIMD) || long_ins->opcode != OP_EXTRACT_I8)
+ if (!(cfg->opt & MONO_OPT_SIMD))
return;
+
/*TODO move this to simd-intrinsic.c once we support sse 4.1 dword extractors since we need the runtime caps info */
-
- vreg = long_ins->sreg1;
-
- if (long_ins->inst_c0) {
+ switch (long_ins->opcode) {
+ case OP_EXTRACT_I8:
+ vreg = long_ins->sreg1;
+
+ if (long_ins->inst_c0) {
+ MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
+ ins->klass = long_ins->klass;
+ ins->sreg1 = long_ins->sreg1;
+ ins->inst_c0 = 2;
+ ins->type = STACK_VTYPE;
+ ins->dreg = vreg = alloc_ireg (cfg);
+ MONO_ADD_INS (cfg->cbb, ins);
+ }
+
+ MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
+ ins->klass = mono_defaults.int32_class;
+ ins->sreg1 = vreg;
+ ins->type = STACK_I4;
+ ins->dreg = long_ins->dreg + 1;
+ MONO_ADD_INS (cfg->cbb, ins);
+
MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
ins->klass = long_ins->klass;
ins->sreg1 = long_ins->sreg1;
- ins->inst_c0 = 2;
+ ins->inst_c0 = long_ins->inst_c0 ? 3 : 1;
ins->type = STACK_VTYPE;
ins->dreg = vreg = alloc_ireg (cfg);
MONO_ADD_INS (cfg->cbb, ins);
- }
+
+ MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
+ ins->klass = mono_defaults.int32_class;
+ ins->sreg1 = vreg;
+ ins->type = STACK_I4;
+ ins->dreg = long_ins->dreg + 2;
+ MONO_ADD_INS (cfg->cbb, ins);
+
+ long_ins->opcode = OP_NOP;
+ break;
+ case OP_INSERTX_I8_SLOW:
+ MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
+ ins->dreg = long_ins->dreg;
+ ins->sreg1 = long_ins->dreg;
+ ins->sreg2 = long_ins->sreg2 + 1;
+ ins->inst_c0 = long_ins->inst_c0 * 2;
+ MONO_ADD_INS (cfg->cbb, ins);
- MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
- ins->klass = mono_defaults.int32_class;
- ins->sreg1 = vreg;
- ins->type = STACK_I4;
- ins->dreg = long_ins->dreg + 1;
- MONO_ADD_INS (cfg->cbb, ins);
-
- MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
- ins->klass = long_ins->klass;
- ins->sreg1 = long_ins->sreg1;
- ins->inst_c0 = long_ins->inst_c0 ? 3 : 1;
- ins->type = STACK_VTYPE;
- ins->dreg = vreg = alloc_ireg (cfg);
- MONO_ADD_INS (cfg->cbb, ins);
-
- MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
- ins->klass = mono_defaults.int32_class;
- ins->sreg1 = vreg;
- ins->type = STACK_I4;
- ins->dreg = long_ins->dreg + 2;
- MONO_ADD_INS (cfg->cbb, ins);
+ MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
+ ins->dreg = long_ins->dreg;
+ ins->sreg1 = long_ins->dreg;
+ ins->sreg2 = long_ins->sreg2 + 2;
+ ins->inst_c0 = long_ins->inst_c0 * 2 + 1;
+ MONO_ADD_INS (cfg->cbb, ins);
- long_ins->opcode = OP_NOP;
+ long_ins->opcode = OP_NOP;
+ break;
+ case OP_EXPAND_I8:
+ MONO_INST_NEW (cfg, ins, OP_ICONV_TO_X);
+ ins->dreg = long_ins->dreg;
+ ins->sreg1 = long_ins->sreg1 + 1;
+ ins->klass = long_ins->klass;
+ ins->type = STACK_VTYPE;
+ MONO_ADD_INS (cfg->cbb, ins);
+
+ MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
+ ins->dreg = long_ins->dreg;
+ ins->sreg1 = long_ins->dreg;
+ ins->sreg2 = long_ins->sreg1 + 2;
+ ins->inst_c0 = 1;
+ ins->klass = long_ins->klass;
+ ins->type = STACK_VTYPE;
+ MONO_ADD_INS (cfg->cbb, ins);
+
+ MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
+ ins->dreg = long_ins->dreg;
+ ins->sreg1 = long_ins->dreg;;
+ ins->inst_c0 = 0x44; /*Magic number for swizzling (X,Y,X,Y)*/
+ ins->klass = long_ins->klass;
+ ins->type = STACK_VTYPE;
+ MONO_ADD_INS (cfg->cbb, ins);
+
+ long_ins->opcode = OP_NOP;
+ break;
+ }
}
#endif