loadu1_mem: dest:i len:16
loadu2_mem: dest:i len:16
+
+#SIMD
+#TODO: Some of these IR opcodes are marked as no clobber when they indeed do.
+
+addps: dest:x src1:x src2:x len:4 clob:1
+divps: dest:x src1:x src2:x len:4 clob:1
+mulps: dest:x src1:x src2:x len:4 clob:1
+subps: dest:x src1:x src2:x len:4 clob:1
+maxps: dest:x src1:x src2:x len:4 clob:1
+minps: dest:x src1:x src2:x len:4 clob:1
+compps: dest:x src1:x src2:x len:5 clob:1
+andps: dest:x src1:x src2:x len:4 clob:1
+andnps: dest:x src1:x src2:x len:4 clob:1
+orps: dest:x src1:x src2:x len:4 clob:1
+xorps: dest:x src1:x src2:x len:4 clob:1
+
+haddps: dest:x src1:x src2:x len:5 clob:1
+hsubps: dest:x src1:x src2:x len:5 clob:1
+addsubps: dest:x src1:x src2:x len:5 clob:1
+dupps_low: dest:x src1:x len:5
+dupps_high: dest:x src1:x len:5
+
+addpd: dest:x src1:x src2:x len:5 clob:1
+divpd: dest:x src1:x src2:x len:5 clob:1
+mulpd: dest:x src1:x src2:x len:5 clob:1
+subpd: dest:x src1:x src2:x len:5 clob:1
+maxpd: dest:x src1:x src2:x len:5 clob:1
+minpd: dest:x src1:x src2:x len:5 clob:1
+comppd: dest:x src1:x src2:x len:6 clob:1
+andpd: dest:x src1:x src2:x len:5 clob:1
+andnpd: dest:x src1:x src2:x len:5 clob:1
+orpd: dest:x src1:x src2:x len:5 clob:1
+xorpd: dest:x src1:x src2:x len:5 clob:1
+
+haddpd: dest:x src1:x src2:x len:6 clob:1
+hsubpd: dest:x src1:x src2:x len:6 clob:1
+addsubpd: dest:x src1:x src2:x len:6 clob:1
+duppd: dest:x src1:x len:6
+
+pand: dest:x src1:x src2:x len:5 clob:1
+por: dest:x src1:x src2:x len:5 clob:1
+pxor: dest:x src1:x src2:x len:5 clob:1
+
+sqrtps: dest:x src1:x len:5
+rsqrtps: dest:x src1:x len:5
+rcpps: dest:x src1:x len:5
+
+pshufflew_high: dest:x src1:x len:6
+pshufflew_low: dest:x src1:x len:6
+pshuffled: dest:x src1:x len:6
+
+extract_mask: dest:i src1:x len:6
+
+paddb: dest:x src1:x src2:x len:5 clob:1
+paddw: dest:x src1:x src2:x len:5 clob:1
+paddd: dest:x src1:x src2:x len:5 clob:1
+paddq: dest:x src1:x src2:x len:5 clob:1
+
+psubb: dest:x src1:x src2:x len:5 clob:1
+psubw: dest:x src1:x src2:x len:5 clob:1
+psubd: dest:x src1:x src2:x len:5 clob:1
+psubq: dest:x src1:x src2:x len:5 clob:1
+
+pmaxb_un: dest:x src1:x src2:x len:5 clob:1
+pmaxw_un: dest:x src1:x src2:x len:6 clob:1
+pmaxd_un: dest:x src1:x src2:x len:6 clob:1
+
+pmaxb: dest:x src1:x src2:x len:6 clob:1
+pmaxw: dest:x src1:x src2:x len:5 clob:1
+pmaxd: dest:x src1:x src2:x len:6 clob:1
+
+pavgb_un: dest:x src1:x src2:x len:5 clob:1
+pavgw_un: dest:x src1:x src2:x len:5 clob:1
+
+pminb_un: dest:x src1:x src2:x len:5 clob:1
+pminw_un: dest:x src1:x src2:x len:6 clob:1
+pmind_un: dest:x src1:x src2:x len:6 clob:1
+
+pminb: dest:x src1:x src2:x len:6 clob:1
+pminw: dest:x src1:x src2:x len:5 clob:1
+pmind: dest:x src1:x src2:x len:6 clob:1
+
+pcmpeqb: dest:x src1:x src2:x len:5 clob:1
+pcmpeqw: dest:x src1:x src2:x len:5 clob:1
+pcmpeqd: dest:x src1:x src2:x len:5 clob:1
+pcmpeqq: dest:x src1:x src2:x len:6 clob:1
+
+pcmpgtb: dest:x src1:x src2:x len:5 clob:1
+pcmpgtw: dest:x src1:x src2:x len:5 clob:1
+pcmpgtd: dest:x src1:x src2:x len:5 clob:1
+pcmpgtq: dest:x src1:x src2:x len:6 clob:1
+
+psumabsdiff: dest:x src1:x src2:x len:5 clob:1
+
+unpack_lowb: dest:x src1:x src2:x len:5 clob:1
+unpack_loww: dest:x src1:x src2:x len:5 clob:1
+unpack_lowd: dest:x src1:x src2:x len:5 clob:1
+unpack_lowq: dest:x src1:x src2:x len:5 clob:1
+unpack_lowps: dest:x src1:x src2:x len:5 clob:1
+unpack_lowpd: dest:x src1:x src2:x len:5 clob:1
+
+unpack_highb: dest:x src1:x src2:x len:5 clob:1
+unpack_highw: dest:x src1:x src2:x len:5 clob:1
+unpack_highd: dest:x src1:x src2:x len:5 clob:1
+unpack_highq: dest:x src1:x src2:x len:5 clob:1
+unpack_highps: dest:x src1:x src2:x len:5 clob:1
+unpack_highpd: dest:x src1:x src2:x len:5 clob:1
+
+packw: dest:x src1:x src2:x len:5 clob:1
+packd: dest:x src1:x src2:x len:5 clob:1
+
+packw_un: dest:x src1:x src2:x len:5 clob:1
+packd_un: dest:x src1:x src2:x len:6 clob:1
+
+paddb_sat: dest:x src1:x src2:x len:5 clob:1
+paddb_sat_un: dest:x src1:x src2:x len:5 clob:1
+
+paddw_sat: dest:x src1:x src2:x len:5 clob:1
+paddw_sat_un: dest:x src1:x src2:x len:5 clob:1
+
+psubb_sat: dest:x src1:x src2:x len:5 clob:1
+psubb_sat_un: dest:x src1:x src2:x len:5 clob:1
+
+psubw_sat: dest:x src1:x src2:x len:5 clob:1
+psubw_sat_un: dest:x src1:x src2:x len:5 clob:1
+
+pmulw: dest:x src1:x src2:x len:5 clob:1
+pmuld: dest:x src1:x src2:x len:6 clob:1
+pmulq: dest:x src1:x src2:x len:5 clob:1
+
+pmul_high_un: dest:x src1:x src2:x len:5 clob:1
+pmul_high: dest:x src1:x src2:x len:5 clob:1
+
+pshrw: dest:x src1:x len:6 clob:1
+pshrw_reg: dest:x src1:x src2:x len:5 clob:1
+
+psarw: dest:x src1:x len:6 clob:1
+psarw_reg: dest:x src1:x src2:x len:5 clob:1
+
+pshlw: dest:x src1:x len:6 clob:1
+pshlw_reg: dest:x src1:x src2:x len:5 clob:1
+
+pshrd: dest:x src1:x len:6 clob:1
+pshrd_reg: dest:x src1:x src2:x len:5 clob:1
+
+psard: dest:x src1:x len:6 clob:1
+psard_reg: dest:x src1:x src2:x len:5 clob:1
+
+pshld: dest:x src1:x len:6 clob:1
+pshld_reg: dest:x src1:x src2:x len:5 clob:1
+
+pshrq: dest:x src1:x len:6 clob:1
+pshrq_reg: dest:x src1:x src2:x len:5 clob:1
+
+pshlq: dest:x src1:x len:6 clob:1
+pshlq_reg: dest:x src1:x src2:x len:5 clob:1
+
+xmove: dest:x src1:x len:5
+xzero: dest:x len:5
+
+iconv_to_x: dest:x src1:i len:5
+extract_i4: dest:i src1:x len:5
+
+extract_i8: dest:i src1:x len:9
+
+extract_i2: dest:i src1:x len:13
+extract_u2: dest:i src1:x len:13
+extract_i1: dest:i src1:x len:13
+extract_u1: dest:i src1:x len:13
+extract_r8: dest:f src1:x len:5
+
+iconv_to_r8_raw: dest:f src1:i len:9
+
+insert_i2: dest:x src1:x src2:i len:6 clob:1
+
+extractx_u2: dest:i src1:x len:6
+insertx_u1_slow: dest:x src1:i src2:i len:18 clob:x
+
+insertx_i4_slow: dest:x src1:x src2:i len:16 clob:x
+insertx_i8_slow: dest:x src1:x src2:i len:13
+insertx_r4_slow: dest:x src1:x src2:f len:24
+insertx_r8_slow: dest:x src1:x src2:f len:24
+
+loadx_membase: dest:x src1:b len:9
+storex_membase: dest:b src1:x len:9
+storex_membase_reg: dest:b src1:x len:9
+
+loadx_aligned_membase: dest:x src1:b len:7
+storex_aligned_membase_reg: dest:b src1:x len:7
+storex_nta_membase_reg: dest:b src1:x len:7
+
+fconv_to_r8_x: dest:x src1:f len:4
+xconv_r8_to_i4: dest:y src1:x len:7
+
+prefetch_membase: src1:b len:4
+
+expand_i2: dest:x src1:i len:18
+expand_i4: dest:x src1:i len:11
+expand_i8: dest:x src1:i len:11
+expand_r4: dest:x src1:f len:16
+expand_r8: dest:x src1:f len:13
+
liverange_start: len:0
liverange_end: len:0
return "unknown";
}
+/* TODO: Figure out away of telling this and the one above apart if things get confussing. */
+const char *
+mono_arch_xregname (int reg)
+{
+ return mono_arch_fregname (reg);
+}
+
G_GNUC_UNUSED static void
break_count (void)
{
ins->sreg1 = temp->dreg;
}
break;
+#ifdef MONO_ARCH_SIMD_INTRINSICS
+ case OP_EXPAND_I1: {
+ int temp_reg1 = mono_alloc_ireg (cfg);
+ int temp_reg2 = mono_alloc_ireg (cfg);
+ int original_reg = ins->sreg1;
+
+ NEW_INS (cfg, ins, temp, OP_ICONV_TO_U1);
+ temp->sreg1 = original_reg;
+ temp->dreg = temp_reg1;
+
+ NEW_INS (cfg, ins, temp, OP_SHL_IMM);
+ temp->sreg1 = temp_reg1;
+ temp->dreg = temp_reg2;
+ temp->inst_imm = 8;
+
+ NEW_INS (cfg, ins, temp, OP_LOR);
+ temp->sreg1 = temp->dreg = temp_reg2;
+ temp->sreg2 = temp_reg1;
+
+ ins->opcode = OP_EXPAND_I2;
+ ins->sreg1 = temp_reg2;
+ }
+ break;
+#endif
default:
break;
}
break;
}
#ifdef MONO_ARCH_SIMD_INTRINSICS
+ /* TODO: Some of these IR opcodes are marked as no clobber when they indeed do. */
case OP_ADDPS:
amd64_sse_addps_reg_reg (code, ins->sreg1, ins->sreg2);
break;
case OP_PADDQ:
amd64_sse_paddq_reg_reg (code, ins->sreg1, ins->sreg2);
break;
+
+ case OP_PSUBB:
+ amd64_sse_psubb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBW:
+ amd64_sse_psubw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBD:
+ amd64_sse_psubd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBQ:
+ amd64_sse_psubq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMAXB_UN:
+ amd64_sse_pmaxub_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXW_UN:
+ amd64_sse_pmaxuw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXD_UN:
+ amd64_sse_pmaxud_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMAXB:
+ amd64_sse_pmaxsb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXW:
+ amd64_sse_pmaxsw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXD:
+ amd64_sse_pmaxsd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PAVGB_UN:
+ amd64_sse_pavgb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PAVGW_UN:
+ amd64_sse_pavgw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMINB_UN:
+ amd64_sse_pminub_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMINW_UN:
+ amd64_sse_pminuw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMIND_UN:
+ amd64_sse_pminud_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMINB:
+ amd64_sse_pminsb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMINW:
+ amd64_sse_pminsw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMIND:
+ amd64_sse_pminsd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PCMPEQB:
+ amd64_sse_pcmpeqb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPEQW:
+ amd64_sse_pcmpeqw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPEQD:
+ amd64_sse_pcmpeqd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPEQQ:
+ amd64_sse_pcmpeqq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PCMPGTB:
+ amd64_sse_pcmpgtb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTW:
+ amd64_sse_pcmpgtw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTD:
+ amd64_sse_pcmpgtd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTQ:
+ amd64_sse_pcmpgtq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PSUM_ABS_DIFF:
+ amd64_sse_psadbw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_UNPACK_LOWB:
+ amd64_sse_punpcklbw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWW:
+ amd64_sse_punpcklwd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWD:
+ amd64_sse_punpckldq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWQ:
+ amd64_sse_punpcklqdq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWPS:
+ amd64_sse_unpcklps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWPD:
+ amd64_sse_unpcklpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_UNPACK_HIGHB:
+ amd64_sse_punpckhbw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHW:
+ amd64_sse_punpckhwd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHD:
+ amd64_sse_punpckhdq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHQ:
+ amd64_sse_punpckhqdq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHPS:
+ amd64_sse_unpckhps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHPD:
+ amd64_sse_unpckhpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PACKW:
+ amd64_sse_packsswb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PACKD:
+ amd64_sse_packssdw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PACKW_UN:
+ amd64_sse_packuswb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PACKD_UN:
+ amd64_sse_packusdw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PADDB_SAT_UN:
+ amd64_sse_paddusb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBB_SAT_UN:
+ amd64_sse_psubusb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDW_SAT_UN:
+ amd64_sse_paddusw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBW_SAT_UN:
+ amd64_sse_psubusw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PADDB_SAT:
+ amd64_sse_paddsb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBB_SAT:
+ amd64_sse_psubsb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDW_SAT:
+ amd64_sse_paddsw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBW_SAT:
+ amd64_sse_psubsw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMULW:
+ amd64_sse_pmullw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULD:
+ amd64_sse_pmulld_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULQ:
+ amd64_sse_pmuludq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULW_HIGH_UN:
+ amd64_sse_pmulhuw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULW_HIGH:
+ amd64_sse_pmulhw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PSHRW:
+ amd64_sse_psrlw_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHRW_REG:
+ amd64_sse_psrlw_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSARW:
+ amd64_sse_psraw_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSARW_REG:
+ amd64_sse_psraw_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHLW:
+ amd64_sse_psllw_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHLW_REG:
+ amd64_sse_psllw_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHRD:
+ amd64_sse_psrld_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHRD_REG:
+ amd64_sse_psrld_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSARD:
+ amd64_sse_psrad_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSARD_REG:
+ amd64_sse_psrad_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHLD:
+ amd64_sse_pslld_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHLD_REG:
+ amd64_sse_pslld_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHRQ:
+ amd64_sse_psrlq_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHRQ_REG:
+ amd64_sse_psrlq_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ /*TODO: This is appart of the sse spec but not added
+ case OP_PSARQ:
+ amd64_sse_psraq_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSARQ_REG:
+ amd64_sse_psraq_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+ */
+
+ case OP_PSHLQ:
+ amd64_sse_psllq_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHLQ_REG:
+ amd64_sse_psllq_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_ICONV_TO_X:
+ amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
+ break;
+ case OP_EXTRACT_I4:
+ amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
+ break;
+ case OP_EXTRACT_I8:
+ if (ins->inst_c0) {
+ amd64_movhlps_reg_reg (code, AMD64_XMM15, ins->sreg1);
+ amd64_movd_reg_xreg_size (code, ins->dreg, AMD64_XMM15, 8);
+ } else {
+ amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 8);
+ }
+ break;
+ case OP_EXTRACT_I1:
+ case OP_EXTRACT_U1:
+ amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
+ if (ins->inst_c0)
+ amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8);
+ amd64_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I1, FALSE);
+ break;
+ case OP_EXTRACT_I2:
+ case OP_EXTRACT_U2:
+ /*amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
+ if (ins->inst_c0)
+ amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, 16, 4);*/
+ amd64_sse_pextrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
+ amd64_widen_reg_size (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE, 4);
+ break;
+ case OP_EXTRACT_R8:
+ if (ins->inst_c0)
+ amd64_movhlps_reg_reg (code, ins->dreg, ins->sreg1);
+ else
+ amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_INSERT_I2:
+ amd64_sse_pinsrw_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
+ break;
+ case OP_EXTRACTX_U2:
+ amd64_sse_pextrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
+ break;
+ case OP_INSERTX_U1_SLOW:
+ /*sreg1 is the extracted ireg (scratch)
+ /sreg2 is the to be inserted ireg (scratch)
+ /dreg is the xreg to receive the value*/
+
+ /*clear the bits from the extracted word*/
+ amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00);
+ /*shift the value to insert if needed*/
+ if (ins->inst_c0 & 1)
+ amd64_shift_reg_imm_size (code, X86_SHL, ins->sreg2, 8, 4);
+ /*join them together*/
+ amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
+ amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0 / 2);
+ break;
+ case OP_INSERTX_I4_SLOW:
+ amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2);
+ amd64_shift_reg_imm (code, X86_SHR, ins->sreg2, 16);
+ amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1);
+ break;
+ case OP_INSERTX_I8_SLOW:
+ amd64_movd_xreg_reg_size(code, AMD64_XMM15, ins->sreg2, 8);
+ if (ins->inst_c0)
+ amd64_movlhps_reg_reg (code, ins->dreg, AMD64_XMM15);
+ else
+ amd64_sse_movsd_reg_reg (code, ins->dreg, AMD64_XMM15);
+ break;
+
+ case OP_INSERTX_R4_SLOW:
+ switch (ins->inst_c0) {
+ case 0:
+ amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+ case 1:
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3));
+ amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3));
+ break;
+ case 2:
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3));
+ amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3));
+ break;
+ case 3:
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0));
+ amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0));
+ break;
+ }
+ break;
+ case OP_INSERTX_R8_SLOW:
+ if (ins->inst_c0)
+ amd64_movlhps_reg_reg (code, ins->dreg, ins->sreg2);
+ else
+ amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+ case OP_STOREX_MEMBASE_REG:
+ case OP_STOREX_MEMBASE:
+ amd64_sse_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
+ break;
+ case OP_LOADX_MEMBASE:
+ amd64_sse_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
+ break;
+ case OP_LOADX_ALIGNED_MEMBASE:
+ amd64_sse_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
+ break;
+ case OP_STOREX_ALIGNED_MEMBASE_REG:
+ amd64_sse_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
+ break;
+ case OP_STOREX_NTA_MEMBASE_REG:
+ amd64_sse_movntps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
+ break;
+ case OP_PREFETCH_MEMBASE:
+ amd64_sse_prefetch_reg_membase (code, ins->backend.arg_info, ins->sreg1, ins->inst_offset);
+ break;
+
+ case OP_XMOVE:
+ /*FIXME the peephole pass should have killed this*/
+ if (ins->dreg != ins->sreg1)
+ amd64_sse_movaps_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_XZERO:
+ amd64_sse_pxor_reg_reg (code, ins->dreg, ins->dreg);
+ break;
+ case OP_ICONV_TO_R8_RAW:
+ amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
+ amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
+ break;
+
+ case OP_FCONV_TO_R8_X:
+ amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+
+ case OP_XCONV_R8_TO_I4:
+ amd64_sse_cvttsd2si_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
+ switch (ins->backend.source_opcode) {
+ case OP_FCONV_TO_I1:
+ amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
+ break;
+ case OP_FCONV_TO_U1:
+ amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
+ break;
+ case OP_FCONV_TO_I2:
+ amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
+ break;
+ case OP_FCONV_TO_U2:
+ amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
+ break;
+ }
+ break;
+
+ case OP_EXPAND_I2:
+ amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, 0);
+ amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, 1);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_I4:
+ amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_I8:
+ amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 8);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
+ break;
+ case OP_EXPAND_R4:
+ amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
+ amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->dreg);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_R8:
+ amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
+ break;
#endif
case OP_LIVERANGE_START: {
if (cfg->verbose_level > 1)