+ amd64_mov_reg_reg (code, dreg, ins->sreg2, size);
+ amd64_prefix (code, X86_LOCK_PREFIX);
+ amd64_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, size);
+ /* dreg contains the old value, add with sreg2 value */
+ amd64_alu_reg_reg_size (code, X86_ADD, dreg, ins->sreg2, size);
+
+ if (ins->dreg != dreg)
+ amd64_mov_reg_reg (code, ins->dreg, dreg, size);
+
+ break;
+ }
+ case OP_ATOMIC_EXCHANGE_I4:
+ case OP_ATOMIC_EXCHANGE_I8: {
+ guchar *br[2];
+ int sreg2 = ins->sreg2;
+ int breg = ins->inst_basereg;
+ guint32 size;
+ gboolean need_push = FALSE, rdx_pushed = FALSE;
+
+ if (ins->opcode == OP_ATOMIC_EXCHANGE_I8)
+ size = 8;
+ else
+ size = 4;
+
+ /*
+ * See http://msdn.microsoft.com/en-us/magazine/cc302329.aspx for
+ * an explanation of how this works.
+ */
+
+ /* cmpxchg uses eax as comperand, need to make sure we can use it
+ * hack to overcome limits in x86 reg allocator
+ * (req: dreg == eax and sreg2 != eax and breg != eax)
+ */
+ g_assert (ins->dreg == AMD64_RAX);
+
+ if (breg == AMD64_RAX && ins->sreg2 == AMD64_RAX)
+ /* Highly unlikely, but possible */
+ need_push = TRUE;
+
+ /* The pushes invalidate rsp */
+ if ((breg == AMD64_RAX) || need_push) {
+ amd64_mov_reg_reg (code, AMD64_R11, breg, 8);
+ breg = AMD64_R11;
+ }
+
+ /* We need the EAX reg for the comparand */
+ if (ins->sreg2 == AMD64_RAX) {
+ if (breg != AMD64_R11) {
+ amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
+ sreg2 = AMD64_R11;
+ } else {
+ g_assert (need_push);
+ amd64_push_reg (code, AMD64_RDX);
+ amd64_mov_reg_reg (code, AMD64_RDX, AMD64_RAX, size);
+ sreg2 = AMD64_RDX;
+ rdx_pushed = TRUE;
+ }
+ }
+
+ amd64_mov_reg_membase (code, AMD64_RAX, breg, ins->inst_offset, size);
+
+ br [0] = code; amd64_prefix (code, X86_LOCK_PREFIX);
+ amd64_cmpxchg_membase_reg_size (code, breg, ins->inst_offset, sreg2, size);
+ br [1] = code; amd64_branch8 (code, X86_CC_NE, -1, FALSE);
+ amd64_patch (br [1], br [0]);
+
+ if (rdx_pushed)
+ amd64_pop_reg (code, AMD64_RDX);
+
+ break;
+ }
+ case OP_ATOMIC_CAS_I4:
+ case OP_ATOMIC_CAS_I8: {
+ guint32 size;
+
+ if (ins->opcode == OP_ATOMIC_CAS_I8)
+ size = 8;
+ else
+ size = 4;
+
+ /*
+ * See http://msdn.microsoft.com/en-us/magazine/cc302329.aspx for
+ * an explanation of how this works.
+ */
+ g_assert (ins->sreg3 == AMD64_RAX);
+ g_assert (ins->sreg1 != AMD64_RAX);
+ g_assert (ins->sreg1 != ins->sreg2);
+
+ amd64_prefix (code, X86_LOCK_PREFIX);
+ amd64_cmpxchg_membase_reg_size (code, ins->sreg1, ins->inst_offset, ins->sreg2, size);
+
+ if (ins->dreg != AMD64_RAX)
+ amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, size);
+ break;
+ }
+#ifdef MONO_ARCH_SIMD_INTRINSICS
+ /* TODO: Some of these IR opcodes are marked as no clobber when they indeed do. */
+ case OP_ADDPS:
+ amd64_sse_addps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_DIVPS:
+ amd64_sse_divps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MULPS:
+ amd64_sse_mulps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_SUBPS:
+ amd64_sse_subps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MAXPS:
+ amd64_sse_maxps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MINPS:
+ amd64_sse_minps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_COMPPS:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
+ amd64_sse_cmpps_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
+ break;
+ case OP_ANDPS:
+ amd64_sse_andps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ANDNPS:
+ amd64_sse_andnps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ORPS:
+ amd64_sse_orps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_XORPS:
+ amd64_sse_xorps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_SQRTPS:
+ amd64_sse_sqrtps_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_RSQRTPS:
+ amd64_sse_rsqrtps_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_RCPPS:
+ amd64_sse_rcpps_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_ADDSUBPS:
+ amd64_sse_addsubps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_HADDPS:
+ amd64_sse_haddps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_HSUBPS:
+ amd64_sse_hsubps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_DUPPS_HIGH:
+ amd64_sse_movshdup_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_DUPPS_LOW:
+ amd64_sse_movsldup_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+
+ case OP_PSHUFLEW_HIGH:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
+ amd64_sse_pshufhw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
+ break;
+ case OP_PSHUFLEW_LOW:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
+ amd64_sse_pshuflw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
+ break;
+ case OP_PSHUFLED:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
+ break;
+
+ case OP_ADDPD:
+ amd64_sse_addpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_DIVPD:
+ amd64_sse_divpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MULPD:
+ amd64_sse_mulpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_SUBPD:
+ amd64_sse_subpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MAXPD:
+ amd64_sse_maxpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MINPD:
+ amd64_sse_minpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_COMPPD:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
+ amd64_sse_cmppd_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
+ break;
+ case OP_ANDPD:
+ amd64_sse_andpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ANDNPD:
+ amd64_sse_andnpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ORPD:
+ amd64_sse_orpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_XORPD:
+ amd64_sse_xorpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_SQRTPD:
+ amd64_sse_sqrtpd_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_ADDSUBPD:
+ amd64_sse_addsubpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_HADDPD:
+ amd64_sse_haddpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_HSUBPD:
+ amd64_sse_hsubpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_DUPPD:
+ amd64_sse_movddup_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+
+ case OP_EXTRACT_MASK:
+ amd64_sse_pmovmskb_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+
+ case OP_PAND:
+ amd64_sse_pand_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_POR:
+ amd64_sse_por_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PXOR:
+ amd64_sse_pxor_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PADDB:
+ amd64_sse_paddb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDW:
+ amd64_sse_paddw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDD:
+ amd64_sse_paddd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDQ:
+ amd64_sse_paddq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PSUBB:
+ amd64_sse_psubb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBW:
+ amd64_sse_psubw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBD:
+ amd64_sse_psubd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBQ:
+ amd64_sse_psubq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMAXB_UN:
+ amd64_sse_pmaxub_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXW_UN:
+ amd64_sse_pmaxuw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXD_UN:
+ amd64_sse_pmaxud_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMAXB:
+ amd64_sse_pmaxsb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXW:
+ amd64_sse_pmaxsw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXD:
+ amd64_sse_pmaxsd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PAVGB_UN:
+ amd64_sse_pavgb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PAVGW_UN:
+ amd64_sse_pavgw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMINB_UN:
+ amd64_sse_pminub_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMINW_UN:
+ amd64_sse_pminuw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMIND_UN:
+ amd64_sse_pminud_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMINB:
+ amd64_sse_pminsb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMINW:
+ amd64_sse_pminsw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMIND:
+ amd64_sse_pminsd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PCMPEQB:
+ amd64_sse_pcmpeqb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPEQW:
+ amd64_sse_pcmpeqw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPEQD:
+ amd64_sse_pcmpeqd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPEQQ:
+ amd64_sse_pcmpeqq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PCMPGTB:
+ amd64_sse_pcmpgtb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTW:
+ amd64_sse_pcmpgtw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTD:
+ amd64_sse_pcmpgtd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTQ:
+ amd64_sse_pcmpgtq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PSUM_ABS_DIFF:
+ amd64_sse_psadbw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_UNPACK_LOWB:
+ amd64_sse_punpcklbw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWW:
+ amd64_sse_punpcklwd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWD:
+ amd64_sse_punpckldq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWQ:
+ amd64_sse_punpcklqdq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWPS:
+ amd64_sse_unpcklps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWPD:
+ amd64_sse_unpcklpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_UNPACK_HIGHB:
+ amd64_sse_punpckhbw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHW:
+ amd64_sse_punpckhwd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHD:
+ amd64_sse_punpckhdq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHQ:
+ amd64_sse_punpckhqdq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHPS:
+ amd64_sse_unpckhps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHPD:
+ amd64_sse_unpckhpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PACKW:
+ amd64_sse_packsswb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PACKD:
+ amd64_sse_packssdw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PACKW_UN:
+ amd64_sse_packuswb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PACKD_UN:
+ amd64_sse_packusdw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PADDB_SAT_UN:
+ amd64_sse_paddusb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBB_SAT_UN:
+ amd64_sse_psubusb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDW_SAT_UN:
+ amd64_sse_paddusw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBW_SAT_UN:
+ amd64_sse_psubusw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PADDB_SAT:
+ amd64_sse_paddsb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBB_SAT:
+ amd64_sse_psubsb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDW_SAT:
+ amd64_sse_paddsw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBW_SAT:
+ amd64_sse_psubsw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMULW:
+ amd64_sse_pmullw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULD:
+ amd64_sse_pmulld_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULQ:
+ amd64_sse_pmuludq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULW_HIGH_UN:
+ amd64_sse_pmulhuw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULW_HIGH:
+ amd64_sse_pmulhw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PSHRW:
+ amd64_sse_psrlw_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHRW_REG:
+ amd64_sse_psrlw_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSARW:
+ amd64_sse_psraw_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSARW_REG:
+ amd64_sse_psraw_reg_reg (code, ins->dreg, ins->sreg2);
+ break;