+2008-10-15 Rodrigo Kumpera <rkumpera@novell.com>
+
+ * mini-ops.h: Add ops for packed shuffle/max/avg and
+ extract mask.
+
+ * cpu-x86.md: Same.
+
+ * mini-x86.c (mono_arch_output_basic_block): Same.
+
+ * simd-intrinsics.c (vector8us_intrinsics): Add avg, shuffle and
+ extract mask.
+
+ * simd-intrinsics.c (simd_intrinsic_emit_extract_mask): New function
+ to emit extract mask op.
+
+ * simd-intrinsics.c (simd_intrinsic_emit_shuffle): Retrofic this function
+ to emit word shuffles.
+
2008-10-15 Mark Probst <mark.probst@gmail.com>
* mini.c (mono_allocate_stack_slots_full): Align stack frame to
rcpps: dest:x src1:x len:4
shuffleps: dest:x src1:x len:5
+pshufflew_high: dest:x src1:x len:5
+pshufflew_low: dest:x src1:x len:5
+extract_mask: dest:i src1:x len:4
+
paddb: dest:x src1:x src2:x len:4 clob:1
paddw: dest:x src1:x src2:x len:4 clob:1
paddd: dest:x src1:x src2:x len:4 clob:1
psubw: dest:x src1:x src2:x len:4 clob:1
psubd: dest:x src1:x src2:x len:4 clob:1
+pmaxb_un: dest:x src1:x src2:x len:4 clob:1
+pmaxw_un: dest:x src1:x src2:x len:5 clob:1
+pmaxd_un: dest:x src1:x src2:x len:5 clob:1
+
+pavgb_un: dest:x src1:x src2:x len:4 clob:1
+pavgw_un: dest:x src1:x src2:x len:4 clob:1
+
unpack_lowb: dest:x src1:x src2:x len:4 clob:1
unpack_loww: dest:x src1:x src2:x len:4 clob:1
unpack_lowd: dest:x src1:x src2:x len:4 clob:1
MINI_OP(OP_RCPPS, "rcpps", XREG, XREG, NONE)
MINI_OP(OP_SHUFLEPS, "shuffleps", XREG, XREG, NONE)
+MINI_OP(OP_PSHUFLEW_HIGH, "pshufflew_high", XREG, XREG, NONE)
+MINI_OP(OP_PSHUFLEW_LOW, "pshufflew_low", XREG, XREG, NONE)
+
+MINI_OP(OP_EXTRACT_MASK, "extract_mask", IREG, XREG, NONE)
+
MINI_OP(OP_PAND, "pand", XREG, XREG, XREG)
MINI_OP(OP_POR, "por", XREG, XREG, XREG)
MINI_OP(OP_PXOR, "pxor", XREG, XREG, XREG)
MINI_OP(OP_PSUBW, "psubw", XREG, XREG, XREG)
MINI_OP(OP_PSUBD, "psubd", XREG, XREG, XREG)
+MINI_OP(OP_PMAXB_UN, "pmaxb_un", XREG, XREG, XREG)
+MINI_OP(OP_PMAXW_UN, "pmaxw_un", XREG, XREG, XREG)
+MINI_OP(OP_PMAXD_UN, "pmaxd_un", XREG, XREG, XREG)
+
+MINI_OP(OP_PAVGB_UN, "pavgb_un", XREG, XREG, XREG)
+MINI_OP(OP_PAVGW_UN, "pavgw_un", XREG, XREG, XREG)
+
MINI_OP(OP_UNPACK_LOWB, "unpack_lowb", XREG, XREG, XREG)
MINI_OP(OP_UNPACK_LOWW, "unpack_loww", XREG, XREG, XREG)
MINI_OP(OP_UNPACK_LOWD, "unpack_lowd", XREG, XREG, XREG)
g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
x86_pshufd_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0);
break;
+
+ case OP_PSHUFLEW_HIGH:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
+ x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 1);
+ break;
+ case OP_PSHUFLEW_LOW:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
+ x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 0);
+ break;
+
+ case OP_EXTRACT_MASK:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMOVMSKB, ins->dreg, ins->sreg1);
+ break;
+
case OP_PAND:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PAND, ins->sreg1, ins->sreg2);
break;
x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBD, ins->sreg1, ins->sreg2);
break;
+ case OP_PMAXB_UN:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXUB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXW_UN:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXD_UN:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUD, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PAVGB_UN:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PAVGW_UN:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGW, ins->sreg1, ins->sreg2);
+ break;
+
case OP_UNPACK_LOWB:
x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLBW, ins->sreg1, ins->sreg2);
break;
SIMD_EMIT_SHUFFLE,
SIMD_EMIT_SHIFT,
SIMD_EMIT_LOAD_ALIGNED,
- SIMD_EMIT_STORE_ALIGNED
+ SIMD_EMIT_STORE_ALIGNED,
+ SIMD_EMIT_EXTRACT_MASK
};
/*This is the size of the largest method name + 1 (to fit the ending \0). Align to 4 as well.*/
{ "Max", OP_MAXPS, SIMD_EMIT_BINARY },
{ "Min", OP_MINPS, SIMD_EMIT_BINARY },
{ "Reciprocal", OP_RCPPS, SIMD_EMIT_UNARY },
- { "Shuffle", 0, SIMD_EMIT_SHUFFLE },
+ { "Shuffle", OP_SHUFLEPS, SIMD_EMIT_SHUFFLE },
{ "Sqrt", OP_SQRTPS, SIMD_EMIT_UNARY },
{ "StoreAligned", 0, SIMD_EMIT_STORE_ALIGNED },
{ "get_W", 3, SIMD_EMIT_GETTER },
*/
static const SimdIntrinsc vector8us_intrinsics[] = {
{ "AddWithSaturation", OP_PADDW_SAT_UN, SIMD_EMIT_BINARY },
+ { "Average", OP_PAVGW_UN, SIMD_EMIT_BINARY },
+ { "ExtractByteMask", 0, SIMD_EMIT_EXTRACT_MASK },
{ "LoadAligned", 0, SIMD_EMIT_LOAD_ALIGNED },
{ "ShiftRightArithmetic", OP_PSARW, SIMD_EMIT_SHIFT },
+ { "ShuffleHigh", OP_PSHUFLEW_HIGH, SIMD_EMIT_SHUFFLE },
+ { "ShuffleLow", OP_PSHUFLEW_LOW, SIMD_EMIT_SHUFFLE },
{ "StoreAligned", 0, SIMD_EMIT_STORE_ALIGNED },
{ "SubWithSaturation", OP_PSUBW_SAT_UN, SIMD_EMIT_BINARY },
{ "UnpackHigh", OP_UNPACK_HIGHW, SIMD_EMIT_BINARY },
/*TODO Exposing shuffle is not a good thing as it's non obvious. We should come up with better abstractions*/
if (args [1]->opcode != OP_ICONST) {
- g_warning ("Vector4f:Shuffle with non literals is not yet supported");
+ g_warning ("Shuffle with non literals is not yet supported");
g_assert_not_reached ();
}
vreg = get_simd_vreg (cfg, cmethod, args [0]);
NULLIFY_INS (args [1]);
- MONO_INST_NEW (cfg, ins, OP_SHUFLEPS);
+ MONO_INST_NEW (cfg, ins, intrinsic->opcode);
ins->klass = cmethod->klass;
ins->sreg1 = vreg;
ins->inst_c0 = args [1]->inst_c0;
return ins;
}
+static MonoInst*
+simd_intrinsic_emit_extract_mask (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
+{
+ MonoInst *ins;
+ int vreg;
+
+ vreg = get_simd_vreg (cfg, cmethod, args [0]);
+
+ MONO_INST_NEW (cfg, ins, OP_EXTRACT_MASK);
+ ins->klass = cmethod->klass;
+ ins->sreg1 = vreg;
+ ins->type = STACK_I4;
+ ins->dreg = alloc_ireg (cfg);
+ MONO_ADD_INS (cfg->cbb, ins);
+
+ return ins;
+}
+
static const char *
simd_version_name (guint32 version)
{
return simd_intrinsic_emit_load_aligned (result, cfg, cmethod, args);
case SIMD_EMIT_STORE_ALIGNED:
return simd_intrinsic_emit_store_aligned (result, cfg, cmethod, args);
+ case SIMD_EMIT_EXTRACT_MASK:
+ return simd_intrinsic_emit_extract_mask (result, cfg, cmethod, args);
}
g_assert_not_reached ();
}