2008-10-23 Rodrigo Kumpera <rkumpera@novell.com>
authorRodrigo Kumpera <kumpera@gmail.com>
Fri, 24 Oct 2008 00:36:18 +0000 (00:36 -0000)
committerRodrigo Kumpera <kumpera@gmail.com>
Fri, 24 Oct 2008 00:36:18 +0000 (00:36 -0000)
* mini-ops.h: Add packed signed min, max and compare greater.

* cpu-x86.md: Same.

* mini-x86.c (mono_arch_output_basic_block): Same. Add packed add/sub with
saturation.

* simd-methods.h: Add CompareGreaterThan.

* simd-methods.h: Remove CompareEquals.

* simd-intrinsics.c: Add new TODO entry and some cosmetic changes.

* simd-intrinsics.c (vector16sb_intrinsics): New table of intrinsic type.

* simd-intrinsics.c (vector4f_intrinsics): Rename CompareEquals to
CompareEqual.

svn path=/trunk/mono/; revision=116899

mono/mini/ChangeLog
mono/mini/cpu-x86.md
mono/mini/mini-ops.h
mono/mini/mini-x86.c
mono/mini/simd-intrinsics.c
mono/mini/simd-methods.h

index 791a62ad327544483613f2a1d48ed96eb01b1ff2..11f53144e8e3789e97e6e74160e0215179a8b71b 100644 (file)
@@ -1,3 +1,23 @@
+2008-10-23  Rodrigo Kumpera  <rkumpera@novell.com>
+
+       * mini-ops.h: Add packed signed min, max and compare greater.
+       
+       * cpu-x86.md: Same.
+
+       * mini-x86.c (mono_arch_output_basic_block): Same. Add packed add/sub with
+       saturation.
+
+       * simd-methods.h: Add CompareGreaterThan.
+
+       * simd-methods.h: Remove CompareEquals.
+
+       * simd-intrinsics.c: Add new TODO entry and some cosmetic changes.
+
+       * simd-intrinsics.c (vector16sb_intrinsics): New table of intrinsic type.
+
+       * simd-intrinsics.c (vector4f_intrinsics): Rename CompareEquals to
+       CompareEqual.
+
 2008-10-23  Rodrigo Kumpera  <rkumpera@novell.com>
 
        * basic-simd.cs: Fix tests due to change in the API.
index 4b02e206b0375f95a4398eb176c323155b355b90..d1aef605b464be3f9c876b7393f542d14288d427 100644 (file)
@@ -454,6 +454,10 @@ pmaxb_un: dest:x src1:x src2:x len:4 clob:1
 pmaxw_un: dest:x src1:x src2:x len:5 clob:1
 pmaxd_un: dest:x src1:x src2:x len:5 clob:1
 
+pmaxb: dest:x src1:x src2:x len:5 clob:1
+pmaxw: dest:x src1:x src2:x len:4 clob:1
+pmaxd: dest:x src1:x src2:x len:5 clob:1
+
 pavgb_un: dest:x src1:x src2:x len:4 clob:1
 pavgw_un: dest:x src1:x src2:x len:4 clob:1
 
@@ -461,10 +465,18 @@ pminb_un: dest:x src1:x src2:x len:4 clob:1
 pminw_un: dest:x src1:x src2:x len:5 clob:1
 pmind_un: dest:x src1:x src2:x len:5 clob:1
 
+pminb: dest:x src1:x src2:x len:5 clob:1
+pminw: dest:x src1:x src2:x len:4 clob:1
+pmind: dest:x src1:x src2:x len:5 clob:1
+
 pcmpeqb: dest:x src1:x src2:x len:4 clob:1
 pcmpeqw: dest:x src1:x src2:x len:4 clob:1
 pcmpeqd: dest:x src1:x src2:x len:4 clob:1
 
+pcmpgtb: dest:x src1:x src2:x len:4 clob:1
+pcmpgtw: dest:x src1:x src2:x len:4 clob:1
+pcmpgtd: dest:x src1:x src2:x len:4 clob:1
+
 psumabsdiff: dest:x src1:x src2:x len:4 clob:1
 
 unpack_lowb: dest:x src1:x src2:x len:4 clob:1
index b92db98c18d7e362d2ca52fc5f59dd504774f5b3..dca84e8f4dcbd742a8690e71e1e66ffdb2a6781a 100644 (file)
@@ -677,6 +677,10 @@ MINI_OP(OP_PMAXB_UN, "pmaxb_un", XREG, XREG, XREG)
 MINI_OP(OP_PMAXW_UN, "pmaxw_un", XREG, XREG, XREG)
 MINI_OP(OP_PMAXD_UN, "pmaxd_un", XREG, XREG, XREG)
 
+MINI_OP(OP_PMAXB, "pmaxb", XREG, XREG, XREG)
+MINI_OP(OP_PMAXW, "pmaxw", XREG, XREG, XREG)
+MINI_OP(OP_PMAXD, "pmaxd", XREG, XREG, XREG)
+
 MINI_OP(OP_PAVGB_UN, "pavgb_un", XREG, XREG, XREG)
 MINI_OP(OP_PAVGW_UN, "pavgw_un", XREG, XREG, XREG)
 
@@ -684,10 +688,18 @@ MINI_OP(OP_PMINB_UN, "pminb_un", XREG, XREG, XREG)
 MINI_OP(OP_PMINW_UN, "pminw_un", XREG, XREG, XREG)
 MINI_OP(OP_PMIND_UN, "pmind_un", XREG, XREG, XREG)
 
+MINI_OP(OP_PMINB, "pminb", XREG, XREG, XREG)
+MINI_OP(OP_PMINW, "pminw", XREG, XREG, XREG)
+MINI_OP(OP_PMIND, "pmind", XREG, XREG, XREG)
+
 MINI_OP(OP_PCMPEQB, "pcmpeqb", XREG, XREG, XREG)
 MINI_OP(OP_PCMPEQW, "pcmpeqw", XREG, XREG, XREG)
 MINI_OP(OP_PCMPEQD, "pcmpeqd", XREG, XREG, XREG)
 
+MINI_OP(OP_PCMPGTB, "pcmpgtb", XREG, XREG, XREG)
+MINI_OP(OP_PCMPGTW, "pcmpgtw", XREG, XREG, XREG)
+MINI_OP(OP_PCMPGTD, "pcmpgtd", XREG, XREG, XREG)
+
 MINI_OP(OP_PSUM_ABS_DIFF, "psumabsdiff", XREG, XREG, XREG)
 
 MINI_OP(OP_UNPACK_LOWB, "unpack_lowb", XREG, XREG, XREG)
index cda4097893e0e4399df99e30dff013b482333050..75b014eda972407a259d82a11255ac9540494243 100644 (file)
@@ -3922,6 +3922,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_PMAXD_UN:
                        x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUD, ins->sreg1, ins->sreg2);
                        break;
+               
+               case OP_PMAXB:
+                       x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSB, ins->sreg1, ins->sreg2);
+                       break;
+               case OP_PMAXW:
+                       x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXSW, ins->sreg1, ins->sreg2);
+                       break;
+               case OP_PMAXD:
+                       x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSD, ins->sreg1, ins->sreg2);
+                       break;
 
                case OP_PAVGB_UN:
                        x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGB, ins->sreg1, ins->sreg2);
@@ -3940,6 +3950,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUD, ins->sreg1, ins->sreg2);
                        break;
 
+               case OP_PMINB:
+                       x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSB, ins->sreg1, ins->sreg2);
+                       break;
+               case OP_PMINW:
+                       x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINSW, ins->sreg1, ins->sreg2);
+                       break;
+               case OP_PMIND:
+                       x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSD, ins->sreg1, ins->sreg2);
+                       break;
+
                case OP_PCMPEQB:
                        x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQB, ins->sreg1, ins->sreg2);
                        break;
@@ -3950,6 +3970,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQD, ins->sreg1, ins->sreg2);
                        break;
 
+               case OP_PCMPGTB:
+                       x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTB, ins->sreg1, ins->sreg2);
+                       break;
+               case OP_PCMPGTW:
+                       x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTW, ins->sreg1, ins->sreg2);
+                       break;
+               case OP_PCMPGTD:
+                       x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTD, ins->sreg1, ins->sreg2);
+                       break;
+
                case OP_PSUM_ABS_DIFF:
                        x86_sse_alu_pd_reg_reg (code, X86_SSE_PSADBW, ins->sreg1, ins->sreg2);
                        break;
@@ -3999,6 +4029,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_PSUBW_SAT_UN:
                        x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSW, ins->sreg1, ins->sreg2);
                        break;
+
+               case OP_PADDB_SAT:
+                       x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSB, ins->sreg1, ins->sreg2);
+                       break;
+               case OP_PSUBB_SAT:
+                       x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSB, ins->sreg1, ins->sreg2);
+                       break;
+               case OP_PADDW_SAT:
+                       x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSW, ins->sreg1, ins->sreg2);
+                       break;
+               case OP_PSUBW_SAT:
+                       x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSW, ins->sreg1, ins->sreg2);
+                       break;
                        
                case OP_PMULW:
                        x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULLW, ins->sreg1, ins->sreg2);
index 141a80c54795661f23e40a7a2f3ca2eb283f6993..3208dedb6c0dd254cac990a31423922b62156c2d 100644 (file)
@@ -32,7 +32,8 @@ TODO figure out what's wrong with OP_STOREX_MEMBASE_REG and OP_STOREX_MEMBASE (t
 TODO maybe add SSE3 emulation on top of SSE2, or just implement the corresponding functions using SSE2 intrinsics.
 TODO pass simd arguments in registers or, at least, add SSE support for pushing large (>=16) valuetypes 
 TODO pass simd args byval to a non-intrinsic method cause some useless local var load/store to happen.
-TODO check if we need to init the SSE control word with better precision. 
+TODO check if we need to init the SSE control word with better precision.
+TODO add support for 3 reg sources in mini without slowing the common path. Or find a way to make MASKMOVDQU work.  
 
 General notes for SIMD intrinsics.
 
@@ -127,7 +128,7 @@ static const SimdIntrinsc vector4f_intrinsics[] = {
        { SN_ctor, 0, SIMD_EMIT_CTOR },
        { SN_AndNot, OP_ANDNPS, SIMD_EMIT_BINARY },
        { SN_AddSub, OP_ADDSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
-       { SN_CompareEquals, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
+       { SN_CompareEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
        { SN_CompareLessEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LE },
        { SN_CompareLessThan, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LT },
        { SN_CompareNotEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ },
@@ -206,7 +207,7 @@ static const SimdIntrinsc vector8us_intrinsics[] = {
        { SN_CompareEqual, OP_PCMPEQW, SIMD_EMIT_BINARY },
        { SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK },
        { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
-       { SN_Max, OP_PMAXW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41},
+       { SN_Max, OP_PMAXW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
        { SN_Min, OP_PMINW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
        { SN_MultiplyStoreHigh, OP_PMULW_HIGH_UN, SIMD_EMIT_BINARY },
        { SN_ShiftRightArithmetic, OP_PSARW, SIMD_EMIT_SHIFT },
@@ -255,6 +256,32 @@ static const SimdIntrinsc vector16b_intrinsics[] = {
        { SN_op_Subtraction, OP_PSUBB, SIMD_EMIT_BINARY },
 };
 
+/*
+Missing:
+.ctor
+getters
+setters
+ */
+static const SimdIntrinsc vector16sb_intrinsics[] = {
+       { SN_AddWithSaturation, OP_PADDB_SAT, SIMD_EMIT_BINARY },
+       { SN_CompareEqual, OP_PCMPEQB, SIMD_EMIT_BINARY },
+       { SN_CompareGreaterThan, OP_PCMPGTB, SIMD_EMIT_BINARY },
+       { SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK },
+       { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
+       { SN_Max, OP_PMAXB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
+       { SN_Min, OP_PMINB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
+       { SN_StoreAligned, 0, SIMD_EMIT_STORE_ALIGNED },
+       { SN_SubWithSaturation, OP_PSUBB_SAT, SIMD_EMIT_BINARY },
+       { SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_EMIT_BINARY },
+       { SN_UnpackLow, OP_UNPACK_LOWB, SIMD_EMIT_BINARY },
+       { SN_op_Addition, OP_PADDB, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseXor, OP_PXOR, SIMD_EMIT_BINARY },
+       { SN_op_Explicit, 0, SIMD_EMIT_CAST },
+       { SN_op_Subtraction, OP_PSUBB, SIMD_EMIT_BINARY },
+};
+
 static guint32 simd_supported_versions;
 
 /*TODO match using number of parameters as well*/
@@ -835,6 +862,8 @@ mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign
                return emit_intrinsics (cfg, cmethod, fsig, args, vector8us_intrinsics, sizeof (vector8us_intrinsics) / sizeof (SimdIntrinsc));
        if (!strcmp ("Vector16b", cmethod->klass->name))
                return emit_intrinsics (cfg, cmethod, fsig, args, vector16b_intrinsics, sizeof (vector16b_intrinsics) / sizeof (SimdIntrinsc));
+       if (!strcmp ("Vector16sb", cmethod->klass->name))
+               return emit_intrinsics (cfg, cmethod, fsig, args, vector16sb_intrinsics, sizeof (vector16sb_intrinsics) / sizeof (SimdIntrinsc));
        return NULL;
 }
 
index fa988e7329eeceb551bd7be2a5eb6bd38c0879a2..fdbfde3fc477a89a69f901477ad8898843195c69 100644 (file)
@@ -3,7 +3,7 @@ SIMD_METHOD("AddWithSaturation", SN_AddWithSaturation)
 SIMD_METHOD("AndNot", SN_AndNot)
 SIMD_METHOD("Average", SN_Average)
 SIMD_METHOD("CompareEqual", SN_CompareEqual)
-SIMD_METHOD("CompareEquals", SN_CompareEquals)
+SIMD_METHOD("CompareGreaterThan", SN_CompareGreaterThan)
 SIMD_METHOD("CompareLessEqual", SN_CompareLessEqual)
 SIMD_METHOD("CompareLessThan", SN_CompareLessThan)
 SIMD_METHOD("CompareNotEqual", SN_CompareNotEqual)