* cpu-x86.md: Added new instructions to handle float to int using SSE regs.

author Rodrigo Kumpera <kumpera@gmail.com>

Mon, 6 Oct 2008 21:42:27 +0000 (21:42 -0000)

committer Rodrigo Kumpera <kumpera@gmail.com>

Mon, 6 Oct 2008 21:42:27 +0000 (21:42 -0000)
author Rodrigo Kumpera <kumpera@gmail.com>
Mon, 6 Oct 2008 21:42:27 +0000 (21:42 -0000)
committer Rodrigo Kumpera <kumpera@gmail.com>
Mon, 6 Oct 2008 21:42:27 +0000 (21:42 -0000)
diff --git a/mono/mini/cpu-x86.md b/mono/mini/cpu-x86.md

index 2ff80e7f4ebafd8bf51ca25cec65a8882a8370e9..e3cb5afe52c7b4bb1d715d64cff92de1c15a07d7 100644 (file)
--- a/mono/mini/cpu-x86.md
+++ b/mono/mini/cpu-x86.md
@@ -443,4 +443,6 @@ storex_aligned_membase_reg: dest:b src1:x len:7
  push_r4: src1:f len:13
  loadx_stack: dest:x len: 13
  
+fconv_to_r8_x: dest:x src1:f len:60 
+xconv_r8_to_i4: dest:y src1:x len:60
  
diff --git a/mono/mini/mini-ops.h b/mono/mini/mini-ops.h

index 8d3331e81879dab240fc0bc5ac58cf68197740b7..6b0e5a7d9312976ae56b7cbb9644b65ca4387cbe 100644 (file)
--- a/mono/mini/mini-ops.h
+++ b/mono/mini/mini-ops.h
@@ -657,6 +657,9 @@ MINI_OP(OP_LOADX_R4, "loadx_r4", FREG, IREG, NONE)
  MINI_OP(OP_PUSH_R4, "push_r4", NONE, FREG, NONE)
  MINI_OP(OP_LOADX_STACK, "loadx_stack", XREG, NONE, NONE)
  
+MINI_OP(OP_FCONV_TO_R8_X, "fconv_to_r8_x", XREG, FREG, NONE)
+MINI_OP(OP_XCONV_R8_TO_I4, "xconv_r8_to_i4", IREG, XREG, NONE)
+
  #endif
  
  MINI_OP(OP_XMOVE,   "xmove", XREG, XREG, NONE)
diff --git a/mono/mini/mini-x86.c b/mono/mini/mini-x86.c

index 793bf3c66c4c38246d9df05d6849b07d72746dcd..7d37ae4eac01125584b670be2912beca314cc0de 100644 (file)
--- a/mono/mini/mini-x86.c
+++ b/mono/mini/mini-x86.c
@@ -1800,7 +1800,9 @@ static unsigned char*
  emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
  {
  #define XMM_TEMP_REG 0
-       if (cfg->opt & MONO_OPT_SSE2 && size < 8) {
+       /*This SSE2 optimization must not be done which OPT_SIMD in place as it clobbers xmm0.*/
+       /*The xmm pass decomposes OP_FCONV_ ops anyway anyway.*/
+       if (cfg->opt & MONO_OPT_SSE2 && size < 8 && !(cfg->opt & MONO_OPT_SIMD)) {
                 /* optimize by assigning a local var for this use so we avoid
                  * the stack manipulations */
                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
@@ -3860,6 +3862,29 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                         x86_movups_reg_membase (code, ins->dreg, X86_ESP, 0);
                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 16);
                         break;
+
+               case OP_FCONV_TO_R8_X:
+                       x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
+                       x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
+                       break;
+
+               case OP_XCONV_R8_TO_I4:
+                       x86_cvttsd2si (code, ins->dreg, ins->sreg1);
+                       switch (ins->backend.source_opcode) {
+                       case OP_FCONV_TO_I1:
+                               x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
+                               break;
+                       case OP_FCONV_TO_U1:
+                               x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
+                               break;
+                       case OP_FCONV_TO_I2:
+                               x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
+                               break;
+                       case OP_FCONV_TO_U2:
+                               x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
+                               break;
+                       }                       
+                       break;
  #endif
                 default:
                         g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode));
@@ -5159,3 +5184,66 @@ mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
         default: return ((gpointer)(&ctx->eax)[reg]);
         }
  }
+
+#ifdef MONO_ARCH_SIMD_INTRINSICS
+
+static MonoInst*
+get_float_to_x_spill_area (MonoCompile *cfg)
+{
+       if (!cfg->fconv_to_r8_x_var) {
+               cfg->fconv_to_r8_x_var = mono_compile_create_var (cfg, &mono_defaults.double_class->byval_arg, OP_LOCAL);
+               cfg->fconv_to_r8_x_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
+       }       
+       return cfg->fconv_to_r8_x_var;
+}
+
+/*
+ * Convert all fconv opts that MONO_OPT_SSE2 would get wrong. 
+ */
+void
+mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
+{
+       MonoInst *fconv;
+
+       int dreg, src_opcode;
+       g_assert (cfg->new_ir);
+
+       if (!(cfg->opt & MONO_OPT_SSE2) || !(cfg->opt & MONO_OPT_SIMD))
+               return;
+
+       switch (src_opcode = ins->opcode) {
+       case OP_FCONV_TO_I1:
+       case OP_FCONV_TO_U1:
+       case OP_FCONV_TO_I2:
+       case OP_FCONV_TO_U2:
+       case OP_FCONV_TO_I4:
+       case OP_FCONV_TO_I:
+               break;
+       default:
+               return;
+       }
+
+       /* dreg is the IREG and sreg1 is the FREG */
+       MONO_INST_NEW (cfg, fconv, OP_FCONV_TO_R8_X);
+       fconv->klass = NULL; /*FIXME, what can I use here as the Mono.Simd lib might not be loaded yet*/
+       fconv->sreg1 = ins->sreg1;
+       fconv->dreg = mono_alloc_ireg (cfg);
+       fconv->type = STACK_VTYPE;
+       fconv->backend.spill_var = get_float_to_x_spill_area (cfg);
+
+       mono_bblock_insert_before_ins (cfg->cbb, ins, fconv);
+
+       dreg = ins->dreg;
+       NULLIFY_INS (ins);
+       ins->opcode = OP_XCONV_R8_TO_I4;
+
+       ins->klass = mono_defaults.int32_class;
+       ins->sreg1 = fconv->dreg;
+       ins->dreg = dreg;
+       ins->type = STACK_I4;
+       ins->backend.source_opcode = src_opcode;
+
+
+}
+#endif
+
diff --git a/mono/mini/mini-x86.h b/mono/mini/mini-x86.h

index 434758e3aab86d4b0c5bd1840ab2c5d5266fb0cf..e714eb7b58b0171c2764b714aef3102f1eaf55bd 100644 (file)
--- a/mono/mini/mini-x86.h
+++ b/mono/mini/mini-x86.h
@@ -291,6 +291,10 @@ typedef struct {
  
  #define MONO_ARCH_HAVE_CMOV_OPS 1
  
+#ifdef MONO_ARCH_SIMD_INTRINSICS
+#define MONO_ARCH_HAVE_DECOMPOSE_OPTS 1
+#endif
+
  #if !defined(__APPLE__)
  #define MONO_ARCH_AOT_SUPPORTED 1
  #endif
diff --git a/mono/mini/mini.h b/mono/mini/mini.h

index b7db9c71ddc76fa8bc3f5f1f343afa1a10809c31..4f10bea39450d65b79203b82ae2fac0d6db9e731 100644 (file)
--- a/mono/mini/mini.h
+++ b/mono/mini/mini.h
@@ -559,7 +559,8 @@ struct MonoInst {
                 gint shift_amount;
                 gboolean is_pinvoke; /* for variables in the unmanaged marshal format */
                 gboolean record_cast_details; /* For CEE_CASTCLASS */
-               MonoInst *spill_var; /* for OP_ICONV_TO_R8_RAW */
+               MonoInst *spill_var; /* for OP_ICONV_TO_R8_RAW and OP_FCONV_TO_R8_X */
+               guint16 source_opcode; /*OP_XCONV_R8_TO_I4 needs to know which op was used to do proper widening*/
         } backend;
         
         MonoClass *klass;
@@ -978,8 +979,11 @@ typedef struct {
         MonoInst **tailcall_valuetype_addrs;
  
         /* Used to implement iconv_to_r8_raw on archs that can't do raw
-       copy between an ireg and a freg*/
+       copy between an ireg and a freg. This is an int32 var.*/
         MonoInst *iconv_raw_var;
+
+       /* Used to implement fconv_to_r8_x. This is a double (8 bytes) var.*/
+       MonoInst *fconv_to_r8_x_var;
  } MonoCompile;
  
  typedef enum {
author	Rodrigo Kumpera <kumpera@gmail.com>
	Mon, 6 Oct 2008 21:42:27 +0000 (21:42 -0000)
committer	Rodrigo Kumpera <kumpera@gmail.com>
	Mon, 6 Oct 2008 21:42:27 +0000 (21:42 -0000)
mono/mini/cpu-x86.md		patch \| blob \| history
mono/mini/mini-ops.h		patch \| blob \| history
mono/mini/mini-x86.c		patch \| blob \| history
mono/mini/mini-x86.h		patch \| blob \| history
mono/mini/mini.h		patch \| blob \| history