From 7471d303d3ee4f58446b2e5856b4d136dc15827a Mon Sep 17 00:00:00 2001 From: Neale Ferguson Date: Thu, 29 Oct 2015 10:31:24 -0400 Subject: [PATCH] s390x-codegen.h - Initial SIMD support cpu-s390x.md - Add gc-safe-point instruction mini-ops.h - Add TARGET_S390X for gc-safe-point mini-s390x.c - Initial SIMD support; Fix floating point conversion mini-s390x.h - Initial SIMD support --- mono/arch/s390x/s390x-codegen.h | 540 +++++++++++++++++++++-- mono/mini/cpu-s390x.md | 5 +- mono/mini/mini-ops.h | 2 +- mono/mini/mini-s390x.c | 747 ++++++++++++++++++++++++++++++-- mono/mini/mini-s390x.h | 8 + 5 files changed, 1214 insertions(+), 88 deletions(-) diff --git a/mono/arch/s390x/s390x-codegen.h b/mono/arch/s390x/s390x-codegen.h index 9e219a3b61f..4c3cd243520 100644 --- a/mono/arch/s390x/s390x-codegen.h +++ b/mono/arch/s390x/s390x-codegen.h @@ -137,6 +137,42 @@ typedef enum { s390_fpc = 256, } S390SpecialRegister; +typedef enum { + s390_VR0 = 0, + s390_VR1 = 1, + s390_VR2 = 2, + s390_VR3 = 3, + s390_VR4 = 4, + s390_VR5 = 5, + s390_VR6 = 6, + s390_VR7 = 7, + s390_VR8 = 8, + s390_VR9 = 9, + s390_VR10 = 10, + s390_VR11 = 11, + s390_VR12 = 12, + s390_VR13 = 13, + s390_VR14 = 14, + s390_VR15 = 15, + s390_VR16 = 16, + s390_VR17 = 17, + s390_VR18 = 18, + s390_VR19 = 19, + s390_VR20 = 20, + s390_VR21 = 21, + s390_VR22 = 22, + s390_VR23 = 23, + s390_VR24 = 24, + s390_VR25 = 25, + s390_VR26 = 26, + s390_VR27 = 27, + s390_VR28 = 28, + s390_VR29 = 29, + s390_VR30 = 30, + s390_VR31 = 31, + s390_VR_NREG = 32, +} s390_VR_Reg_No; + #define s390_is_imm16(val) ((glong)val >= (glong) SHRT_MIN && \ (glong)val <= (glong) SHRT_MAX) #define s390_is_imm32(val) ((glong)val >= (glong) INT_MIN && \ @@ -205,12 +241,34 @@ typedef struct { int im; } I_Format; +typedef struct { + short op; + char xx; + char ri1 : 4; + char ri2 : 4; +} IE_Format; + +typedef struct { + short op; + short m1 : 4; + short ri2 : 12; + short i3; +} MII_Format; + typedef struct { char op; char r1 : 4; char r2 : 4; } RR_Format; +typedef struct { + short op; + char r1 : 4; + char xx : 4; + char r3 : 4; + char r4 : 4; +} __attribute__ ((packed)) RRD_Format; + typedef struct { short op; char xx; @@ -251,29 +309,30 @@ typedef struct { } RRF_Format_4; typedef struct { - char op; + char op1; char r1 : 4; - char x2 : 4; - char b2 : 4; - short d2 : 12; -} RX_Format; + char r2 : 4; + short b4 : 4; + short d4 : 12; + char m3 : 4; + char xx : 4; + char op2; +} RRS_Format; typedef struct { - char op1; + char op; char r1 : 4; char x2 : 4; - char b2 : 4; - int d2 : 12; - char xx; - char op2; -} RXE_Format; + short b2 : 4; + short d2 : 12; +} RX_Format; typedef struct { char op1; char r3 : 4; char x2 : 4; - char b2 : 4; - int d2 : 12; + short b2 : 4; + short d2 : 12; char r1 : 4; char xx : 4; char op2; @@ -283,7 +342,7 @@ typedef struct { char op1; char r1 : 4; char x2 : 4; - char b2 : 4; + int b2 : 4; int d2 : 20; char op2; } __attribute__ ((packed)) RXY_Format; @@ -292,32 +351,34 @@ typedef struct { char op; char r1 : 4; char r3 : 4; - char b2 : 4; - int d2 : 12; + short b2 : 4; + short d2 : 12; } RS_Format_1; typedef struct { char op; char r1 : 4; char m3 : 4; - char b2 : 4; - int d2 : 12; + short b2 : 4; + short d2 : 12; } RS_Format_2; typedef struct { char op; char r1 : 4; char xx : 4; - char b2 : 4; - int d2 : 12; + short b2 : 4; + short dl2 : 12; + char dh2; } RS_Format_3; typedef struct { char op1; char r1 : 4; char r3 : 4; - char b2 : 4; - int d2 : 20; + short b2 : 4; + short dl2 : 12; + char dh2; char op2; } __attribute__ ((packed)) RSY_Format_1; @@ -325,8 +386,9 @@ typedef struct { char op1; char r1 : 4; char m3 : 4; - char b2 : 4; - int d2 : 20; + short b2 : 4; + short dl2 : 12; + char dh2; char op2; } __attribute__ ((packed)) RSY_Format_2; @@ -334,25 +396,25 @@ typedef struct { char op1; char l1 : 4; char xx : 4; - char b1 : 4; - int d1 : 12; + short b1 : 4; + short d1 : 12; char yy; char op2; -} RSL_Format; +} __attribute__ ((packed)) RSL_Format; typedef struct { char op; char r1 : 4; char r3 : 4; short i2; -} RSI_Format; +} __attribute__ ((packed)) RSI_Format; typedef struct { char op1; char m1 : 4; char op2 : 4; short i2; -} RI_Format; +} __attribute__ ((packed)) RI_Format; typedef struct { char op1; @@ -361,7 +423,7 @@ typedef struct { short i2; char xx; char op2; -} RIE_Format_1; +} __attribute__ ((packed)) RIE_Format_1; typedef struct { char op1; @@ -371,7 +433,7 @@ typedef struct { char m2 : 4; char xx : 4; char op2; -} RIE_Format_2; +} __attribute__ ((packed)) RIE_Format_2; typedef struct { char op1; @@ -380,7 +442,7 @@ typedef struct { short d; char i; char op2; -} RIE_Format_3; +} __attribute__ ((packed)) RIE_Format_3; typedef struct { char op1; @@ -390,7 +452,45 @@ typedef struct { char m3 : 4; char xx : 4; char op2; -} RIE_Format_4; +} __attribute__ ((packed)) RIE_Format_4; + +typedef struct { + char op1; + char r1 : 4; + char r3 : 4; + short ri2; + char xx; + char op2; +} __attribute__ ((packed)) RIE_Format_5; + +typedef struct { + char op1; + char r1 : 4; + char r2 : 4; + char i3; + char i4; + char i5; + char op2; +} __attribute__ ((packed)) RIE_Format_6; + +typedef struct { + char op1; + char r1 : 4; + char m3 : 4; + short i2; + char xx; + char op2; +} __attribute__ ((packed)) RIE_Format_7; + +typedef struct { + char op1; + char r1 : 4; + char m3 : 4; + int b4 : 4; + int d4 : 12; + char i2; + char op2; +} __attribute__ ((packed)) RIS_Format; typedef struct { char op1; @@ -406,12 +506,30 @@ typedef struct { int i2; } __attribute__ ((packed)) RIL_Format_2; +typedef struct { + short op1; + char r1 : 4; + char x2 : 4; + short b2 : 4; + short d1 : 12; + char m3 : 4; + char xx : 4; + char op2; +} __attribute__ ((packed)) RXE_Format; + typedef struct { char op; char i2; + short b1 : 4; + short d1 : 12; +} __attribute__ ((packed)) SI_Format; + +typedef struct { + short op; char b1 : 4; short d1 : 12; -} SI_Format; + short i2; +} __attribute__ ((packed)) SIL_Format; typedef struct { char op1; @@ -421,50 +539,59 @@ typedef struct { char op2; } __attribute__ ((packed)) SIY_Format; +typedef struct { + char op1; + char m1 : 4; + char xx : 4; + short b3 : 4; + short d3 : 12; + short ri2; +} __attribute__ ((packed)) SMI_Format; + typedef struct { short op; - char b2 : 4; + short b2 : 4; short d2 : 12; -} S_Format; +} __attribute__ ((packed)) S_Format; typedef struct { char op; char ll; - char b1 : 4; + short b1 : 4; short d1 : 12; - char b2 : 4; + short b2 : 4; short d2 : 12; -} SS_Format_1; +} __attribute__ ((packed)) SS_Format_1; typedef struct { char op; char l1 : 4; char l2 : 4; - char b1 : 4; + short b1 : 4; short d1 : 12; - char b2 : 4; + short b2 : 4; short d2 : 12; -} SS_Format_2; +} __attribute__ ((packed)) SS_Format_2; typedef struct { char op; char r1 : 4; char r3 : 4; - char b1 : 4; + short b1 : 4; short d1 : 12; - char b2 : 4; + short b2 : 4; short d2 : 12; -} SS_Format_3; +} __attribute__ ((packed)) SS_Format_3; typedef struct { char op; char r1 : 4; char r3 : 4; - char b2 : 4; + short b2 : 4; short d2 : 12; - char b4 : 4; + short b4 : 4; short d4 : 12; -} SS_Format_4; +} __attribute__ ((packed)) SS_Format_4; typedef struct { short op; @@ -484,6 +611,189 @@ typedef struct { short d2 : 12; } __attribute__ ((packed)) SSF_Format; +typedef struct { + short op1; + char v1 : 4; + char xx : 4; + short i2; + char m3 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRIa_Format; + +typedef struct { + short op1; + char v1 : 4; + char xx : 4; + char i2; + char i3; + char m4 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRIb_Format; + +typedef struct { + short op1; + char v1 : 4; + char v3 : 4; + short i2; + char m4 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRIc_Format; + +typedef struct { + short op1; + char v1 : 4; + char v2 : 4; + char v3 : 4; + char xx : 4; + char i4; + char m5 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRId_Format; + +typedef struct { + short op1; + char v1 : 4; + char v2 : 4; + short i3 : 12; + char m5 : 4; + char m4 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRIe_Format; + +typedef struct { + short op1; + char v1 : 4; + char v2 : 4; + char xx; + char m5 : 4; + char m4 : 4; + char m3 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRRa_Format; + +typedef struct { + short op1; + char v1 : 4; + char v2 : 4; + char v3 : 4; + char xx : 4; + char m5 : 4; + char yy : 4; + char m4 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRRb_Format; + +typedef struct { + short op1; + char v1 : 4; + char v2 : 4; + char v3 : 4; + char xx : 4; + char m5 : 4; + char m4 : 4; + char m3 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRRc_Format; + +typedef struct { + short op1; + char v1 : 4; + char v2 : 4; + char v3 : 4; + char m5 : 4; + char m6 : 4; + char xx : 4; + char v4 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRRd_Format; + +typedef struct { + short op1; + char v1 : 4; + char v2 : 4; + char v3 : 4; + char m6 : 4; + char xx : 4; + char m5 : 4; + char v4 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRRe_Format; + +typedef struct { + short op1; + char v1 : 4; + char r2 : 4; + char r3 : 4; + short xx; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRRf_Format; + +typedef struct { + short op1; + char v1 : 4; + char v3 : 4; + char b2 : 4; + short d2 : 12; + char m4 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRSa_Format; + +typedef struct { + short op1; + char v1 : 4; + char r3 : 4; + char b2 : 4; + short d2 : 12; + char m4 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRSb_Format; + +typedef struct { + short op1; + char r1 : 4; + char v3 : 4; + char b2 : 4; + short d2 : 12; + char m4 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRSc_Format; + +typedef struct { + short op1; + char v1 : 4; + char v2 : 4; + char b2 : 4; + short d2 : 12; + char m3 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRV_Format; + +typedef struct { + short op1; + char v1 : 4; + char x2 : 4; + char b2 : 4; + short d2 : 12; + char m3 : 4; + char rxb : 4; + char op2; +} __attribute__ ((packed)) VRX_Format; + #define s390_emit16(c, x) do \ { \ *((guint16 *) c) = (guint16) x; \ @@ -677,6 +987,140 @@ typedef struct { s390_emit16(c, ((s2) << 12 | ((p2) & 0xfff))); \ } while (0) +#define S390_VRIa(c,opc,v1,i2,m3) do \ +{ \ + char rxb = (((v1) > 15) << 7); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4)); \ + s390_emit16(c, (i2)); \ + s390_emit16(c, (((m3) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRIb(c,opc,v1,i2,i3,m4) do \ +{ \ + char rxb = (((v1) > 15) << 7); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4)); \ + s390_emit16(c, (((i2) << 8) | (i3))); \ + s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRIc(c,opc,v1,v3,i2,m4) do \ +{ \ + char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ + (((v3) > 15) << 5); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4) | ((v2))); \ + s390_emit16(c, (v4)); \ + s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRId(c,opc,v1,v2,v3,i4,m5) do \ +{ \ + char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ + (((v3) > 15) << 5); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4) | ((v2))); \ + s390_emit16(c, ((v3) << 12) | (i2)); \ + s390_emit16(c, (((m5) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRIe(c,opc,v1,v2,i3,m4,m5) do \ +{ \ + char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4) | ((v2))); \ + s390_emit16(c, ((i2) << 8) | (m5)); \ + s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRRa(c,opc,v1,v2,m3,m4,m5) do \ +{ \ + char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4) | ((v2))); \ + s390_emit16(c, ((m5) << 4) | (m4)); \ + s390_emit16(c, (((m3) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRRb(c,opc,v1,v2,v3,m4,m5) do \ +{ \ + char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ + (((v3) > 15) << 5); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4) | ((v2))); \ + s390_emit16(c, ((v3) << 12) | ((m5) << 4) | (m4)); \ + s390_emit16(c, (((m3) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRRc(c,opc,v1,v2,m3,m4,m5) do \ +{ \ + char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ + (((v3) > 15) << 5); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4) | ((v2))); \ + s390_emit16(c, (((v3) << 12)| (m5) << 4)); \ + s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRRd(c,opc,v1,v2,v3,v4,m5,m6) do \ +{ \ + char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ + (((v3) > 15) << 5) | (((v4) > 15) << 4); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4) | ((v2))); \ + s390_emit16(c, (((v3) << 12)| ((m6) << 8)) | ((m5) << 4)); \ + s390_emit16(c, (((v4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRRe(c,opc,v1,v2,v3,m4,m5,m6) do \ +{ \ + char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ + (((v3) > 15) << 5); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4) | ((v2))); \ + s390_emit16(c, (((v3) << 12)| ((m6) << 8)) | (m5)); \ + s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRRf(c,opc,v1,r2) do \ +{ \ + char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4) | ((v2))); \ + s390_emit16(c, ((r2) << 12)| ((r3) << r8) | (m5)); \ + s390_emit16(c, (((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRSa(c,opc,v1,v3,b2,d2,m4) do \ +{ \ + char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4) | ((v3))); \ + s390_emit16(c, ((b2) << 12)| (d2)); \ + s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRSb(c,opc,v1,r3,b2,d2,m4) do \ +{ \ + char rxb = (((v1) > 15) << 7); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4) | ((r3))); \ + s390_emit16(c, ((b2) << 12)| (d2)); \ + s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRSc(c,opc,r1,v3,b2,d2,m4) do \ +{ \ + char rxb = (((v1) > 15) << 7); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((r1) << 4) | ((v3))); \ + s390_emit16(c, ((b2) << 12)| (d2)); \ + s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRV(c,opc,v1,v2,b2,d2,m3) do \ +{ \ + char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6); \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4) | ((v2))); \ + s390_emit16(c, ((b2) << 12)| (d2)); \ + s390_emit16(c, (((m3) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + +#define S390_VRX(c,opc,v1,x2,b2,d2,m3) do \ +{ \ + char rxb = ((v1) > 15) << 7; \ + s390_emit16(c, (((opc) & 0xff00) << 8) | ((v1) << 4) | ((x2))); \ + s390_emit16(c, ((b2) << 12)| (d2)); \ + s390_emit16(c, (((m3) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ +} while (0) + #define s390_a(c, r, x, b, d) S390_RX(c, 0x5a, r, x, b, d) #define s390_adb(c, r, x, b, d) S390_RXE(c, 0xed1a, r, x, b, d) #define s390_adbr(c, r1, r2) S390_RRE(c, 0xb31a, r1, r2) diff --git a/mono/mini/cpu-s390x.md b/mono/mini/cpu-s390x.md index cbb566b6676..75d4d95d44d 100644 --- a/mono/mini/cpu-s390x.md +++ b/mono/mini/cpu-s390x.md @@ -131,8 +131,8 @@ float_rem: dest:f src1:f src2:f len:16 float_rem_un: dest:f src1:f src2:f len:16 float_sub: dest:f src1:f src2:f len:6 fmove: dest:f src1:f len:4 -move_f_to_i4: dest:i src1:f len:4 -move_i4_to_f: dest:f src1:i len:8 +move_f_to_i4: dest:i src1:f len:14 +move_i4_to_f: dest:f src1:i len:14 move_f_to_i8: dest:i src1:f len:4 move_i8_to_f: dest:f src1:i len:8 i8const: dest:i len:20 @@ -389,5 +389,6 @@ gc_liveness_def: len:0 gc_liveness_use: len:0 gc_spill_slot_liveness_def: len:0 gc_param_slot_liveness_def: len:0 +gc_safe_point: clob:c src1:i len:32 generic_class_init: src1:A len:32 clob:c diff --git a/mono/mini/mini-ops.h b/mono/mini/mini-ops.h index 4ff795aeb6c..bab1e3e7aa4 100644 --- a/mono/mini/mini-ops.h +++ b/mono/mini/mini-ops.h @@ -1076,7 +1076,7 @@ MINI_OP(OP_GC_PARAM_SLOT_LIVENESS_DEF, "gc_param_slot_liveness_def", NONE, NONE, MINI_OP(OP_GENERIC_CLASS_INIT, "generic_class_init", NONE, IREG, NONE) /* Arch specific opcodes */ -#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_S390X) MINI_OP(OP_GC_SAFE_POINT, "gc_safe_point", NONE, IREG, NONE) #endif diff --git a/mono/mini/mini-s390x.c b/mono/mini/mini-s390x.c index 7072d6cb389..1c775f80341 100644 --- a/mono/mini/mini-s390x.c +++ b/mono/mini/mini-s390x.c @@ -413,6 +413,27 @@ breakpoint_t breakpointCode; static mono_mutex_t mini_arch_mutex; +static const char * grNames[] = { + "s390_r0", "s390_sp", "s390_r2", "s390_r3", "s390_r4", + "s390_r5", "s390_r6", "s390_r7", "s390_r8", "s390_r9", + "s390_r10", "s390_r11", "s390_r12", "s390_r13", "s390_r14", + "s390_r15" +}; + +static const char * fpNames[] = { + "s390_f0", "s390_f1", "s390_f2", "s390_f3", "s390_f4", + "s390_f5", "s390_f6", "s390_f7", "s390_f8", "s390_f9", + "s390_f10", "s390_f11", "s390_f12", "s390_f13", "s390_f14", + "s390_f15" +}; + +static const char * vrNames[] = { + "vr0", "vr1", "vr2", "vr3", "vr4", "vr5", "vr6", "vr7", + "vr8", "vr9", "vr10", "vr11", "vr12", "vr13", "vr14", "vr15", + "vr16", "vr17", "vr18", "vr19", "vr20", "vr21", "vr22", "vr23", + "vr24", "vr25", "vr26", "vr27", "vr28", "vr29", "vr30", "vr31" +}; + /*====================== End of Global Variables ===================*/ /*------------------------------------------------------------------*/ @@ -425,16 +446,10 @@ static mono_mutex_t mini_arch_mutex; /*------------------------------------------------------------------*/ const char* -mono_arch_regname (int reg) { - static const char * rnames[] = { - "s390_r0", "s390_sp", "s390_r2", "s390_r3", "s390_r4", - "s390_r5", "s390_r6", "s390_r7", "s390_r8", "s390_r9", - "s390_r10", "s390_r11", "s390_r12", "s390_r13", "s390_r14", - "s390_r15" - }; - +mono_arch_regname (int reg) +{ if (reg >= 0 && reg < 16) - return rnames [reg]; + return grNames [reg]; else return "unknown"; } @@ -451,16 +466,30 @@ mono_arch_regname (int reg) { /*------------------------------------------------------------------*/ const char* -mono_arch_fregname (int reg) { - static const char * rnames[] = { - "s390_f0", "s390_f1", "s390_f2", "s390_f3", "s390_f4", - "s390_f5", "s390_f6", "s390_f7", "s390_f8", "s390_f9", - "s390_f10", "s390_f11", "s390_f12", "s390_f13", "s390_f14", - "s390_f15" - }; - +mono_arch_fregname (int reg) +{ if (reg >= 0 && reg < 16) - return rnames [reg]; + return fpNames [reg]; + else + return "unknown"; +} + +/*========================= End of Function ========================*/ + +/*------------------------------------------------------------------*/ +/* */ +/* Name - mono_arch_xregname */ +/* */ +/* Function - Returns the name of the register specified by */ +/* the input parameter. */ +/* */ +/*------------------------------------------------------------------*/ + +const char * +mono_arch_xregname (int reg) +{ + if (reg < s390_VR_NREG) + return vrNames [reg]; else return "unknown"; } @@ -1339,21 +1368,6 @@ mono_arch_cpu_optimizations (guint32 *exclude_mask) /*========================= End of Function ========================*/ -/*------------------------------------------------------------------*/ -/* */ -/* Name - mono_arch_cpu_enumerate_simd_versions */ -/* */ -/* Function - Returns the SIMD instruction sets on this CPU */ -/* */ -/*------------------------------------------------------------------*/ -guint32 -mono_arch_cpu_enumerate_simd_versions (void) -{ - /* SIMD is currently unimplemented */ - return 0; -} -/*========================= End of Function ========================*/ - /*------------------------------------------------------------------*/ /* */ /* Name - mono_arch_get_allocatable_int_vars */ @@ -3859,11 +3873,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) s390_ldgr (code, ins->dreg, ins->sreg1); break; case OP_MOVE_F_TO_I4: - s390_lgdr (code, ins->dreg, ins->sreg1); + s390_ledbr (code, s390_f0, ins->sreg1); + s390_lgdr (code, ins->dreg, s390_f0); + s390_srag (code, ins->dreg, ins->dreg, 0, 32); break; case OP_MOVE_I4_TO_F: - s390_lgfr (code, s390_r0, ins->sreg1); + s390_slag (code, s390_r0, ins->sreg1, 0, 32); s390_ldgr (code, ins->dreg, s390_r0); + s390_ldebr (code, ins->dreg, ins->dreg); break; case OP_FCONV_TO_R4: s390_ledbr (code, ins->dreg, ins->sreg1); @@ -4327,7 +4344,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } break; case OP_LOADR4_MEMBASE: { - S390_LONG (code, ldy, ld, s390_f15, 0, + S390_LONG (code, ley, le, s390_f15, 0, ins->inst_basereg, ins->inst_offset); s390_ldebr (code, ins->dreg, s390_f15); } @@ -4699,6 +4716,625 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) ins->backend.pc_offset = code - cfg->native_code; bb->spill_slot_defs = g_slist_prepend_mempool (cfg->mempool, bb->spill_slot_defs, ins); break; +#ifdef MONO_ARCH_SIMD_INTRINSICS + case OP_ADDPS: + s390x_addps (code, ins->sreg1, ins->sreg2); + break; + case OP_DIVPS: + s390x_divps (code, ins->sreg1, ins->sreg2); + break; + case OP_MULPS: + s390x_mulps (code, ins->sreg1, ins->sreg2); + break; + case OP_SUBPS: + s390x_subps (code, ins->sreg1, ins->sreg2); + break; + case OP_MAXPS: + s390x_maxps (code, ins->sreg1, ins->sreg2); + break; + case OP_MINPS: + s390x_minps (code, ins->sreg1, ins->sreg2); + break; + case OP_COMPPS: + g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7); + s390x_cmpps_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); + break; + case OP_ANDPS: + s390x_andps (code, ins->sreg1, ins->sreg2); + break; + case OP_ANDNPS: + s390x_andnps (code, ins->sreg1, ins->sreg2); + break; + case OP_ORPS: + s390x_orps (code, ins->sreg1, ins->sreg2); + break; + case OP_XORPS: + s390x_xorps (code, ins->sreg1, ins->sreg2); + break; + case OP_SQRTPS: + s390x_sqrtps (code, ins->dreg, ins->sreg1); + break; + case OP_RSQRTPS: + s390x_rsqrtps (code, ins->dreg, ins->sreg1); + break; + case OP_RCPPS: + s390x_rcpps (code, ins->dreg, ins->sreg1); + break; + case OP_ADDSUBPS: + s390x_addsubps (code, ins->sreg1, ins->sreg2); + break; + case OP_HADDPS: + s390x_haddps (code, ins->sreg1, ins->sreg2); + break; + case OP_HSUBPS: + s390x_hsubps (code, ins->sreg1, ins->sreg2); + break; + case OP_DUPPS_HIGH: + s390x_movshdup (code, ins->dreg, ins->sreg1); + break; + case OP_DUPPS_LOW: + s390x_movsldup (code, ins->dreg, ins->sreg1); + break; + + case OP_PSHUFLEW_HIGH: + g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF); + s390x_pshufhw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); + break; + case OP_PSHUFLEW_LOW: + g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF); + s390x_pshuflw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); + break; + case OP_PSHUFLED: + g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF); + s390x_pshufd_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); + break; + case OP_SHUFPS: + g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF); + s390x_shufps_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); + break; + case OP_SHUFPD: + g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0x3); + s390x_shufpd_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); + break; + + case OP_ADDPD: + s390x_addpd (code, ins->sreg1, ins->sreg2); + break; + case OP_DIVPD: + s390x_divpd (code, ins->sreg1, ins->sreg2); + break; + case OP_MULPD: + s390x_mulpd (code, ins->sreg1, ins->sreg2); + break; + case OP_SUBPD: + s390x_subpd (code, ins->sreg1, ins->sreg2); + break; + case OP_MAXPD: + s390x_maxpd (code, ins->sreg1, ins->sreg2); + break; + case OP_MINPD: + s390x_minpd (code, ins->sreg1, ins->sreg2); + break; + case OP_COMPPD: + g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7); + s390x_cmppd_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); + break; + case OP_ANDPD: + s390x_andpd (code, ins->sreg1, ins->sreg2); + break; + case OP_ANDNPD: + s390x_andnpd (code, ins->sreg1, ins->sreg2); + break; + case OP_ORPD: + s390x_orpd (code, ins->sreg1, ins->sreg2); + break; + case OP_XORPD: + s390x_xorpd (code, ins->sreg1, ins->sreg2); + break; + case OP_SQRTPD: + s390x_sqrtpd (code, ins->dreg, ins->sreg1); + break; + case OP_ADDSUBPD: + s390x_addsubpd (code, ins->sreg1, ins->sreg2); + break; + case OP_HADDPD: + s390x_haddpd (code, ins->sreg1, ins->sreg2); + break; + case OP_HSUBPD: + s390x_hsubpd (code, ins->sreg1, ins->sreg2); + break; + case OP_DUPPD: + s390x_movddup (code, ins->dreg, ins->sreg1); + break; + + case OP_EXTRACT_MASK: + s390x_pmovmskb (code, ins->dreg, ins->sreg1); + break; + + case OP_PAND: + s390x_pand (code, ins->sreg1, ins->sreg2); + break; + case OP_POR: + s390x_por (code, ins->sreg1, ins->sreg2); + break; + case OP_PXOR: + s390x_pxor (code, ins->sreg1, ins->sreg2); + break; + + case OP_PADDB: + s390x_paddb (code, ins->sreg1, ins->sreg2); + break; + case OP_PADDW: + s390x_paddw (code, ins->sreg1, ins->sreg2); + break; + case OP_PADDD: + s390x_paddd (code, ins->sreg1, ins->sreg2); + break; + case OP_PADDQ: + s390x_paddq (code, ins->sreg1, ins->sreg2); + break; + + case OP_PSUBB: + s390x_psubb (code, ins->sreg1, ins->sreg2); + break; + case OP_PSUBW: + s390x_psubw (code, ins->sreg1, ins->sreg2); + break; + case OP_PSUBD: + s390x_psubd (code, ins->sreg1, ins->sreg2); + break; + case OP_PSUBQ: + s390x_psubq (code, ins->sreg1, ins->sreg2); + break; + + case OP_PMAXB_UN: + s390x_pmaxub (code, ins->sreg1, ins->sreg2); + break; + case OP_PMAXW_UN: + s390x_pmaxuw (code, ins->sreg1, ins->sreg2); + break; + case OP_PMAXD_UN: + s390x_pmaxud (code, ins->sreg1, ins->sreg2); + break; + + case OP_PMAXB: + s390x_pmaxsb (code, ins->sreg1, ins->sreg2); + break; + case OP_PMAXW: + s390x_pmaxsw (code, ins->sreg1, ins->sreg2); + break; + case OP_PMAXD: + s390x_pmaxsd (code, ins->sreg1, ins->sreg2); + break; + + case OP_PAVGB_UN: + s390x_pavgb (code, ins->sreg1, ins->sreg2); + break; + case OP_PAVGW_UN: + s390x_pavgw (code, ins->sreg1, ins->sreg2); + break; + + case OP_PMINB_UN: + s390x_pminub (code, ins->sreg1, ins->sreg2); + break; + case OP_PMINW_UN: + s390x_pminuw (code, ins->sreg1, ins->sreg2); + break; + case OP_PMIND_UN: + s390x_pminud (code, ins->sreg1, ins->sreg2); + break; + + case OP_PMINB: + s390x_pminsb (code, ins->sreg1, ins->sreg2); + break; + case OP_PMINW: + s390x_pminsw (code, ins->sreg1, ins->sreg2); + break; + case OP_PMIND: + s390x_pminsd (code, ins->sreg1, ins->sreg2); + break; + + case OP_PCMPEQB: + s390x_pcmpeqb (code, ins->sreg1, ins->sreg2); + break; + case OP_PCMPEQW: + s390x_pcmpeqw (code, ins->sreg1, ins->sreg2); + break; + case OP_PCMPEQD: + s390x_pcmpeqd (code, ins->sreg1, ins->sreg2); + break; + case OP_PCMPEQQ: + s390x_pcmpeqq (code, ins->sreg1, ins->sreg2); + break; + + case OP_PCMPGTB: + s390x_pcmpgtb (code, ins->sreg1, ins->sreg2); + break; + case OP_PCMPGTW: + s390x_pcmpgtw (code, ins->sreg1, ins->sreg2); + break; + case OP_PCMPGTD: + s390x_pcmpgtd (code, ins->sreg1, ins->sreg2); + break; + case OP_PCMPGTQ: + s390x_pcmpgtq (code, ins->sreg1, ins->sreg2); + break; + + case OP_PSUM_ABS_DIFF: + s390x_psadbw (code, ins->sreg1, ins->sreg2); + break; + + case OP_UNPACK_LOWB: + s390x_punpcklbw (code, ins->sreg1, ins->sreg2); + break; + case OP_UNPACK_LOWW: + s390x_punpcklwd (code, ins->sreg1, ins->sreg2); + break; + case OP_UNPACK_LOWD: + s390x_punpckldq (code, ins->sreg1, ins->sreg2); + break; + case OP_UNPACK_LOWQ: + s390x_punpcklqdq (code, ins->sreg1, ins->sreg2); + break; + case OP_UNPACK_LOWPS: + s390x_unpcklps (code, ins->sreg1, ins->sreg2); + break; + case OP_UNPACK_LOWPD: + s390x_unpcklpd (code, ins->sreg1, ins->sreg2); + break; + + case OP_UNPACK_HIGHB: + s390x_punpckhbw (code, ins->sreg1, ins->sreg2); + break; + case OP_UNPACK_HIGHW: + s390x_punpckhwd (code, ins->sreg1, ins->sreg2); + break; + case OP_UNPACK_HIGHD: + s390x_punpckhdq (code, ins->sreg1, ins->sreg2); + break; + case OP_UNPACK_HIGHQ: + s390x_punpckhqdq (code, ins->sreg1, ins->sreg2); + break; + case OP_UNPACK_HIGHPS: + s390x_unpckhps (code, ins->sreg1, ins->sreg2); + break; + case OP_UNPACK_HIGHPD: + s390x_unpckhpd (code, ins->sreg1, ins->sreg2); + break; + + case OP_PACKW: + s390x_packsswb (code, ins->sreg1, ins->sreg2); + break; + case OP_PACKD: + s390x_packssdw (code, ins->sreg1, ins->sreg2); + break; + case OP_PACKW_UN: + s390x_packuswb (code, ins->sreg1, ins->sreg2); + break; + case OP_PACKD_UN: + s390x_packusdw (code, ins->sreg1, ins->sreg2); + break; + + case OP_PADDB_SAT_UN: + s390x_paddusb (code, ins->sreg1, ins->sreg2); + break; + case OP_PSUBB_SAT_UN: + s390x_psubusb (code, ins->sreg1, ins->sreg2); + break; + case OP_PADDW_SAT_UN: + s390x_paddusw (code, ins->sreg1, ins->sreg2); + break; + case OP_PSUBW_SAT_UN: + s390x_psubusw (code, ins->sreg1, ins->sreg2); + break; + + case OP_PADDB_SAT: + s390x_paddsb (code, ins->sreg1, ins->sreg2); + break; + case OP_PSUBB_SAT: + s390x_psubsb (code, ins->sreg1, ins->sreg2); + break; + case OP_PADDW_SAT: + s390x_paddsw (code, ins->sreg1, ins->sreg2); + break; + case OP_PSUBW_SAT: + s390x_psubsw (code, ins->sreg1, ins->sreg2); + break; + + case OP_PMULW: + s390x_pmullw (code, ins->sreg1, ins->sreg2); + break; + case OP_PMULD: + s390x_pmulld (code, ins->sreg1, ins->sreg2); + break; + case OP_PMULQ: + s390x_pmuludq (code, ins->sreg1, ins->sreg2); + break; + case OP_PMULW_HIGH_UN: + s390x_pmulhuw (code, ins->sreg1, ins->sreg2); + break; + case OP_PMULW_HIGH: + s390x_pmulhw (code, ins->sreg1, ins->sreg2); + break; + + case OP_PSHRW: + s390x_psrlw_reg_imm (code, ins->dreg, ins->inst_imm); + break; + case OP_PSHRW_REG: + s390x_psrlw (code, ins->dreg, ins->sreg2); + break; + + case OP_PSARW: + s390x_psraw_reg_imm (code, ins->dreg, ins->inst_imm); + break; + case OP_PSARW_REG: + s390x_psraw (code, ins->dreg, ins->sreg2); + break; + + case OP_PSHLW: + s390x_psllw_reg_imm (code, ins->dreg, ins->inst_imm); + break; + case OP_PSHLW_REG: + s390x_psllw (code, ins->dreg, ins->sreg2); + break; + + case OP_PSHRD: + s390x_psrld_reg_imm (code, ins->dreg, ins->inst_imm); + break; + case OP_PSHRD_REG: + s390x_psrld (code, ins->dreg, ins->sreg2); + break; + + case OP_PSARD: + s390x_psrad_reg_imm (code, ins->dreg, ins->inst_imm); + break; + case OP_PSARD_REG: + s390x_psrad (code, ins->dreg, ins->sreg2); + break; + + case OP_PSHLD: + s390x_pslld_reg_imm (code, ins->dreg, ins->inst_imm); + break; + case OP_PSHLD_REG: + s390x_pslld (code, ins->dreg, ins->sreg2); + break; + + case OP_PSHRQ: + s390x_psrlq_reg_imm (code, ins->dreg, ins->inst_imm); + break; + case OP_PSHRQ_REG: + s390x_psrlq (code, ins->dreg, ins->sreg2); + break; + + /*TODO: This is appart of the sse spec but not added + case OP_PSARQ: + s390x_psraq_reg_imm (code, ins->dreg, ins->inst_imm); + break; + case OP_PSARQ_REG: + s390x_psraq (code, ins->dreg, ins->sreg2); + break; + */ + + case OP_PSHLQ: + s390x_psllq_reg_imm (code, ins->dreg, ins->inst_imm); + break; + case OP_PSHLQ_REG: + s390x_psllq (code, ins->dreg, ins->sreg2); + break; + case OP_CVTDQ2PD: + s390x_cvtdq2pd (code, ins->dreg, ins->sreg1); + break; + case OP_CVTDQ2PS: + s390x_cvtdq2ps (code, ins->dreg, ins->sreg1); + break; + case OP_CVTPD2DQ: + s390x_cvtpd2dq (code, ins->dreg, ins->sreg1); + break; + case OP_CVTPD2PS: + s390x_cvtpd2ps (code, ins->dreg, ins->sreg1); + break; + case OP_CVTPS2DQ: + s390x_cvtps2dq (code, ins->dreg, ins->sreg1); + break; + case OP_CVTPS2PD: + s390x_cvtps2pd (code, ins->dreg, ins->sreg1); + break; + case OP_CVTTPD2DQ: + s390x_cvttpd2dq (code, ins->dreg, ins->sreg1); + break; + case OP_CVTTPS2DQ: + s390x_cvttps2dq (code, ins->dreg, ins->sreg1); + break; + + case OP_ICONV_TO_X: + amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4); + break; + case OP_EXTRACT_I4: + amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); + break; + case OP_EXTRACT_I8: + if (ins->inst_c0) { + amd64_movhlps (code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg1); + amd64_movd_reg_xreg_size (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG, 8); + } else { + amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 8); + } + break; + case OP_EXTRACT_I1: + case OP_EXTRACT_U1: + amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); + if (ins->inst_c0) + amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8); + amd64_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I1, FALSE); + break; + case OP_EXTRACT_I2: + case OP_EXTRACT_U2: + /*amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); + if (ins->inst_c0) + amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, 16, 4);*/ + s390x_pextrw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); + amd64_widen_reg_size (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE, 4); + break; + case OP_EXTRACT_R8: + if (ins->inst_c0) + amd64_movhlps (code, ins->dreg, ins->sreg1); + else + s390x_movsd (code, ins->dreg, ins->sreg1); + break; + case OP_INSERT_I2: + s390x_pinsrw_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); + break; + case OP_EXTRACTX_U2: + s390x_pextrw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); + break; + case OP_INSERTX_U1_SLOW: + /*sreg1 is the extracted ireg (scratch) + /sreg2 is the to be inserted ireg (scratch) + /dreg is the xreg to receive the value*/ + + /*clear the bits from the extracted word*/ + amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00); + /*shift the value to insert if needed*/ + if (ins->inst_c0 & 1) + amd64_shift_reg_imm_size (code, X86_SHL, ins->sreg2, 8, 4); + /*join them together*/ + amd64_alu (code, X86_OR, ins->sreg1, ins->sreg2); + s390x_pinsrw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0 / 2); + break; + case OP_INSERTX_I4_SLOW: + s390x_pinsrw_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2); + amd64_shift_reg_imm (code, X86_SHR, ins->sreg2, 16); + s390x_pinsrw_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1); + break; + case OP_INSERTX_I8_SLOW: + amd64_movd_xreg_reg_size(code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg2, 8); + if (ins->inst_c0) + amd64_movlhps (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG); + else + s390x_movsd (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG); + break; + + case OP_INSERTX_R4_SLOW: + switch (ins->inst_c0) { + case 0: + if (cfg->r4fp) + s390x_movss (code, ins->dreg, ins->sreg2); + else + s390x_cvtsd2ss (code, ins->dreg, ins->sreg2); + break; + case 1: + s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3)); + if (cfg->r4fp) + s390x_movss (code, ins->dreg, ins->sreg2); + else + s390x_cvtsd2ss (code, ins->dreg, ins->sreg2); + s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3)); + break; + case 2: + s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3)); + if (cfg->r4fp) + s390x_movss (code, ins->dreg, ins->sreg2); + else + s390x_cvtsd2ss (code, ins->dreg, ins->sreg2); + s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3)); + break; + case 3: + s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0)); + if (cfg->r4fp) + s390x_movss (code, ins->dreg, ins->sreg2); + else + s390x_cvtsd2ss (code, ins->dreg, ins->sreg2); + s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0)); + break; + } + break; + case OP_INSERTX_R8_SLOW: + if (ins->inst_c0) + amd64_movlhps (code, ins->dreg, ins->sreg2); + else + s390x_movsd (code, ins->dreg, ins->sreg2); + break; + case OP_STOREX_MEMBASE_REG: + case OP_STOREX_MEMBASE: + s390x_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1); + break; + case OP_LOADX_MEMBASE: + s390x_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset); + break; + case OP_LOADX_ALIGNED_MEMBASE: + s390x_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset); + break; + case OP_STOREX_ALIGNED_MEMBASE_REG: + s390x_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1); + break; + case OP_STOREX_NTA_MEMBASE_REG: + s390x_movntps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset); + break; + case OP_PREFETCH_MEMBASE: + s390x_prefetch_reg_membase (code, ins->backend.arg_info, ins->sreg1, ins->inst_offset); + break; + + case OP_XMOVE: + /*FIXME the peephole pass should have killed this*/ + if (ins->dreg != ins->sreg1) + s390x_movaps (code, ins->dreg, ins->sreg1); + break; + case OP_XZERO: + s390x_pxor (code, ins->dreg, ins->dreg); + break; + case OP_ICONV_TO_R4_RAW: + amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4); + break; + + case OP_FCONV_TO_R8_X: + s390x_movsd (code, ins->dreg, ins->sreg1); + break; + + case OP_XCONV_R8_TO_I4: + s390x_cvttsd2si_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); + switch (ins->backend.source_opcode) { + case OP_FCONV_TO_I1: + amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE); + break; + case OP_FCONV_TO_U1: + amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); + break; + case OP_FCONV_TO_I2: + amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE); + break; + case OP_FCONV_TO_U2: + amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE); + break; + } + break; + + case OP_EXPAND_I2: + s390x_pinsrw_imm (code, ins->dreg, ins->sreg1, 0); + s390x_pinsrw_imm (code, ins->dreg, ins->sreg1, 1); + s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0); + break; + case OP_EXPAND_I4: + amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4); + s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0); + break; + case OP_EXPAND_I8: + amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 8); + s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0x44); + break; + case OP_EXPAND_R4: + if (cfg->r4fp) { + s390x_movsd (code, ins->dreg, ins->sreg1); + } else { + s390x_movsd (code, ins->dreg, ins->sreg1); + s390x_cvtsd2ss (code, ins->dreg, ins->dreg); + } + s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0); + break; + case OP_EXPAND_R8: + s390x_movsd (code, ins->dreg, ins->sreg1); + s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0x44); + break; +#endif default: g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__); g_assert_not_reached (); @@ -6356,9 +6992,11 @@ mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code) return NULL; } +/*========================= End of Function ========================*/ + /*------------------------------------------------------------------*/ /* */ -/* Name - mono_arch_init_lmf_ext. */ +/* Name - mono_arch_init_lmf_ext. */ /* */ /* Function - */ /* */ @@ -6377,6 +7015,39 @@ mono_arch_init_lmf_ext (MonoLMFExt *ext, gpointer prev_lmf) #endif +/*------------------------------------------------------------------*/ +/* */ +/* Name - mono_arch_cpu_enumerate_simd_versions. */ +/* */ +/* Function - If this CPU supports vector operations then it */ +/* supports the equivalent of SSE1-4. */ +/* */ +/*------------------------------------------------------------------*/ + +guint32 +mono_arch_cpu_enumerate_simd_versions (void) +{ + guint32 sseOpts = 0; + + if (facs.vec != 0) + sseOpts = (SIMD_VERSION_SSE1 | SIMD_VERSION_SSE2 | + SIMD_VERSION_SSE3 | SIMD_VERSION_SSSE3 | + SIMD_VERSION_SSE41 | SIMD_VERSION_SSE42 | + SIMD_VERSION_SSE4a); + + return (sseOpts); +} + +/*========================= End of Function ========================*/ + +/*------------------------------------------------------------------*/ +/* */ +/* Name - mono_arch_opcode_supported. */ +/* */ +/* Function - Check if a given return code is supported. */ +/* */ +/*------------------------------------------------------------------*/ + gboolean mono_arch_opcode_supported (int opcode) { @@ -6390,3 +7061,5 @@ mono_arch_opcode_supported (int opcode) return FALSE; } } + +/*========================= End of Function ========================*/ diff --git a/mono/mini/mini-s390x.h b/mono/mini/mini-s390x.h index 1de03e2fd2a..2ac8cc17a31 100644 --- a/mono/mini/mini-s390x.h +++ b/mono/mini/mini-s390x.h @@ -112,6 +112,14 @@ typedef struct #define MONO_ARCH_FRAME_ALIGNMENT 8 #define MONO_ARCH_CODE_ALIGNMENT 32 +/*-----------------------------------------------*/ +/* SIMD Related Definitions */ +/*-----------------------------------------------*/ + +#define MONO_MAX_XREGS 31 +#define MONO_ARCH_CALLEE_XREGS 0x0 +#define MONO_ARCH_CALLEE_SAVED_XREGS 0x0 + /*-----------------------------------------------*/ /* Macros used to generate instructions */ /*-----------------------------------------------*/ -- 2.25.1