Add support for z13 processor
authorNeale Ferguson <neale@sinenomine.net>
Thu, 17 Aug 2017 20:42:54 +0000 (16:42 -0400)
committerAlexander Köplinger <alex.koeplinger@outlook.com>
Wed, 23 Aug 2017 18:42:47 +0000 (20:42 +0200)
The z13 supports some useful instructions that can be exploited
by the JIT. Most notably the compare-and-branch instructions can
reduce a two instruction sequence to one. The load-and-add command
eliminates the more complex compare-and-swap sequence.

The z13 also supports three operand instructions dst,src1,src2
which eliminate checks and loads for instructions like add and subtract.

The unaligned.cs test has been changed to cater for big-endian platforms.
For s390x only, the --trace option will result in a file per thread
being created rather than the traces being written to stdout and
sometimes intermixing incomplete trace entries.

mono/arch/s390x/s390x-codegen.h
mono/mini/cpu-s390x.md
mono/mini/mini-ops.h
mono/mini/mini-s390x.c
mono/mini/mini-s390x.h
mono/mini/tramp-s390x.c
mono/mini/unaligned.cs
mono/utils/mono-hwcap-s390x.c
mono/utils/mono-hwcap-vars.h

index 880eca2201cef39089aaac9d256265e4cd9a3de9..695e318267dfcdb054690188fab29f3f710eac11 100644 (file)
@@ -815,7 +815,7 @@ typedef struct {
 
 #define S390_RRE(c,opc,g1,g2)          s390_emit32(c, (opc << 16 | (g1) << 4 | g2)) 
 
-#define S390_RRF_1(c,opc,g1,g2,g3)     s390_emit32(c, (opc << 16 | (g1) << 12 | (g3) << 4 | g2))
+#define S390_RRF_1(c,opc,g1,g2,g3)     s390_emit32(c, (opc << 16 | (g3) << 12 | (g1) << 4 | g2))
 
 #define S390_RRF_2(c,opc,g1,k3,g2)     s390_emit32(c, (opc << 16 | (k3) << 12 | (g1) << 4 | g2))
 
@@ -881,9 +881,9 @@ typedef struct {
 
 #define S390_RIE_2(c,opc,g1,g2,m3,v) do                                \
 {                                                              \
-       s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | g3));      \
+       s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | g2));      \
        s390_emit16(c, (v));                                    \
-       s390_emit16(c, ((m2) << 12 | (opc & 0xff)));            \
+       s390_emit16(c, ((m3) << 12 | (opc & 0xff)));            \
 } while (0)
 
 #define S390_RIE_3(c,opc,g1,i,m3,d) do                         \
@@ -1132,14 +1132,14 @@ typedef struct {
 #define s390_agfi(c, r, v)             S390_RIL_1(c, 0xc28, r, v)
 #define s390_afgr(c, r1, r2)           S390_RRE(c, 0xb918, r1, r2)
 #define s390_aghi(c, r, v)             S390_RI(c, 0xa7b, r, v)
-#define s390_aghik(c, r, v)            S390_RIE_1(c, 0xecd9, r, v)
+#define s390_aghik(c, r1, r3, v)       S390_RIE_1(c, 0xecd9, r1, r3, v)
 #define s390_agr(c, r1, r2)            S390_RRE(c, 0xb908, r1, r2)
 #define s390_agrk(c, r1, r2, r3)       S390_RRF_1(c, 0xb9e8, r1, r2, r3)
 #define s390_agsi(c, r, v)             S390_SIY(c, 0xeb7a, r v)
 #define s390_ahhhr(c, r1, r2, r3)      S390_RRF_1(c, 0xb9c8, r1, r2, r3)
 #define s390_ahhlr(c, r1, r2, r3)      S390_RRF_1(c, 0xb9d8, r1, r2, r3)
 #define s390_ahi(c, r, v)              S390_RI(c, 0xa7a, r, v)
-#define s390_ahik(c, r, v)             S390_RIE_1(c, 0xecd8, r, v)
+#define s390_ahik(c, r1, r3, v)                S390_RIE_1(c, 0xecd8, r1, r3, v)
 #define s390_ahy(c, r, x, b, d)                S390_RXY(c, 0xe37a, r, b, d)
 #define s390_aih(c, r, v)              S390_RIL_1(c, 0xcc8, r, v)
 #define s390_al(c, r, x, b, d)         S390_RX(c, 0x5e, r, x, b, d)
@@ -1152,12 +1152,12 @@ typedef struct {
 #define s390_algf(c, r, x, b, d)       S390_RXY(c, 0xe31a, r, x, b, d)
 #define s390_algfi(c, r, v)            S390_RIL_1(c, 0xc2a, r, v)
 #define s390_algfr(c, r1, r2)          S390_RRE(c, 0xb91a, r1, r2)
-#define s390_alghsik(c, r, v)          S390_RIE_1(c, 0xecd8, r, v)
+#define s390_alghsik(c, r1, r3, v)     S390_RIE_1(c, 0xecdb, r1, r3, v)
 #define s390_algr(c, r1, r2)           S390_RRE(c, 0xb90a, r1, r2)
 #define s390_algsi(c, d1, b1, i2)      S390_SIY_1(c, 0xeb7e, d1, b1, i2)
 #define s390_alhhhr(c, r1, r2, r3)     S390_RRF_1(c, 0xb9ca, r1, r2, r3)
 #define s390_alhhlr(c, r1, r2, r3)     S390_RRF_1(c, 0xb9da, r1, r2, r3)
-#define s390_alhsik(c, r, v)           S390_RIE_1(c, 0xecda, r, v)
+#define s390_alhsik(c, r1, r3, v)      S390_RIE_1(c, 0xecda, r1, r3, v)
 #define s390_alr(c, r1, r2)            S390_RR(c, 0x1e, r1, r2)
 #define s390_alrk(c, r1, r2)           S390_RRF(c, 0xb9fa, r1, r2)
 #define s390_alsi(c, d1, b1, i2)       S390_SIY_1(c, 0xeb6e, d1, b1, i2)
@@ -1262,8 +1262,10 @@ typedef struct {
 #define s390_je(c, d)                  s390_brc(c, S390_CC_EQ, d)
 #define s390_jeo(c, d)                 s390_brc(c, S390_CC_ZR|S390_CC_OV, d)
 #define s390_jh(c, d)                  s390_brc(c, S390_CC_GT, d)
+#define s390_jhe(c, d)                 s390_brc(c, S390_CC_GT|S390_CC_EQ, d)
 #define s390_jho(c, d)                 s390_brc(c, S390_CC_GT|S390_CC_OV, d)
 #define s390_jl(c, d)                  s390_brc(c, S390_CC_LT, d)
+#define s390_jle(c, d)                 s390_brc(c, S390_CC_LT|S390_CC_EQ, d)
 #define s390_jlo(c, d)                 s390_brc(c, S390_CC_LT|S390_CC_OV, d)
 #define s390_jm(c, d)                  s390_brc(c, S390_CC_LT, d)
 #define s390_jnc(c, d)                 s390_brc(c, S390_CC_NC, d)
@@ -1296,6 +1298,8 @@ typedef struct {
 #define s390_l(c, r, x, b, d)          S390_RX(c, 0x58, r, x, b, d)
 #define s390_ly(c, r, x, b, d)         S390_RXY(c, 0xe358, r, x, b, d)
 #define s390_la(c, r, x, b, d)         S390_RX(c, 0x41, r, x, b, d)
+#define s390_laa(c, r1, r3, b, d)      S390_RSY_1(c, 0xebf8, r1, r3, b, d)
+#define s390_laag(c, r1, r3, b, d)     S390_RSY_1(c, 0xebe8, r1, r3, b, d)
 #define s390_lay(c, r, x, b, d)                S390_RXY(c, 0xe371, r, x, b, d)
 #define s390_lam(c, r1, r2, b, d)      S390_RS_1(c, 0x9a, r1, r2, b, d)
 #define s390_larl(c, r, o)             S390_RIL_1(c, 0xc00, r, o)
@@ -1357,6 +1361,8 @@ typedef struct {
 #define s390_lpr(c, r1, r2)            S390_RR(c, 0x10, r1, r2)
 #define s390_lr(c, r1, r2)             S390_RR(c, 0x18, r1, r2)
 #define s390_lrl(c, r1, d)             S390_RIL_1(c, 0xc4d, r1, d)
+#define s390_lt(c, r, x, b, d)         S390_RXY(c, 0xe312, r, x, b, d)
+#define s390_ltg(c, r, x, b, d)                S390_RXY(c, 0xe302, r, x, b, d)
 #define s390_ltgfr(c, r1, r2)          S390_RRE(c, 0xb912, r1, r2)
 #define s390_ltgr(c, r1, r2)           S390_RRE(c, 0xb902, r1, r2)
 #define s390_ltr(c, r1, r2)            S390_RR(c, 0x12, r1, r2)
@@ -1407,6 +1413,7 @@ typedef struct {
 #define s390_oiy(c, b, d, v)           S390_SIY(c, 0xeb56 b, d, v) 
 #define s390_og(c, r, x, b, d)         S390_RXY(c, 0xe381, r, x, b, d)
 #define s390_ogr(c, r1, r2)            S390_RRE(c, 0xb981, r1, r2)
+#define s390_ogrk(c, r1, r2, r3)       S390_RRF_1(c, 0xb9e6, r1, r2, r3)
 #define s390_or(c, r1, r2)             S390_RR(c, 0x16, r1, r2)
 #define s390_s(c, r, x, b, d)          S390_RX(c, 0x5b, r, x, b, d)
 #define s390_sdb(c, r, x, b, d)                S390_RXE(c, 0xed1b, r, x, b, d)
@@ -1415,6 +1422,7 @@ typedef struct {
 #define s390_sg(c, r, x, b, d)         S390_RXY(c, 0xe309, r, x, b, d)
 #define s390_sgf(c, r, x, b, d)                S390_RXY(c, 0xe319, r, x, b, d)
 #define s390_sgr(c, r1, r2)            S390_RRE(c, 0xb909, r1, r2)
+#define s390_sgrk(c, r1, r2, r3)       S390_RRF_1(c, 0xb9e9, r1, r2, r3)
 #define s390_sl(c, r, x, b, d)         S390_RX(c, 0x5f, r, x, b, d)
 #define s390_sla(c, r, b, d)           S390_RS_3(c, 0x8b, r, b, d) 
 #define s390_slag(c, r1, r2, b, d)     S390_RSY_1(c, 0xeb0b, r1, r2, b, d) 
@@ -1470,6 +1478,7 @@ typedef struct {
 #define s390_xilf(c, r, v)             S390_RIL_1(c, 0xc07, r, v)
 #define s390_xg(c, r, x, b, d)         S390_RXY(c, 0xe382, r, x, b, d)
 #define s390_xgr(c, r1, r2)            S390_RRE(c, 0xb982, r1, r2)
+#define s390_xgrk(c, r1, r2, r3)       S390_RRF_1(c, 0xb9e7, r1, r2, r3)
 #define s390_xr(c, r1, r2)             S390_RR(c, 0x17, r1, r2)
 #define s390_xy(c, r, x, b, d)         S390_RXY(c, 0xe357, r, x, b, d)
 #endif
index 8dabb43234f80cb4af5a22bdba870fe28aee4831..e80fcdce42a0b5e0974087d573582f5aa6795866 100644 (file)
@@ -96,6 +96,7 @@ fcall_membase: dest:g src1:b len:14 clob:c
 fcall_reg: dest:g src1:i len:10 clob:c
 fcompare: src1:f src2:f len:14
 float_add: dest:f src1:f src2:f len:6
+
 float_beq: len:10
 float_bge: len:10
 float_bge_un: len:8
@@ -106,11 +107,16 @@ float_blt: len:10
 float_blt_un: len:8
 float_bne_un: len:8
 float_bgt_un: len:8
+
 float_ceq: dest:i src1:f src2:f len:16
 float_cgt: dest:i src1:f src2:f len:16
 float_cgt_un: dest:i src1:f src2:f len:16
 float_clt: dest:i src1:f src2:f len:16
 float_clt_un: dest:i src1:f src2:f len:16
+float_cneq: dest:y src1:f src2:f len:16
+float_cge: dest:y src1:f src2:f len:16
+float_cle: dest:y src1:f src2:f len:16
+
 float_conv_to_i1: dest:i src1:f len:50
 float_conv_to_i2: dest:i src1:f len:50
 float_conv_to_i4: dest:i src1:f len:50
@@ -231,11 +237,19 @@ int_ble_un: len:8
 int_blt: len:8
 int_blt_un: len:8
 int_bne_un: len:8
+
 int_ceq: dest:i len:12
 int_cgt: dest:i len:12
 int_cgt_un: dest:i len:12
 int_clt: dest:i len:12
 int_clt_un: dest:i len:12
+
+int_cneq: dest:i len:12
+int_cge: dest:i len:12
+int_cle: dest:i len:12
+int_cge_un: dest:i len:12
+int_cle_un: dest:i len:12
+
 int_div: dest:a src1:i src2:i len:16
 int_div_imm: dest:a src1:i len:24
 int_div_un: dest:a src1:i src2:i len:16
@@ -340,11 +354,11 @@ jump_table: dest:i len:24
 
 int_conv_to_i1: dest:i src1:i len:12
 int_conv_to_i2: dest:i src1:i len:12
-int_conv_to_i4: dest:i src1:i len:2
-int_conv_to_i: dest:i src1:i len:2
+int_conv_to_i4: dest:i src1:i len:4
+int_conv_to_i: dest:i src1:i len:4
 int_conv_to_u1: dest:i src1:i len:10
 int_conv_to_u2: dest:i src1:i len:16
-int_conv_to_u4: dest:i src1:i
+int_conv_to_u4: dest:i src1:i len:4
 int_conv_to_r_un: dest:f src1:i len:37 
 
 cond_exc_ic: len:8
@@ -393,3 +407,12 @@ gc_param_slot_liveness_def: len:0
 gc_safe_point: clob:c src1:i len:32
 
 generic_class_init: src1:A len:32 clob:c
+
+s390_crj: src1:i src2:i len:24
+s390_crj_un: src1:i src2:i len:24
+s390_cgrj: src1:i src2:i len:24
+s390_cgrj_un: src1:i src2:i len:24
+s390_cij: src1:i len:24
+s390_cij_un: src1:i len:24
+s390_cgij: src1:i len:24
+s390_cgij_un: src1:i len:24
index 29d8cb09fb77e0b2d84586309f48dc85243afc26..32ad106ab571ce0b975e13a92e824ddbec7afc56 100644 (file)
@@ -1219,6 +1219,14 @@ MINI_OP(OP_S390_IADD_OVF,       "s390_int_add_ovf", IREG, IREG, IREG)
 MINI_OP(OP_S390_IADD_OVF_UN,    "s390_int_add_ovf_un", IREG, IREG, IREG)
 MINI_OP(OP_S390_ISUB_OVF,       "s390_int_sub_ovf", IREG, IREG, IREG)
 MINI_OP(OP_S390_ISUB_OVF_UN,    "s390_int_sub_ovf_un", IREG, IREG, IREG)
+MINI_OP(OP_S390_CRJ,            "s390_crj", IREG, IREG, IREG)
+MINI_OP(OP_S390_CLRJ,           "s390_crj_un", IREG, IREG, IREG)
+MINI_OP(OP_S390_CGRJ,           "s390_cgrj", LREG, LREG, IREG)
+MINI_OP(OP_S390_CLGRJ,          "s390_cgrj_un", LREG, LREG, IREG)
+MINI_OP(OP_S390_CIJ,            "s390_cij", IREG, NONE, NONE)
+MINI_OP(OP_S390_CLIJ,           "s390_cij_un", IREG, IREG, NONE)
+MINI_OP(OP_S390_CGIJ,           "s390_cgij", LREG, NONE, NONE)
+MINI_OP(OP_S390_CLGIJ,          "s390_cgij_un", LREG, NONE, NONE)
 #endif
 
 #if defined(__mips__)
index 5760bfb8672d8276721c86ee2e4a8928e6a830f4..a0dddf7aa8a8782236178e614370b8a28a4cbbd0 100644 (file)
@@ -61,6 +61,69 @@ if (ins->inst_target_bb->native_offset) {                                    \
                s390_jcl (code, cond, 0);                               \
        } while (0); 
 
+#define EMIT_COMP_AND_BRANCH(ins, cab, cmp)                                    \
+{                                                                              \
+if (ins->inst_true_bb->native_offset) {                                        \
+       int displace;                                                           \
+       displace = ((cfg->native_code +                                         \
+                   ins->inst_true_bb->native_offset) - code) / 2;              \
+       if (s390_is_imm16(displace)) {                                          \
+               s390_##cab (code, ins->sreg1, ins->sreg2,                       \
+                           ins->sreg3, displace);                              \
+       } else {                                                                \
+               s390_##cmp (code, ins->sreg1, ins->sreg2);                      \
+               displace = ((cfg->native_code +                                 \
+                           ins->inst_true_bb->native_offset) - code) / 2;      \
+               s390_jcl (code, ins->sreg3, displace);                          \
+       }                                                                       \
+} else {                                                                       \
+       s390_##cmp (code, ins->sreg1, ins->sreg2);                              \
+       mono_add_patch_info (cfg, code - cfg->native_code,                      \
+                            MONO_PATCH_INFO_BB, ins->inst_true_bb);            \
+       s390_jcl (code, ins->sreg3, 0);                                         \
+}                                                                              \
+}
+
+#define EMIT_COMP_AND_BRANCH_IMM(ins, cab, cmp, lat, logical)                  \
+{                                                                              \
+if (ins->inst_true_bb->native_offset) {                                        \
+       int displace;                                                           \
+       if ((ins->backend.data == 0) && (!logical)) {                           \
+               s390_##lat (code, ins->sreg1, ins->sreg1);                      \
+               displace = ((cfg->native_code +                                 \
+                           ins->inst_true_bb->native_offset) - code) / 2;      \
+               if (s390_is_imm16(displace)) {                                  \
+                       s390_brc (code, ins->sreg3, displace);                  \
+               } else {                                                        \
+                       s390_jcl (code, ins->sreg3, displace);                  \
+               }                                                               \
+       } else {                                                                \
+               S390_SET (code, s390_r0, ins->backend.data);                    \
+               displace = ((cfg->native_code +                                 \
+                           ins->inst_true_bb->native_offset) - code) / 2;      \
+               if (s390_is_imm16(displace)) {                                  \
+                       s390_##cab (code, ins->sreg1, s390_r0,                  \
+                                   ins->sreg3, displace);                      \
+               } else {                                                        \
+                       s390_##cmp (code, ins->sreg1, s390_r0);                 \
+                       displace = ((cfg->native_code +                         \
+                           ins->inst_true_bb->native_offset) - code) / 2;      \
+                       s390_jcl (code, ins->sreg3, displace);                  \
+               }                                                               \
+       }                                                                       \
+} else {                                                                       \
+       if ((ins->backend.data == 0) && (!logical)) {                           \
+               s390_##lat (code, ins->sreg1, ins->sreg1);                      \
+       } else {                                                                \
+               S390_SET (code, s390_r0, ins->backend.data);                    \
+               s390_##cmp (code, ins->sreg1, s390_r0);                         \
+       }                                                                       \
+       mono_add_patch_info (cfg, code - cfg->native_code,                      \
+                            MONO_PATCH_INFO_BB, ins->inst_true_bb);            \
+       s390_jcl (code, ins->sreg3, 0);                                         \
+}                                                                              \
+}
+
 #define CHECK_SRCDST_COM                                               \
        if (ins->dreg == ins->sreg2) {                                  \
                src2 = ins->sreg1;                                      \
@@ -378,6 +441,7 @@ static CallInfo * get_call_info (MonoCompile *, MonoMemPool *, MonoMethodSignatu
 static guchar * emit_float_to_int (MonoCompile *, guchar *, int, int, int, gboolean);
 static guint8 * emit_load_volatile_arguments (guint8 *, MonoCompile *);
 static __inline__ void emit_unwind_regs(MonoCompile *, guint8 *, int, int, long);
+static void compare_and_branch(MonoBasicBlock *, MonoInst *, int, gboolean);
 
 /*========================= End of Prototypes ======================*/
 
@@ -388,6 +452,8 @@ static __inline__ void emit_unwind_regs(MonoCompile *, guint8 *, int, int, long)
 int mono_exc_esp_offset = 0;
 
 __thread int indent_level = 0;
+__thread FILE *trFd = NULL;
+int curThreadNo = 0;
 
 /*
  * The code generated for sequence points reads from this location, 
@@ -646,9 +712,9 @@ indent (int diff) {
        if (diff < 0)
                indent_level += diff;
        v = indent_level;
-       printf("%p [%3d] ",(void *)pthread_self(),v);
+       fprintf (trFd, "%p [%3d] ",(void *)pthread_self(),v);
        while (v-- > 0) {
-               printf (". ");
+               fprintf (trFd, ". ");
        }
        if (diff > 0) 
                indent_level += diff;
@@ -749,9 +815,9 @@ cvtMonoType(MonoTypeEnum t)
 
 /*------------------------------------------------------------------*/
 /*                                                                  */
-/* Name                - decodeParm                                        */
+/* Name                - decodeParmString                                  */
 /*                                                                  */
-/* Function    - Decode a parameter for the trace.                 */
+/* Function    - Decode a parameter string for the trace.          */
 /*                                                                 */
 /*------------------------------------------------------------------*/
 
@@ -761,57 +827,68 @@ decodeParmString (MonoString *s)
        MonoError error;
        char *str = mono_string_to_utf8_checked(s, &error);
        if (is_ok (&error))  {
-               printf("[STRING:%p:%s], ", s, str);
+               fprintf (trFd, "[STRING:%p:%s], ", s, str);
                g_free (str);
        } else {
                mono_error_cleanup (&error);
-               printf("[STRING:%p:], ", s);
+               fprintf (trFd, "[STRING:%p:], ", s);
        }
 }
 
+/*========================= End of Function ========================*/
+
+/*------------------------------------------------------------------*/
+/*                                                                  */
+/* Name                - decodeParm                                        */
+/*                                                                  */
+/* Function    - Decode a parameter for the trace.                 */
+/*                                                                 */
+/*------------------------------------------------------------------*/
+
+
 static void 
 decodeParm(MonoType *type, void *curParm, int size)
 {
        guint32 simpleType;
 
        if (type->byref) {
-               printf("[BYREF:%p], ", *((char **) curParm));
+               fprintf (trFd, "[BYREF:%p], ", *((char **) curParm));
        } else {
                simpleType = mini_get_underlying_type(type)->type;
 enum_parmtype:
                switch (simpleType) {
                        case MONO_TYPE_I :
-                               printf ("[INTPTR:%p], ", *((int **) curParm));
+                               fprintf (trFd, "[INTPTR:%p], ", *((int **) curParm));
                                break;
                        case MONO_TYPE_U :
-                               printf ("[UINTPTR:%p], ", *((int **) curParm));
+                               fprintf (trFd, "[UINTPTR:%p], ", *((int **) curParm));
                                break;
                        case MONO_TYPE_BOOLEAN :
-                               printf ("[BOOL:%ld], ", *((gint64 *) curParm));
+                               fprintf (trFd, "[BOOL:%ld], ", *((gint64 *) curParm));
                                break;
                        case MONO_TYPE_CHAR :
-                               printf ("[CHAR:%c], ", *((int  *) curParm));
+                               fprintf (trFd, "[CHAR:%c], ", *((int  *) curParm));
                                break;
                        case MONO_TYPE_I1 :
-                               printf ("[INT1:%ld], ", *((gint64 *) curParm));
+                               fprintf (trFd, "[INT1:%ld], ", *((gint64 *) curParm));
                                break; 
                        case MONO_TYPE_I2 :
-                               printf ("[INT2:%ld], ", *((gint64 *) curParm));
+                               fprintf (trFd, "[INT2:%ld], ", *((gint64 *) curParm));
                                break; 
                        case MONO_TYPE_I4 :
-                               printf ("[INT4:%ld], ", *((gint64 *) curParm));
+                               fprintf (trFd, "[INT4:%ld], ", *((gint64 *) curParm));
                                break; 
                        case MONO_TYPE_U1 :
-                               printf ("[UINT1:%lu], ", *((guint64 *) curParm));
+                               fprintf (trFd, "[UINT1:%lu], ", *((guint64 *) curParm));
                                break; 
                        case MONO_TYPE_U2 :
-                               printf ("[UINT2:%lu], ", *((guint64 *) curParm));
+                               fprintf (trFd, "[UINT2:%lu], ", *((guint64 *) curParm));
                                break; 
                        case MONO_TYPE_U4 :
-                               printf ("[UINT4:%lu], ", *((guint64 *) curParm));
+                               fprintf (trFd, "[UINT4:%lu], ", *((guint64 *) curParm));
                                break; 
                        case MONO_TYPE_U8 :
-                               printf ("[UINT8:%lu], ", *((guint64 *) curParm));
+                               fprintf (trFd, "[UINT8:%lu], ", *((guint64 *) curParm));
                                break; 
                        case MONO_TYPE_STRING : {
                                MonoString *s = *((MonoString **) curParm);
@@ -819,7 +896,7 @@ enum_parmtype:
                                        g_assert (((MonoObject *) s)->vtable->klass == mono_defaults.string_class);
                                        decodeParmString (s);
                                } else {
-                                       printf("[STRING:null], ");
+                                       fprintf (trFd, "[STRING:null], ");
                                }
                                break;
                        }
@@ -828,43 +905,43 @@ enum_parmtype:
                                MonoObject *obj = *((MonoObject **) curParm);
                                MonoClass *klass;
                                if ((obj) && (obj->vtable)) {
-                                       printf("[CLASS/OBJ:");
+                                       fprintf (trFd, "[CLASS/OBJ:");
                                        klass = obj->vtable->klass;
-                                       printf("%p [%p] ",obj,curParm);
+                                       fprintf (trFd, "%p [%p] ",obj,curParm);
                                        if (klass == mono_defaults.string_class) {
                                                decodeParmString ((MonoString *)obj);
                                        } else if (klass == mono_defaults.int32_class) { 
-                                               printf("[INT32:%p:%d]", 
+                                               fprintf (trFd, "[INT32:%p:%d]", 
                                                        obj, *(gint32 *)((char *)obj + sizeof (MonoObject)));
                                        } else
-                                               printf("[%s.%s:%p]", 
+                                               fprintf (trFd, "[%s.%s:%p]", 
                                                       klass->name_space, klass->name, obj);
-                                       printf("], ");
+                                       fprintf (trFd, "], ");
                                } else {
-                                       printf("[OBJECT:null], ");
+                                       fprintf (trFd, "[OBJECT:null], ");
                                }
                                break;
                        }
                        case MONO_TYPE_PTR :
-                               printf("[PTR:%p], ", *((gpointer **) (curParm)));
+                               fprintf (trFd, "[PTR:%p], ", *((gpointer **) (curParm)));
                                break;
                        case MONO_TYPE_FNPTR :
-                               printf("[FNPTR:%p], ", *((gpointer **) (curParm)));
+                               fprintf (trFd, "[FNPTR:%p], ", *((gpointer **) (curParm)));
                                break;
                        case MONO_TYPE_ARRAY :
-                               printf("[ARRAY:%p], ", *((gpointer **) (curParm)));
+                               fprintf (trFd, "[ARRAY:%p], ", *((gpointer **) (curParm)));
                                break;
                        case MONO_TYPE_SZARRAY :
-                               printf("[SZARRAY:%p], ", *((gpointer **) (curParm)));
+                               fprintf (trFd, "[SZARRAY:%p], ", *((gpointer **) (curParm)));
                                break;
                        case MONO_TYPE_I8 :
-                               printf("[INT8:%ld], ", *((gint64 *) (curParm)));
+                               fprintf (trFd, "[INT8:%ld], ", *((gint64 *) (curParm)));
                                break;
                        case MONO_TYPE_R4 :
-                               printf("[FLOAT4:%g], ", *((float *) (curParm)));
+                               fprintf (trFd, "[FLOAT4:%g], ", *((float *) (curParm)));
                                break;
                        case MONO_TYPE_R8 :
-                               printf("[FLOAT8:%g], ", *((double *) (curParm)));
+                               fprintf (trFd, "[FLOAT8:%g], ", *((double *) (curParm)));
                                break;
                        case MONO_TYPE_VALUETYPE : {
                                int i;
@@ -872,7 +949,7 @@ enum_parmtype:
 
                                if (type->data.klass->enumtype) {
                                        simpleType = mono_class_enum_basetype (type->data.klass)->type;
-                                       printf("{VALUETYPE} - ");
+                                       fprintf (trFd, "{VALUETYPE} - ");
                                        goto enum_parmtype;
                                }
 
@@ -881,33 +958,33 @@ enum_parmtype:
                                if ((info->native_size == sizeof(float)) &&
                                    (info->num_fields  == 1) &&
                                    (info->fields[0].field->type->type == MONO_TYPE_R4)) {
-                                               printf("[FLOAT4:%f], ", *((float *) (curParm)));
+                                               fprintf (trFd, "[FLOAT4:%f], ", *((float *) (curParm)));
                                        break;
                                }
 
                                if ((info->native_size == sizeof(double)) &&
                                    (info->num_fields  == 1) &&
                                    (info->fields[0].field->type->type == MONO_TYPE_R8)) {
-                                       printf("[FLOAT8:%g], ", *((double *) (curParm)));
+                                       fprintf (trFd, "[FLOAT8:%g], ", *((double *) (curParm)));
                                        break;
                                }
 
-                               printf("[VALUETYPE:");
+                               fprintf (trFd, "[VALUETYPE:");
                                for (i = 0; i < size; i++)
-                                       printf("%02x,", *((guint8 *)curParm+i));
-                               printf("], ");
+                                       fprintf (trFd, "%02x,", *((guint8 *)curParm+i));
+                               fprintf (trFd, "], ");
                                break;
                        }
                        case MONO_TYPE_TYPEDBYREF: {
                                int i;
-                               printf("[TYPEDBYREF:");
+                               fprintf (trFd, "[TYPEDBYREF:");
                                for (i = 0; i < size; i++)
-                                       printf("%02x,", *((guint8 *)curParm+i));
-                               printf("]");
+                                       fprintf (trFd, "%02x,", *((guint8 *)curParm+i));
+                               fprintf (trFd, "]");
                                break;
                        }
                        default :
-                               printf("[%s], ",cvtMonoType(simpleType));
+                               fprintf (trFd, "[%s], ",cvtMonoType(simpleType));
                }
        }
 }
@@ -936,13 +1013,18 @@ enter_method (MonoMethod *method, RegParm *rParm, char *sp)
        ArgInfo *ainfo;
        void *curParm;
 
+       if (trFd == NULL) {
+               char buf[32];
+               sprintf(buf, "/tmp/mono.%d.trc.%d", getpid(), curThreadNo++);
+               trFd = fopen(buf, "w");
+       }
        fname = mono_method_full_name (method, TRUE);
        indent (1);
-       printf ("ENTER: %s ", fname);
+       fprintf (trFd, "ENTER: %s ", fname);
        g_free (fname);
 
        ip  = (*(guint64 *) (sp+S390_RET_ADDR_OFFSET));
-       printf ("ip: %p sp: %p - ", (gpointer) ip, sp); 
+       fprintf (trFd, "ip: %p sp: %p - ", (gpointer) ip, sp); 
 
        if (rParm == NULL)
                return;
@@ -952,7 +1034,7 @@ enter_method (MonoMethod *method, RegParm *rParm, char *sp)
        cinfo = get_call_info (NULL, NULL, sig);
 
        if (cinfo->struct_ret) {
-               printf ("[STRUCTRET:%p], ", (gpointer) rParm->gr[0]);
+               fprintf (trFd, "[STRUCTRET:%p], ", (gpointer) rParm->gr[0]);
                iParm = 1;
        }
 
@@ -963,28 +1045,28 @@ enter_method (MonoMethod *method, RegParm *rParm, char *sp)
                case MONO_TYPE_VALUETYPE:
                        if (obj) {
                                guint64 *value = (guint64 *) ((uintptr_t)this_arg + sizeof(MonoObject));
-                               printf("this:[value:%p:%016lx], ", this_arg, *value);
+                               fprintf (trFd, "this:[value:%p:%016lx], ", this_arg, *value);
                        } else 
-                               printf ("this:[NULL], ");
+                               fprintf (trFd, "this:[NULL], ");
                        break;
                case MONO_TYPE_STRING:
                        if (obj) {
                                if (obj->vtable) {
                                        klass = obj->vtable->klass;
                                        if (klass == mono_defaults.string_class) {
-                                               printf ("this:");
+                                               fprintf (trFd, "this:");
                                                decodeParmString((MonoString *)obj);
                                        } else {
-                                               printf ("this:%p[%s.%s], ", 
+                                               fprintf (trFd, "this:%p[%s.%s], ", 
                                                        obj, klass->name_space, klass->name);
                                        }
                                } else 
-                                       printf("vtable:[NULL], ");
+                                       fprintf (trFd, "vtable:[NULL], ");
                        } else 
-                               printf ("this:[NULL], ");
+                               fprintf (trFd, "this:[NULL], ");
                        break;
                default :
-                       printf("this[%s]: %p, ",cvtMonoType(method->klass->this_arg.type),this_arg);
+                       fprintf (trFd, "this[%s]: %p, ",cvtMonoType(method->klass->this_arg.type),this_arg);
                }
                oParm++;
        }
@@ -1030,10 +1112,10 @@ enter_method (MonoMethod *method, RegParm *rParm, char *sp)
                                break;
                                
                        default :
-                               printf("???, ");
+                               fprintf (trFd, "???, ");
                }
        }       
-       printf("\n");
+       fprintf (trFd, "\n");
        g_free(cinfo);
 }
 
@@ -1059,7 +1141,7 @@ leave_method (MonoMethod *method, ...)
 
        fname = mono_method_full_name (method, TRUE);
        indent (-1);
-       printf ("LEAVE: %s", fname);
+       fprintf (trFd, "LEAVE: %s", fname);
        g_free (fname);
 
        type = mono_method_signature (method)->ret;
@@ -1071,57 +1153,57 @@ handle_enum:
        case MONO_TYPE_BOOLEAN: {
                int val = va_arg (ap, int);
                if (val)
-                       printf ("[TRUE:%d]", val);
+                       fprintf (trFd, "[TRUE:%d]", val);
                else 
-                       printf ("[FALSE]");
+                       fprintf (trFd, "[FALSE]");
                        
                break;
        }
        case MONO_TYPE_CHAR: {
                int val = va_arg (ap, int);
-               printf ("[CHAR:%d]", val);
+               fprintf (trFd, "[CHAR:%d]", val);
                break;
        }
        case MONO_TYPE_I1: {
                int val = va_arg (ap, int);
-               printf ("[INT1:%d]", val);
+               fprintf (trFd, "[INT1:%d]", val);
                break;
        }
        case MONO_TYPE_U1: {
                int val = va_arg (ap, int);
-               printf ("[UINT1:%d]", val);
+               fprintf (trFd, "[UINT1:%d]", val);
                break;
        }
        case MONO_TYPE_I2: {
                int val = va_arg (ap, int);
-               printf ("[INT2:%d]", val);
+               fprintf (trFd, "[INT2:%d]", val);
                break;
        }
        case MONO_TYPE_U2: {
                int val = va_arg (ap, int);
-               printf ("[UINT2:%d]", val);
+               fprintf (trFd, "[UINT2:%d]", val);
                break;
        }
        case MONO_TYPE_I4: {
                int val = va_arg (ap, int);
-               printf ("[INT4:%d]", val);
+               fprintf (trFd, "[INT4:%d]", val);
                break;
        }
        case MONO_TYPE_U4: {
                int val = va_arg (ap, int);
-               printf ("[UINT4:%d]", val);
+               fprintf (trFd, "[UINT4:%d]", val);
                break;
        }
        case MONO_TYPE_I: {
                gint64 val = va_arg (ap, gint64);
-               printf ("[INT:%ld]", val);
-               printf("]");
+               fprintf (trFd, "[INT:%ld]", val);
+               fprintf (trFd, "]");
                break;
        }
        case MONO_TYPE_U: {
                gint64 val = va_arg (ap, gint64);
-               printf ("[UINT:%lu]", val);
-               printf("]");
+               fprintf (trFd, "[UINT:%lu]", val);
+               fprintf (trFd, "]");
                break;
        }
        case MONO_TYPE_STRING: {
@@ -1131,7 +1213,7 @@ handle_enum:
                        g_assert (((MonoObject *)s)->vtable->klass == mono_defaults.string_class);
                        decodeParmString (s);
                } else 
-                       printf ("[STRING:null], ");
+                       fprintf (trFd, "[STRING:null], ");
                break;
        }
        case MONO_TYPE_CLASS: 
@@ -1140,15 +1222,15 @@ handle_enum:
 
                if ((o) && (o->vtable)) {
                        if (o->vtable->klass == mono_defaults.boolean_class) {
-                               printf ("[BOOLEAN:%p:%d]", o, *((guint8 *)o + sizeof (MonoObject)));            
+                               fprintf (trFd, "[BOOLEAN:%p:%d]", o, *((guint8 *)o + sizeof (MonoObject)));             
                        } else if  (o->vtable->klass == mono_defaults.int32_class) {
-                               printf ("[INT32:%p:%d]", o, *((gint32 *)((char *)o + sizeof (MonoObject))));    
+                               fprintf (trFd, "[INT32:%p:%d]", o, *((gint32 *)((char *)o + sizeof (MonoObject))));     
                        } else if  (o->vtable->klass == mono_defaults.int64_class) {
-                               printf ("[INT64:%p:%ld]", o, *((gint64 *)((char *)o + sizeof (MonoObject))));   
+                               fprintf (trFd, "[INT64:%p:%ld]", o, *((gint64 *)((char *)o + sizeof (MonoObject))));    
                        } else
-                               printf ("[%s.%s:%p]", o->vtable->klass->name_space, o->vtable->klass->name, o);
+                               fprintf (trFd, "[%s.%s:%p]", o->vtable->klass->name_space, o->vtable->klass->name, o);
                } else
-                       printf ("[OBJECT:%p]", o);
+                       fprintf (trFd, "[OBJECT:%p]", o);
               
                break;
        }
@@ -1157,27 +1239,27 @@ handle_enum:
        case MONO_TYPE_ARRAY:
        case MONO_TYPE_SZARRAY: {
                gpointer p = va_arg (ap, gpointer);
-               printf ("[result=%p]", p);
+               fprintf (trFd, "[result=%p]", p);
                break;
        }
        case MONO_TYPE_I8: {
                gint64 l =  va_arg (ap, gint64);
-               printf ("[LONG:%ld]", l);
+               fprintf (trFd, "[LONG:%ld]", l);
                break;
        }
        case MONO_TYPE_U8: {
                guint64 l =  va_arg (ap, guint64);
-               printf ("[ULONG:%lu]", l);
+               fprintf (trFd, "[ULONG:%lu]", l);
                break;
        }
        case MONO_TYPE_R4: {
                double f = va_arg (ap, double);
-               printf ("[FLOAT4:%g]\n", f);
+               fprintf (trFd, "[FLOAT4:%g]\n", f);
                break;
        }
        case MONO_TYPE_R8: {
                double f = va_arg (ap, double);
-               printf ("[FLOAT8:%g]\n", f);
+               fprintf (trFd, "[FLOAT8:%g]\n", f);
                break;
        }
        case MONO_TYPE_VALUETYPE: {
@@ -1194,7 +1276,7 @@ handle_enum:
                            (info->num_fields  == 1) &&
                            (info->fields[0].field->type->type == MONO_TYPE_R4)) {
                                double f = va_arg (ap, double);
-                               printf("[FLOAT4:%g]\n", (double) f);
+                               fprintf (trFd, "[FLOAT4:%g]\n", (double) f);
                                break;
                        }
 
@@ -1202,7 +1284,7 @@ handle_enum:
                            (info->num_fields  == 1) &&
                            (info->fields[0].field->type->type == MONO_TYPE_R8)) {
                                double f = va_arg (ap, double);
-                               printf("[FLOAT8:%g]\n", f);
+                               fprintf (trFd, "[FLOAT8:%g]\n", f);
                                break;
                        }
 
@@ -1210,27 +1292,27 @@ handle_enum:
                        switch (size) {
                                case 1: {
                                        guint32 p = va_arg (ap, guint32);
-                                       printf ("[%02x]\n",p);
+                                       fprintf (trFd, "[%02x]\n",p);
                                        break;
                                }
                                case 2: {
                                        guint32 p = va_arg (ap, guint32);
-                                       printf ("[%04x]\n",p);
+                                       fprintf (trFd, "[%04x]\n",p);
                                        break;
                                }
                                case 4: {
                                        guint32 p = va_arg (ap, guint32);
-                                       printf ("[%08x]\n",p);
+                                       fprintf (trFd, "[%08x]\n",p);
                                        break;
                                }
                                case 8: {
                                        guint64 p = va_arg (ap, guint64);
-                                       printf ("[%016lx]\n",p);
+                                       fprintf (trFd, "[%016lx]\n",p);
                                        break;
                                }
                                default: {
                                        gpointer p = va_arg (ap, gpointer);
-                                       printf ("[VALUETYPE] %p\n",p);
+                                       fprintf (trFd, "[VALUETYPE] %p\n",p);
                                }
                        }
                }
@@ -1245,43 +1327,43 @@ handle_enum:
                case 2:
                case 4:
                case 8:
-                       printf ("[");
+                       fprintf (trFd, "[");
                        for (j = 0; p && j < size; j++)
-                               printf ("%02x,", p [j]);
-                       printf ("]\n");
+                               fprintf (trFd, "%02x,", p [j]);
+                       fprintf (trFd, "]\n");
                        break;
                default:
-                       printf ("[TYPEDBYREF]\n");
+                       fprintf (trFd, "[TYPEDBYREF]\n");
                }
        }
                break;
        case MONO_TYPE_GENERICINST: {
-               printf("[GENERICINST]\n");
+               fprintf (trFd, "[GENERICINST]\n");
        }
                break;
        case MONO_TYPE_MVAR: {
-               printf("[MVAR]\n");
+               fprintf (trFd, "[MVAR]\n");
        }
                break;
        case MONO_TYPE_CMOD_REQD: {
-               printf("[CMOD_REQD]\n");
+               fprintf (trFd, "[CMOD_REQD]\n");
        }
                break;
        case MONO_TYPE_CMOD_OPT: {
-               printf("[CMOD_OPT]\n");
+               fprintf (trFd, "[CMOD_OPT]\n");
        }
                break;
        case MONO_TYPE_INTERNAL: {
-               printf("[INTERNAL]\n");
+               fprintf (trFd, "[INTERNAL]\n");
        }
                break;
        default:
-               printf ("(unknown return type %x)", 
+               fprintf (trFd, "(unknown return type %x)", 
                        mono_method_signature (method)->ret->type);
        }
 
        ip = ((gint64) __builtin_extract_return_addr (__builtin_return_address (0)));
-       printf (" ip: %p\n", (gpointer) ip);
+       fprintf (trFd, " ip: %p\n", (gpointer) ip);
        va_end (ap);
 }
 
@@ -1455,7 +1537,9 @@ mono_arch_get_global_int_regs (MonoCompile *cfg)
        /* FIXME: s390_r12 is reserved for bkchain_reg. Only reserve it if needed */
        top = 12;
        for (i = 8; i < top; ++i) {
-               if (cfg->frame_reg != i)
+               if ((cfg->frame_reg != i) && 
+                   //!((cfg->uses_rgctx_reg) && (i == MONO_ARCH_IMT_REG)))
+                   (i != MONO_ARCH_IMT_REG))
                        regs = g_list_prepend (regs, GUINT_TO_POINTER (i));
        }
 
@@ -1989,10 +2073,7 @@ mono_arch_allocate_vars (MonoCompile *cfg)
        cfg->arch.bkchain_reg = -1;
 
        if (frame_reg != STK_BASE) 
-               cfg->used_int_regs |= (1 << frame_reg);         
-
-       if (cfg->uses_rgctx_reg)
-               cfg->used_int_regs |= (1 << MONO_ARCH_IMT_REG);
+               cfg->used_int_regs |= (1LL << frame_reg);               
 
        sig     = mono_method_signature (cfg->method);
        
@@ -2788,6 +2869,64 @@ handle_enum:
 
 /*========================= End of Function ========================*/
 
+/*------------------------------------------------------------------*/
+/*                                                                  */
+/* Name                - compare_and_branch                                */
+/*                                                                  */
+/* Function    - Form a peephole pass at the code looking for      */
+/*               simple optimizations.                             */
+/*                                                                 */
+/*------------------------------------------------------------------*/
+
+static void
+compare_and_branch(MonoBasicBlock *bb, MonoInst *ins, int cc, gboolean logical)
+{
+       MonoInst *last;
+
+       if (mono_hwcap_s390x_has_gie) {
+               last = mono_inst_prev (ins, FILTER_IL_SEQ_POINT);
+               ins->sreg1 = last->sreg1;
+               ins->sreg2 = last->sreg2;
+               ins->sreg3 = cc;
+               switch(last->opcode) {
+               case OP_ICOMPARE:
+                       if (logical)
+                               ins->opcode = OP_S390_CLRJ;
+                       else
+                               ins->opcode = OP_S390_CRJ;
+                       MONO_DELETE_INS(bb, last);
+                       break;
+               case OP_COMPARE:
+               case OP_LCOMPARE:
+                       if (logical)
+                               ins->opcode = OP_S390_CLGRJ;
+                       else
+                               ins->opcode = OP_S390_CGRJ;
+                       MONO_DELETE_INS(bb, last);
+                       break;
+               case OP_ICOMPARE_IMM:
+                       ins->backend.data = (gpointer) last->inst_imm;
+                       if (logical)
+                               ins->opcode = OP_S390_CLIJ;
+                       else
+                               ins->opcode = OP_S390_CIJ;
+                       MONO_DELETE_INS(bb, last);
+                       break;
+               case OP_COMPARE_IMM:
+               case OP_LCOMPARE_IMM:
+                       ins->backend.data = (gpointer) last->inst_imm;
+                       if (logical)
+                               ins->opcode = OP_S390_CLGIJ;
+                       else
+                               ins->opcode = OP_S390_CGIJ;
+                       MONO_DELETE_INS(bb, last);
+                       break;
+               }
+       }
+}
+
+/*========================= End of Function ========================*/
+
 /*------------------------------------------------------------------*/
 /*                                                                  */
 /* Name                - mono_arch_peephole_pass_1                         */
@@ -2800,6 +2939,55 @@ handle_enum:
 void
 mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
 {
+       MonoInst *ins, *n;
+
+       MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
+               switch (ins->opcode) {
+               case OP_IBEQ:
+               case OP_LBEQ:
+                       compare_and_branch(bb, ins, S390_CC_EQ, FALSE);
+                       break;
+               case OP_LBNE_UN:
+               case OP_IBNE_UN:
+                       compare_and_branch(bb, ins, S390_CC_NE, TRUE);
+                       break;
+               case OP_LBLT:
+               case OP_IBLT:
+                       compare_and_branch(bb, ins, S390_CC_LT, FALSE);
+                       break;
+               case OP_LBLT_UN:
+               case OP_IBLT_UN:
+                       compare_and_branch(bb, ins, S390_CC_LT, TRUE);
+                       break;
+               case OP_LBGT:
+               case OP_IBGT:
+                       compare_and_branch(bb, ins, S390_CC_GT, FALSE);
+                       break;
+               case OP_LBGT_UN:
+               case OP_IBGT_UN:
+                       compare_and_branch(bb, ins, S390_CC_GT, TRUE);
+                       break;
+               case OP_LBGE:
+               case OP_IBGE:
+                       compare_and_branch(bb, ins, S390_CC_GE, FALSE);
+                       break;
+               case OP_LBGE_UN:
+               case OP_IBGE_UN:
+                       compare_and_branch(bb, ins, S390_CC_GE, TRUE);
+                       break;
+               case OP_LBLE:
+               case OP_IBLE:
+                       compare_and_branch(bb, ins, S390_CC_LE, FALSE);
+                       break;
+               case OP_LBLE_UN:
+               case OP_IBLE_UN:
+                       compare_and_branch(bb, ins, S390_CC_LE, TRUE);
+                       break;
+
+               // default:
+               //      mono_peephole_ins (bb, ins);
+               }
+       }
 }
 
 /*========================= End of Function ========================*/
@@ -2816,9 +3004,20 @@ mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
 void
 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
 {
-       MonoInst *ins, *n;
+       MonoInst *ins, *n, *last_ins = NULL;
 
        MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
+               switch (ins->opcode) {
+               case OP_LOADU4_MEMBASE:
+               case OP_LOADI4_MEMBASE:
+                       if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
+                                       ins->inst_basereg == last_ins->inst_destbasereg &&
+                                       ins->inst_offset == last_ins->inst_offset) {
+                               ins->opcode = (ins->opcode == OP_LOADI4_MEMBASE) ? OP_ICONV_TO_I4 : OP_ICONV_TO_U4;
+                               ins->sreg1 = last_ins->sreg1;
+                       }
+                       break;
+               }
                mono_peephole_ins (bb, ins);
        }
 }
@@ -2929,15 +3128,15 @@ emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size,
 
 /*------------------------------------------------------------------*/
 /*                                                                  */
-/* Name                - gboolean_is_unsigned.                             */
+/* Name                - is_unsigned.                                      */
 /*                                                                  */
 /* Function    - Return TRUE if next opcode is checking for un-    */
 /*               signed value.                                     */
 /*                                                                 */
 /*------------------------------------------------------------------*/
 
-static 
-gboolean is_unsigned (MonoInst *next)
+static gboolean 
+is_unsigned (MonoInst *next)
 {
        if ((next) && 
                (((next->opcode >= OP_IBNE_UN) &&
@@ -2949,7 +3148,9 @@ gboolean is_unsigned (MonoInst *next)
                 ((next->opcode >= OP_COND_EXC_INE_UN) &&
                  (next->opcode <= OP_COND_EXC_ILT_UN)) ||
                 ((next->opcode == OP_CLT_UN) ||
-                 (next->opcode == OP_CGT_UN)) ||
+                 (next->opcode == OP_CGT_UN) ||
+                 (next->opcode == OP_ICGE_UN)  ||
+                 (next->opcode == OP_ICLE_UN)) ||
                 ((next->opcode == OP_ICLT_UN) ||
                  (next->opcode == OP_ICGT_UN) ||
                  (next->opcode == OP_LCLT_UN) ||
@@ -3116,6 +3317,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        s390_llghr (code, ins->dreg, ins->sreg1);
                }
                        break;
+               case OP_ICONV_TO_U4: {
+                       s390_llgfr (code, ins->dreg, ins->sreg1);
+               }
+                       break;
+               case OP_ICONV_TO_I4: {
+                       s390_lgfr (code, ins->dreg, ins->sreg1);
+               }
+                       break;
                case OP_COMPARE: 
                case OP_LCOMPARE: {
                        if (is_unsigned (ins->next))
@@ -3133,19 +3342,29 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_COMPARE_IMM:
                case OP_LCOMPARE_IMM: {
-                       S390_SET (code, s390_r0, ins->inst_imm);
-                       if (is_unsigned (ins->next))
-                               s390_clgr (code, ins->sreg1, s390_r0);
-                       else
-                               s390_cgr  (code, ins->sreg1, s390_r0);
+                       gboolean branchUn = is_unsigned (ins->next);
+                       if ((ins->inst_imm == 0) && (!branchUn)) {
+                               s390_ltgr (code, ins->sreg1, ins->sreg1);
+                       } else {
+                               S390_SET (code, s390_r0, ins->inst_imm);
+                               if (branchUn)
+                                       s390_clgr (code, ins->sreg1, s390_r0);
+                               else
+                                       s390_cgr  (code, ins->sreg1, s390_r0);
+                       }
                }
                        break;
                case OP_ICOMPARE_IMM: {
-                       S390_SET (code, s390_r0, ins->inst_imm);
-                       if (is_unsigned (ins->next))
-                               s390_clr  (code, ins->sreg1, s390_r0);
-                       else
-                               s390_cr   (code, ins->sreg1, s390_r0);
+                       gboolean branchUn = is_unsigned (ins->next);
+                       if ((ins->inst_imm == 0) && (!branchUn)) {
+                               s390_ltr (code, ins->sreg1, ins->sreg1);
+                       } else {
+                               S390_SET (code, s390_r0, ins->inst_imm);
+                               if (branchUn)
+                                       s390_clr  (code, ins->sreg1, s390_r0);
+                               else
+                                       s390_cr   (code, ins->sreg1, s390_r0);
+                       }
                }
                        break;
                case OP_BREAK: {
@@ -3155,13 +3374,21 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                        break;
                case OP_ADDCC: {
-                       CHECK_SRCDST_COM;
-                       s390_agr  (code, ins->dreg, src2);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_agrk (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       } else {
+                               CHECK_SRCDST_COM;
+                               s390_agr  (code, ins->dreg, src2);
+                       }
                }
                        break;
                case OP_LADD: {
-                       CHECK_SRCDST_COM;
-                       s390_agr   (code, ins->dreg, src2);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_agrk (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       } else {
+                               CHECK_SRCDST_COM;
+                               s390_agr   (code, ins->dreg, src2);
+                       }
                }
                        break;
                case OP_ADC: {
@@ -3170,16 +3397,25 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                        break;
                case OP_ADD_IMM: {
-                       if (ins->dreg != ins->sreg1) {
-                               s390_lgr  (code, ins->dreg, ins->sreg1);
-                       }
-                       if (s390_is_imm16 (ins->inst_imm)) {
-                               s390_aghi (code, ins->dreg, ins->inst_imm);
-                       } else if (s390_is_imm32 (ins->inst_imm)) {
-                               s390_agfi (code, ins->dreg, ins->inst_imm);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               if (s390_is_imm16 (ins->inst_imm)) {
+                                       s390_aghik(code, ins->dreg, ins->sreg1, ins->inst_imm);
+                               } else {
+                                       S390_SET  (code, s390_r0, ins->inst_imm);
+                                       s390_agrk (code, ins->dreg, ins->sreg1, s390_r0);
+                               }
                        } else {
-                               S390_SET  (code, s390_r0, ins->inst_imm);
-                               s390_agr  (code, ins->dreg, s390_r0);
+                               if (ins->dreg != ins->sreg1) {
+                                       s390_lgr  (code, ins->dreg, ins->sreg1);
+                               }
+                               if (s390_is_imm16 (ins->inst_imm)) {
+                                       s390_aghi (code, ins->dreg, ins->inst_imm);
+                               } else if (s390_is_imm32 (ins->inst_imm)) {
+                                       s390_agfi (code, ins->dreg, ins->inst_imm);
+                               } else {
+                                       S390_SET  (code, s390_r0, ins->inst_imm);
+                                       s390_agr  (code, ins->dreg, s390_r0);
+                               }
                        }
                }
                        break;
@@ -3187,8 +3423,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ins->dreg != ins->sreg1) {
                                s390_lgr  (code, ins->dreg, ins->sreg1);
                        }
-                       g_assert (s390_is_imm16 (ins->inst_imm));
-                       s390_aghi (code, ins->dreg, ins->inst_imm);
+                       if (s390_is_imm32 (ins->inst_imm)) {
+                               s390_agfi (code, ins->dreg, ins->inst_imm);
+                       } else {
+                               S390_SET  (code, s390_r0, ins->inst_imm);
+                               s390_agr  (code, ins->dreg, s390_r0);
+                       }
                }
                        break;
                case OP_ADC_IMM: {
@@ -3238,13 +3478,21 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                        break;
                case OP_SUBCC: {
-                       CHECK_SRCDST_NCOM;
-                       s390_sgr (code, ins->dreg, src2);
+                       if (mono_hwcap_s390x_has_mlt) {
+                           s390_sgrk (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       } else {
+                           CHECK_SRCDST_NCOM;
+                           s390_sgr (code, ins->dreg, src2);
+                       }
                }
                        break;
                case OP_LSUB: {
-                       CHECK_SRCDST_NCOM;
-                       s390_sgr  (code, ins->dreg, src2);
+                       if (mono_hwcap_s390x_has_mlt) {
+                           s390_sgrk (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       } else {
+                           CHECK_SRCDST_NCOM;
+                           s390_sgr  (code, ins->dreg, src2);
+                       }
                }
                        break;
                case OP_SBB: {
@@ -3311,26 +3559,32 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                        break;
                case OP_LAND: {
-                       if (ins->sreg1 == ins->dreg) {
-                               s390_ngr  (code, ins->dreg, ins->sreg2);
-                       } 
-                       else { 
-                               if (ins->sreg2 == ins->dreg) { 
-                                       s390_ngr (code, ins->dreg, ins->sreg1);
-                               }
-                               else { 
-                                       s390_lgr (code, ins->dreg, ins->sreg1);
-                                       s390_ngr (code, ins->dreg, ins->sreg2);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_ngrk (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       } else {
+                               if (ins->sreg1 == ins->dreg) {
+                                       s390_ngr  (code, ins->dreg, ins->sreg2);
+                               } else { 
+                                       if (ins->sreg2 == ins->dreg) { 
+                                               s390_ngr (code, ins->dreg, ins->sreg1);
+                                       } else { 
+                                               s390_lgr (code, ins->dreg, ins->sreg1);
+                                               s390_ngr (code, ins->dreg, ins->sreg2);
+                                       }
                                }
                        }
                }
                        break;
                case OP_AND_IMM: {
-                       if (ins->dreg != ins->sreg1) {
-                               s390_lgr  (code, ins->dreg, ins->sreg1);
-                       }
                        S390_SET_MASK (code, s390_r0, ins->inst_imm);
-                       s390_ngr (code, ins->dreg, s390_r0);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_ngrk (code, ins->dreg, ins->sreg1, s390_r0);
+                       } else {
+                               if (ins->dreg != ins->sreg1) {
+                                       s390_lgr  (code, ins->dreg, ins->sreg1);
+                               }
+                               s390_ngr (code, ins->dreg, s390_r0);
+                       }
                }
                        break;
                case OP_LDIV: {
@@ -3371,49 +3625,63 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                        break;
                case OP_LOR: {
-                       if (ins->sreg1 == ins->dreg) {
-                               s390_ogr  (code, ins->dreg, ins->sreg2);
-                       } 
-                       else { 
-                               if (ins->sreg2 == ins->dreg) { 
-                                       s390_ogr (code, ins->dreg, ins->sreg1);
-                               }
-                               else { 
-                                       s390_lgr (code, ins->dreg, ins->sreg1);
-                                       s390_ogr (code, ins->dreg, ins->sreg2);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_ogrk (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       } else {
+                               if (ins->sreg1 == ins->dreg) {
+                                       s390_ogr  (code, ins->dreg, ins->sreg2);
+                               } else { 
+                                       if (ins->sreg2 == ins->dreg) { 
+                                               s390_ogr (code, ins->dreg, ins->sreg1);
+                                       } else { 
+                                               s390_lgr (code, ins->dreg, ins->sreg1);
+                                               s390_ogr (code, ins->dreg, ins->sreg2);
+                                       }
                                }
                        }
                }
                        break;
                case OP_OR_IMM: {
-                       if (ins->dreg != ins->sreg1) {
-                               s390_lgr  (code, ins->dreg, ins->sreg1);
-                       }
                        S390_SET_MASK(code, s390_r0, ins->inst_imm);
-                       s390_ogr (code, ins->dreg, s390_r0);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_ogrk (code, ins->dreg, ins->sreg1, s390_r0);
+                       } else {
+                               if (ins->dreg != ins->sreg1) {
+                                       s390_lgr  (code, ins->dreg, ins->sreg1);
+                               }
+                               s390_ogr (code, ins->dreg, s390_r0);
+                       }
                }
                        break;
                case OP_LXOR: {
-                       if (ins->sreg1 == ins->dreg) {
-                               s390_xgr  (code, ins->dreg, ins->sreg2);
-                       } 
-                       else { 
-                               if (ins->sreg2 == ins->dreg) { 
-                                       s390_xgr (code, ins->dreg, ins->sreg1);
-                               }
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_xgrk (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       } else {
+                               if (ins->sreg1 == ins->dreg) {
+                                       s390_xgr  (code, ins->dreg, ins->sreg2);
+                               } 
                                else { 
-                                       s390_lgr (code, ins->dreg, ins->sreg1);
-                                       s390_xgr (code, ins->dreg, ins->sreg2);
+                                       if (ins->sreg2 == ins->dreg) { 
+                                               s390_xgr (code, ins->dreg, ins->sreg1);
+                                       }
+                                       else { 
+                                               s390_lgr (code, ins->dreg, ins->sreg1);
+                                               s390_xgr (code, ins->dreg, ins->sreg2);
+                                       }
                                }
                        }
                }
                        break;
                case OP_XOR_IMM: {
-                       if (ins->dreg != ins->sreg1) {
-                               s390_lgr  (code, ins->dreg, ins->sreg1);
+                       S390_SET_MASK(code, s390_r0, ins->inst_imm);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_xgrk (code, ins->dreg, ins->sreg1, s390_r0);
+                       } else {
+                               if (ins->dreg != ins->sreg1) {
+                                       s390_lgr  (code, ins->dreg, ins->sreg1);
+                               }
+                               s390_xgr (code, ins->dreg, s390_r0);
                        }
-                       S390_SET_MASK (code, s390_r0, ins->inst_imm);
-                       s390_xgr (code, ins->dreg, s390_r0);
                }
                        break;
                case OP_LSHL: {
@@ -3632,16 +3900,24 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                        break;
                case OP_IAND: {
-                       CHECK_SRCDST_NCOM_I;
-                       s390_ngr (code, ins->dreg, src2);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_ngrk (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       } else {
+                               CHECK_SRCDST_NCOM_I;
+                               s390_ngr (code, ins->dreg, src2);
+                       }
                }
                        break;
                case OP_IAND_IMM: {
-                       if (ins->dreg != ins->sreg1) {
-                               s390_lgfr (code, ins->dreg, ins->sreg1);
-                       }
                        S390_SET_MASK (code, s390_r0, ins->inst_imm);
-                       s390_ngr  (code, ins->dreg, s390_r0);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_ngrk (code, ins->dreg, ins->sreg1, s390_r0);
+                       } else {
+                               if (ins->dreg != ins->sreg1) {
+                                       s390_lgfr (code, ins->dreg, ins->sreg1);
+                               }
+                               s390_ngr  (code, ins->dreg, s390_r0);
+                       }
                }
                        break;
                case OP_IDIV: {
@@ -3696,29 +3972,45 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                        break;
                case OP_IOR: {
-                       CHECK_SRCDST_COM_I;
-                       s390_ogr (code, ins->dreg, src2);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_ogrk (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       } else {
+                               CHECK_SRCDST_COM_I;
+                               s390_ogr (code, ins->dreg, src2);
+                       }
                }
                        break;
                case OP_IOR_IMM: {
-                       if (ins->dreg != ins->sreg1) {
-                               s390_lgfr (code, ins->dreg, ins->sreg1);
-                       }
                        S390_SET_MASK (code, s390_r0, ins->inst_imm);
-                       s390_ogr  (code, ins->dreg, s390_r0);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_ogrk (code, ins->dreg, ins->sreg1, s390_r0);
+                       } else {
+                               if (ins->dreg != ins->sreg1) {
+                                       s390_lgfr (code, ins->dreg, ins->sreg1);
+                               }
+                               s390_ogr  (code, ins->dreg, s390_r0);
+                       }
                }
                        break;
                case OP_IXOR: {
-                       CHECK_SRCDST_COM_I;
-                       s390_xgr (code, ins->dreg, src2);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_xgrk (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       } else {
+                               CHECK_SRCDST_COM_I;
+                               s390_xgr (code, ins->dreg, src2);
+                       }
                }
                        break;
                case OP_IXOR_IMM: {
-                       if (ins->dreg != ins->sreg1) {
-                               s390_lgfr (code, ins->dreg, ins->sreg1);
-                       }
                        S390_SET_MASK (code, s390_r0, ins->inst_imm);
-                       s390_xgr  (code, ins->dreg, s390_r0);
+                       if (mono_hwcap_s390x_has_mlt) {
+                               s390_xgrk (code, ins->dreg, ins->sreg1, s390_r0);
+                       } else {
+                               if (ins->dreg != ins->sreg1) {
+                                       s390_lgfr (code, ins->dreg, ins->sreg1);
+                               }
+                               s390_xgr  (code, ins->dreg, s390_r0);
+                       }
                }
                        break;
                case OP_ISHL: {
@@ -4246,6 +4538,36 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        s390_lghi(code, ins->dreg, 0);
                }
                        break;
+               case OP_ICNEQ: {
+                       s390_lghi(code, ins->dreg, 1);
+                       s390_jne (code, 4);
+                       s390_lghi(code, ins->dreg, 0);
+               }
+                       break;
+               case OP_ICGE: {
+                       s390_lghi(code, ins->dreg, 1);
+                       s390_jhe (code, 4);
+                       s390_lghi(code, ins->dreg, 0);
+               }
+                       break;
+               case OP_ICLE: {
+                       s390_lghi(code, ins->dreg, 1);
+                       s390_jle (code, 4);
+                       s390_lghi(code, ins->dreg, 0);
+               }
+                       break;
+               case OP_ICGE_UN: {
+                       s390_lghi(code, ins->dreg, 1);
+                       s390_jhe (code, 4);
+                       s390_lghi(code, ins->dreg, 0);
+               }
+                       break;
+               case OP_ICLE_UN: {
+                       s390_lghi(code, ins->dreg, 1);
+                       s390_jle (code, 4);
+                       s390_lghi(code, ins->dreg, 0);
+               }
+                       break;
                case OP_COND_EXC_EQ:
                case OP_COND_EXC_IEQ:
                        EMIT_COND_SYSTEM_EXCEPTION (S390_CC_EQ, ins->inst_p1);
@@ -4327,6 +4649,38 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_BRANCH (ins, S390_CC_LE);
                        break;
 
+               case OP_S390_CRJ:
+                       EMIT_COMP_AND_BRANCH(ins, crj, cr);
+                       break;
+
+               case OP_S390_CLRJ:
+                       EMIT_COMP_AND_BRANCH(ins, clrj, clr);
+                       break;
+
+               case OP_S390_CGRJ:
+                       EMIT_COMP_AND_BRANCH(ins, cgrj, cgr);
+                       break;
+
+               case OP_S390_CLGRJ:
+                       EMIT_COMP_AND_BRANCH(ins, clgrj, clgr);
+                       break;
+
+               case OP_S390_CIJ:
+                       EMIT_COMP_AND_BRANCH_IMM(ins, crj, cr, ltr, FALSE);
+                       break;
+
+               case OP_S390_CLIJ:
+                       EMIT_COMP_AND_BRANCH_IMM(ins, clrj, clr, ltr, TRUE);
+                       break;
+
+               case OP_S390_CGIJ:
+                       EMIT_COMP_AND_BRANCH_IMM(ins, cgrj, cgr, ltgr, FALSE);
+                       break;
+
+               case OP_S390_CLGIJ:
+                       EMIT_COMP_AND_BRANCH_IMM(ins, clgrj, clgr, ltgr, TRUE);
+                       break;
+
                /* floating point opcodes */
                case OP_R8CONST: {
                        if (*((double *) ins->inst_p0) == 0) {
@@ -4562,6 +4916,27 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        s390_lghi  (code, ins->dreg, 0);
                }
                        break;
+               case OP_FCNEQ: {
+                       s390_cdbr  (code, ins->sreg1, ins->sreg2);
+                       s390_lghi  (code, ins->dreg, 1);
+                       s390_jne   (code, 4);
+                       s390_lghi  (code, ins->dreg, 0);
+               }
+                       break;
+               case OP_FCGE: {
+                       s390_cdbr  (code, ins->sreg1, ins->sreg2);
+                       s390_lghi  (code, ins->dreg, 1);
+                       s390_jhe   (code, 4);
+                       s390_lghi  (code, ins->dreg, 0);
+               }
+                       break;
+               case OP_FCLE: {
+                       s390_cdbr  (code, ins->sreg1, ins->sreg2);
+                       s390_lghi  (code, ins->dreg, 1);
+                       s390_jle   (code, 4);
+                       s390_lghi  (code, ins->dreg, 0);
+               }
+                       break;
                case OP_FBEQ: {
                        short *o;
                        s390_jo (code, 0); CODEPTR(code, o);
@@ -4667,12 +5042,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                        break;
                case OP_ATOMIC_ADD_I8: {
-                       s390_lgr (code, s390_r1, ins->sreg2);
-                       s390_lg  (code, s390_r0, 0, ins->inst_basereg, ins->inst_offset);
-                       s390_agr (code, s390_r1, s390_r0);
-                       s390_csg (code, s390_r0, s390_r1, ins->inst_basereg, ins->inst_offset);
-                       s390_jnz (code, -10);
-                       s390_lgr (code, ins->dreg, s390_r1);
+                       if (mono_hwcap_s390x_has_ia) {
+                               s390_laag (code, ins->dreg, ins->sreg2, ins->inst_basereg, ins->inst_offset);
+                               s390_agr  (code, ins->dreg, ins->sreg2);
+                       } else {
+                               s390_lgr (code, s390_r1, ins->sreg2);
+                               s390_lg  (code, s390_r0, 0, ins->inst_basereg, ins->inst_offset);
+                               s390_agr (code, s390_r1, s390_r0);
+                               s390_csg (code, s390_r0, s390_r1, ins->inst_basereg, ins->inst_offset);
+                               s390_jnz (code, -10);
+                               s390_lgr (code, ins->dreg, s390_r1);
+                       }
                }
                        break;  
                case OP_ATOMIC_EXCHANGE_I8: {
@@ -4683,12 +5063,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                        break;  
                case OP_ATOMIC_ADD_I4: {
-                       s390_lgfr(code, s390_r1, ins->sreg2);
-                       s390_lgf (code, s390_r0, 0, ins->inst_basereg, ins->inst_offset);
-                       s390_agr (code, s390_r1, s390_r0);
-                       s390_cs  (code, s390_r0, s390_r1, ins->inst_basereg, ins->inst_offset);
-                       s390_jnz (code, -9);
-                       s390_lgfr(code, ins->dreg, s390_r1);
+                       if (mono_hwcap_s390x_has_ia) {
+                               s390_laa (code, ins->dreg, ins->sreg2, ins->inst_basereg, ins->inst_offset);
+                               s390_ar  (code, ins->dreg, ins->sreg2);
+                       } else {
+                               s390_lgfr(code, s390_r1, ins->sreg2);
+                               s390_lgf (code, s390_r0, 0, ins->inst_basereg, ins->inst_offset);
+                               s390_agr (code, s390_r1, s390_r0);
+                               s390_cs  (code, s390_r0, s390_r1, ins->inst_basereg, ins->inst_offset);
+                               s390_jnz (code, -9);
+                               s390_lgfr(code, ins->dreg, s390_r1);
+                       }
                }
                        break;  
                case OP_ATOMIC_EXCHANGE_I4: {
@@ -4714,13 +5099,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        s390_mem (code);
                        break;
                case OP_GC_SAFE_POINT: {
-                       guint8 *br;
+                       short *br;
 
                        g_assert (mono_threads_is_coop_enabled ());
 
-                       s390_chi (code, ins->sreg1, 1); 
-                       s390_je  (code, 0); CODEPTR(code, br);
-                       mono_add_patch_info (cfg, code- cfg->native_code, MONO_PATCH_INFO_ABS,
+                       s390_ltg (code, s390_r0, 0, ins->sreg1, 0);     
+                       s390_jz  (code, 0); CODEPTR(code, br);
+                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_ABS,
                                             mono_threads_state_poll);
                        S390_CALL_TEMPLATE (code, s390_r14);
                        PTRSLOT (code, br);
@@ -5412,7 +5797,7 @@ mono_arch_patch_code (MonoCompile *cfg, MonoMethod *method, MonoDomain *domain,
                gconstpointer target = NULL;
 
                target = mono_resolve_patch_target (method, domain, code, 
-                                                                                       patch_info, run_cctors, error);
+                                                   patch_info, run_cctors, error);
                return_if_nok (error);
 
                switch (patch_info->type) {
@@ -5614,6 +5999,14 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        mono_emit_unwind_op_offset (cfg, code, s390_r14, S390_RET_ADDR_OFFSET);
        mini_gc_set_slot_type_from_cfa (cfg, S390_RET_ADDR_OFFSET, SLOT_NOREF);
 
+#if 0
+if ((strcmp(method->klass->name, "Tests") == 0) && 
+    (strcmp(method->name,"test_5_jmp") == 0)) {
+printf("Found!\n"); fflush(stdout);
+s390_j (code, 0);
+}
+#endif
+
        if (cfg->arch.bkchain_reg != -1)
                s390_lgr (code, cfg->arch.bkchain_reg, STK_BASE);
 
@@ -6131,7 +6524,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
 
 /*------------------------------------------------------------------*/
 /*                                                                  */
-/* Name                - mono_arch_finish_init                                 */
+/* Name                - mono_arch_finish_init                             */
 /*                                                                  */
 /* Function    - Setup the JIT's Thread Level Specific Data.       */
 /*                                                                 */
@@ -6389,8 +6782,6 @@ get_delegate_invoke_impl (MonoTrampInfo **info, gboolean has_target, guint32 par
                mono_arch_flush_icache (start, size);
        }
 
-       MONO_PROFILER_RAISE (jit_code_buffer, (start, code - start, MONO_PROFILER_CODE_BUFFER_DELEGATE_INVOKE, NULL));
-
        if (has_target) {
                *info = mono_tramp_info_create ("delegate_invoke_impl_has_target", start, code - start, NULL, NULL);
        } else {
@@ -6675,7 +7066,6 @@ mono_arch_build_imt_trampoline (MonoVTable *vtable, MonoDomain *domain,
        }
 
        mono_arch_flush_icache ((guint8*)start, (code - start));
-       MONO_PROFILER_RAISE (jit_code_buffer, (start, code - start, MONO_PROFILER_CODE_BUFFER_IMT_TRAMPOLINE, NULL));
 
        if (!fail_tramp) 
                mono_stats.imt_trampolines_size += (code - start);
index 01dcfd997a9bf6a9822f412625f7b16b3989aaf0..fd1c2142f3349119f0a4a6cec7b268e3ee9a9ac4 100644 (file)
@@ -64,6 +64,7 @@ typedef struct
 #define MONO_ARCH_HAVE_INVALIDATE_METHOD               1
 #define MONO_ARCH_HAVE_OP_GENERIC_CLASS_INIT           1
 #define MONO_ARCH_HAVE_SETUP_ASYNC_CALLBACK            1
+#define MONO_ARCH_HAVE_INIT_LMF_EXT                    1
 
 #define S390_STACK_ALIGNMENT            8
 #define S390_FIRST_ARG_REG             s390_r2
@@ -78,13 +79,15 @@ typedef struct
 /*------------------------------------------------------*/
 /* use s390_r2-s390_r6 as parm registers                */
 /* s390_r0, s390_r1, s390_r12, s390_r13 used internally */
-/* s390_r8..s390_r11 are used for global regalloc       */
+/* s390_r8..s390_r10 are used for global regalloc       */
+/* -- except for s390_r9 which is used as IMT pointer   */
+/* s390_r11 is sometimes used as the frame pointer      */
 /* s390_r15 is the stack pointer                        */
 /*------------------------------------------------------*/
 
-#define MONO_ARCH_CALLEE_REGS (0xfc)
+#define MONO_ARCH_CALLEE_REGS (0x00fc)
 
-#define MONO_ARCH_CALLEE_SAVED_REGS 0xff80
+#define MONO_ARCH_CALLEE_SAVED_REGS 0xfd00
 
 /*----------------------------------------*/
 /* use s390_f1/s390_f3-s390_f15 as temps  */
index 91aed0b96e22801406dbc4482762d9e10cc4d63b..fed21162dcf9853121c132ccb21f164a9dfa2752 100644 (file)
@@ -559,8 +559,7 @@ mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info
 /*------------------------------------------------------------------*/
 
 gpointer
-mono_arch_get_static_rgctx_trampoline (gpointer arg,
-                                                                          gpointer addr)
+mono_arch_get_static_rgctx_trampoline (gpointer arg, gpointer addr)
 {
        guint8 *code, *start;
        gint32 displace;
@@ -586,3 +585,20 @@ mono_arch_get_static_rgctx_trampoline (gpointer arg,
 }      
 
 /*========================= End of Function ========================*/
+
+/*------------------------------------------------------------------*/
+/*                                                                  */
+/* Name            - mono_arch_get_enter_icall_trampoline.                 */
+/*                                                                  */
+/* Function -                                                      */
+/*                                                                  */
+/*------------------------------------------------------------------*/
+
+gpointer
+mono_arch_get_enter_icall_trampoline (MonoTrampInfo **info)
+{
+       g_assert_not_reached ();
+       return NULL;
+}
+
+/*========================= End of Function ========================*/
index 4ff899fc9dec21bd325414b3866e8a0f1ff66b0f..070a81633143aea8a914fa7ba5e21d7b4f2e8e8c 100644 (file)
@@ -70,33 +70,58 @@ class Tests {
                return 0;
        }
 
+
        public static unsafe int test_0_ldobj ()
        {
                byte *ptr = stackalloc byte [20];
                for (int i = 0; i < 20; ++i)
                        ptr [i] = (byte)i;
 
+               if (BitConverter.IsLittleEndian) {
 
-               if (Intrinsics.UnalignedLdobj<short> (ptr + 0) != 0x0100)
-                       return 1;
+                       if (Intrinsics.UnalignedLdobj<short> (ptr + 0) != 0x0100)
+                               return 1;
 
-               if (Intrinsics.UnalignedLdobj<short> (ptr + 1) != 0x0201)
-                       return 2;
+                       if (Intrinsics.UnalignedLdobj<short> (ptr + 1) != 0x0201)
+                               return 2;
 
-               if (Intrinsics.UnalignedLdobj<short> (ptr + 2) != 0x0302)
-                       return 3;
+                       if (Intrinsics.UnalignedLdobj<short> (ptr + 2) != 0x0302)
+                               return 3;
 
-               if (Intrinsics.UnalignedLdobj<int> (ptr + 1) != 0x04030201)
-                       return 4;
+                       if (Intrinsics.UnalignedLdobj<int> (ptr + 1) != 0x04030201)
+                               return 4;
 
-               if (Intrinsics.UnalignedLdobj<int> (ptr + 2) != 0x05040302)
-                       return 5;
+                       if (Intrinsics.UnalignedLdobj<int> (ptr + 2) != 0x05040302)
+                               return 5;
+
+                       if (Intrinsics.UnalignedLdobj<long> (ptr + 1) != 0x0807060504030201)
+                               return 6;
+
+                       if (Intrinsics.UnalignedLdobj<long> (ptr + 6) != 0xD0C0B0A09080706)
+                               return 7;
+               } else {
+
+                       if (Intrinsics.UnalignedLdobj<short> (ptr + 0) != 0x0001)
+                               return 1;
 
-               if (Intrinsics.UnalignedLdobj<long> (ptr + 1) != 0x0807060504030201)
-                       return 6;
+                       if (Intrinsics.UnalignedLdobj<short> (ptr + 1) != 0x0102)
+                               return 2;
 
-               if (Intrinsics.UnalignedLdobj<long> (ptr + 6) != 0xD0C0B0A09080706)
-                       return 7;
+                       if (Intrinsics.UnalignedLdobj<short> (ptr + 2) != 0x0203)
+                               return 3;
+
+                       if (Intrinsics.UnalignedLdobj<int> (ptr + 1) != 0x01020304)
+                               return 4;
+
+                       if (Intrinsics.UnalignedLdobj<int> (ptr + 2) != 0x02030405)
+                               return 5;
+
+                       if (Intrinsics.UnalignedLdobj<long> (ptr + 1) != 0x0102030405060708)
+                               return 6;
+
+                       if (Intrinsics.UnalignedLdobj<long> (ptr + 6) != 0x60708090A0B0C0D)
+                               return 7;
+               }
 
                return 0;
        }
@@ -108,26 +133,51 @@ class Tests {
                        ptr [i] = (byte)i;
 
 
-               if (Intrinsics.UnalignedLdInd2 (ptr + 0) != 0x0100)
-                       return 1;
+               if (BitConverter.IsLittleEndian) {
 
-               if (Intrinsics.UnalignedLdInd2 (ptr + 1) != 0x0201)
-                       return 2;
+                       if (Intrinsics.UnalignedLdInd2 (ptr + 0) != 0x0100)
+                               return 1;
 
-               if (Intrinsics.UnalignedLdInd2 (ptr + 2) != 0x0302)
-                       return 3;
+                       if (Intrinsics.UnalignedLdInd2 (ptr + 1) != 0x0201)
+                               return 2;
 
-               if (Intrinsics.UnalignedLdInd4 (ptr + 1) != 0x04030201)
-                       return 4;
+                       if (Intrinsics.UnalignedLdInd2 (ptr + 2) != 0x0302)
+                               return 3;
 
-               if (Intrinsics.UnalignedLdInd4 (ptr + 2) != 0x05040302)
-                       return 5;
+                       if (Intrinsics.UnalignedLdInd4 (ptr + 1) != 0x04030201)
+                               return 4;
+
+                       if (Intrinsics.UnalignedLdInd4 (ptr + 2) != 0x05040302)
+                               return 5;
 
-               if (Intrinsics.UnalignedLdInd8 (ptr + 1) != 0x0807060504030201)
-                       return 6;
+                       if (Intrinsics.UnalignedLdInd8 (ptr + 1) != 0x0807060504030201)
+                               return 6;
+
+                       if (Intrinsics.UnalignedLdInd8 (ptr + 6) != 0xD0C0B0A09080706)
+                               return 7;
+               } else {
 
-               if (Intrinsics.UnalignedLdInd8 (ptr + 6) != 0xD0C0B0A09080706)
-                       return 7;
+                       if (Intrinsics.UnalignedLdInd2 (ptr + 0) != 0x0001)
+                               return 1;
+
+                       if (Intrinsics.UnalignedLdInd2 (ptr + 1) != 0x0102)
+                               return 2;
+
+                       if (Intrinsics.UnalignedLdInd2 (ptr + 2) != 0x0203)
+                               return 3;
+
+                       if (Intrinsics.UnalignedLdInd4 (ptr + 1) != 0x01020304)
+                               return 4;
+
+                       if (Intrinsics.UnalignedLdInd4 (ptr + 2) != 0x02030405)
+                               return 5;
+
+                       if (Intrinsics.UnalignedLdInd8 (ptr + 1) != 0x0102030405060708)
+                               return 6;
+
+                       if (Intrinsics.UnalignedLdInd8 (ptr + 6) != 0x60708090A0B0C0D)
+                               return 7;
+               }
 
                return 0;
        }
@@ -185,21 +235,41 @@ class Tests {
        {
                byte *ptr = stackalloc byte [20];
 
-               Intrinsics.UnalignedStobj <short> (ptr + 0, 0x6688);
-               if (ptr [0] != 0x88 || ptr [1] != 0x66)
-                       return 1;
+               if (BitConverter.IsLittleEndian) {
+                       Intrinsics.UnalignedStobj <short> (ptr + 0, 0x6688);
+                       if (ptr [0] != 0x88 || ptr [1] != 0x66)
+                               return 1;
 
-               Intrinsics.UnalignedStobj <short> (ptr + 1, 0x6589);
-               if (ptr [1] != 0x89 || ptr [2] != 0x65)
-                       return 2;
+                       Intrinsics.UnalignedStobj <short> (ptr + 1, 0x6589);
+                       if (ptr [1] != 0x89 || ptr [2] != 0x65)
+                               return 2;
 
-               Intrinsics.UnalignedStobj <int> (ptr + 1, 0x60708090);
-               if (ptr [1] != 0x90 || ptr [2] != 0x80 || ptr [3] != 0x70 || ptr [4] != 0x60)
-                       return 3;
+                       Intrinsics.UnalignedStobj <int> (ptr + 1, 0x60708090);
+                       if (ptr [1] != 0x90 || ptr [2] != 0x80 || ptr [3] != 0x70 || ptr [4] != 0x60)
+                               return 3;
 
-               Intrinsics.UnalignedStobj <long> (ptr + 1, 0x405060708090);
-               if (ptr [1] != 0x90 || ptr [2] != 0x80 || ptr [3] != 0x70 || ptr [4] != 0x60 || ptr [5] != 0x50 || ptr [6] != 0x40)
-                       return 4;
+                       Intrinsics.UnalignedStobj <long> (ptr + 1, 0x405060708090);
+                       if (ptr [1] != 0x90 || ptr [2] != 0x80 || ptr [3] != 0x70 || 
+                           ptr [4] != 0x60 || ptr [5] != 0x50 || ptr [6] != 0x40)
+                               return 4;
+               } else {
+                       Intrinsics.UnalignedStobj <short> (ptr + 0, 0x6688);
+                       if (ptr [0] != 0x66 || ptr [1] != 0x88)
+                               return 1;
+
+                       Intrinsics.UnalignedStobj <short> (ptr + 1, 0x6589);
+                       if (ptr [1] != 0x65 || ptr [2] != 0x89)
+                               return 2;
+
+                       Intrinsics.UnalignedStobj <int> (ptr + 1, 0x60708090);
+                       if (ptr [1] != 0x60 || ptr [2] != 0x70 || ptr [3] != 0x80 || ptr [4] != 0x90)
+                               return 3;
+
+                       Intrinsics.UnalignedStobj <long> (ptr + 1, 0x2030405060708090);
+                       if (ptr [1] != 0x20 || ptr [2] != 0x30 || ptr [3] != 0x40 || 
+                           ptr [4] != 0x50 || ptr [5] != 0x60 || ptr [6] != 0x70)
+                               return 4;
+               }
 
                return 0;
        }
index 12df49b91ba84fbad94d2c64b25c9d71afdb9d3e..40318232c2134f68563a10d855b52987092c52e5 100644 (file)
@@ -133,4 +133,7 @@ mono_hwcap_arch_init (void)
 
        mono_hwcap_s390x_has_fpe = facs.fpe;
        mono_hwcap_s390x_has_vec = facs.vec;
+       mono_hwcap_s390x_has_mlt = facs.multi;
+       mono_hwcap_s390x_has_ia  = facs.ia;
+       mono_hwcap_s390x_has_gie = facs.gie;
 }
index 419066ad04dbba33ad7cda003ab81f3080337d7f..47ae051428a9050b7596f0b53c37b489a8c4b84e 100644 (file)
@@ -35,6 +35,9 @@ MONO_HWCAP_VAR(ppc_has_multiple_ls_units)
 
 MONO_HWCAP_VAR(s390x_has_fpe)
 MONO_HWCAP_VAR(s390x_has_vec)
+MONO_HWCAP_VAR(s390x_has_mlt)
+MONO_HWCAP_VAR(s390x_has_ia)
+MONO_HWCAP_VAR(s390x_has_gie)
 
 #elif defined (TARGET_SPARC) || defined (TARGET_SPARC64)