From 3cd04f97199ff38d7316587e44381638ba469565 Mon Sep 17 00:00:00 2001 From: Rodrigo Kumpera Date: Mon, 16 Nov 2015 00:22:56 -0500 Subject: [PATCH] [jit] Add arm64 backend and gsharedvt code. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit List of original contributors: Zoltan Varga Rodrigo Kumpera Andi McClure Vlad Brezae Joao Matos Alex Rønne Petersen --- mono/arch/arm64/arm64-codegen.h | 850 ++++- mono/metadata/object-offsets.h | 7 + mono/mini/Makefile.am.in | 4 +- mono/mini/aot-compiler.c | 326 +- mono/mini/exceptions-arm64.c | 595 +++- mono/mini/mini-arm64-gsharedvt.c | 418 +++ mono/mini/mini-arm64-gsharedvt.h | 84 + mono/mini/mini-arm64.c | 5217 ++++++++++++++++++++++++++++- mono/mini/mini-arm64.h | 268 +- mono/mini/tramp-arm64-gsharedvt.c | 574 ++++ mono/mini/tramp-arm64.c | 616 +++- 11 files changed, 8952 insertions(+), 7 deletions(-) create mode 100644 mono/mini/mini-arm64-gsharedvt.c create mode 100644 mono/mini/mini-arm64-gsharedvt.h create mode 100644 mono/mini/tramp-arm64-gsharedvt.c diff --git a/mono/arch/arm64/arm64-codegen.h b/mono/arch/arm64/arm64-codegen.h index 259ff967407..1744235476e 100644 --- a/mono/arch/arm64/arm64-codegen.h +++ b/mono/arch/arm64/arm64-codegen.h @@ -1,3 +1,851 @@ -#include "../../../../mono-extensions/mono/arch/arm64/arm64-codegen.h" +/* + * arm64-codegen.h: ARM64 code generation macros + * + * Author: + * Zoltan Varga (vargaz@gmail.com) + * + * Copyright 2013 Xamarin, Inc (http://www.xamarin.com) + */ +#ifndef __ARM64_CODEGEN_H__ +#define __ARM64_CODEGEN_H__ +#include + +enum { + ARMREG_R0 = 0, + ARMREG_R1 = 1, + ARMREG_R2 = 2, + ARMREG_R3 = 3, + ARMREG_R4 = 4, + ARMREG_R5 = 5, + ARMREG_R6 = 6, + ARMREG_R7 = 7, + ARMREG_R8 = 8, + ARMREG_R9 = 9, + ARMREG_R10 = 10, + ARMREG_R11 = 11, + ARMREG_R12 = 12, + ARMREG_R13 = 13, + ARMREG_R14 = 14, + ARMREG_R15 = 15, + ARMREG_R16 = 16, + ARMREG_R17 = 17, + ARMREG_R18 = 18, + ARMREG_R19 = 19, + ARMREG_R20 = 20, + ARMREG_R21 = 21, + ARMREG_R22 = 22, + ARMREG_R23 = 23, + ARMREG_R24 = 24, + ARMREG_R25 = 25, + ARMREG_R26 = 26, + ARMREG_R27 = 27, + ARMREG_R28 = 28, + ARMREG_R29 = 29, + ARMREG_R30 = 30, + ARMREG_SP = 31, + ARMREG_RZR = 31, + + ARMREG_IP0 = ARMREG_R16, + ARMREG_IP1 = ARMREG_R17, + ARMREG_FP = ARMREG_R29, + ARMREG_LR = ARMREG_R30 +}; + +enum { + ARMREG_D0 = 0, + ARMREG_D1 = 1, + ARMREG_D2 = 2, + ARMREG_D3 = 3, + ARMREG_D4 = 4, + ARMREG_D5 = 5, + ARMREG_D6 = 6, + ARMREG_D7 = 7, + ARMREG_D8 = 8, + ARMREG_D9 = 9, + ARMREG_D10 = 10, + ARMREG_D11 = 11, + ARMREG_D12 = 12, + ARMREG_D13 = 13, + ARMREG_D14 = 14, + ARMREG_D15 = 15, + ARMREG_D16 = 16, + ARMREG_D17 = 17, + ARMREG_D18 = 18, + ARMREG_D19 = 19, + ARMREG_D20 = 20, + ARMREG_D21 = 21, + ARMREG_D22 = 22, + ARMREG_D23 = 23, + ARMREG_D24 = 24, + ARMREG_D25 = 25, + ARMREG_D26 = 26, + ARMREG_D27 = 27, + ARMREG_D28 = 28, + ARMREG_D29 = 29, + ARMREG_D30 = 30, + ARMREG_D31 = 31 +}; + +typedef enum { + ARMCOND_EQ = 0x0, /* Equal; Z = 1 */ + ARMCOND_NE = 0x1, /* Not equal, or unordered; Z = 0 */ + ARMCOND_CS = 0x2, /* Carry set; C = 1 */ + ARMCOND_HS = ARMCOND_CS, /* Unsigned higher or same; */ + ARMCOND_CC = 0x3, /* Carry clear; C = 0 */ + ARMCOND_LO = ARMCOND_CC, /* Unsigned lower */ + ARMCOND_MI = 0x4, /* Negative; N = 1 */ + ARMCOND_PL = 0x5, /* Positive or zero; N = 0 */ + ARMCOND_VS = 0x6, /* Overflow; V = 1 */ + ARMCOND_VC = 0x7, /* No overflow; V = 0 */ + ARMCOND_HI = 0x8, /* Unsigned higher; C = 1 && Z = 0 */ + ARMCOND_LS = 0x9, /* Unsigned lower or same; C = 0 || Z = 1 */ + ARMCOND_GE = 0xA, /* Signed greater than or equal; N = V */ + ARMCOND_LT = 0xB, /* Signed less than; N != V */ + ARMCOND_GT = 0xC, /* Signed greater than; Z = 0 && N = V */ + ARMCOND_LE = 0xD, /* Signed less than or equal; Z = 1 || N != V */ + ARMCOND_AL = 0xE, /* Always */ + ARMCOND_NV = 0xF, /* Never */ +} ARMCond; + +typedef enum { + ARMSHIFT_LSL = 0x0, + ARMSHIFT_LSR = 0x1, + ARMSHIFT_ASR = 0x2 +} ARMShift; + +typedef enum { + ARMSIZE_B = 0x0, + ARMSIZE_H = 0x1, + ARMSIZE_W = 0x2, + ARMSIZE_X = 0x3 +} ARMSize; + +#define arm_emit(p, ins) do { *(guint32*)(p) = (ins); (p) += 4; } while (0) + +/* Overwrite bits [offset,offset+nbits] with value */ +static G_GNUC_UNUSED inline void +arm_set_ins_bits (void *p, int offset, int nbits, guint32 value) +{ + *(guint32*)p = (*(guint32*)p & ~(((1 << nbits) - 1) << offset)) | (value << offset); +} + +/* + * Naming conventions for codegen macros: + * - 64 bit opcodes have an 'X' suffix + * - 32 bit opcodes have a 'W' suffix + * - the order of operands is the same as in assembly + */ + +/* + * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0487a/index.html + */ + +/* Uncoditional branch (register) */ + +// 0b1101011 == 0x6b +#define arm_format_breg(p, opc, op2, op3, op4, rn) arm_emit ((p), (0x6b << 25) | ((opc) << 21) | ((op2) << 16) | ((op3) << 10) | ((rn) << 5) | ((op4) << 0)) + +// 0b0000 == 0x0, 0b11111 == 0x1f +#define arm_brx(p, reg) arm_format_breg ((p), 0x0, 0x1f, 0x0, 0x0, (reg)) + +// 0b0001 == 0x1 +#define arm_blrx(p, reg) arm_format_breg ((p), 0x1, 0x1f, 0x0, 0x0, (reg)) + +//0b0010 == 0x2 +#define arm_retx(p, reg) arm_format_breg ((p), 0x2, 0x1f, 0x0, 0x0, (reg)) + +/* Unconditional branch (immeditate) */ + +static G_GNUC_UNUSED inline gboolean +arm_is_bl_disp (void *code, void *target) +{ + gint64 disp = ((char*)(target) - (char*)(code)) / 4; + + return (disp > -(1 << 25)) && (disp < (1 << 25)); +} + +static G_GNUC_UNUSED inline unsigned int +arm_get_disp (void *p, void *target) +{ + unsigned int disp = ((char*)target - (char*)p) / 4; + + if (target) + g_assert (arm_is_bl_disp (p, target)); + + return (disp & 0x3ffffff); +} + +// 0b00101 == 0x5 +#define arm_b(p, target) arm_emit (p, (0x0 << 31) | (0x5 << 26) | ((arm_get_disp ((p), (target)) << 0))) + +#define arm_bl(p, target) arm_emit (p, (0x1 << 31) | (0x5 << 26) | ((arm_get_disp ((p), (target)) << 0))) + +/* Conditional branch */ + +static G_GNUC_UNUSED inline gboolean +arm_is_disp19 (void *code, void *target) +{ + gint64 disp = ((char*)(target) - (char*)(code)) / 4; + + return (disp > -(1 << 18)) && (disp < (1 << 18)); +} + +static G_GNUC_UNUSED inline unsigned int +arm_get_disp19 (void *p, void *target) +{ + unsigned int disp = ((char*)target - (char*)p) / 4; + + if (target) + g_assert (arm_is_disp19 (p, target)); + + return (disp & 0x7ffff); +} + +// 0b0101010 == 0x2a +#define arm_format_condbr(p, o1, o0, cond, disp) arm_emit ((p), (0x2a << 25) | ((o1) << 24) | ((disp) << 5) | ((o0) << 4) | ((cond) << 0)) +#define arm_get_bcc_cond(p) ((*(guint32*)p) & 0xf) + +#define arm_bcc(p, cond, target) arm_format_condbr ((p), 0x0, 0x0, (cond), arm_get_disp19 ((p), (target))) + +// 0b011010 == 0x1a +#define arm_format_cmpbr(p, sf, op, rt, target) arm_emit ((p), ((sf) << 31) | (0x1a << 25) | ((op) << 24) | (arm_get_disp19 ((p), (target)) << 5) | ((rt) << 0)) + +#define arm_set_cbz_target(p, target) arm_set_ins_bits (p, 5, 19, arm_get_disp19 ((p), (target))) + +#define arm_cbzx(p, rt, target) arm_format_cmpbr ((p), 0x1, 0x0, (rt), (target)) +#define arm_cbzw(p, rt, target) arm_format_cmpbr ((p), 0x0, 0x0, (rt), (target)) + +#define arm_cbnzx(p, rt, target) arm_format_cmpbr ((p), 0x1, 0x1, (rt), (target)) +#define arm_cbnzw(p, rt, target) arm_format_cmpbr ((p), 0x0, 0x1, (rt), (target)) + +static G_GNUC_UNUSED inline unsigned int +arm_get_disp15 (void *p, void *target) +{ + unsigned int disp = ((char*)target - (char*)p) / 4; + return (disp & 0x7fff); +} + +// 0b011011 == 0x1b +#define arm_format_tbimm(p, op, rt, bit, target) arm_emit ((p), ((((bit) >> 5) & 1) << 31) | (0x1b << 25) | ((op) << 24) | (((bit) & 0x1f) << 19) | (arm_get_disp15 ((p), (target)) << 5) | ((rt) << 0)) + +#define arm_tbz(p, rt, bit, target) arm_format_tbimm ((p), 0x0, (rt), (bit), (target)) +#define arm_tbnz(p, rt, bit, target) arm_format_tbimm ((p), 0x1, (rt), (bit), (target)) + +/* Memory access */ + +#define arm_is_pimm12_scaled(pimm,size) ((pimm) >= 0 && (pimm) / (size) <= 0xfff && ((pimm) % (size)) == 0) + +static G_GNUC_UNUSED unsigned int +arm_encode_pimm12 (int pimm, int size) +{ + g_assert (arm_is_pimm12_scaled (pimm, size)); + return ((unsigned int)(pimm / size)) & 0xfff; +} + +#define arm_is_strb_imm(pimm) arm_is_pimm12_scaled((pimm), 1) +#define arm_is_strh_imm(pimm) arm_is_pimm12_scaled((pimm), 2) +#define arm_is_strw_imm(pimm) arm_is_pimm12_scaled((pimm), 4) +#define arm_is_strx_imm(pimm) arm_is_pimm12_scaled((pimm), 8) + +/* Load/Store register + scaled immediate */ +/* No pre-index/post-index yet */ +#define arm_format_mem_imm(p, size, opc, rt, rn, pimm, scale) arm_emit ((p), ((size) << 30) | (0x39 << 24) | ((opc) << 22) | (arm_encode_pimm12 ((pimm), (scale)) << 10) | ((rn) << 5) | ((rt) << 0)) + +/* C5.6.83 LDR (immediate) */ +#define arm_ldrx(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_X, 0x1, (rt), (rn), (pimm), 8) +#define arm_ldrw(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_W, 0x1, (rt), (rn), (pimm), 4) +/* C5.6.86 LDRB (immediate) */ +#define arm_ldrb(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_B, 0x1, (rt), (rn), (pimm), 1) +/* C5.6.88 LDRH (immediate) */ +#define arm_ldrh(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_H, 0x1, (rt), (rn), (pimm), 2) +/* C5.6.90 LDRSB (immediate) */ +#define arm_ldrsbx(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_B, 0x2, (rt), (rn), (pimm), 1) +#define arm_ldrsbw(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_B, 0x3, (rt), (rn), (pimm), 1) +/* C5.6.92 LDRSH (immediate) */ +#define arm_ldrshx(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_H, 0x2, (rt), (rn), (pimm), 2) +#define arm_ldrshw(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_H, 0x3, (rt), (rn), (pimm), 2) +/* C5.6.94 LDRSW (immediate) */ +#define arm_ldrswx(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_W, 0x2, (rt), (rn), (pimm), 4) + +/* C5.6.178 STR (immediate) */ +#define arm_strx(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_X, 0x0, (rt), (rn), (pimm), 8) +#define arm_strw(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_W, 0x0, (rt), (rn), (pimm), 4) +/* C5.6.182 STR (immediate) */ +#define arm_strh(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_H, 0x0, (rt), (rn), (pimm), 2) +#define arm_strb(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_B, 0x0, (rt), (rn), (pimm), 1) + +/* C3.3.9 Load/store register (immediate post-indexed) */ +static G_GNUC_UNUSED unsigned int +arm_encode_simm9 (int simm) +{ + g_assert (simm >= -256 && simm <= 255); + return ((unsigned int)simm) & 0x1ff; +} + +#define arm_format_mem_imm_post(p, size, V, opc, rt, rn, simm) arm_emit ((p), ((size) << 30) | (0x7 << 27) | ((V) << 26) | (0x0 << 24) | ((opc) << 22) | (arm_encode_simm9 ((simm)) << 12) | (0x1 << 10) | ((rn) << 5) | ((rt) << 0)) + +#define arm_ldrx_post(p, rt, rn, simm) arm_format_mem_imm_post (p, ARMSIZE_X, 0x0, 0x1, (rt), (rn), (simm)) +#define arm_ldrw_post(p, rt, rn, simm) arm_format_mem_imm_post (p, ARMSIZE_W, 0x0, 0x1, (rt), (rn), (simm)) + +#define arm_strx_post(p, rt, rn, simm) arm_format_mem_imm_post (p, ARMSIZE_X, 0x0, 0x0, (rt), (rn), (simm)) +#define arm_strw_post(p, rt, rn, simm) arm_format_mem_imm_post (p, ARMSIZE_W, 0x0, 0x0, (rt), (rn), (simm)) + +/* C3.3.9 Load/store register (immediate pre-indexed) */ +#define arm_format_mem_imm_pre(p, size, V, opc, rt, rn, simm) arm_emit ((p), ((size) << 30) | (0x7 << 27) | ((V) << 26) | (0x0 << 24) | ((opc) << 22) | (arm_encode_simm9 ((simm)) << 12) | (0x3 << 10) | ((rn) << 5) | ((rt) << 0)) + +#define arm_ldrx_pre(p, rt, rn, simm) arm_format_mem_imm_pre (p, ARMSIZE_X, 0x0, 0x1, (rt), (rn), (simm)) +#define arm_ldrw_pre(p, rt, rn, simm) arm_format_mem_imm_pre (p, ARMSIZE_W, 0x0, 0x1, (rt), (rn), (simm)) + +#define arm_strx_pre(p, rt, rn, simm) arm_format_mem_imm_pre (p, ARMSIZE_X, 0x0, 0x0, (rt), (rn), (simm)) +#define arm_strw_pre(p, rt, rn, simm) arm_format_mem_imm_pre (p, ARMSIZE_W, 0x0, 0x0, (rt), (rn), (simm)) + +/* Load/Store register + register */ +/* No extend/scale yet */ +#define arm_format_mem_reg(p, size, opc, rt, rn, rm) arm_emit ((p), ((size) << 30) | (0x38 << 24) | ((opc) << 22) | (0x1 << 21) | ((rm) << 16) | (0x3 << 13) | (0 << 12) | (0x2 << 10) | ((rn) << 5) | ((rt) << 0)) + +/* C5.6.85 LDR (register) */ +#define arm_ldrx_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_X, 0x1, (rt), (rn), (rm)) +#define arm_ldrw_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_W, 0x1, (rt), (rn), (rm)) +/* C5.6.87 LDRB (register) */ +#define arm_ldrb_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_B, 0x1, (rt), (rn), (rm)) +/* C5.6.88 LDRH (register) */ +#define arm_ldrh_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_H, 0x1, (rt), (rn), (rm)) +/* C5.6.91 LDRSB (register) */ +#define arm_ldrsbx_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_B, 0x2, (rt), (rn), (rm)) +#define arm_ldrsbw_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_B, 0x3, (rt), (rn), (rm)) +/* C5.6.93 LDRSH (register) */ +#define arm_ldrshx_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_H, 0x2, (rt), (rn), (rm)) +#define arm_ldrshw_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_H, 0x3, (rt), (rn), (rm)) +/* C5.6.96 LDRSW (register) */ +#define arm_ldrswx_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_W, 0x2, (rt), (rn), (rm)) + +/* C5.6.179 STR (register) */ +#define arm_strx_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_X, 0x0, (rt), (rn), (rm)) +#define arm_strw_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_W, 0x0, (rt), (rn), (rm)) +/* C5.6.181 STRB (register) */ +#define arm_strb_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_B, 0x0, (rt), (rn), (rm)) +/* C5.6.183 STRH (register) */ +#define arm_strh_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_H, 0x0, (rt), (rn), (rm)) + +/* PC relative */ + +/* C5.6.84 LDR (literal) */ + +#define arm_get_ldr_lit_reg(p) (*(guint32*)(p) & 0x1f) + +#define arm_ldrx_lit(p, rt, target) arm_emit ((p), (0x01 << 30) | (0x18 << 24) | (arm_get_disp19 ((p), (target)) << 5) | ((rt) << 0)) +#define arm_ldrw_lit(p, rt, target) arm_emit ((p), (0x00 << 30) | (0x18 << 24) | (arm_get_disp19 ((p), (target)) << 5) | ((rt) << 0)) +#define arm_ldrswx_lit(p, rt, target) arm_emit ((p), (0x2 << 30) | (0x18 << 24) | (arm_get_disp19 ((p), (target)) << 5) | ((rt) << 0)) + +/* Unscaled offset */ +/* FIXME: Not yet */ + +/* Load/Store Pair */ + +static G_GNUC_UNUSED unsigned int +arm_encode_imm7 (int imm, int size) +{ + g_assert (imm / size >= -64 && imm / size <= 63 && (imm % size) == 0); + return ((unsigned int)(imm / size)) & 0x7f; +} + +#define arm_is_imm7_scaled(imm, size) ((imm) / (size) >= -64 && (imm) / (size) <= 63 && ((imm) % (size)) == 0) + +#define arm_is_ldpx_imm(imm) arm_is_imm7_scaled ((imm), 8) + +/* C3.3.14 */ +#define arm_format_mem_p(p, size, opc, L, rt1, rt2, rn, imm) arm_emit ((p), (opc << 30) | (0x52 << 23) | ((L) << 22) | (arm_encode_imm7 (imm, size) << 15) | ((rt2) << 10) | ((rn) << 5) | ((rt1) << 0)) + +#define arm_ldpx(p, rt1, rt2, rn, imm) arm_format_mem_p ((p), 8, 0x2, 1, (rt1), (rt2), (rn), (imm)) +#define arm_ldpw(p, rt1, rt2, rn, imm) arm_format_mem_p ((p), 4, 0x0, 1, (rt1), (rt2), (rn), (imm)) +#define arm_ldpsw(p, rt1, rt2, rn, imm) arm_format_mem_p ((p), 4, 0x1, 1, (rt1), (rt2), (rn), (imm)) +#define arm_stpx(p, rt1, rt2, rn, imm) arm_format_mem_p ((p), 8, 0x2, 0, (rt1), (rt2), (rn), (imm)) +#define arm_stpw(p, rt1, rt2, rn, imm) arm_format_mem_p ((p), 4, 0x0, 0, (rt1), (rt2), (rn), (imm)) + +/* Load/Store Pair (Pre-indexed) */ +/* C3.3.16 */ +#define arm_format_mem_p_pre(p, size, opc, L, rt1, rt2, rn, imm) arm_emit ((p), (opc << 30) | (0x53 << 23) | ((L) << 22) | (arm_encode_imm7 (imm, size) << 15) | ((rt2) << 10) | ((rn) << 5) | ((rt1) << 0)) + +#define arm_ldpx_pre(p, rt1, rt2, rn, imm) arm_format_mem_p_pre ((p), 8, 0x2, 1, (rt1), (rt2), (rn), (imm)) +#define arm_ldpw_pre(p, rt1, rt2, rn, imm) arm_format_mem_p_pre ((p), 4, 0x0, 1, (rt1), (rt2), (rn), (imm)) +#define arm_ldpsw_pre(p, rt1, rt2, rn, imm) arm_format_mem_p_pre ((p), 4, 0x1, 1, (rt1), (rt2), (rn), (imm)) +#define arm_stpx_pre(p, rt1, rt2, rn, imm) arm_format_mem_p_pre ((p), 8, 0x2, 0, (rt1), (rt2), (rn), (imm)) +#define arm_stpw_pre(p, rt1, rt2, rn, imm) arm_format_mem_p_pre ((p), 4, 0x0, 0, (rt1), (rt2), (rn), (imm)) + +/* Not an official alias */ +#define arm_pushpx (p, rt1, rt2) arm_LDPX_pre (p, rt1, rt2, ARMREG_RSP, -8) + +/* Load/Store Pair (Post-indexed) */ +/* C3.3.15 */ +#define arm_format_mem_p_post(p, size, opc, L, rt1, rt2, rn, imm) arm_emit ((p), (opc << 30) | (0x51 << 23) | ((L) << 22) | (arm_encode_imm7 (imm, size) << 15) | ((rt2) << 10) | ((rn) << 5) | ((rt1) << 0)) + +#define arm_ldpx_post(p, rt1, rt2, rn, imm) arm_format_mem_p_post ((p), 8, 0x2, 1, (rt1), (rt2), (rn), (imm)) +#define arm_ldpw_post(p, rt1, rt2, rn, imm) arm_format_mem_p_post ((p), 4, 0x0, 1, (rt1), (rt2), (rn), (imm)) +#define arm_ldpsw_post(p, rt1, rt2, rn, imm) arm_format_mem_p_post ((p), 4, 0x1, 1, (rt1), (rt2), (rn), (imm)) +#define arm_stpx_post(p, rt1, rt2, rn, imm) arm_format_mem_p_post ((p), 8, 0x2, 0, (rt1), (rt2), (rn), (imm)) +#define arm_stpw_post(p, rt1, rt2, rn, imm) arm_format_mem_p_post ((p), 4, 0x0, 0, (rt1), (rt2), (rn), (imm)) + +/* Not an official alias */ +#define arm_poppx (p, rt1, rt2) arm_ldpx_post (p, rt1, rt2, ARMREG_RSP, 8) + +/* Load/Store Exclusive */ +#define arm_format_ldxr(p, size, rt, rn) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x0 << 23) | (0x1 << 22) | (0x0 << 21) | (0x1f << 16) | (0x0 << 15) | (0x1f << 10) | ((rn) << 5) | ((rt) << 0)) +#define arm_format_ldxp(p, size, rt1, rt2, rn) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x0 << 23) | (0x1 << 22) | (0x1 << 21) | (0x1f << 16) | (0x0 << 15) | ((rt2) << 10)| ((rn) << 5) | ((rt1) << 0)) +#define arm_format_stxr(p, size, rs, rt, rn) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x0 << 23) | (0x0 << 22) | (0x0 << 21) | ((rs) << 16) | (0x0 << 15) | (0x1f << 10) | ((rn) << 5) | ((rt) << 0)) +#define arm_format_stxp(p, size, rs, rt1, rt2, rn) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x0 << 23) | (0x0 << 22) | (0x1 << 21) | ((rs) << 16) | (0x0 << 15) | ((rt2) << 10)| ((rn) << 5) | ((rt1) << 0)) + +#define arm_ldxrx(p, rt, rn) arm_format_ldxr ((p), ARMSIZE_X, (rt), (rn)) +#define arm_ldxrw(p, rt, rn) arm_format_ldxr ((p), ARMSIZE_W, (rt), (rn)) +#define arm_ldxrh(p, rt, rn) arm_format_ldxr ((p), ARMSIZE_H, (rt), (rn)) +#define arm_ldxrb(p, rt, rn) arm_format_ldxr ((p), ARMSIZE_B, (rt), (rn)) +#define arm_ldxpx(p, rt1, rt2, rn) arm_format_ldxp ((p), ARMSIZE_X, (rt1), (rt2), (rn)) +#define arm_ldxpw(p, rt1, rt2, rn) arm_format_ldxp ((p), ARMSIZE_W, (rt1), (rt2), (rn)) +#define arm_stxrx(p, rs, rt, rn) arm_format_stxr ((p), ARMSIZE_X, (rs), (rt), (rn)) +#define arm_stxrw(p, rs, rt, rn) arm_format_stxr ((p), ARMSIZE_W, (rs), (rt), (rn)) +#define arm_stxrh(p, rs, rt, rn) arm_format_stxr ((p), ARMSIZE_H, (rs), (rt), (rn)) +#define arm_stxrb(p, rs, rt, rn) arm_format_stxr ((p), ARMSIZE_B, (rs), (rt), (rn)) +#define arm_stxpx(p, rs, rt1, rt2, rn) arm_format_stxp ((p), ARMSIZE_X, (rs), (rt1), (rt2), (rn)) +#define arm_stxpw(p, rs, rt1, rt2, rn) arm_format_stxp ((p), ARMSIZE_W, (rs), (rt1), (rt2), (rn)) + +/* C5.6.73 LDAR: Load-Acquire Register */ + +#define arm_format_ldar(p, size, rt, rn) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x1 << 23) | (0x1 << 22) | (0x0 << 21) | (0x1f << 16) | (0x1 << 15) | (0x1f << 10) | ((rn) << 5) | ((rt) << 0)) + +#define arm_ldarx(p, rt, rn) arm_format_ldar ((p), ARMSIZE_X, (rt), (rn)) +#define arm_ldarw(p, rt, rn) arm_format_ldar ((p), ARMSIZE_W, (rt), (rn)) +#define arm_ldarh(p, rt, rn) arm_format_ldar ((p), ARMSIZE_H, (rt), (rn)) +#define arm_ldarb(p, rt, rn) arm_format_ldar ((p), ARMSIZE_B, (rt), (rn)) + +/* C5.6.169 STLR: Store-Release Register */ + +#define arm_format_stlr(p, size, rt, rn) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x1 << 23) | (0x0 << 22) | (0x0 << 21) | (0x1f << 16) | (0x1 << 15) | (0x1f << 10) | ((rn) << 5) | ((rt) << 0)) + +#define arm_stlrx(p, rn, rt) arm_format_stlr ((p), ARMSIZE_X, (rt), (rn)) +#define arm_stlrw(p, rn, rt) arm_format_stlr ((p), ARMSIZE_W, (rt), (rn)) +#define arm_stlrh(p, rn, rt) arm_format_stlr ((p), ARMSIZE_H, (rt), (rn)) +#define arm_stlrb(p, rn, rt) arm_format_stlr ((p), ARMSIZE_B, (rt), (rn)) + +/* C5.6.77 LDAXR */ +#define arm_format_ldaxr(p, size, rn, rt) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x0 << 23) | (0x1 << 22) | (0x0 << 21) | (0x1f << 16) | (0x1 << 15) | (0x1f << 10) | ((rn) << 5) | ((rt) << 0)) + +#define arm_ldaxrx(p, rt, rn) arm_format_ldaxr ((p), 0x3, (rn), (rt)) +#define arm_ldaxrw(p, rt, rn) arm_format_ldaxr ((p), 0x2, (rn), (rt)) + +/* C5.6.173 STLXR */ +#define arm_format_stlxr(p, size, rs, rn, rt) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x0 << 23) | (0x0 << 22) | (0x0 << 21) | ((rs) << 16) | (0x1 << 15) | (0x1f << 10) | ((rn) << 5) | ((rt) << 0)) + +#define arm_stlxrx(p, rs, rt, rn) arm_format_stlxr ((p), 0x3, (rs), (rn), (rt)) +#define arm_stlxrw(p, rs, rt, rn) arm_format_stlxr ((p), 0x2, (rs), (rn), (rt)) + +/* Load/Store SIMD&FP */ + +/* C6.3.285 STR (immediate, SIMD&FP) */ +#define arm_format_strfp_imm(p, size, opc, rt, rn, pimm, scale) arm_emit ((p), ((size) << 30) | (0xf << 26) | (0x1 << 24) | ((opc) << 22) | (arm_encode_pimm12 ((pimm), (scale)) << 10) | ((rn) << 5) | ((rt) << 0)) + +/* Store double */ +#define arm_strfpx(p, dt, xn, simm) arm_format_strfp_imm ((p), ARMSIZE_X, 0x0, (dt), (xn), (simm), 8) +/* Store single */ +#define arm_strfpw(p, st, xn, simm) arm_format_strfp_imm ((p), ARMSIZE_W, 0x0, (st), (xn), (simm), 4) + +/* C6.3.166 LDR (immediate, SIMD&FP) */ +#define arm_format_ldrfp_imm(p, size, opc, rt, rn, pimm, scale) arm_emit ((p), ((size) << 30) | (0xf << 26) | (0x1 << 24) | ((opc) << 22) | (arm_encode_pimm12 ((pimm), (scale)) << 10) | ((rn) << 5) | ((rt) << 0)) + +/* Load double */ +#define arm_ldrfpx(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_X, 0x1, dt, xn, simm, 8) +/* Load single */ +#define arm_ldrfpw(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_W, 0x1, dt, xn, simm, 4) + +/* Arithmetic (immediate) */ +static G_GNUC_UNUSED inline guint32 +arm_encode_arith_imm (int imm, guint32 *shift) +{ + // FIXME: + g_assert ((imm >= 0) && (imm < 0xfff)); + *shift = 0; + return (guint32)imm; +} + +// FIXME: +#define arm_is_arith_imm(imm) (((imm) >= 0) && ((imm) < 0xfff)) + +#define arm_format_alu_imm(p, sf, op, S, rd, rn, imm) do { \ + guint32 _imm12, _shift; \ + _imm12 = arm_encode_arith_imm ((imm), &_shift); arm_emit ((p), ((sf) << 31) | ((op) << 30) | ((S) << 29) | (0x11 << 24) | ((_shift) << 22) | ((_imm12) << 10) | ((rn) << 5) | ((rd) << 0)); \ +} while (0) + +/* rd/rn can be SP for addx/subx */ +#define arm_addx_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x1, 0x0, 0x0, (rd), (rn), (imm)) +#define arm_addw_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x0, 0x0, 0x0, (rd), (rn), (imm)) +#define arm_addsx_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x1, 0x0, 0x1, (rd), (rn), (imm)) +#define arm_addsw_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x0, 0x0, 0x1, (rd), (rn), (imm)) +#define arm_subx_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x1, 0x1, 0x0, (rd), (rn), (imm)) +#define arm_subw_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x0, 0x1, 0x0, (rd), (rn), (imm)) +#define arm_subsx_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x1, 0x1, 0x1, (rd), (rn), (imm)) +#define arm_subsw_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x0, 0x1, 0x1, (rd), (rn), (imm)) + +#define arm_cmpx_imm(p, rn, imm) arm_subsx_imm ((p), ARMREG_RZR, (rn), (imm)) +#define arm_cmpw_imm(p, rn, imm) arm_subsw_imm ((p), ARMREG_RZR, (rn), (imm)) +#define arm_cmnx_imm(p, rn, imm) arm_addsx_imm ((p), ARMREG_RZR, (rn), (imm)) +#define arm_cmnw_imm(p, rn, imm) arm_addsw_imm ((p), ARMREG_RZR, (rn), (imm)) + +/* Logical (immediate) */ + +// FIXME: imm +#if 0 +#define arm_format_and(p, sf, opc, rd, rn, imm) arm_emit ((p), ((sf) << 31) | ((opc) << 29) | (0x24 << 23) | ((0) << 22) | ((imm) << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_andx_imm(p, rd, rn, imm) arm_format_and ((p), 0x1, 0x0, (rd), (rn), (imm)) +#define arm_andw_imm(p, rd, rn, imm) arm_format_and ((p), 0x0, 0x0, (rd), (rn), (imm)) +#define arm_andsx_imm(p, rd, rn, imm) arm_format_and ((p), 0x1, 0x3, (rd), (rn), (imm)) +#define arm_andsw_imm(p, rd, rn, imm) arm_format_and ((p), 0x0, 0x3, (rd), (rn), (imm)) +#define arm_eorx_imm(p, rd, rn, imm) arm_format_and ((p), 0x1, 0x2, (rd), (rn), (imm)) +#define arm_eorw_imm(p, rd, rn, imm) arm_format_and ((p), 0x0, 0x2, (rd), (rn), (imm)) +#define arm_orrx_imm(p, rd, rn, imm) arm_format_and ((p), 0x1, 0x1, (rd), (rn), (imm)) +#define arm_orrw_imm(p, rd, rn, imm) arm_format_and ((p), 0x0, 0x1, (rd), (rn), (imm)) + +#define arm_tstx_imm(p, rn, imm) arm_andsx_imm ((p), ARMREG_RZR, (rn), (imm)) +#define arm_tstw_imm(p, rn, imm) arm_andsw_imm ((p), ARMREG_RZR, (rn), (imm)) +#endif + +/* Move (wide immediate) */ +#define arm_format_mov(p, sf, opc, hw, rd, imm16) arm_emit ((p), ((sf) << 31) | ((opc) << 29) | (0x25 << 23) | ((hw) << 21) | (((guint32)(imm16) & 0xffff) << 5) | ((rd) << 0)) + +#define arm_get_movzx_rd(p) ((*(guint32*)p) & 0x1f) + +#define arm_movzx(p, rd, imm, shift) do { g_assert ((shift) % 16 == 0); arm_format_mov ((p), 0x1, 0x2, (shift) / 16, (rd), (imm)); } while (0) +#define arm_movzw(p, rd, imm, shift) do { g_assert ((shift) % 16 == 0); arm_format_mov ((p), 0x0, 0x2, (shift) / 16, (rd), (imm)); } while (0) +#define arm_movnx(p, rd, imm, shift) do { g_assert ((shift) % 16 == 0); arm_format_mov ((p), 0x1, 0x0, (shift) / 16, (rd), (imm)); } while (0) +#define arm_movnw(p, rd, imm, shift) do { g_assert ((shift) % 16 == 0); arm_format_mov ((p), 0x0, 0x0, (shift) / 16, (rd), (imm)); } while (0) +#define arm_movkx(p, rd, imm, shift) do { g_assert ((shift) % 16 == 0); arm_format_mov ((p), 0x1, 0x3, (shift) / 16, (rd), (imm)); } while (0) +#define arm_movkw(p, rd, imm, shift) do { g_assert ((shift) % 16 == 0); arm_format_mov ((p), 0x0, 0x3, (shift) / 16, (rd), (imm)); } while (0) + +/* PC-relative address calculation */ +#define arm_format_adrp(p, op, rd, target) do { guint64 imm1 = (guint64)(target); guint64 imm2 = (guint64)(p); int _imm = imm1 - imm2; arm_emit ((p), ((op) << 31) | (((_imm) & 0x3) << 29) | (0x10 << 24) | (((_imm >> 2) & 0x7ffff) << 5) | ((rd) << 0)); } while (0) + +#define arm_adrpx(p, rd, target) arm_format_adrp ((p), 0x1, (rd), (target)) +#define arm_adrx(p, rd, target) arm_format_adrp ((p), 0x0, (rd), (target)) + +/* Bitfield move */ +#define arm_format_bfm(p, sf, opc, N, immr, imms, rn, rd) arm_emit ((p), ((sf) << 31) | ((opc) << 29) | (0x26 << 23) | ((N) << 22) | ((N) << 22) | ((immr) << 16) | ((imms) << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_bfmx(p, rd, rn, immr, imms) arm_format_bfm ((p), 0x1, 0x1, 0x1, (immr), (imms), (rn), (rd)) +#define arm_bfmw(p, rd, rn, immr, imms) arm_format_bfm ((p), 0x0, 0x1, 0x0, (immr), (imms), (rn), (rd)) +#define arm_sbfmx(p, rd, rn, immr, imms) arm_format_bfm ((p), 0x1, 0x0, 0x1, (immr), (imms), (rn), (rd)) +#define arm_sbfmw(p, rd, rn, immr, imms) arm_format_bfm ((p), 0x0, 0x0, 0x0, (immr), (imms), (rn), (rd)) +#define arm_ubfmx(p, rd, rn, immr, imms) arm_format_bfm ((p), 0x1, 0x2, 0x1, (immr), (imms), (rn), (rd)) +#define arm_ubfmw(p, rd, rn, immr, imms) arm_format_bfm ((p), 0x0, 0x2, 0x0, (immr), (imms), (rn), (rd)) + +/* Sign extend and Zero-extend */ +#define arm_sxtbx(p, rd, rn) arm_sbfmx ((p), (rd), (rn), 0, 7) +#define arm_sxtbw(p, rd, rn) arm_sbfmw ((p), (rd), (rn), 0, 7) +#define arm_sxthx(p, rd, rn) arm_sbfmx ((p), (rd), (rn), 0, 15) +#define arm_sxthw(p, rd, rn) arm_sbfmw ((p), (rd), (rn), 0, 15) +#define arm_sxtwx(p, rd, rn) arm_sbfmx ((p), (rd), (rn), 0, 31) +#define arm_uxtbx(p, rd, rn) arm_ubfmx ((p), (rd), (rn), 0, 7) +#define arm_uxtbw(p, rd, rn) arm_ubfmw ((p), (rd), (rn), 0, 7) +#define arm_uxthx(p, rd, rn) arm_ubfmx ((p), (rd), (rn), 0, 15) +#define arm_uxthw(p, rd, rn) arm_ubfmw ((p), (rd), (rn), 0, 15) + +/* Extract register */ +#define arm_format_extr(p, sf, N, rd, rn, rm, imms) arm_emit ((p), ((sf) << 31) | (0x27 << 23) | ((N) << 22) | (0x0 << 21) | ((rm) << 16) | ((imms) << 10) | ((rn) << 5) | ((rd) << 0)) +#define arm_extrx(p, rd, rn, rm, lsb) arm_format_extr ((p), 0x1, 0x1, (rd), (rn), (rm), (lsb)) +#define arm_extrw(p, rd, rn, rm, lsb) arm_format_extr ((p), 0x0, 0x0, (rd), (rn), (rm), (lsb)) + +/* Shift (immediate) */ +#define arm_asrx(p, rd, rn, shift) arm_sbfmx ((p), (rd), (rn), (shift), 63) +#define arm_asrw(p, rd, rn, shift) arm_sbfmw ((p), (rd), (rn), (shift), 31) +#define arm_lslx(p, rd, rn, shift) arm_ubfmx ((p), (rd), (rn), 64 - ((shift) % 64), 63 - ((shift) % 64)) +#define arm_lslw(p, rd, rn, shift) arm_ubfmw ((p), (rd), (rn), 32 - ((shift) % 32), 31 - ((shift) % 32)) +#define arm_lsrx(p, rd, rn, shift) arm_ubfmx ((p), (rd), (rn), shift, 63) +#define arm_lsrw(p, rd, rn, shift) arm_ubfmw ((p), (rd), (rn), shift, 31) +#define arm_rorx(p, rd, rs, shift) arm_extrx ((p), (rd), (rs), (rs), (shift)) +#define arm_rorw(p, rd, rs, shift) arm_extrw ((p), (rd), (rs), (rs), (shift)) + +/* Arithmetic (shifted register) */ +#define arm_format_alu_shift(p, sf, op, S, rd, rn, rm, shift, imm6) arm_emit ((p), ((sf) << 31) | ((op) << 30) | ((S) << 29) | (0xb << 24) | ((shift) << 22) | (0x0 << 21) | ((rm) << 16) | ((imm6) << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_addx_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x1, 0x0, 0x0, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_addw_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x0, 0x0, 0x0, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_addsx_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x1, 0x0, 0x1, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_addsw_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x0, 0x0, 0x1, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_subx_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x1, 0x1, 0x0, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_subw_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x0, 0x1, 0x0, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_subsx_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x1, 0x1, 0x1, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_subsw_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x0, 0x1, 0x1, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_cmnx_shift(p, rn, rm, shift_type, amount) arm_addsx_shift ((p), ARMREG_RZR, (rn), (rm), (shift_type), (amount)) +#define arm_cmnw_shift(p, rn, rm, shift_type, amount) arm_addsw_shift ((p), ARMREG_RZR, (rn), (rm), (shift_type), (amount)) +#define arm_cmpx_shift(p, rn, rm, shift_type, amount) arm_subsx_shift ((p), ARMREG_RZR, (rn), (rm), (shift_type), (amount)) +#define arm_cmpw_shift(p, rn, rm, shift_type, amount) arm_subsw_shift ((p), ARMREG_RZR, (rn), (rm), (shift_type), (amount)) +#define arm_negx_shift(p, rd, rm, shift_type, amount) arm_subx_shift ((p), (rd), ARMREG_RZR, (rm), (shift_type), (amount)) +#define arm_negw_shift(p, rd, rm, shift_type, amount) arm_subw_shift ((p), (rd), ARMREG_RZR, (rm), (shift_type), (amount)) +#define arm_negsx_shift(p, rd, rm, shift_type, amount) arm_subsx_shift ((p), (rd), ARMREG_RZR, (rm), (shift_type), (amount)) +#define arm_negsw_shift(p, rd, rm, shift_type, amount) arm_subsw_shift ((p), (rd), ARMREG_RZR, (rm), (shift_type), (amount)) + +#define arm_addx(p, rd, rn, rm) arm_addx_shift ((p), (rd), (rn), (rm), 0, 0) +#define arm_addw(p, rd, rn, rm) arm_addw_shift ((p), (rd), (rn), (rm), 0, 0) +#define arm_subx(p, rd, rn, rm) arm_subx_shift ((p), (rd), (rn), (rm), 0, 0) +#define arm_subw(p, rd, rn, rm) arm_subw_shift ((p), (rd), (rn), (rm), 0, 0) +#define arm_addsx(p, rd, rn, rm) arm_addsx_shift ((p), (rd), (rn), (rm), 0, 0) +#define arm_addsw(p, rd, rn, rm) arm_addsw_shift ((p), (rd), (rn), (rm), 0, 0) +#define arm_subsx(p, rd, rn, rm) arm_subsx_shift ((p), (rd), (rn), (rm), 0, 0) +#define arm_subsw(p, rd, rn, rm) arm_subsw_shift ((p), (rd), (rn), (rm), 0, 0) +#define arm_cmpx(p, rd, rn) arm_cmpx_shift ((p), (rd), (rn), 0, 0) +#define arm_cmpw(p, rd, rn) arm_cmpw_shift ((p), (rd), (rn), 0, 0) +#define arm_negx(p, rd, rn) arm_negx_shift ((p), (rd), (rn), 0, 0) +#define arm_negw(p, rd, rn) arm_negw_shift ((p), (rd), (rn), 0, 0) + +/* Arithmetic with carry */ +#define arm_format_adc(p, sf, op, S, rd, rn, rm) arm_emit ((p), ((sf) << 31) | ((op) << 30) | ((S) << 29) | (0xd0 << 21) | ((rm) << 16) | (0x0 << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_adcx(p, rd, rn, rm) arm_format_adc ((p), 0x1, 0x0, 0x0, (rd), (rn), (rm)) +#define arm_adcw(p, rd, rn, rm) arm_format_adc ((p), 0x0, 0x0, 0x0, (rd), (rn), (rm)) +#define arm_adcsx(p, rd, rn, rm) arm_format_adc ((p), 0x1, 0x0, 0x1, (rd), (rn), (rm)) +#define arm_adcsw(p, rd, rn, rm) arm_format_adc ((p), 0x0, 0x0, 0x1, (rd), (rn), (rm)) +#define arm_sbcx(p, rd, rn, rm) arm_format_adc ((p), 0x1, 0x1, 0x0, (rd), (rn), (rm)) +#define arm_sbcw(p, rd, rn, rm) arm_format_adc ((p), 0x0, 0x1, 0x0, (rd), (rn), (rm)) +#define arm_sbcsx(p, rd, rn, rm) arm_format_adc ((p), 0x1, 0x1, 0x1, (rd), (rn), (rm)) +#define arm_sbcsw(p, rd, rn, rm) arm_format_adc ((p), 0x0, 0x1, 0x1, (rd), (rn), (rm)) +#define arm_ngcx(p, rd, rm) arm_sbcx ((p), (rd), ARMREG_RZR, (rm)) +#define arm_ngcw(p, rd, rm) arm_sbcw ((p), (rd), ARMREG_RZR, (rm)) +#define arm_ngcsx(p, rd, rm) arm_sbcsx ((p), (rd), ARMREG_RZR, (rm)) +#define arm_ngcsw(p, rd, rm) arm_sbcsw ((p), (rd), ARMREG_RZR, (rm)) + +/* Logical (shifted register) */ +#define arm_format_logical_shift(p, sf, op, N, rd, rn, rm, shift, imm6) arm_emit ((p), ((sf) << 31) | ((op) << 29) | (0xa << 24) | ((shift) << 22) | ((N) << 21) | ((rm) << 16) | ((imm6) << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_andx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x0, 0x0, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_andw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x0, 0x0, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_andsx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x3, 0x0, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_andsw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x3, 0x0, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_bicx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x0, 0x1, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_bicw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x0, 0x1, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_bicsx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x3, 0x1, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_bicsw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x3, 0x1, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_eonx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x2, 0x1, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_eonw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x2, 0x1, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_eorx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x2, 0x0, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_eorw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x2, 0x0, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_orrx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x1, 0x0, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_orrw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x1, 0x0, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_ornx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x1, 0x1, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_ornw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x1, 0x1, (rd), (rn), (rm), (shift_type), (amount)) +#define arm_mvnx_shift(p, rd, rm, shift_type, amount) arm_ornx_shift ((p), (rd), ARMREG_RZR, (rm), (shift_type), (amount)) +#define arm_mvnw_shift(p, rd, rm, shift_type, amount) arm_ornw_shift ((p), (rd), ARMREG_RZR, (rm), (shift_type), (amount)) +#define arm_tstx_shift(p, rn, rm, shift_type, amount) arm_andsx_shift ((p), ARMREG_RZR, (rn), (rm), (shift_type), (amount)) +#define arm_tstw_shift(p, rn, rm, shift_type, amount) arm_andsw_shift ((p), ARMREG_RZR, (rn), (rm), (shift_type), (amount)) +/* Aliases */ +#define arm_andx(p, rd, rn, rm) arm_andx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_andw(p, rd, rn, rm) arm_andw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_andsx(p, rd, rn, rm) arm_andsx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_andsw(p, rd, rn, rm) arm_andsw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_bixx(p, rd, rn, rm) arm_bixx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_bixw(p, rd, rn, rm) arm_bixw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_bixsx(p, rd, rn, rm) arm_bixsx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_bixsw(p, rd, rn, rm) arm_bixsw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_eonx(p, rd, rn, rm) arm_eonx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_eonw(p, rd, rn, rm) arm_eonw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_eorx(p, rd, rn, rm) arm_eorx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_eorw(p, rd, rn, rm) arm_eorw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_orrx(p, rd, rn, rm) arm_orrx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_orrw(p, rd, rn, rm) arm_orrw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_ornx(p, rd, rn, rm) arm_ornx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_ornw(p, rd, rn, rm) arm_ornw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0) +#define arm_mvnx(p, rd, rm) arm_mvnx_shift(p, rd, rm, ARMSHIFT_LSL, 0) +#define arm_mvnw(p, rd, rm) arm_mvnw_shift(p, rd, rm, ARMSHIFT_LSL, 0) +#define arm_tstx(p, rn, rm) arm_tstx_shift(p, rn, rm, ARMSHIFT_LSL, 0) +#define arm_tstw(p, rn, rm) arm_tstw_shift(p, rn, rm, ARMSHIFT_LSL, 0) + +/* Move (register) */ +#define arm_movx(p, rn, rm) arm_orrx_shift ((p), (rn), ARMREG_RZR, (rm), ARMSHIFT_LSL, 0) +#define arm_movw(p, rn, rm) arm_orrw_shift ((p), (rn), ARMREG_RZR, (rm), ARMSHIFT_LSL, 0) + +/* Not an official alias */ +#define arm_movspx(p, rn, rm) arm_addx_imm ((p), (rn), (rm), 0) + +/* Shift (register) */ +#define arm_format_shift_reg(p, sf, op2, rd, rn, rm) arm_emit ((p), ((sf) << 31) | (0xd6 << 21) | ((rm) << 16) | (0x2 << 12) | ((op2) << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_asrvx(p, rd, rn, rm) arm_format_shift_reg ((p), 0x1, 0x2, (rd), (rn), (rm)) +#define arm_asrvw(p, rd, rn, rm) arm_format_shift_reg ((p), 0x0, 0x2, (rd), (rn), (rm)) +#define arm_lslvx(p, rd, rn, rm) arm_format_shift_reg ((p), 0x1, 0x0, (rd), (rn), (rm)) +#define arm_lslvw(p, rd, rn, rm) arm_format_shift_reg ((p), 0x0, 0x0, (rd), (rn), (rm)) +#define arm_lsrvx(p, rd, rn, rm) arm_format_shift_reg ((p), 0x1, 0x1, (rd), (rn), (rm)) +#define arm_lsrvw(p, rd, rn, rm) arm_format_shift_reg ((p), 0x0, 0x1, (rd), (rn), (rm)) +#define arm_rorvx(p, rd, rn, rm) arm_format_shift_reg ((p), 0x1, 0x3, (rd), (rn), (rm)) +#define arm_rorvw(p, rd, rn, rm) arm_format_shift_reg ((p), 0x0, 0x3, (rd), (rn), (rm)) + +/* Multiply */ +#define arm_format_mul(p, sf, o0, rd, rn, rm, ra) arm_emit ((p), ((sf) << 31) | (0x0 << 29) | (0x1b << 24) | (0x0 << 21) | ((rm) << 16) | ((o0) << 15) | ((ra) << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_maddx(p, rd, rn, rm, ra) arm_format_mul((p), 0x1, 0x0, (rd), (rn), (rm), (ra)) +#define arm_maddw(p, rd, rn, rm, ra) arm_format_mul((p), 0x0, 0x0, (rd), (rn), (rm), (ra)) +#define arm_msubx(p, rd, rn, rm, ra) arm_format_mul((p), 0x1, 0x1, (rd), (rn), (rm), (ra)) +#define arm_msubw(p, rd, rn, rm, ra) arm_format_mul((p), 0x0, 0x1, (rd), (rn), (rm), (ra)) +#define arm_mnegx(p, rd, rn, rm) arm_msubx ((p), (rd), (rn), (rm), ARMREG_RZR) +#define arm_mnegw(p, rd, rn, rm) arm_msubw ((p), (rd), (rn), (rm), ARMREG_RZR) +#define arm_mulx(p, rd, rn, rm) arm_maddx ((p), (rd), (rn), (rm), ARMREG_RZR) +#define arm_mulw(p, rd, rn, rm) arm_maddw ((p), (rd), (rn), (rm), ARMREG_RZR) + +/* FIXME: Missing multiple opcodes */ + +/* Division */ +#define arm_format_div(p, sf, o1, rd, rn, rm) arm_emit ((p), ((sf) << 31) | (0xd6 << 21) | ((rm) << 16) | (0x1 << 11) | ((o1) << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_sdivx(p, rd, rn, rm) arm_format_div ((p), 0x1, 0x1, (rd), (rn), (rm)) +#define arm_sdivw(p, rd, rn, rm) arm_format_div ((p), 0x0, 0x1, (rd), (rn), (rm)) +#define arm_udivx(p, rd, rn, rm) arm_format_div ((p), 0x1, 0x0, (rd), (rn), (rm)) +#define arm_udivw(p, rd, rn, rm) arm_format_div ((p), 0x0, 0x0, (rd), (rn), (rm)) + +/* Conditional select */ +#define arm_format_csel(p, sf, op, op2, cond, rd, rn, rm) arm_emit ((p), ((sf) << 31) | ((op) << 30) | (0xd4 << 21) | ((rm) << 16) | ((cond) << 12) | ((op2) << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_cselx(p, cond, rd, rn, rm) arm_format_csel ((p), 0x1, 0x0, 0x0, (cond), (rd), (rn), (rm)) +#define arm_cselw(p, cond, rd, rn, rm) arm_format_csel ((p), 0x0, 0x0, 0x0, (cond), (rd), (rn), (rm)) +#define arm_csincx(p, cond, rd, rn, rm) arm_format_csel ((p), 0x1, 0x0, 0x1, (cond), (rd), (rn), (rm)) +#define arm_csincw(p, cond, rd, rn, rm) arm_format_csel ((p), 0x0, 0x0, 0x1, (cond), (rd), (rn), (rm)) +#define arm_csinvx(p, cond, rd, rn, rm) arm_format_csel ((p), 0x1, 0x1, 0x0, (cond), (rd), (rn), (rm)) +#define arm_csinvw(p, cond, rd, rn, rm) arm_format_csel ((p), 0x0, 0x1, 0x0, (cond), (rd), (rn), (rm)) +#define arm_csnegx(p, cond, rd, rn, rm) arm_format_csel ((p), 0x1, 0x1, 0x1, (cond), (rd), (rn), (rm)) +#define arm_csnegw(p, cond, rd, rn, rm) arm_format_csel ((p), 0x0, 0x1, 0x1, (cond), (rd), (rn), (rm)) + +#define arm_cset(p, cond, rd) arm_csincx ((p), ((cond) ^ 0x1), (rd), ARMREG_RZR, ARMREG_RZR) + +/* C5.6.68 (HINT) */ +#define arm_hint(p, imm) arm_emit ((p), (0xd5032 << 12) | ((imm) << 5) | (0x1f << 0)) +#define arm_nop(p) arm_hint ((p), 0x0) + +/* C5.6.29 BRK */ +#define arm_brk(p, imm) arm_emit ((p), (0xd4 << 24) | (0x1 << 21) | ((imm) << 5)) + +/* C6.3.114 FMOV (General) */ +#define arm_format_fmov_gr(p, sf, type, rmode, opcode, rn, rd) arm_emit ((p), ((sf) << 31) | (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rmode) << 19) | ((opcode) << 16) | ((rn) << 5) | ((rd) << 0)) + +/* Move gr->vfp */ +#define arm_fmov_rx_to_double(p, dd, xn) arm_format_fmov_gr ((p), 0x1, 0x1, 0x0, 0x7, (xn), (dd)) + +/* Move vfp->gr */ +#define arm_fmov_double_to_rx(p, xd, dn) arm_format_fmov_gr ((p), 0x1, 0x1, 0x0, 0x6, (dn), (xd)) + +/* C6.3.113 FMOV (register) */ +#define arm_format_fmov(p, type, rn, rd) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | (0x10 << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_fmovd(p, dd, dn) arm_format_fmov ((p), 0x1, (dn), (dd)) +#define arm_fmovs(p, dd, dn) arm_format_fmov ((p), 0x0, (dn), (dd)) + +/* C6.3.54 FCMP */ +#define arm_format_fcmp(p, type, opc, rn, rm) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rm) << 16) | (0x8 << 10) | ((rn) << 5) | ((opc) << 3)) + +#define arm_fcmpd(p, dn, dm) arm_format_fcmp (p, 0x1, 0x0, (dn), (dm)) +#define arm_fcmps(p, dn, dm) arm_format_fcmp (p, 0x0, 0x0, (dn), (dm)) + +/* Float precision */ +#define arm_format_fcvt(p, type, opc, rn, rd) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | (0x1 << 17) | ((opc) << 15) | (0x10 << 10) | ((rn) << 5) | ((rd) << 0)) + +/* C6.3.57 FCVT */ +/* single->double */ +#define arm_fcvt_sd(p, dd, sn) arm_format_fcvt ((p), 0x0, 0x1, (sn), (dd)) +/* double->single */ +#define arm_fcvt_ds(p, sd, dn) arm_format_fcvt ((p), 0x1, 0x0, (dn), (sd)) + +/* Float conversion to integer conversion */ +#define arm_format_fcvtz(p, sf, type, rmode, opcode, rn, rd) arm_emit ((p), ((sf) << 31) | (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rmode) << 19) | ((opcode) << 16) | ((rn) << 5) | ((rd) << 0)) + +/* C6.3.80 FCVTZS (scalar, integer) */ +#define arm_fcvtzs_dw(p, rd, rn) arm_format_fcvtz ((p), 0x0, 0x1, 0x3, 0x0, (rn), (rd)) +#define arm_fcvtzs_dx(p, rd, rn) arm_format_fcvtz ((p), 0x1, 0x1, 0x3, 0x0, (rn), (rd)) +#define arm_fcvtzs_sw(p, rd, rn) arm_format_fcvtz ((p), 0x0, 0x0, 0x3, 0x0, (rn), (rd)) +#define arm_fcvtzs_sx(p, rd, rn) arm_format_fcvtz ((p), 0x1, 0x0, 0x3, 0x0, (rn), (rd)) + +/* C6.3.84 FCVTZU (scalar, integer) */ +#define arm_fcvtzu_dw(p, rd, rn) arm_format_fcvtz ((p), 0x0, 0x1, 0x3, 0x1, (rn), (rd)) +#define arm_fcvtzu_dx(p, rd, rn) arm_format_fcvtz ((p), 0x1, 0x1, 0x3, 0x1, (rn), (rd)) +#define arm_fcvtzu_sw(p, rd, rn) arm_format_fcvtz ((p), 0x0, 0x0, 0x3, 0x1, (rn), (rd)) +#define arm_fcvtzu_sx(p, rd, rn) arm_format_fcvtz ((p), 0x1, 0x0, 0x3, 0x1, (rn), (rd)) + +/* C6.3.208 SCVTF (vector, integer) */ +#define arm_format_scvtf_vector(p, sz, rn, rd) arm_emit ((p), (0x1 << 30) | (0x0 << 29) | (0x1e << 24) | ((sz) << 22) | (0x10 << 17) | (0x1d << 12) | (0x2 << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_scvtf_d(p, dd, dn) arm_format_scvtf_vector ((p), 0x1, (dn), (dd)) +#define arm_scvtf_s(p, sd, sn) arm_format_scvtf_vector ((p), 0x0, (sn), (sd)) + +/* C6.3.210 SCVTF (scalar, integer) */ +#define arm_format_scvtf_scalar(p, sf, type, rn, rd) arm_emit ((p), ((sf) << 31) | (0x1e << 24) | ((type) << 22) | (0x1 << 21) | (0x2 << 16) | (0x0 << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_scvtf_rx_to_d(p, dd, rn) arm_format_scvtf_scalar ((p), 0x1, 0x1, rn, dd) +#define arm_scvtf_rw_to_d(p, dd, rn) arm_format_scvtf_scalar ((p), 0x0, 0x1, rn, dd) +#define arm_scvtf_rx_to_s(p, dd, rn) arm_format_scvtf_scalar ((p), 0x1, 0x0, rn, dd) +#define arm_scvtf_rw_to_s(p, dd, rn) arm_format_scvtf_scalar ((p), 0x0, 0x0, rn, dd) + +/* C6.3.306 UCVTF (vector, integer) */ +#define arm_format_ucvtf_vector(p, sz, rn, rd) arm_emit ((p), (0x1 << 30) | (0x1 << 29) | (0x1e << 24) | ((sz) << 22) | (0x10 << 17) | (0x1d << 12) | (0x2 << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_ucvtf_d(p, dd, dn) arm_format_ucvtf_vector ((p), 0x1, (dn), (dd)) +#define arm_ucvtf_s(p, sd, sn) arm_format_ucvtf_vector ((p), 0x0, (sn), (sd)) + +/* C6.3.308 UCVTF (scalar, integer) */ +#define arm_format_ucvtf_scalar(p, sf, type, rn, rd) arm_emit ((p), ((sf) << 31) | (0x1e << 24) | ((type) << 22) | (0x1 << 21) | (0x3 << 16) | (0x0 << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_ucvtf_rx_to_d(p, dd, rn) arm_format_ucvtf_scalar ((p), 0x1, 0x1, rn, dd) +#define arm_ucvtf_rw_to_d(p, dd, rn) arm_format_ucvtf_scalar ((p), 0x0, 0x1, rn, dd) + +/* C6.3.41 FADD (scalar) */ +#define arm_format_fadd_scalar(p, type, rd, rn, rm) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rm) << 16) | (0x1 << 13) | (0x2 << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_fadd_d(p, rd, rn, rm) arm_format_fadd_scalar ((p), 0x1, (rd), (rn), (rm)) +#define arm_fadd_s(p, rd, rn, rm) arm_format_fadd_scalar ((p), 0x0, (rd), (rn), (rm)) + +/* C6.3.149 FSUB (scalar) */ +#define arm_format_fsub_scalar(p, type, rd, rn, rm) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rm) << 16) | (0x1 << 13) | (0x1 << 12) | (0x2 << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_fsub_d(p, rd, rn, rm) arm_format_fsub_scalar ((p), 0x1, (rd), (rn), (rm)) +#define arm_fsub_s(p, rd, rn, rm) arm_format_fsub_scalar ((p), 0x0, (rd), (rn), (rm)) + +/* C6.3.119 FMUL (scalar) */ +#define arm_format_fmul_scalar(p, type, rd, rn, rm) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rm) << 16) | (0x2 << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_fmul_d(p, rd, rn, rm) arm_format_fmul_scalar ((p), 0x1, (rd), (rn), (rm)) +#define arm_fmul_s(p, rd, rn, rm) arm_format_fmul_scalar ((p), 0x0, (rd), (rn), (rm)) + +/* C6.3.86 FDIV (scalar) */ +#define arm_format_fdiv_scalar(p, type, rd, rn, rm) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rm) << 16) | (0x1 << 12) | (0x2 << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_fdiv_d(p, rd, rn, rm) arm_format_fdiv_scalar ((p), 0x1, (rd), (rn), (rm)) +#define arm_fdiv_s(p, rd, rn, rm) arm_format_fdiv_scalar ((p), 0x0, (rd), (rn), (rm)) + +/* C6.3.116 FMSUB */ +#define arm_format_fmsub(p, type, rd, rn, rm, ra) arm_emit ((p), (0x1f << 24) | ((type) << 22) | (0x0 << 21) | ((rm) << 16) | (0x1 << 15) | ((ra) << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_fmsub_d(p, rd, rn, rm, ra) arm_format_fmsub ((p), 0x1, (rd), (rn), (rm), (ra)) + +/* C6.3.123 FNEG */ +#define arm_format_fneg(p, type, rd, rn) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | (0x2 << 15) | (0x10 << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_fneg_d(p, rd, rn) arm_format_fneg ((p), 0x1, (rd), (rn)) +#define arm_fneg_s(p, rd, rn) arm_format_fneg ((p), 0x0, (rd), (rn)) + +/* C6.3.37 FABS (scalar) */ +#define arm_format_fabs(p, type, opc, rd, rn) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((opc) << 15) | (0x10 << 10) | ((rn) << 5) | ((rd) << 0)) + +#define arm_fabs_d(p, rd, rn) arm_format_fabs ((p), 0x1, 0x1, (rd), (rn)) + +/* C5.6.60 DMB */ +#define arm_format_dmb(p, opc, CRm) arm_emit ((p), (0x354 << 22) | (0x3 << 16) | (0x3 << 12) | ((CRm) << 8) | (0x1 << 7) | ((opc) << 5) | (0x1f << 0)) + +#define ARM_DMB_LD 0x1 +#define ARM_DMB_ST 0x2 +#define ARM_DMB_ALL 0x3 +#define ARM_DMB_SY 0xc + +#define arm_dmb(p, imm) arm_format_dmb ((p), 0x1, (imm)) + +/* C5.6.129 MRS */ + +#define ARM_MRS_REG_TPIDR_EL0 0x5e82 + +#define arm_format_mrs(p, sysreg, rt) arm_emit ((p), (0x354 << 22) | (0x1 << 21) | (0x1 << 20) | ((sysreg) << 5) | ((rt) << 0)) + +#define arm_mrs(p, rt, sysreg) arm_format_mrs ((p), (sysreg), (rt)) + +#endif /* __arm_CODEGEN_H__ */ diff --git a/mono/metadata/object-offsets.h b/mono/metadata/object-offsets.h index acf2023b450..36763dd311c 100644 --- a/mono/metadata/object-offsets.h +++ b/mono/metadata/object-offsets.h @@ -237,6 +237,13 @@ DECL_OFFSET(GSharedVtCallInfo, vret_slot) DECL_OFFSET(GSharedVtCallInfo, gsharedvt_in) #endif +#if defined(TARGET_ARM64) +DECL_OFFSET(GSharedVtCallInfo, stack_usage) +DECL_OFFSET(GSharedVtCallInfo, gsharedvt_in) +DECL_OFFSET(GSharedVtCallInfo, ret_marshal) +DECL_OFFSET(GSharedVtCallInfo, vret_slot) +#endif + #if defined(TARGET_AMD64) || defined(TARGET_ARM64) DECL_OFFSET(SeqPointInfo, ss_tramp_addr) #endif diff --git a/mono/mini/Makefile.am.in b/mono/mini/Makefile.am.in index f82baef4863..d2cc3cd3a5b 100755 --- a/mono/mini/Makefile.am.in +++ b/mono/mini/Makefile.am.in @@ -337,7 +337,9 @@ arm64_sources = \ mini-arm64.c \ mini-arm64.h \ exceptions-arm64.c \ - tramp-arm64.c + tramp-arm64.c \ + mini-arm64-gsharedvt.c \ + tramp-arm64-gsharedvt.c mips_sources = \ mini-mips.c \ diff --git a/mono/mini/aot-compiler.c b/mono/mini/aot-compiler.c index 3ae542f03ae..3cd38718b98 100644 --- a/mono/mini/aot-compiler.c +++ b/mono/mini/aot-compiler.c @@ -857,7 +857,331 @@ arch_init (MonoAotCompile *acfg) #ifdef TARGET_ARM64 -#include "../../../mono-extensions/mono/mini/aot-compiler-arm64.c" + +/* Load the contents of GOT_SLOT into dreg, clobbering ip0 */ +static void +arm64_emit_load_got_slot (MonoAotCompile *acfg, int dreg, int got_slot) +{ + int offset; + + g_assert (acfg->fp); + emit_unset_mode (acfg); + /* r16==ip0 */ + offset = (int)(got_slot * sizeof (gpointer)); +#ifdef TARGET_MACH + /* clang's integrated assembler */ + fprintf (acfg->fp, "adrp x16, %s@PAGE+%d\n", acfg->got_symbol, offset & 0xfffff000); + fprintf (acfg->fp, "add x16, x16, %s@PAGEOFF\n", acfg->got_symbol); + fprintf (acfg->fp, "ldr x%d, [x16, #%d]\n", dreg, offset & 0xfff); +#else + /* Linux GAS */ + fprintf (acfg->fp, "adrp x16, %s+%d\n", acfg->got_symbol, offset & 0xfffff000); + fprintf (acfg->fp, "add x16, x16, :lo12:%s\n", acfg->got_symbol); + fprintf (acfg->fp, "ldr x%d, [x16, %d]\n", dreg, offset & 0xfff); +#endif +} + +static void +arm64_emit_objc_selector_ref (MonoAotCompile *acfg, guint8 *code, int index, int *code_size) +{ + int reg; + + g_assert (acfg->fp); + emit_unset_mode (acfg); + + /* ldr rt, target */ + reg = arm_get_ldr_lit_reg (code); + + fprintf (acfg->fp, "adrp x%d, L_OBJC_SELECTOR_REFERENCES_%d@PAGE\n", reg, index); + fprintf (acfg->fp, "add x%d, x%d, L_OBJC_SELECTOR_REFERENCES_%d@PAGEOFF\n", reg, reg, index); + fprintf (acfg->fp, "ldr x%d, [x%d]\n", reg, reg); + + *code_size = 12; +} + +static void +arm64_emit_direct_call (MonoAotCompile *acfg, const char *target, gboolean external, gboolean thumb, MonoJumpInfo *ji, int *call_size) +{ + g_assert (acfg->fp); + emit_unset_mode (acfg); + if (ji && ji->relocation == MONO_R_ARM64_B) { + fprintf (acfg->fp, "b %s\n", target); + } else { + if (ji) + g_assert (ji->relocation == MONO_R_ARM64_BL); + fprintf (acfg->fp, "bl %s\n", target); + } + *call_size = 4; +} + +static void +arm64_emit_got_access (MonoAotCompile *acfg, guint8 *code, int got_slot, int *code_size) +{ + int reg; + + /* ldr rt, target */ + reg = arm_get_ldr_lit_reg (code); + arm64_emit_load_got_slot (acfg, reg, got_slot); + *code_size = 12; +} + +static void +arm64_emit_plt_entry (MonoAotCompile *acfg, const char *got_symbol, int offset, int info_offset) +{ + arm64_emit_load_got_slot (acfg, ARMREG_R16, offset / sizeof (gpointer)); + fprintf (acfg->fp, "br x16\n"); + /* Used by mono_aot_get_plt_info_offset () */ + fprintf (acfg->fp, "%s %d\n", acfg->inst_directive, info_offset); +} + +static void +arm64_emit_tramp_page_common_code (MonoAotCompile *acfg, int pagesize, int arg_reg, int *size) +{ + guint8 buf [256]; + guint8 *code; + int imm; + + /* The common code */ + code = buf; + imm = pagesize; + /* The trampoline address is in IP0 */ + arm_movzx (code, ARMREG_IP1, imm & 0xffff, 0); + arm_movkx (code, ARMREG_IP1, (imm >> 16) & 0xffff, 16); + /* Compute the data slot address */ + arm_subx (code, ARMREG_IP0, ARMREG_IP0, ARMREG_IP1); + /* Trampoline argument */ + arm_ldrx (code, arg_reg, ARMREG_IP0, 0); + /* Address */ + arm_ldrx (code, ARMREG_IP0, ARMREG_IP0, 8); + arm_brx (code, ARMREG_IP0); + + /* Emit it */ + emit_code_bytes (acfg, buf, code - buf); + + *size = code - buf; +} + +static void +arm64_emit_tramp_page_specific_code (MonoAotCompile *acfg, int pagesize, int common_tramp_size, int specific_tramp_size) +{ + guint8 buf [256]; + guint8 *code; + int i, count; + + count = (pagesize - common_tramp_size) / specific_tramp_size; + for (i = 0; i < count; ++i) { + code = buf; + arm_adrx (code, ARMREG_IP0, code); + /* Branch to the generic code */ + arm_b (code, code - 4 - (i * specific_tramp_size) - common_tramp_size); + /* This has to be 2 pointers long */ + arm_nop (code); + arm_nop (code); + g_assert (code - buf == specific_tramp_size); + emit_code_bytes (acfg, buf, code - buf); + } +} + +static void +arm64_emit_specific_trampoline_pages (MonoAotCompile *acfg) +{ + guint8 buf [128]; + guint8 *code; + guint8 *labels [16]; + int common_tramp_size; + int specific_tramp_size = 2 * 8; + int imm, pagesize; + char symbol [128]; + + if (!acfg->aot_opts.use_trampolines_page) + return; + +#ifdef TARGET_MACH + /* Have to match the target pagesize */ + pagesize = 16384; +#else + pagesize = mono_pagesize (); +#endif + acfg->tramp_page_size = pagesize; + + /* The specific trampolines */ + sprintf (symbol, "%sspecific_trampolines_page", acfg->user_symbol_prefix); + emit_alignment (acfg, pagesize); + emit_global (acfg, symbol, TRUE); + emit_label (acfg, symbol); + + /* The common code */ + arm64_emit_tramp_page_common_code (acfg, pagesize, ARMREG_IP1, &common_tramp_size); + acfg->tramp_page_code_offsets [MONO_AOT_TRAMP_SPECIFIC] = common_tramp_size; + + arm64_emit_tramp_page_specific_code (acfg, pagesize, common_tramp_size, specific_tramp_size); + + /* The rgctx trampolines */ + /* These are the same as the specific trampolines, but they load the argument into MONO_ARCH_RGCTX_REG */ + sprintf (symbol, "%srgctx_trampolines_page", acfg->user_symbol_prefix); + emit_alignment (acfg, pagesize); + emit_global (acfg, symbol, TRUE); + emit_label (acfg, symbol); + + /* The common code */ + arm64_emit_tramp_page_common_code (acfg, pagesize, MONO_ARCH_RGCTX_REG, &common_tramp_size); + acfg->tramp_page_code_offsets [MONO_AOT_TRAMP_STATIC_RGCTX] = common_tramp_size; + + arm64_emit_tramp_page_specific_code (acfg, pagesize, common_tramp_size, specific_tramp_size); + + /* The gsharedvt arg trampolines */ + /* These are the same as the specific trampolines */ + sprintf (symbol, "%sgsharedvt_arg_trampolines_page", acfg->user_symbol_prefix); + emit_alignment (acfg, pagesize); + emit_global (acfg, symbol, TRUE); + emit_label (acfg, symbol); + + arm64_emit_tramp_page_common_code (acfg, pagesize, ARMREG_IP1, &common_tramp_size); + acfg->tramp_page_code_offsets [MONO_AOT_TRAMP_GSHAREDVT_ARG] = common_tramp_size; + + arm64_emit_tramp_page_specific_code (acfg, pagesize, common_tramp_size, specific_tramp_size); + + /* The IMT trampolines */ + sprintf (symbol, "%simt_trampolines_page", acfg->user_symbol_prefix); + emit_alignment (acfg, pagesize); + emit_global (acfg, symbol, TRUE); + emit_label (acfg, symbol); + + code = buf; + imm = pagesize; + /* The trampoline address is in IP0 */ + arm_movzx (code, ARMREG_IP1, imm & 0xffff, 0); + arm_movkx (code, ARMREG_IP1, (imm >> 16) & 0xffff, 16); + /* Compute the data slot address */ + arm_subx (code, ARMREG_IP0, ARMREG_IP0, ARMREG_IP1); + /* Trampoline argument */ + arm_ldrx (code, ARMREG_IP1, ARMREG_IP0, 0); + + /* Same as arch_emit_imt_thunk () */ + labels [0] = code; + arm_ldrx (code, ARMREG_IP0, ARMREG_IP1, 0); + arm_cmpx (code, ARMREG_IP0, MONO_ARCH_RGCTX_REG); + labels [1] = code; + arm_bcc (code, ARMCOND_EQ, 0); + + /* End-of-loop check */ + labels [2] = code; + arm_cbzx (code, ARMREG_IP0, 0); + + /* Loop footer */ + arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 2 * 8); + arm_b (code, labels [0]); + + /* Match */ + mono_arm_patch (labels [1], code, MONO_R_ARM64_BCC); + /* Load vtable slot addr */ + arm_ldrx (code, ARMREG_IP0, ARMREG_IP1, 8); + /* Load vtable slot */ + arm_ldrx (code, ARMREG_IP0, ARMREG_IP0, 0); + arm_brx (code, ARMREG_IP0); + + /* No match */ + mono_arm_patch (labels [2], code, MONO_R_ARM64_CBZ); + /* Load fail addr */ + arm_ldrx (code, ARMREG_IP0, ARMREG_IP1, 8); + arm_brx (code, ARMREG_IP0); + + emit_code_bytes (acfg, buf, code - buf); + + common_tramp_size = code - buf; + acfg->tramp_page_code_offsets [MONO_AOT_TRAMP_IMT_THUNK] = common_tramp_size; + + arm64_emit_tramp_page_specific_code (acfg, pagesize, common_tramp_size, specific_tramp_size); +} + +static void +arm64_emit_specific_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size) +{ + /* Load argument from second GOT slot */ + arm64_emit_load_got_slot (acfg, ARMREG_R17, offset + 1); + /* Load generic trampoline address from first GOT slot */ + arm64_emit_load_got_slot (acfg, ARMREG_R16, offset); + fprintf (acfg->fp, "br x16\n"); + *tramp_size = 7 * 4; +} + +static void +arm64_emit_unbox_trampoline (MonoAotCompile *acfg, MonoCompile *cfg, MonoMethod *method, const char *call_target) +{ + emit_unset_mode (acfg); + fprintf (acfg->fp, "add x0, x0, %d\n", (int)(sizeof (MonoObject))); + fprintf (acfg->fp, "b %s\n", call_target); +} + +static void +arm64_emit_static_rgctx_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size) +{ + /* Similar to the specific trampolines, but use the rgctx reg instead of ip1 */ + + /* Load argument from first GOT slot */ + g_assert (MONO_ARCH_RGCTX_REG == 27); + arm64_emit_load_got_slot (acfg, ARMREG_R27, offset); + /* Load generic trampoline address from second GOT slot */ + arm64_emit_load_got_slot (acfg, ARMREG_R16, offset + 1); + fprintf (acfg->fp, "br x16\n"); + *tramp_size = 7 * 4; +} + +static void +arm64_emit_imt_thunk (MonoAotCompile *acfg, int offset, int *tramp_size) +{ + guint8 buf [128]; + guint8 *code, *labels [16]; + + /* Load parameter from GOT slot into ip1 */ + arm64_emit_load_got_slot (acfg, ARMREG_R17, offset); + + code = buf; + labels [0] = code; + arm_ldrx (code, ARMREG_IP0, ARMREG_IP1, 0); + arm_cmpx (code, ARMREG_IP0, MONO_ARCH_RGCTX_REG); + labels [1] = code; + arm_bcc (code, ARMCOND_EQ, 0); + + /* End-of-loop check */ + labels [2] = code; + arm_cbzx (code, ARMREG_IP0, 0); + + /* Loop footer */ + arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 2 * 8); + arm_b (code, labels [0]); + + /* Match */ + mono_arm_patch (labels [1], code, MONO_R_ARM64_BCC); + /* Load vtable slot addr */ + arm_ldrx (code, ARMREG_IP0, ARMREG_IP1, 8); + /* Load vtable slot */ + arm_ldrx (code, ARMREG_IP0, ARMREG_IP0, 0); + arm_brx (code, ARMREG_IP0); + + /* No match */ + mono_arm_patch (labels [2], code, MONO_R_ARM64_CBZ); + /* Load fail addr */ + arm_ldrx (code, ARMREG_IP0, ARMREG_IP1, 8); + arm_brx (code, ARMREG_IP0); + + emit_code_bytes (acfg, buf, code - buf); + + *tramp_size = code - buf + (3 * 4); +} + +static void +arm64_emit_gsharedvt_arg_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size) +{ + /* Similar to the specific trampolines, but the address is in the second slot */ + /* Load argument from first GOT slot */ + arm64_emit_load_got_slot (acfg, ARMREG_R17, offset); + /* Load generic trampoline address from second GOT slot */ + arm64_emit_load_got_slot (acfg, ARMREG_R16, offset + 1); + fprintf (acfg->fp, "br x16\n"); + *tramp_size = 7 * 4; +} + #endif diff --git a/mono/mini/exceptions-arm64.c b/mono/mini/exceptions-arm64.c index 333fd13b754..e7d9e0074d3 100644 --- a/mono/mini/exceptions-arm64.c +++ b/mono/mini/exceptions-arm64.c @@ -1 +1,594 @@ -#include "../../../mono-extensions/mono/mini/exceptions-arm64.c" +/* + * exceptions-arm64.c: exception support for ARM64 + * + * Copyright 2013 Xamarin Inc + * + * Based on exceptions-arm.c: + * + * Authors: + * Dietmar Maurer (dietmar@ximian.com) + * Paolo Molaro (lupus@ximian.com) + * + * (C) 2001 Ximian, Inc. + */ + +#include "mini.h" + +#include +#include + +#define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1)) + +#ifndef DISABLE_JIT + +gpointer +mono_arch_get_restore_context (MonoTrampInfo **info, gboolean aot) +{ + guint8 *start, *code; + MonoJumpInfo *ji = NULL; + GSList *unwind_ops = NULL; + int i, ctx_reg, size; + + size = 256; + code = start = mono_global_codeman_reserve (size); + + arm_movx (code, ARMREG_IP0, ARMREG_R0); + ctx_reg = ARMREG_IP0; + /* Restore fregs */ + for (i = 0; i < 32; ++i) + arm_ldrfpx (code, i, ctx_reg, MONO_STRUCT_OFFSET (MonoContext, fregs) + (i * 8)); + /* Restore gregs */ + // FIXME: Restore less registers + // FIXME: fp should be restored later + code = mono_arm_emit_load_regarray (code, 0xffffffff & ~(1 << ctx_reg) & ~(1 << ARMREG_SP), ctx_reg, MONO_STRUCT_OFFSET (MonoContext, regs)); + /* ip0/ip1 doesn't need to be restored */ + /* ip1 = pc */ + arm_ldrx (code, ARMREG_IP1, ctx_reg, MONO_STRUCT_OFFSET (MonoContext, pc)); + /* ip0 = sp */ + arm_ldrx (code, ARMREG_IP0, ctx_reg, MONO_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_SP * 8)); + /* Restore sp, ctx is no longer valid */ + arm_movspx (code, ARMREG_SP, ARMREG_IP0); + /* Branch to pc */ + arm_brx (code, ARMREG_IP1); + /* Not reached */ + arm_brk (code, 0); + + g_assert ((code - start) < size); + mono_arch_flush_icache (start, code - start); + mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_EXCEPTION_HANDLING, NULL); + + if (info) + *info = mono_tramp_info_create ("restore_context", start, code - start, ji, unwind_ops); + + return start; +} + +gpointer +mono_arch_get_call_filter (MonoTrampInfo **info, gboolean aot) +{ + guint8 *code; + guint8* start; + int size, offset, gregs_offset, fregs_offset, ctx_offset, num_fregs, frame_size; + MonoJumpInfo *ji = NULL; + GSList *unwind_ops = NULL; + + size = 512; + start = code = mono_global_codeman_reserve (size); + + /* Compute stack frame size and offsets */ + offset = 0; + /* frame block */ + offset += 2 * 8; + /* gregs */ + gregs_offset = offset; + offset += 32 * 8; + /* fregs */ + num_fregs = 8; + fregs_offset = offset; + offset += num_fregs * 8; + ctx_offset = offset; + ctx_offset += 8; + frame_size = ALIGN_TO (offset, MONO_ARCH_FRAME_ALIGNMENT); + + /* + * We are being called from C code, ctx is in r0, the address to call is in r1. + * We need to save state, restore ctx, make the call, then restore the previous state, + * returning the value returned by the call. + */ + + /* Setup a frame */ + arm_stpx_pre (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, -frame_size); + arm_movspx (code, ARMREG_FP, ARMREG_SP); + + /* Save ctx */ + arm_strx (code, ARMREG_R0, ARMREG_FP, ctx_offset); + /* Save gregs */ + code = mono_arm_emit_store_regarray (code, MONO_ARCH_CALLEE_SAVED_REGS | (1 << ARMREG_FP), ARMREG_FP, gregs_offset); + /* No need to save/restore fregs, since we don't currently use them */ + + /* Load regs from ctx */ + code = mono_arm_emit_load_regarray (code, MONO_ARCH_CALLEE_SAVED_REGS, ARMREG_R0, MONO_STRUCT_OFFSET (MonoContext, regs)); + /* Load fp */ + arm_ldrx (code, ARMREG_FP, ARMREG_R0, MONO_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_FP * 8)); + + /* Make the call */ + arm_blrx (code, ARMREG_R1); + /* For filters, the result is in R0 */ + + /* Restore fp */ + arm_ldrx (code, ARMREG_FP, ARMREG_SP, gregs_offset + (ARMREG_FP * 8)); + /* Load ctx */ + arm_ldrx (code, ARMREG_IP0, ARMREG_FP, ctx_offset); + /* Save registers back to ctx */ + /* This isn't strictly neccessary since we don't allocate variables used in eh clauses to registers */ + code = mono_arm_emit_store_regarray (code, MONO_ARCH_CALLEE_SAVED_REGS, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoContext, regs)); + + /* Restore regs */ + code = mono_arm_emit_load_regarray (code, MONO_ARCH_CALLEE_SAVED_REGS, ARMREG_FP, gregs_offset); + /* Destroy frame */ + code = mono_arm_emit_destroy_frame (code, frame_size, (1 << ARMREG_IP0)); + arm_retx (code, ARMREG_LR); + + g_assert ((code - start) < size); + mono_arch_flush_icache (start, code - start); + mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_EXCEPTION_HANDLING, NULL); + + if (info) + *info = mono_tramp_info_create ("call_filter", start, code - start, ji, unwind_ops); + + return start; +} + +static gpointer +get_throw_trampoline (int size, gboolean corlib, gboolean rethrow, gboolean llvm, gboolean resume_unwind, const char *tramp_name, MonoTrampInfo **info, gboolean aot) +{ + guint8 *start, *code; + MonoJumpInfo *ji = NULL; + GSList *unwind_ops = NULL; + int i, offset, gregs_offset, fregs_offset, frame_size, num_fregs; + + code = start = mono_global_codeman_reserve (size); + + /* We are being called by JITted code, the exception object/type token is in R0 */ + + /* Compute stack frame size and offsets */ + offset = 0; + /* frame block */ + offset += 2 * 8; + /* gregs */ + gregs_offset = offset; + offset += 32 * 8; + /* fregs */ + num_fregs = 8; + fregs_offset = offset; + offset += num_fregs * 8; + frame_size = ALIGN_TO (offset, MONO_ARCH_FRAME_ALIGNMENT); + + /* Setup a frame */ + arm_stpx_pre (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, -frame_size); + arm_movspx (code, ARMREG_FP, ARMREG_SP); + + /* Save gregs */ + code = mono_arm_emit_store_regarray (code, 0xffffffff, ARMREG_FP, gregs_offset); + if (corlib && !llvm) + /* The real LR is in R1 */ + arm_strx (code, ARMREG_R1, ARMREG_FP, gregs_offset + (ARMREG_LR * 8)); + /* Save fp/sp */ + arm_ldrx (code, ARMREG_IP0, ARMREG_FP, 0); + arm_strx (code, ARMREG_IP0, ARMREG_FP, gregs_offset + (ARMREG_FP * 8)); + arm_addx_imm (code, ARMREG_IP0, ARMREG_FP, frame_size); + arm_strx (code, ARMREG_IP0, ARMREG_FP, gregs_offset + (ARMREG_SP * 8)); + /* Save fregs */ + for (i = 0; i < num_fregs; ++i) + arm_strfpx (code, ARMREG_D8 + i, ARMREG_FP, fregs_offset + (i * 8)); + + /* Call the C trampoline function */ + /* Arg1 = exception object/type token */ + arm_movx (code, ARMREG_R0, ARMREG_R0); + /* Arg2 = caller ip */ + if (corlib) { + if (llvm) + arm_ldrx (code, ARMREG_R1, ARMREG_FP, gregs_offset + (ARMREG_LR * 8)); + else + arm_movx (code, ARMREG_R1, ARMREG_R1); + } else { + arm_ldrx (code, ARMREG_R1, ARMREG_FP, 8); + } + /* Arg 3 = gregs */ + arm_addx_imm (code, ARMREG_R2, ARMREG_FP, gregs_offset); + /* Arg 4 = fregs */ + arm_addx_imm (code, ARMREG_R3, ARMREG_FP, fregs_offset); + /* Arg 5 = corlib */ + arm_movzx (code, ARMREG_R4, corlib ? 1 : 0, 0); + /* Arg 6 = rethrow */ + arm_movzx (code, ARMREG_R5, rethrow ? 1 : 0, 0); + /* Call the function */ + if (aot) { + const char *icall_name; + + if (resume_unwind) + icall_name = "mono_arm_resume_unwind"; + else + icall_name = "mono_arm_throw_exception"; + + code = mono_arm_emit_aotconst (&ji, code, start, ARMREG_LR, MONO_PATCH_INFO_JIT_ICALL_ADDR, icall_name); + } else { + gpointer icall_func; + + if (resume_unwind) + icall_func = mono_arm_resume_unwind; + else + icall_func = mono_arm_throw_exception; + + code = mono_arm_emit_imm64 (code, ARMREG_LR, (guint64)icall_func); + } + arm_blrx (code, ARMREG_LR); + /* This shouldn't return */ + arm_brk (code, 0x0); + + g_assert ((code - start) < size); + mono_arch_flush_icache (start, code - start); + mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_EXCEPTION_HANDLING, NULL); + + if (info) + *info = mono_tramp_info_create (tramp_name, start, code - start, ji, unwind_ops); + + return start; +} + +gpointer +mono_arch_get_throw_exception (MonoTrampInfo **info, gboolean aot) +{ + return get_throw_trampoline (256, FALSE, FALSE, FALSE, FALSE, "throw_exception", info, aot); +} + +gpointer +mono_arch_get_rethrow_exception (MonoTrampInfo **info, gboolean aot) +{ + return get_throw_trampoline (256, FALSE, TRUE, FALSE, FALSE, "rethrow_exception", info, aot); +} + +gpointer +mono_arch_get_throw_corlib_exception (MonoTrampInfo **info, gboolean aot) +{ + return get_throw_trampoline (256, TRUE, FALSE, FALSE, FALSE, "throw_corlib_exception", info, aot); +} + +GSList* +mono_arm_get_exception_trampolines (gboolean aot) +{ + MonoTrampInfo *info; + GSList *tramps = NULL; + + /* LLVM uses the normal trampolines, but with a different name */ + get_throw_trampoline (256, TRUE, FALSE, FALSE, FALSE, "llvm_throw_corlib_exception_trampoline", &info, aot); + tramps = g_slist_prepend (tramps, info); + + get_throw_trampoline (256, TRUE, FALSE, TRUE, FALSE, "llvm_throw_corlib_exception_abs_trampoline", &info, aot); + tramps = g_slist_prepend (tramps, info); + + get_throw_trampoline (256, FALSE, FALSE, FALSE, TRUE, "llvm_resume_unwind_trampoline", &info, aot); + tramps = g_slist_prepend (tramps, info); + + return tramps; +} + +#else /* DISABLE_JIT */ + +gpointer +mono_arch_get_restore_context (MonoTrampInfo **info, gboolean aot) +{ + g_assert_not_reached (); + return NULL; +} + +gpointer +mono_arch_get_call_filter (MonoTrampInfo **info, gboolean aot) +{ + g_assert_not_reached (); + return NULL; +} + +gpointer +mono_arch_get_throw_exception (MonoTrampInfo **info, gboolean aot) +{ + g_assert_not_reached (); + return NULL; +} + +gpointer +mono_arch_get_rethrow_exception (MonoTrampInfo **info, gboolean aot) +{ + g_assert_not_reached (); + return NULL; +} + +gpointer +mono_arch_get_throw_corlib_exception (MonoTrampInfo **info, gboolean aot) +{ + g_assert_not_reached (); + return NULL; +} + +GSList* +mono_arm_get_exception_trampolines (gboolean aot) +{ + g_assert_not_reached (); + return NULL; +} + +#endif /* !DISABLE_JIT */ + +void +mono_arch_exceptions_init (void) +{ + guint8 *tramp; + GSList *tramps, *l; + + if (mono_aot_only) { + tramp = mono_aot_get_trampoline ("llvm_throw_corlib_exception_trampoline"); + mono_register_jit_icall (tramp, "llvm_throw_corlib_exception_trampoline", NULL, TRUE); + tramp = mono_aot_get_trampoline ("llvm_throw_corlib_exception_abs_trampoline"); + mono_register_jit_icall (tramp, "llvm_throw_corlib_exception_abs_trampoline", NULL, TRUE); + tramp = mono_aot_get_trampoline ("llvm_resume_unwind_trampoline"); + mono_register_jit_icall (tramp, "llvm_resume_unwind_trampoline", NULL, TRUE); + } else { + tramps = mono_arm_get_exception_trampolines (FALSE); + for (l = tramps; l; l = l->next) { + MonoTrampInfo *info = l->data; + + mono_register_jit_icall (info->code, g_strdup (info->name), NULL, TRUE); + mono_tramp_info_register (info, NULL); + } + g_slist_free (tramps); + } +} + +/* + * mono_arm_throw_exception: + * + * This function is called by the exception trampolines. + * FP_REGS points to the 8 callee saved fp regs. + */ +void +mono_arm_throw_exception (gpointer arg, mgreg_t pc, mgreg_t *int_regs, gdouble *fp_regs, gboolean corlib, gboolean rethrow) +{ + MonoContext ctx; + MonoObject *exc = NULL; + guint32 ex_token_index, ex_token; + + if (!corlib) + exc = arg; + else { + ex_token_index = (guint64)arg; + ex_token = MONO_TOKEN_TYPE_DEF | ex_token_index; + exc = (MonoObject*)mono_exception_from_token (mono_defaults.corlib, ex_token); + } + + /* Adjust pc so it points into the call instruction */ + pc -= 4; + + /* Initialize a ctx based on the arguments */ + memset (&ctx, 0, sizeof (MonoContext)); + memcpy (&(ctx.regs [0]), int_regs, sizeof (mgreg_t) * 32); + memcpy (&(ctx.fregs [ARMREG_D8]), fp_regs, sizeof (double) * 8); + ctx.pc = pc; + + if (mono_object_isinst (exc, mono_defaults.exception_class)) { + MonoException *mono_ex = (MonoException*)exc; + if (!rethrow) + mono_ex->stack_trace = NULL; + } + + mono_handle_exception (&ctx, exc); + + mono_restore_context (&ctx); +} + +void +mono_arm_resume_unwind (gpointer arg, mgreg_t pc, mgreg_t *int_regs, gdouble *fp_regs, gboolean corlib, gboolean rethrow) +{ + MonoContext ctx; + + /* Adjust pc so it points into the call instruction */ + pc -= 4; + + /* Initialize a ctx based on the arguments */ + memset (&ctx, 0, sizeof (MonoContext)); + memcpy (&(ctx.regs [0]), int_regs, sizeof (mgreg_t) * 32); + memcpy (&(ctx.fregs [ARMREG_D8]), fp_regs, sizeof (double) * 8); + ctx.pc = pc; + + mono_resume_unwind (&ctx); +} + +/* + * mono_arch_unwind_frame: + * + * See exceptions-amd64.c for docs; + */ +gboolean +mono_arch_unwind_frame (MonoDomain *domain, MonoJitTlsData *jit_tls, + MonoJitInfo *ji, MonoContext *ctx, + MonoContext *new_ctx, MonoLMF **lmf, + mgreg_t **save_locations, + StackFrameInfo *frame) +{ + gpointer ip = MONO_CONTEXT_GET_IP (ctx); + + memset (frame, 0, sizeof (StackFrameInfo)); + frame->ji = ji; + + *new_ctx = *ctx; + + if (ji != NULL) { + mgreg_t regs [MONO_MAX_IREGS + 8 + 1]; + guint8 *cfa; + guint32 unwind_info_len; + guint8 *unwind_info; + + frame->type = FRAME_TYPE_MANAGED; + + unwind_info = mono_jinfo_get_unwind_info (ji, &unwind_info_len); + + memcpy (regs, &new_ctx->regs, sizeof (mgreg_t) * 32); + /* v8..v15 are callee saved */ + memcpy (regs + MONO_MAX_IREGS, &(new_ctx->fregs [8]), sizeof (mgreg_t) * 8); + + mono_unwind_frame (unwind_info, unwind_info_len, ji->code_start, + (guint8*)ji->code_start + ji->code_size, + ip, NULL, regs, MONO_MAX_IREGS + 8, + save_locations, MONO_MAX_IREGS, &cfa); + + memcpy (&new_ctx->regs, regs, sizeof (mgreg_t) * 32); + memcpy (&(new_ctx->fregs [8]), regs + MONO_MAX_IREGS, sizeof (mgreg_t) * 8); + + new_ctx->pc = regs [ARMREG_LR]; + new_ctx->regs [ARMREG_SP] = (mgreg_t)cfa; + + if (*lmf && (*lmf)->gregs [MONO_ARCH_LMF_REG_SP] && (MONO_CONTEXT_GET_SP (ctx) >= (gpointer)(*lmf)->gregs [MONO_ARCH_LMF_REG_SP])) { + /* remove any unused lmf */ + *lmf = (gpointer)(((gsize)(*lmf)->previous_lmf) & ~3); + } + + /* we substract 1, so that the IP points into the call instruction */ + new_ctx->pc--; + + return TRUE; + } else if (*lmf) { + if (((gsize)(*lmf)->previous_lmf) & 2) { + /* + * This LMF entry is created by the soft debug code to mark transitions to + * managed code done during invokes. + */ + MonoLMFExt *ext = (MonoLMFExt*)(*lmf); + + g_assert (ext->debugger_invoke); + + memcpy (new_ctx, &ext->ctx, sizeof (MonoContext)); + + *lmf = (gpointer)(((gsize)(*lmf)->previous_lmf) & ~3); + + frame->type = FRAME_TYPE_DEBUGGER_INVOKE; + + return TRUE; + } + + frame->type = FRAME_TYPE_MANAGED_TO_NATIVE; + + ji = mini_jit_info_table_find (domain, (gpointer)(*lmf)->pc, NULL); + if (!ji) + return FALSE; + + g_assert (MONO_ARCH_LMF_REGS == ((0x3ff << 19) | (1 << ARMREG_FP) | (1 << ARMREG_SP))); + memcpy (&new_ctx->regs [ARMREG_R19], &(*lmf)->gregs [0], sizeof (mgreg_t) * 10); + new_ctx->regs [ARMREG_FP] = (*lmf)->gregs [MONO_ARCH_LMF_REG_FP]; + new_ctx->regs [ARMREG_SP] = (*lmf)->gregs [MONO_ARCH_LMF_REG_SP]; + new_ctx->pc = (*lmf)->pc; + + /* we substract 1, so that the IP points into the call instruction */ + new_ctx->pc--; + + *lmf = (gpointer)(((gsize)(*lmf)->previous_lmf) & ~3); + + return TRUE; + } + + return FALSE; +} + +void +mono_arch_sigctx_to_monoctx (void *sigctx, MonoContext *mctx) +{ + mono_sigctx_to_monoctx (sigctx, mctx); +} + +void +mono_arch_monoctx_to_sigctx (MonoContext *mctx, void *sigctx) +{ + mono_monoctx_to_sigctx (mctx, sigctx); +} + +/* + * handle_exception: + * + * Called by resuming from a signal handler. + */ +static void +handle_signal_exception (gpointer obj) +{ + MonoJitTlsData *jit_tls = mono_native_tls_get_value (mono_jit_tls_id); + MonoContext ctx; + + memcpy (&ctx, &jit_tls->ex_ctx, sizeof (MonoContext)); + + mono_handle_exception (&ctx, obj); + + mono_restore_context (&ctx); +} + +/* + * This is the function called from the signal handler + */ +gboolean +mono_arch_handle_exception (void *ctx, gpointer obj) +{ +#if defined(MONO_CROSS_COMPILE) + g_assert_not_reached (); +#else + MonoJitTlsData *jit_tls; + void *sigctx = ctx; + + /* + * Resume into the normal stack and handle the exception there. + */ + jit_tls = mono_native_tls_get_value (mono_jit_tls_id); + + /* Pass the ctx parameter in TLS */ + mono_arch_sigctx_to_monoctx (sigctx, &jit_tls->ex_ctx); + /* The others in registers */ + UCONTEXT_REG_R0 (sigctx) = (gsize)obj; + + UCONTEXT_REG_PC (sigctx) = (gsize)handle_signal_exception; + UCONTEXT_REG_SP (sigctx) = UCONTEXT_REG_SP (sigctx) - MONO_ARCH_REDZONE_SIZE; +#endif + + return TRUE; +} + +gpointer +mono_arch_ip_from_context (void *sigctx) +{ +#ifdef MONO_CROSS_COMPILE + g_assert_not_reached (); + return NULL; +#else + return (gpointer)UCONTEXT_REG_PC (sigctx); +#endif +} + +void +mono_arch_setup_async_callback (MonoContext *ctx, void (*async_cb)(void *fun), gpointer user_data) +{ + mgreg_t sp = (mgreg_t)MONO_CONTEXT_GET_SP (ctx); + + // FIXME: + g_assert (!user_data); + + /* Allocate a stack frame */ + sp -= 32; + MONO_CONTEXT_SET_SP (ctx, sp); + + mono_arch_setup_resume_sighandler_ctx (ctx, async_cb); +} + +/* + * mono_arch_setup_resume_sighandler_ctx: + * + * Setup CTX so execution continues at FUNC. + */ +void +mono_arch_setup_resume_sighandler_ctx (MonoContext *ctx, gpointer func) +{ + MONO_CONTEXT_SET_IP (ctx,func); +} diff --git a/mono/mini/mini-arm64-gsharedvt.c b/mono/mini/mini-arm64-gsharedvt.c new file mode 100644 index 00000000000..3d9664b56bf --- /dev/null +++ b/mono/mini/mini-arm64-gsharedvt.c @@ -0,0 +1,418 @@ +/* + * mini-arm64-gsharedvt.c: gsharedvt support code for arm64 + * + * Authors: + * Zoltan Varga + * + * Copyright 2013 Xamarin, Inc (http://www.xamarin.com) + * Licensed under the MIT license. See LICENSE file in the project root for full license information. + */ +#include "mini.h" +#include "mini-arm64.h" +#include "mini-arm64-gsharedvt.h" + +/* + * GSHAREDVT + */ +#ifdef MONO_ARCH_GSHARED_SUPPORTED + +#define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1)) + +void +mono_arm_gsharedvt_init (void) +{ + mono_aot_register_jit_icall ("mono_arm_start_gsharedvt_call", mono_arm_start_gsharedvt_call); +} + +gboolean +mono_arch_gsharedvt_sig_supported (MonoMethodSignature *sig) +{ + /* + if (sig->ret && is_variable_size (sig->ret)) + return FALSE; + */ + return TRUE; +} + +static inline void +add_to_map (GPtrArray *map, int src, int dst) +{ + g_ptr_array_add (map, GUINT_TO_POINTER (src)); + g_ptr_array_add (map, GUINT_TO_POINTER (dst)); +} + +/* + * Slot mapping: + * 0..8 - r0..r8 + * 9..16 - d0..d7 + * 17.. - stack slots + */ + +static inline int +map_reg (int reg) +{ + return reg; +} + +static inline int +map_freg (int reg) +{ + return reg + NUM_GSHAREDVT_ARG_GREGS; +} + +static inline int +map_stack_slot (int slot) +{ + return slot + NUM_GSHAREDVT_ARG_GREGS + NUM_GSHAREDVT_ARG_FREGS; +} + +static int +get_arg_slots (ArgInfo *ainfo, int **out_slots) +{ + int sreg = ainfo->reg; + int sslot = ainfo->offset / 8; + int *src = NULL; + int i, nsrc; + + switch (ainfo->storage) { + case ArgInIReg: + case ArgVtypeByRef: + nsrc = 1; + src = g_malloc (nsrc * sizeof (int)); + src [0] = map_reg (sreg); + break; + case ArgVtypeByRefOnStack: + nsrc = 1; + src = g_malloc (nsrc * sizeof (int)); + src [0] = map_stack_slot (sslot); + break; + case ArgInFReg: + case ArgInFRegR4: + nsrc = 1; + src = g_malloc (nsrc * sizeof (int)); + src [0] = map_freg (sreg); + break; + case ArgHFA: + nsrc = ainfo->nregs; + src = g_malloc (nsrc * sizeof (int)); + for (i = 0; i < ainfo->nregs; ++i) + src [i] = map_freg (sreg + i); + break; + case ArgVtypeInIRegs: + nsrc = ainfo->nregs; + src = g_malloc (nsrc * sizeof (int)); + for (i = 0; i < ainfo->nregs; ++i) + src [i] = map_reg (sreg + i); + break; + case ArgOnStack: + nsrc = 1; + src = g_malloc (nsrc * sizeof (int)); + src [0] = map_stack_slot (sslot); + break; + case ArgVtypeOnStack: + nsrc = ainfo->size / 8; + src = g_malloc (nsrc * sizeof (int)); + for (i = 0; i < nsrc; ++i) + src [i] = map_stack_slot (sslot + i); + break; + default: + NOT_IMPLEMENTED; + break; + } + + *out_slots = src; + return nsrc; +} + +/* + * mono_arch_get_gsharedvt_call_info: + * + * See mini-x86.c for documentation. + */ +gpointer +mono_arch_get_gsharedvt_call_info (gpointer addr, MonoMethodSignature *normal_sig, MonoMethodSignature *gsharedvt_sig, gboolean gsharedvt_in, gint32 vcall_offset, gboolean calli) +{ + GSharedVtCallInfo *info; + CallInfo *caller_cinfo, *callee_cinfo; + MonoMethodSignature *caller_sig, *callee_sig; + int aindex, i; + gboolean var_ret = FALSE; + CallInfo *cinfo, *gcinfo; + MonoMethodSignature *sig, *gsig; + GPtrArray *map; + + if (gsharedvt_in) { + caller_sig = normal_sig; + callee_sig = gsharedvt_sig; + caller_cinfo = mono_arch_get_call_info (NULL, caller_sig); + callee_cinfo = mono_arch_get_call_info (NULL, callee_sig); + } else { + callee_sig = normal_sig; + caller_sig = gsharedvt_sig; + callee_cinfo = mono_arch_get_call_info (NULL, callee_sig); + caller_cinfo = mono_arch_get_call_info (NULL, caller_sig); + } + + /* + * If GSHAREDVT_IN is true, this means we are transitioning from normal to gsharedvt code. The caller uses the + * normal call signature, while the callee uses the gsharedvt signature. + * If GSHAREDVT_IN is false, its the other way around. + */ + + /* sig/cinfo describes the normal call, while gsig/gcinfo describes the gsharedvt call */ + if (gsharedvt_in) { + sig = caller_sig; + gsig = callee_sig; + cinfo = caller_cinfo; + gcinfo = callee_cinfo; + } else { + sig = callee_sig; + gsig = caller_sig; + cinfo = callee_cinfo; + gcinfo = caller_cinfo; + } + + if (gcinfo->ret.gsharedvt) { + /* + * The return type is gsharedvt + */ + var_ret = TRUE; + } + + /* + * The stack looks like this: + * + * + * + * We have to map the stack slots in to the stack slots in . + */ + map = g_ptr_array_new (); + + for (aindex = 0; aindex < cinfo->nargs; ++aindex) { + ArgInfo *ainfo = &caller_cinfo->args [aindex]; + ArgInfo *ainfo2 = &callee_cinfo->args [aindex]; + int *src = NULL, *dst = NULL; + int nsrc, ndst, nslots, src_slot, arg_marshal; + + /* + * The src descriptor looks like this: + * - 6 bits src slot + * - 12 bits number of slots + * - 4 bits marshal type (GSHAREDVT_ARG_...) + * - 4 bits size/sign descriptor (GSHAREDVT_ARG_SIZE) + * - 4 bits offset inside stack slots + */ + arg_marshal = GSHAREDVT_ARG_NONE; + + if (ainfo->gsharedvt) { + /* Pass the value whose address is received in a reg by value */ + g_assert (!ainfo2->gsharedvt); + ndst = get_arg_slots (ainfo2, &dst); + nsrc = 1; + src = g_new0 (int, 1); + if (ainfo->storage == ArgVtypeByRef) + src_slot = map_reg (ainfo->reg); + else + src_slot = map_stack_slot (ainfo->offset / 8); + g_assert (ndst < 256); + g_assert (src_slot < 64); + src [0] = (ndst << 6) | src_slot; + if (ainfo2->storage == ArgHFA && ainfo2->esize == 4) + arg_marshal = GSHAREDVT_ARG_BYREF_TO_BYVAL_HFAR4; + else if (ainfo2->storage == ArgVtypeByRef || ainfo2->storage == ArgVtypeByRefOnStack) + arg_marshal = GSHAREDVT_ARG_BYREF_TO_BYREF; + else + arg_marshal = GSHAREDVT_ARG_BYREF_TO_BYVAL; + } else { + nsrc = get_arg_slots (ainfo, &src); + } + if (ainfo2->storage == ArgVtypeByRef && ainfo2->gsharedvt) { + /* Pass the address of the first src slot in a reg */ + if (ainfo->storage != ArgVtypeByRef) { + if (ainfo->storage == ArgHFA && ainfo->esize == 4) { + arg_marshal = GSHAREDVT_ARG_BYVAL_TO_BYREF_HFAR4; + g_assert (src [0] < 64); + g_assert (nsrc < 256); + src [0] |= (nsrc << 6); + } else { + arg_marshal = GSHAREDVT_ARG_BYVAL_TO_BYREF; + } + } + ndst = 1; + dst = g_new0 (int, 1); + dst [0] = map_reg (ainfo2->reg); + } else if (ainfo2->storage == ArgVtypeByRefOnStack && ainfo2->gsharedvt) { + /* Pass the address of the first src slot in a stack slot */ + if (ainfo->storage != ArgVtypeByRef) + arg_marshal = GSHAREDVT_ARG_BYVAL_TO_BYREF; + ndst = 1; + dst = g_new0 (int, 1); + dst [0] = map_stack_slot (ainfo2->offset / 8); + } else { + ndst = get_arg_slots (ainfo2, &dst); + } + if (nsrc) + src [0] |= (arg_marshal << 18); + if (ainfo->storage == ArgOnStack && ainfo->slot_size != 8) { + GSharedVtArgSize arg_size = GSHAREDVT_ARG_SIZE_NONE; + + /* + * On IOS, stack arguments smaller than 8 bytes can + * share a stack slot. Encode this information into + * the descriptor. + */ + switch (ainfo->slot_size) { + case 1: + arg_size = ainfo->sign ? GSHAREDVT_ARG_SIZE_I1 : GSHAREDVT_ARG_SIZE_U1; + break; + case 2: + arg_size = ainfo->sign ? GSHAREDVT_ARG_SIZE_I2 : GSHAREDVT_ARG_SIZE_U2; + break; + case 4: + arg_size = ainfo->sign ? GSHAREDVT_ARG_SIZE_I4 : GSHAREDVT_ARG_SIZE_U4; + break; + default: + NOT_IMPLEMENTED; + break; + } + /* Encode the size/sign */ + src [0] |= (arg_size << 22); + /* Encode the offset inside the stack slot */ + src [0] |= ((ainfo->offset % 8) << 26); + if (ainfo2->storage == ArgOnStack) + dst [0] |= ((ainfo2->offset % 8) << 26); + } else if (ainfo2->storage == ArgOnStack && ainfo2->slot_size != 8) { + /* The caller passes in an address, need to store it into a stack slot */ + + GSharedVtArgSize arg_size = GSHAREDVT_ARG_SIZE_NONE; + switch (ainfo2->slot_size) { + case 1: + arg_size = ainfo2->sign ? GSHAREDVT_ARG_SIZE_I1 : GSHAREDVT_ARG_SIZE_U1; + break; + case 2: + arg_size = ainfo2->sign ? GSHAREDVT_ARG_SIZE_I2 : GSHAREDVT_ARG_SIZE_U2; + break; + case 4: + arg_size = ainfo2->sign ? GSHAREDVT_ARG_SIZE_I4 : GSHAREDVT_ARG_SIZE_U4; + break; + default: + NOT_IMPLEMENTED; + break; + } + /* Encode the size/sign */ + src [0] |= (arg_size << 22); + /* Encode the offset inside the stack slot */ + dst [0] |= ((ainfo2->offset % 8) << 26); + } + nslots = MIN (nsrc, ndst); + + for (i = 0; i < nslots; ++i) + add_to_map (map, src [i], dst [i]); + + g_free (src); + g_free (dst); + } + + if (cinfo->ret.storage == ArgVtypeByRef) { + /* Both the caller and the callee pass the vtype ret address in r8 */ + g_assert (cinfo->ret.storage == gcinfo->ret.storage); + add_to_map (map, map_reg (ARMREG_R8), map_reg (ARMREG_R8)); + } + + info = mono_domain_alloc0 (mono_domain_get (), sizeof (GSharedVtCallInfo) + (map->len * sizeof (int))); + info->addr = addr; + info->stack_usage = callee_cinfo->stack_usage; + info->ret_marshal = GSHAREDVT_RET_NONE; + info->gsharedvt_in = gsharedvt_in ? 1 : 0; + info->vret_slot = -1; + info->calli = calli; + + if (var_ret) { + g_assert (gcinfo->ret.gsharedvt); + info->vret_arg_reg = map_reg (ARMREG_R8); + } else { + info->vret_arg_reg = -1; + } + + info->vcall_offset = vcall_offset; + info->map_count = map->len / 2; + for (i = 0; i < map->len; ++i) + info->map [i] = GPOINTER_TO_UINT (g_ptr_array_index (map, i)); + g_ptr_array_free (map, TRUE); + + /* Compute return value marshalling */ + if (var_ret) { + switch (cinfo->ret.storage) { + case ArgInIReg: + if (!gsharedvt_in || sig->ret->byref) { + info->ret_marshal = GSHAREDVT_RET_I8; + } else { + switch (sig->ret->type) { + case MONO_TYPE_I1: + info->ret_marshal = GSHAREDVT_RET_I1; + break; + case MONO_TYPE_U1: + case MONO_TYPE_BOOLEAN: + info->ret_marshal = GSHAREDVT_RET_U1; + break; + case MONO_TYPE_I2: + info->ret_marshal = GSHAREDVT_RET_I2; + break; + case MONO_TYPE_U2: + case MONO_TYPE_CHAR: + info->ret_marshal = GSHAREDVT_RET_U2; + break; + case MONO_TYPE_I4: + info->ret_marshal = GSHAREDVT_RET_I4; + break; + case MONO_TYPE_U4: + info->ret_marshal = GSHAREDVT_RET_U4; + break; + default: + info->ret_marshal = GSHAREDVT_RET_I8; + break; + } + } + break; + case ArgInFReg: + info->ret_marshal = GSHAREDVT_RET_R8; + break; + case ArgInFRegR4: + info->ret_marshal = GSHAREDVT_RET_R4; + break; + case ArgVtypeInIRegs: + info->ret_marshal = GSHAREDVT_RET_IREGS_1 - 1 + cinfo->ret.nregs; + break; + case ArgHFA: + if (cinfo->ret.esize == 4) + info->ret_marshal = GSHAREDVT_RET_HFAR4_1 - 1 + cinfo->ret.nregs; + else + info->ret_marshal = GSHAREDVT_RET_HFAR8_1 - 1 + cinfo->ret.nregs; + break; + case ArgVtypeByRef: + /* No conversion needed */ + break; + default: + g_assert_not_reached (); + } + } + + if (gsharedvt_in && var_ret && cinfo->ret.storage != ArgVtypeByRef) { + /* Allocate stack space for the return value */ + info->vret_slot = map_stack_slot (info->stack_usage / sizeof (gpointer)); + info->stack_usage += mono_type_stack_size_internal (normal_sig->ret, NULL, FALSE) + sizeof (gpointer); + } + + info->stack_usage = ALIGN_TO (info->stack_usage, MONO_ARCH_FRAME_ALIGNMENT); + + return info; +} + +#else + +void +mono_arm_gsharedvt_init (void) +{ +} + +#endif /* MONO_ARCH_GSHARED_SUPPORTED */ \ No newline at end of file diff --git a/mono/mini/mini-arm64-gsharedvt.h b/mono/mini/mini-arm64-gsharedvt.h new file mode 100644 index 00000000000..b828218887d --- /dev/null +++ b/mono/mini/mini-arm64-gsharedvt.h @@ -0,0 +1,84 @@ +#ifndef __MINI_ARM64_GSHAREDVT_H__ +#define __MINI_ARM64_GSHAREDVT_H__ + +/* Argument marshallings for calls between gsharedvt and normal code */ +typedef enum { + GSHAREDVT_ARG_NONE = 0, + GSHAREDVT_ARG_BYVAL_TO_BYREF = 1, + GSHAREDVT_ARG_BYVAL_TO_BYREF_HFAR4 = 2, + GSHAREDVT_ARG_BYREF_TO_BYVAL = 3, + GSHAREDVT_ARG_BYREF_TO_BYVAL_HFAR4 = 4, + GSHAREDVT_ARG_BYREF_TO_BYREF = 5 +} GSharedVtArgMarshal; + +/* For arguments passed on the stack on ios */ +typedef enum { + GSHAREDVT_ARG_SIZE_NONE = 0, + GSHAREDVT_ARG_SIZE_I1 = 1, + GSHAREDVT_ARG_SIZE_U1 = 2, + GSHAREDVT_ARG_SIZE_I2 = 3, + GSHAREDVT_ARG_SIZE_U2 = 4, + GSHAREDVT_ARG_SIZE_I4 = 5, + GSHAREDVT_ARG_SIZE_U4 = 6, +} GSharedVtArgSize; + +/* Return value marshalling for calls between gsharedvt and normal code */ +typedef enum { + GSHAREDVT_RET_NONE = 0, + GSHAREDVT_RET_I8 = 1, + GSHAREDVT_RET_I1 = 2, + GSHAREDVT_RET_U1 = 3, + GSHAREDVT_RET_I2 = 4, + GSHAREDVT_RET_U2 = 5, + GSHAREDVT_RET_I4 = 6, + GSHAREDVT_RET_U4 = 7, + GSHAREDVT_RET_R8 = 8, + GSHAREDVT_RET_R4 = 9, + GSHAREDVT_RET_IREGS_1 = 10, + GSHAREDVT_RET_IREGS_2 = 11, + GSHAREDVT_RET_IREGS_3 = 12, + GSHAREDVT_RET_IREGS_4 = 13, + GSHAREDVT_RET_IREGS_5 = 14, + GSHAREDVT_RET_IREGS_6 = 15, + GSHAREDVT_RET_IREGS_7 = 16, + GSHAREDVT_RET_IREGS_8 = 17, + GSHAREDVT_RET_HFAR8_1 = 18, + GSHAREDVT_RET_HFAR8_2 = 19, + GSHAREDVT_RET_HFAR8_3 = 20, + GSHAREDVT_RET_HFAR8_4 = 21, + GSHAREDVT_RET_HFAR4_1 = 22, + GSHAREDVT_RET_HFAR4_2 = 23, + GSHAREDVT_RET_HFAR4_3 = 24, + GSHAREDVT_RET_HFAR4_4 = 25, + GSHAREDVT_RET_NUM = 26 +} GSharedVtRetMarshal; + +typedef struct { + /* Method address to call */ + gpointer addr; + /* The trampoline reads this, so keep the size explicit */ + int ret_marshal; + /* If ret_marshal != NONE, this is the reg of the vret arg, else -1 */ + /* Equivalent of vret_arg_slot in x86 implementation. */ + int vret_arg_reg; + /* The stack slot where the return value will be stored */ + int vret_slot; + int stack_usage, map_count; + /* If not -1, then make a virtual call using this vtable offset */ + int vcall_offset; + /* If 1, make an indirect call to the address in the rgctx reg */ + int calli; + /* Whenever this is a in or an out call */ + int gsharedvt_in; + /* Maps stack slots/registers in the caller to the stack slots/registers in the callee */ + int map [MONO_ZERO_LEN_ARRAY]; +} GSharedVtCallInfo; + +/* Number of argument registers (r0..r8) */ +#define NUM_GSHAREDVT_ARG_GREGS 9 +#define NUM_GSHAREDVT_ARG_FREGS 8 + +gpointer +mono_arm_start_gsharedvt_call (GSharedVtCallInfo *info, gpointer *caller, gpointer *callee, gpointer mrgctx_reg); + +#endif /* __MINI_ARM64_GSHAREDVT_H__ */ diff --git a/mono/mini/mini-arm64.c b/mono/mini/mini-arm64.c index 517f6c68a9d..434d88c3653 100644 --- a/mono/mini/mini-arm64.c +++ b/mono/mini/mini-arm64.c @@ -1 +1,5216 @@ -#include "../../../mono-extensions/mono/mini/mini-arm64.c" +/* + * mini-arm64.c: ARM64 backend for the Mono code generator + * + * Copyright 2013 Xamarin, Inc (http://www.xamarin.com) + * + * Based on mini-arm.c: + * + * Authors: + * Paolo Molaro (lupus@ximian.com) + * Dietmar Maurer (dietmar@ximian.com) + * + * (C) 2003 Ximian, Inc. + * Copyright 2003-2011 Novell, Inc (http://www.novell.com) + * Copyright 2011 Xamarin, Inc (http://www.xamarin.com) + */ + +#include "mini.h" +#include "cpu-arm64.h" +#include "ir-emit.h" + +#include +#include +#include +#include + +/* + * Documentation: + * + * - ARM(R) Architecture Reference Manual, ARMv8, for ARMv8-A architecture profile (DDI0487A_a_armv8_arm.pdf) + * - Procedure Call Standard for the ARM 64-bit Architecture (AArch64) (IHI0055B_aapcs64.pdf) + * - ELF for the ARM 64-bit Architecture (IHI0056B_aaelf64.pdf) + * + * Register usage: + * - ip0/ip1/lr are used as temporary registers + * - r27 is used as the rgctx/imt register + * - r28 is used to access arguments passed on the stack + * - d15/d16 are used as fp temporary registers + */ + +#define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1)) + +#define FP_TEMP_REG ARMREG_D16 +#define FP_TEMP_REG2 ARMREG_D17 + +#define THUNK_SIZE (4 * 4) + +/* The single step trampoline */ +static gpointer ss_trampoline; + +/* The breakpoint trampoline */ +static gpointer bp_trampoline; + +static gboolean ios_abi; + +static __attribute__((warn_unused_result)) guint8* emit_load_regset (guint8 *code, guint64 regs, int basereg, int offset); + +const char* +mono_arch_regname (int reg) +{ + static const char * rnames[] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", + "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "fp", + "lr", "sp" + }; + if (reg >= 0 && reg < 32) + return rnames [reg]; + return "unknown"; +} + +const char* +mono_arch_fregname (int reg) +{ + static const char * rnames[] = { + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", + "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", + "d30", "d31" + }; + if (reg >= 0 && reg < 32) + return rnames [reg]; + return "unknown fp"; +} + +int +mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info) +{ + NOT_IMPLEMENTED; + return 0; +} + +#define MAX_ARCH_DELEGATE_PARAMS 7 + +static gpointer +get_delegate_invoke_impl (gboolean has_target, gboolean param_count, guint32 *code_size) +{ + guint8 *code, *start; + + if (has_target) { + start = code = mono_global_codeman_reserve (12); + + /* Replace the this argument with the target */ + arm_ldrx (code, ARMREG_IP0, ARMREG_R0, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr)); + arm_ldrx (code, ARMREG_R0, ARMREG_R0, MONO_STRUCT_OFFSET (MonoDelegate, target)); + arm_brx (code, ARMREG_IP0); + + g_assert ((code - start) <= 12); + + mono_arch_flush_icache (start, 12); + } else { + int size, i; + + size = 8 + param_count * 4; + start = code = mono_global_codeman_reserve (size); + + arm_ldrx (code, ARMREG_IP0, ARMREG_R0, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr)); + /* slide down the arguments */ + for (i = 0; i < param_count; ++i) + arm_movx (code, i, i + 1); + arm_brx (code, ARMREG_IP0); + + g_assert ((code - start) <= size); + + mono_arch_flush_icache (start, size); + } + + if (code_size) + *code_size = code - start; + + return start; +} + +/* + * mono_arch_get_delegate_invoke_impls: + * + * Return a list of MonoAotTrampInfo structures for the delegate invoke impl + * trampolines. + */ +GSList* +mono_arch_get_delegate_invoke_impls (void) +{ + GSList *res = NULL; + guint8 *code; + guint32 code_len; + int i; + char *tramp_name; + + code = get_delegate_invoke_impl (TRUE, 0, &code_len); + res = g_slist_prepend (res, mono_tramp_info_create ("delegate_invoke_impl_has_target", code, code_len, NULL, NULL)); + + for (i = 0; i <= MAX_ARCH_DELEGATE_PARAMS; ++i) { + code = get_delegate_invoke_impl (FALSE, i, &code_len); + tramp_name = g_strdup_printf ("delegate_invoke_impl_target_%d", i); + res = g_slist_prepend (res, mono_tramp_info_create (tramp_name, code, code_len, NULL, NULL)); + g_free (tramp_name); + } + + return res; +} + +gpointer +mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target) +{ + guint8 *code, *start; + + /* + * vtypes are returned in registers, or using the dedicated r8 register, so + * they can be supported by delegate invokes. + */ + + if (has_target) { + static guint8* cached = NULL; + + if (cached) + return cached; + + if (mono_aot_only) + start = mono_aot_get_trampoline ("delegate_invoke_impl_has_target"); + else + start = get_delegate_invoke_impl (TRUE, 0, NULL); + mono_memory_barrier (); + cached = start; + return cached; + } else { + static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL}; + int i; + + if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS) + return NULL; + for (i = 0; i < sig->param_count; ++i) + if (!mono_is_regsize_var (sig->params [i])) + return NULL; + + code = cache [sig->param_count]; + if (code) + return code; + + if (mono_aot_only) { + char *name = g_strdup_printf ("delegate_invoke_impl_target_%d", sig->param_count); + start = mono_aot_get_trampoline (name); + g_free (name); + } else { + start = get_delegate_invoke_impl (FALSE, sig->param_count, NULL); + } + mono_memory_barrier (); + cache [sig->param_count] = start; + return start; + } + + return NULL; +} + +gpointer +mono_arch_get_delegate_virtual_invoke_impl (MonoMethodSignature *sig, MonoMethod *method, int offset, gboolean load_imt_reg) +{ + return NULL; +} + +gpointer +mono_arch_get_this_arg_from_call (mgreg_t *regs, guint8 *code) +{ + return (gpointer)regs [ARMREG_R0]; +} + +void +mono_arch_cpu_init (void) +{ +} + +void +mono_arch_init (void) +{ + mono_aot_register_jit_icall ("mono_arm_throw_exception", mono_arm_throw_exception); + mono_aot_register_jit_icall ("mono_arm_resume_unwind", mono_arm_resume_unwind); + + if (!mono_aot_only) + bp_trampoline = mini_get_breakpoint_trampoline (); + + mono_arm_gsharedvt_init (); + +#if defined(TARGET_IOS) + ios_abi = TRUE; +#endif +} + +void +mono_arch_cleanup (void) +{ +} + +guint32 +mono_arch_cpu_optimizations (guint32 *exclude_mask) +{ + *exclude_mask = 0; + return 0; +} + +guint32 +mono_arch_cpu_enumerate_simd_versions (void) +{ + return 0; +} + +void +mono_arch_register_lowlevel_calls (void) +{ +} + +void +mono_arch_finish_init (void) +{ +} + +/* The maximum length is 2 instructions */ +static guint8* +emit_imm (guint8 *code, int dreg, int imm) +{ + // FIXME: Optimize this + if (imm < 0) { + gint64 limm = imm; + arm_movnx (code, dreg, (~limm) & 0xffff, 0); + arm_movkx (code, dreg, (limm >> 16) & 0xffff, 16); + } else { + arm_movzx (code, dreg, imm & 0xffff, 0); + if (imm >> 16) + arm_movkx (code, dreg, (imm >> 16) & 0xffff, 16); + } + + return code; +} + +/* The maximum length is 4 instructions */ +static guint8* +emit_imm64 (guint8 *code, int dreg, guint64 imm) +{ + // FIXME: Optimize this + arm_movzx (code, dreg, imm & 0xffff, 0); + if ((imm >> 16) & 0xffff) + arm_movkx (code, dreg, (imm >> 16) & 0xffff, 16); + if ((imm >> 32) & 0xffff) + arm_movkx (code, dreg, (imm >> 32) & 0xffff, 32); + if ((imm >> 48) & 0xffff) + arm_movkx (code, dreg, (imm >> 48) & 0xffff, 48); + + return code; +} + +guint8* +mono_arm_emit_imm64 (guint8 *code, int dreg, gint64 imm) +{ + return emit_imm64 (code, dreg, imm); +} + +/* + * emit_imm_template: + * + * Emit a patchable code sequence for constructing a 64 bit immediate. + */ +static guint8* +emit_imm64_template (guint8 *code, int dreg) +{ + arm_movzx (code, dreg, 0, 0); + arm_movkx (code, dreg, 0, 16); + arm_movkx (code, dreg, 0, 32); + arm_movkx (code, dreg, 0, 48); + + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_addw_imm (guint8 *code, int dreg, int sreg, int imm) +{ + if (!arm_is_arith_imm (imm)) { + code = emit_imm (code, ARMREG_LR, imm); + arm_addw (code, dreg, sreg, ARMREG_LR); + } else { + arm_addw_imm (code, dreg, sreg, imm); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_addx_imm (guint8 *code, int dreg, int sreg, int imm) +{ + if (!arm_is_arith_imm (imm)) { + code = emit_imm (code, ARMREG_LR, imm); + arm_addx (code, dreg, sreg, ARMREG_LR); + } else { + arm_addx_imm (code, dreg, sreg, imm); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_subw_imm (guint8 *code, int dreg, int sreg, int imm) +{ + if (!arm_is_arith_imm (imm)) { + code = emit_imm (code, ARMREG_LR, imm); + arm_subw (code, dreg, sreg, ARMREG_LR); + } else { + arm_subw_imm (code, dreg, sreg, imm); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_subx_imm (guint8 *code, int dreg, int sreg, int imm) +{ + if (!arm_is_arith_imm (imm)) { + code = emit_imm (code, ARMREG_LR, imm); + arm_subx (code, dreg, sreg, ARMREG_LR); + } else { + arm_subx_imm (code, dreg, sreg, imm); + } + return code; +} + +/* Emit sp+=imm. Clobbers ip0/ip1 */ +static inline __attribute__((warn_unused_result)) guint8* +emit_addx_sp_imm (guint8 *code, int imm) +{ + code = emit_imm (code, ARMREG_IP0, imm); + arm_movspx (code, ARMREG_IP1, ARMREG_SP); + arm_addx (code, ARMREG_IP1, ARMREG_IP1, ARMREG_IP0); + arm_movspx (code, ARMREG_SP, ARMREG_IP1); + return code; +} + +/* Emit sp-=imm. Clobbers ip0/ip1 */ +static inline __attribute__((warn_unused_result)) guint8* +emit_subx_sp_imm (guint8 *code, int imm) +{ + code = emit_imm (code, ARMREG_IP0, imm); + arm_movspx (code, ARMREG_IP1, ARMREG_SP); + arm_subx (code, ARMREG_IP1, ARMREG_IP1, ARMREG_IP0); + arm_movspx (code, ARMREG_SP, ARMREG_IP1); + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_andw_imm (guint8 *code, int dreg, int sreg, int imm) +{ + // FIXME: + code = emit_imm (code, ARMREG_LR, imm); + arm_andw (code, dreg, sreg, ARMREG_LR); + + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_andx_imm (guint8 *code, int dreg, int sreg, int imm) +{ + // FIXME: + code = emit_imm (code, ARMREG_LR, imm); + arm_andx (code, dreg, sreg, ARMREG_LR); + + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_orrw_imm (guint8 *code, int dreg, int sreg, int imm) +{ + // FIXME: + code = emit_imm (code, ARMREG_LR, imm); + arm_orrw (code, dreg, sreg, ARMREG_LR); + + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_orrx_imm (guint8 *code, int dreg, int sreg, int imm) +{ + // FIXME: + code = emit_imm (code, ARMREG_LR, imm); + arm_orrx (code, dreg, sreg, ARMREG_LR); + + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_eorw_imm (guint8 *code, int dreg, int sreg, int imm) +{ + // FIXME: + code = emit_imm (code, ARMREG_LR, imm); + arm_eorw (code, dreg, sreg, ARMREG_LR); + + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_eorx_imm (guint8 *code, int dreg, int sreg, int imm) +{ + // FIXME: + code = emit_imm (code, ARMREG_LR, imm); + arm_eorx (code, dreg, sreg, ARMREG_LR); + + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_cmpw_imm (guint8 *code, int sreg, int imm) +{ + if (imm == 0) { + arm_cmpw (code, sreg, ARMREG_RZR); + } else { + // FIXME: + code = emit_imm (code, ARMREG_LR, imm); + arm_cmpw (code, sreg, ARMREG_LR); + } + + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_cmpx_imm (guint8 *code, int sreg, int imm) +{ + if (imm == 0) { + arm_cmpx (code, sreg, ARMREG_RZR); + } else { + // FIXME: + code = emit_imm (code, ARMREG_LR, imm); + arm_cmpx (code, sreg, ARMREG_LR); + } + + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_strb (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_strb_imm (imm)) { + arm_strb (code, rt, rn, imm); + } else { + g_assert (rt != ARMREG_IP0); + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_strb_reg (code, rt, rn, ARMREG_IP0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_strh (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_strh_imm (imm)) { + arm_strh (code, rt, rn, imm); + } else { + g_assert (rt != ARMREG_IP0); + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_strh_reg (code, rt, rn, ARMREG_IP0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_strw (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_strw_imm (imm)) { + arm_strw (code, rt, rn, imm); + } else { + g_assert (rt != ARMREG_IP0); + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_strw_reg (code, rt, rn, ARMREG_IP0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_strfpw (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_strw_imm (imm)) { + arm_strfpw (code, rt, rn, imm); + } else { + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0); + arm_strfpw (code, rt, ARMREG_IP0, 0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_strfpx (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_strx_imm (imm)) { + arm_strfpx (code, rt, rn, imm); + } else { + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0); + arm_strfpx (code, rt, ARMREG_IP0, 0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_strx (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_strx_imm (imm)) { + arm_strx (code, rt, rn, imm); + } else { + g_assert (rt != ARMREG_IP0); + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_strx_reg (code, rt, rn, ARMREG_IP0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_ldrb (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_pimm12_scaled (imm, 1)) { + arm_ldrb (code, rt, rn, imm); + } else { + g_assert (rt != ARMREG_IP0); + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_ldrb_reg (code, rt, rn, ARMREG_IP0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_ldrsbx (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_pimm12_scaled (imm, 1)) { + arm_ldrsbx (code, rt, rn, imm); + } else { + g_assert (rt != ARMREG_IP0); + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_ldrsbx_reg (code, rt, rn, ARMREG_IP0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_ldrh (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_pimm12_scaled (imm, 2)) { + arm_ldrh (code, rt, rn, imm); + } else { + g_assert (rt != ARMREG_IP0); + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_ldrh_reg (code, rt, rn, ARMREG_IP0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_ldrshx (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_pimm12_scaled (imm, 2)) { + arm_ldrshx (code, rt, rn, imm); + } else { + g_assert (rt != ARMREG_IP0); + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_ldrshx_reg (code, rt, rn, ARMREG_IP0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_ldrswx (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_pimm12_scaled (imm, 4)) { + arm_ldrswx (code, rt, rn, imm); + } else { + g_assert (rt != ARMREG_IP0); + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_ldrswx_reg (code, rt, rn, ARMREG_IP0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_ldrw (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_pimm12_scaled (imm, 4)) { + arm_ldrw (code, rt, rn, imm); + } else { + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_ldrw_reg (code, rt, rn, ARMREG_IP0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_ldrx (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_pimm12_scaled (imm, 8)) { + arm_ldrx (code, rt, rn, imm); + } else { + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_ldrx_reg (code, rt, rn, ARMREG_IP0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_ldrfpw (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_pimm12_scaled (imm, 4)) { + arm_ldrfpw (code, rt, rn, imm); + } else { + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0); + arm_ldrfpw (code, rt, ARMREG_IP0, 0); + } + return code; +} + +static inline __attribute__((warn_unused_result)) guint8* +emit_ldrfpx (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_pimm12_scaled (imm, 8)) { + arm_ldrfpx (code, rt, rn, imm); + } else { + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0); + arm_ldrfpx (code, rt, ARMREG_IP0, 0); + } + return code; +} + +guint8* +mono_arm_emit_ldrx (guint8 *code, int rt, int rn, int imm) +{ + return emit_ldrx (code, rt, rn, imm); +} + +static guint8* +emit_call (MonoCompile *cfg, guint8* code, guint32 patch_type, gconstpointer data) +{ + /* + mono_add_patch_info_rel (cfg, code - cfg->native_code, patch_type, data, MONO_R_ARM64_IMM); + code = emit_imm64_template (code, ARMREG_LR); + arm_blrx (code, ARMREG_LR); + */ + mono_add_patch_info_rel (cfg, code - cfg->native_code, patch_type, data, MONO_R_ARM64_BL); + arm_bl (code, code); + cfg->thunk_area += THUNK_SIZE; + return code; +} + +static guint8* +emit_aotconst_full (MonoCompile *cfg, MonoJumpInfo **ji, guint8 *code, guint8 *start, int dreg, guint32 patch_type, gconstpointer data) +{ + if (cfg) + mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data); + else + *ji = mono_patch_info_list_prepend (*ji, code - start, patch_type, data); + /* See arch_emit_got_access () in aot-compiler.c */ + arm_ldrx_lit (code, dreg, 0); + arm_nop (code); + arm_nop (code); + return code; +} + +static guint8* +emit_aotconst (MonoCompile *cfg, guint8 *code, int dreg, guint32 patch_type, gconstpointer data) +{ + return emit_aotconst_full (cfg, NULL, code, NULL, dreg, patch_type, data); +} + +/* + * mono_arm_emit_aotconst: + * + * Emit code to load an AOT constant into DREG. Usable from trampolines. + */ +guint8* +mono_arm_emit_aotconst (gpointer ji, guint8 *code, guint8 *code_start, int dreg, guint32 patch_type, gconstpointer data) +{ + return emit_aotconst_full (NULL, (MonoJumpInfo**)ji, code, code_start, dreg, patch_type, data); +} + +static guint8* +emit_tls_get (guint8 *code, int dreg, int tls_offset) +{ + arm_mrs (code, dreg, ARM_MRS_REG_TPIDR_EL0); + if (tls_offset < 256) { + arm_ldrx (code, dreg, dreg, tls_offset); + } else { + code = emit_addx_imm (code, dreg, dreg, tls_offset); + arm_ldrx (code, dreg, dreg, 0); + } + return code; +} + +static guint8* +emit_tls_get_reg (guint8 *code, int dreg, int offset_reg) +{ + g_assert (offset_reg != ARMREG_IP0); + arm_mrs (code, ARMREG_IP0, ARM_MRS_REG_TPIDR_EL0); + arm_ldrx_reg (code, dreg, ARMREG_IP0, offset_reg); + return code; +} + +static guint8* +emit_tls_set (guint8 *code, int sreg, int tls_offset) +{ + int tmpreg = ARMREG_IP0; + + g_assert (sreg != tmpreg); + arm_mrs (code, tmpreg, ARM_MRS_REG_TPIDR_EL0); + if (tls_offset < 256) { + arm_strx (code, sreg, tmpreg, tls_offset); + } else { + code = emit_addx_imm (code, tmpreg, tmpreg, tls_offset); + arm_strx (code, sreg, tmpreg, 0); + } + return code; +} + + +static guint8* +emit_tls_set_reg (guint8 *code, int sreg, int offset_reg) +{ + int tmpreg = ARMREG_IP0; + + g_assert (sreg != tmpreg); + arm_mrs (code, tmpreg, ARM_MRS_REG_TPIDR_EL0); + arm_strx_reg (code, sreg, tmpreg, offset_reg); + return code; +} + +/* + * Emits + * - mov sp, fp + * - ldrp [fp, lr], [sp], !stack_offfset + * Clobbers TEMP_REGS. + */ +__attribute__((warn_unused_result)) guint8* +mono_arm_emit_destroy_frame (guint8 *code, int stack_offset, guint64 temp_regs) +{ + arm_movspx (code, ARMREG_SP, ARMREG_FP); + + if (arm_is_ldpx_imm (stack_offset)) { + arm_ldpx_post (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, stack_offset); + } else { + arm_ldpx (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, 0); + /* sp += stack_offset */ + g_assert (temp_regs & (1 << ARMREG_IP0)); + if (temp_regs & (1 << ARMREG_IP1)) { + code = emit_addx_sp_imm (code, stack_offset); + } else { + int imm = stack_offset; + + /* Can't use addx_sp_imm () since we can't clobber ip0/ip1 */ + arm_addx_imm (code, ARMREG_IP0, ARMREG_SP, 0); + while (imm > 256) { + arm_addx_imm (code, ARMREG_IP0, ARMREG_IP0, 256); + imm -= 256; + } + arm_addx_imm (code, ARMREG_SP, ARMREG_IP0, imm); + } + } + return code; +} + +#define is_call_imm(diff) ((gint)(diff) >= -33554432 && (gint)(diff) <= 33554431) + +static guint8* +emit_thunk (guint8 *code, gconstpointer target) +{ + guint8 *p = code; + + arm_ldrx_lit (code, ARMREG_IP0, code + 8); + arm_brx (code, ARMREG_IP0); + *(guint64*)code = (guint64)target; + + mono_arch_flush_icache (p, code - p); + return code; +} + +static gpointer +create_thunk (MonoCompile *cfg, MonoDomain *domain, guchar *code, const guchar *target) +{ + MonoJitInfo *ji; + MonoThunkJitInfo *info; + guint8 *thunks, *p; + int thunks_size; + guint8 *orig_target; + guint8 *target_thunk; + + if (!domain) + domain = mono_domain_get (); + + if (cfg) { + /* + * This can be called multiple times during JITting, + * save the current position in cfg->arch to avoid + * doing a O(n^2) search. + */ + if (!cfg->arch.thunks) { + cfg->arch.thunks = cfg->thunks; + cfg->arch.thunks_size = cfg->thunk_area; + } + thunks = cfg->arch.thunks; + thunks_size = cfg->arch.thunks_size; + if (!thunks_size) { + g_print ("thunk failed %p->%p, thunk space=%d method %s", code, target, thunks_size, mono_method_full_name (cfg->method, TRUE)); + g_assert_not_reached (); + } + + g_assert (*(guint32*)thunks == 0); + emit_thunk (thunks, target); + + cfg->arch.thunks += THUNK_SIZE; + cfg->arch.thunks_size -= THUNK_SIZE; + + return thunks; + } else { + ji = mini_jit_info_table_find (domain, (char*)code, NULL); + g_assert (ji); + info = mono_jit_info_get_thunk_info (ji); + g_assert (info); + + thunks = (guint8*)ji->code_start + info->thunks_offset; + thunks_size = info->thunks_size; + + orig_target = mono_arch_get_call_target (code + 4); + + mono_domain_lock (domain); + + target_thunk = NULL; + if (orig_target >= thunks && orig_target < thunks + thunks_size) { + /* The call already points to a thunk, because of trampolines etc. */ + target_thunk = orig_target; + } else { + for (p = thunks; p < thunks + thunks_size; p += THUNK_SIZE) { + if (((guint32*)p) [0] == 0) { + /* Free entry */ + target_thunk = p; + break; + } else if (((guint64*)p) [1] == (guint64)target) { + /* Thunk already points to target */ + target_thunk = p; + break; + } + } + } + + //printf ("THUNK: %p %p %p\n", code, target, target_thunk); + + if (!target_thunk) { + mono_domain_unlock (domain); + g_print ("thunk failed %p->%p, thunk space=%d method %s", code, target, thunks_size, cfg ? mono_method_full_name (cfg->method, TRUE) : mono_method_full_name (jinfo_get_method (ji), TRUE)); + g_assert_not_reached (); + } + + emit_thunk (target_thunk, target); + + mono_domain_unlock (domain); + + return target_thunk; + } +} + +static void +arm_patch_full (MonoCompile *cfg, MonoDomain *domain, guint8 *code, guint8 *target, int relocation) +{ + switch (relocation) { + case MONO_R_ARM64_B: + arm_b (code, target); + break; + case MONO_R_ARM64_BCC: { + int cond; + + cond = arm_get_bcc_cond (code); + arm_bcc (code, cond, target); + break; + } + case MONO_R_ARM64_CBZ: + arm_set_cbz_target (code, target); + break; + case MONO_R_ARM64_IMM: { + guint64 imm = (guint64)target; + int dreg; + + /* emit_imm64_template () */ + dreg = arm_get_movzx_rd (code); + arm_movzx (code, dreg, imm & 0xffff, 0); + arm_movkx (code, dreg, (imm >> 16) & 0xffff, 16); + arm_movkx (code, dreg, (imm >> 32) & 0xffff, 32); + arm_movkx (code, dreg, (imm >> 48) & 0xffff, 48); + break; + } + case MONO_R_ARM64_BL: + if (arm_is_bl_disp (code, target)) { + arm_bl (code, target); + } else { + gpointer thunk; + + thunk = create_thunk (cfg, domain, code, target); + g_assert (arm_is_bl_disp (code, thunk)); + arm_bl (code, thunk); + } + break; + default: + g_assert_not_reached (); + } +} + +static void +arm_patch_rel (guint8 *code, guint8 *target, int relocation) +{ + arm_patch_full (NULL, NULL, code, target, relocation); +} + +void +mono_arm_patch (guint8 *code, guint8 *target, int relocation) +{ + arm_patch_rel (code, target, relocation); +} + +void +mono_arch_patch_code_new (MonoCompile *cfg, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gpointer target) +{ + guint8 *ip; + + ip = ji->ip.i + code; + + switch (ji->type) { + case MONO_PATCH_INFO_METHOD_JUMP: + /* ji->relocation is not set by the caller */ + arm_patch_rel (ip, (guint8*)target, MONO_R_ARM64_B); + break; + default: + arm_patch_full (cfg, domain, ip, (guint8*)target, ji->relocation); + break; + } +} + +void +mono_arch_free_jit_tls_data (MonoJitTlsData *tls) +{ +} + +void +mono_arch_flush_register_windows (void) +{ +} + +MonoMethod* +mono_arch_find_imt_method (mgreg_t *regs, guint8 *code) +{ + return (gpointer)regs [MONO_ARCH_RGCTX_REG]; +} + +MonoVTable* +mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code) +{ + return (gpointer)regs [MONO_ARCH_RGCTX_REG]; +} + +mgreg_t +mono_arch_context_get_int_reg (MonoContext *ctx, int reg) +{ + return ctx->regs [reg]; +} + +void +mono_arch_context_set_int_reg (MonoContext *ctx, int reg, mgreg_t val) +{ + ctx->regs [reg] = val; +} + +/* + * mono_arch_set_target: + * + * Set the target architecture the JIT backend should generate code for, in the form + * of a GNU target triplet. Only used in AOT mode. + */ +void +mono_arch_set_target (char *mtriple) +{ + if (strstr (mtriple, "darwin") || strstr (mtriple, "ios")) { + ios_abi = TRUE; + } +} + +static void +add_general (CallInfo *cinfo, ArgInfo *ainfo, int size, gboolean sign) +{ + if (cinfo->gr >= PARAM_REGS) { + ainfo->storage = ArgOnStack; + if (ios_abi) { + /* Assume size == align */ + cinfo->stack_usage = ALIGN_TO (cinfo->stack_usage, size); + ainfo->offset = cinfo->stack_usage; + ainfo->slot_size = size; + ainfo->sign = sign; + cinfo->stack_usage += size; + } else { + ainfo->offset = cinfo->stack_usage; + ainfo->slot_size = 8; + ainfo->sign = FALSE; + /* Put arguments into 8 byte aligned stack slots */ + cinfo->stack_usage += 8; + } + } else { + ainfo->storage = ArgInIReg; + ainfo->reg = cinfo->gr; + cinfo->gr ++; + } +} + +static void +add_fp (CallInfo *cinfo, ArgInfo *ainfo, gboolean single) +{ + int size = single ? 4 : 8; + + if (cinfo->fr >= FP_PARAM_REGS) { + ainfo->storage = single ? ArgOnStackR4 : ArgOnStackR8; + if (ios_abi) { + cinfo->stack_usage = ALIGN_TO (cinfo->stack_usage, size); + ainfo->offset = cinfo->stack_usage; + ainfo->slot_size = size; + cinfo->stack_usage += size; + } else { + ainfo->offset = cinfo->stack_usage; + ainfo->slot_size = 8; + /* Put arguments into 8 byte aligned stack slots */ + cinfo->stack_usage += 8; + } + } else { + if (single) + ainfo->storage = ArgInFRegR4; + else + ainfo->storage = ArgInFReg; + ainfo->reg = cinfo->fr; + cinfo->fr ++; + } +} + +static gboolean +is_hfa (MonoType *t, int *out_nfields, int *out_esize, int *field_offsets) +{ + MonoClass *klass; + gpointer iter; + MonoClassField *field; + MonoType *ftype, *prev_ftype = NULL; + int i, nfields = 0; + + klass = mono_class_from_mono_type (t); + iter = NULL; + while ((field = mono_class_get_fields (klass, &iter))) { + if (field->type->attrs & FIELD_ATTRIBUTE_STATIC) + continue; + ftype = mono_field_get_type (field); + ftype = mini_get_underlying_type (ftype); + + if (MONO_TYPE_ISSTRUCT (ftype)) { + int nested_nfields, nested_esize; + int nested_field_offsets [16]; + + if (!is_hfa (ftype, &nested_nfields, &nested_esize, nested_field_offsets)) + return FALSE; + if (nested_esize == 4) + ftype = &mono_defaults.single_class->byval_arg; + else + ftype = &mono_defaults.double_class->byval_arg; + if (prev_ftype && prev_ftype->type != ftype->type) + return FALSE; + prev_ftype = ftype; + for (i = 0; i < nested_nfields; ++i) { + if (nfields + i < 4) + field_offsets [nfields + i] = field->offset - sizeof (MonoObject) + nested_field_offsets [i]; + } + nfields += nested_nfields; + } else { + if (!(!ftype->byref && (ftype->type == MONO_TYPE_R4 || ftype->type == MONO_TYPE_R8))) + return FALSE; + if (prev_ftype && prev_ftype->type != ftype->type) + return FALSE; + prev_ftype = ftype; + if (nfields < 4) + field_offsets [nfields] = field->offset - sizeof (MonoObject); + nfields ++; + } + } + if (nfields == 0 || nfields > 4) + return FALSE; + *out_nfields = nfields; + *out_esize = prev_ftype->type == MONO_TYPE_R4 ? 4 : 8; + return TRUE; +} + +static void +add_valuetype (CallInfo *cinfo, ArgInfo *ainfo, MonoType *t) +{ + int i, size, align_size, nregs, nfields, esize; + int field_offsets [16]; + guint32 align; + + size = mini_type_stack_size_full (t, &align, FALSE); + align_size = ALIGN_TO (size, 8); + + nregs = size / 8; + if (is_hfa (t, &nfields, &esize, field_offsets)) { + /* + * The struct might include nested float structs aligned at 8, + * so need to keep track of the offsets of the individual fields. + */ + if (cinfo->fr + nfields <= FP_PARAM_REGS) { + ainfo->storage = ArgHFA; + ainfo->reg = cinfo->fr; + ainfo->nregs = nfields; + ainfo->size = size; + ainfo->esize = esize; + for (i = 0; i < nfields; ++i) + ainfo->foffsets [i] = field_offsets [i]; + cinfo->fr += ainfo->nregs; + } else { + ainfo->nfregs_to_skip = FP_PARAM_REGS > cinfo->fr ? FP_PARAM_REGS - cinfo->fr : 0; + cinfo->fr = FP_PARAM_REGS; + size = ALIGN_TO (size, 8); + ainfo->storage = ArgVtypeOnStack; + ainfo->offset = cinfo->stack_usage; + ainfo->size = size; + ainfo->hfa = TRUE; + ainfo->nregs = nfields; + ainfo->esize = esize; + cinfo->stack_usage += size; + } + return; + } + + if (align_size > 16) { + ainfo->storage = ArgVtypeByRef; + ainfo->size = size; + return; + } + + if (cinfo->gr + nregs > PARAM_REGS) { + size = ALIGN_TO (size, 8); + ainfo->storage = ArgVtypeOnStack; + ainfo->offset = cinfo->stack_usage; + ainfo->size = size; + cinfo->stack_usage += size; + cinfo->gr = PARAM_REGS; + } else { + ainfo->storage = ArgVtypeInIRegs; + ainfo->reg = cinfo->gr; + ainfo->nregs = nregs; + ainfo->size = size; + cinfo->gr += nregs; + } +} + +static void +add_param (CallInfo *cinfo, ArgInfo *ainfo, MonoType *t) +{ + MonoType *ptype; + + ptype = mini_get_underlying_type (t); + switch (ptype->type) { + case MONO_TYPE_I1: + add_general (cinfo, ainfo, 1, TRUE); + break; + case MONO_TYPE_BOOLEAN: + case MONO_TYPE_U1: + add_general (cinfo, ainfo, 1, FALSE); + break; + case MONO_TYPE_I2: + add_general (cinfo, ainfo, 2, TRUE); + break; + case MONO_TYPE_U2: + case MONO_TYPE_CHAR: + add_general (cinfo, ainfo, 2, FALSE); + break; + case MONO_TYPE_I4: + add_general (cinfo, ainfo, 4, TRUE); + break; + case MONO_TYPE_U4: + add_general (cinfo, ainfo, 4, FALSE); + break; + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_PTR: + case MONO_TYPE_FNPTR: + case MONO_TYPE_CLASS: + case MONO_TYPE_OBJECT: + case MONO_TYPE_SZARRAY: + case MONO_TYPE_ARRAY: + case MONO_TYPE_STRING: + case MONO_TYPE_U8: + case MONO_TYPE_I8: + add_general (cinfo, ainfo, 8, FALSE); + break; + case MONO_TYPE_R8: + add_fp (cinfo, ainfo, FALSE); + break; + case MONO_TYPE_R4: + add_fp (cinfo, ainfo, TRUE); + break; + case MONO_TYPE_VALUETYPE: + case MONO_TYPE_TYPEDBYREF: + add_valuetype (cinfo, ainfo, ptype); + break; + case MONO_TYPE_VOID: + ainfo->storage = ArgNone; + break; + case MONO_TYPE_GENERICINST: + if (!mono_type_generic_inst_is_valuetype (ptype)) { + add_general (cinfo, ainfo, 8, FALSE); + } else if (mini_is_gsharedvt_variable_type (ptype)) { + /* + * Treat gsharedvt arguments as large vtypes + */ + ainfo->storage = ArgVtypeByRef; + ainfo->gsharedvt = TRUE; + } else { + add_valuetype (cinfo, ainfo, ptype); + } + break; + case MONO_TYPE_VAR: + case MONO_TYPE_MVAR: + g_assert (mini_is_gsharedvt_type (ptype)); + ainfo->storage = ArgVtypeByRef; + ainfo->gsharedvt = TRUE; + break; + default: + g_assert_not_reached (); + break; + } +} + +/* + * get_call_info: + * + * Obtain information about a call according to the calling convention. + */ +static CallInfo* +get_call_info (MonoMemPool *mp, MonoMethodSignature *sig) +{ + CallInfo *cinfo; + ArgInfo *ainfo; + int n, pstart, pindex; + + n = sig->hasthis + sig->param_count; + + if (mp) + cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n)); + else + cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n)); + + cinfo->nargs = n; + + /* Return value */ + add_param (cinfo, &cinfo->ret, sig->ret); + if (cinfo->ret.storage == ArgVtypeByRef) + cinfo->ret.reg = ARMREG_R8; + /* Reset state */ + cinfo->gr = 0; + cinfo->fr = 0; + cinfo->stack_usage = 0; + + /* Parameters */ + if (sig->hasthis) + add_general (cinfo, cinfo->args + 0, 8, FALSE); + pstart = 0; + for (pindex = pstart; pindex < sig->param_count; ++pindex) { + ainfo = cinfo->args + sig->hasthis + pindex; + + if ((sig->call_convention == MONO_CALL_VARARG) && (pindex == sig->sentinelpos)) { + /* Prevent implicit arguments and sig_cookie from + being passed in registers */ + cinfo->gr = PARAM_REGS; + cinfo->fr = FP_PARAM_REGS; + /* Emit the signature cookie just before the implicit arguments */ + add_param (cinfo, &cinfo->sig_cookie, &mono_defaults.int_class->byval_arg); + } + + add_param (cinfo, ainfo, sig->params [pindex]); + if (ainfo->storage == ArgVtypeByRef) { + /* Pass the argument address in the next register */ + if (cinfo->gr >= PARAM_REGS) { + ainfo->storage = ArgVtypeByRefOnStack; + ainfo->offset = cinfo->stack_usage; + cinfo->stack_usage += 8; + } else { + ainfo->reg = cinfo->gr; + cinfo->gr ++; + } + } + } + + /* Handle the case where there are no implicit arguments */ + if ((sig->call_convention == MONO_CALL_VARARG) && (pindex == sig->sentinelpos)) { + /* Prevent implicit arguments and sig_cookie from + being passed in registers */ + cinfo->gr = PARAM_REGS; + cinfo->fr = FP_PARAM_REGS; + /* Emit the signature cookie just before the implicit arguments */ + add_param (cinfo, &cinfo->sig_cookie, &mono_defaults.int_class->byval_arg); + } + + cinfo->stack_usage = ALIGN_TO (cinfo->stack_usage, MONO_ARCH_FRAME_ALIGNMENT); + + return cinfo; +} + +typedef struct { + MonoMethodSignature *sig; + CallInfo *cinfo; + MonoType *rtype; + MonoType **param_types; + int n_fpargs, n_fpret; +} ArchDynCallInfo; + +static gboolean +dyn_call_supported (CallInfo *cinfo, MonoMethodSignature *sig) +{ + int i; + + if (sig->hasthis + sig->param_count > PARAM_REGS + DYN_CALL_STACK_ARGS) + return FALSE; + + // FIXME: Add more cases + switch (cinfo->ret.storage) { + case ArgNone: + case ArgInIReg: + case ArgInFReg: + case ArgInFRegR4: + case ArgVtypeByRef: + break; + case ArgVtypeInIRegs: + if (cinfo->ret.nregs > 2) + return FALSE; + break; + case ArgHFA: + break; + default: + return FALSE; + } + + for (i = 0; i < cinfo->nargs; ++i) { + ArgInfo *ainfo = &cinfo->args [i]; + + switch (ainfo->storage) { + case ArgInIReg: + case ArgVtypeInIRegs: + case ArgInFReg: + case ArgInFRegR4: + case ArgHFA: + case ArgVtypeByRef: + break; + case ArgOnStack: + if (ainfo->offset >= DYN_CALL_STACK_ARGS * sizeof (mgreg_t)) + return FALSE; + break; + default: + return FALSE; + } + } + + return TRUE; +} + +MonoDynCallInfo* +mono_arch_dyn_call_prepare (MonoMethodSignature *sig) +{ + ArchDynCallInfo *info; + CallInfo *cinfo; + int i; + + cinfo = get_call_info (NULL, sig); + + if (!dyn_call_supported (cinfo, sig)) { + g_free (cinfo); + return NULL; + } + + info = g_new0 (ArchDynCallInfo, 1); + // FIXME: Preprocess the info to speed up start_dyn_call () + info->sig = sig; + info->cinfo = cinfo; + info->rtype = mini_get_underlying_type (sig->ret); + info->param_types = g_new0 (MonoType*, sig->param_count); + for (i = 0; i < sig->param_count; ++i) + info->param_types [i] = mini_get_underlying_type (sig->params [i]); + + switch (cinfo->ret.storage) { + case ArgInFReg: + case ArgInFRegR4: + info->n_fpret = 1; + break; + case ArgHFA: + info->n_fpret = cinfo->ret.nregs; + break; + default: + break; + } + + return (MonoDynCallInfo*)info; +} + +void +mono_arch_dyn_call_free (MonoDynCallInfo *info) +{ + ArchDynCallInfo *ainfo = (ArchDynCallInfo*)info; + + g_free (ainfo->cinfo); + g_free (ainfo->param_types); + g_free (ainfo); +} + +static double +bitcast_r4_to_r8 (float f) +{ + float *p = &f; + + return *(double*)p; +} + +static float +bitcast_r8_to_r4 (double f) +{ + double *p = &f; + + return *(float*)p; +} + +void +mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, guint8 *buf, int buf_len) +{ + ArchDynCallInfo *dinfo = (ArchDynCallInfo*)info; + DynCallArgs *p = (DynCallArgs*)buf; + int aindex, arg_index, greg, i, pindex; + MonoMethodSignature *sig = dinfo->sig; + CallInfo *cinfo = dinfo->cinfo; + int buffer_offset = 0; + + g_assert (buf_len >= sizeof (DynCallArgs)); + + p->res = 0; + p->ret = ret; + p->n_fpargs = dinfo->n_fpargs; + p->n_fpret = dinfo->n_fpret; + + arg_index = 0; + greg = 0; + pindex = 0; + + if (sig->hasthis) + p->regs [greg ++] = (mgreg_t)*(args [arg_index ++]); + + if (cinfo->ret.storage == ArgVtypeByRef) + p->regs [ARMREG_R8] = (mgreg_t)ret; + + for (aindex = pindex; aindex < sig->param_count; aindex++) { + MonoType *t = dinfo->param_types [aindex]; + gpointer *arg = args [arg_index ++]; + ArgInfo *ainfo = &cinfo->args [aindex + sig->hasthis]; + int slot = -1; + + if (ainfo->storage == ArgOnStack) { + slot = PARAM_REGS + 1 + (ainfo->offset / sizeof (mgreg_t)); + } else { + slot = ainfo->reg; + } + + if (t->byref) { + p->regs [slot] = (mgreg_t)*arg; + continue; + } + + if (ios_abi && ainfo->storage == ArgOnStack) { + guint8 *stack_arg = (guint8*)&(p->regs [PARAM_REGS + 1]) + ainfo->offset; + gboolean handled = TRUE; + + /* Special case arguments smaller than 1 machine word */ + switch (t->type) { + case MONO_TYPE_BOOLEAN: + case MONO_TYPE_U1: + *(guint8*)stack_arg = *(guint8*)arg; + break; + case MONO_TYPE_I1: + *(gint8*)stack_arg = *(gint8*)arg; + break; + case MONO_TYPE_U2: + case MONO_TYPE_CHAR: + *(guint16*)stack_arg = *(guint16*)arg; + break; + case MONO_TYPE_I2: + *(gint16*)stack_arg = *(gint16*)arg; + break; + case MONO_TYPE_I4: + *(gint32*)stack_arg = *(gint32*)arg; + break; + case MONO_TYPE_U4: + *(guint32*)stack_arg = *(guint32*)arg; + break; + default: + handled = FALSE; + break; + } + if (handled) + continue; + } + + switch (t->type) { + case MONO_TYPE_STRING: + case MONO_TYPE_CLASS: + case MONO_TYPE_ARRAY: + case MONO_TYPE_SZARRAY: + case MONO_TYPE_OBJECT: + case MONO_TYPE_PTR: + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_I8: + case MONO_TYPE_U8: + p->regs [slot] = (mgreg_t)*arg; + break; + case MONO_TYPE_BOOLEAN: + case MONO_TYPE_U1: + p->regs [slot] = *(guint8*)arg; + break; + case MONO_TYPE_I1: + p->regs [slot] = *(gint8*)arg; + break; + case MONO_TYPE_I2: + p->regs [slot] = *(gint16*)arg; + break; + case MONO_TYPE_U2: + case MONO_TYPE_CHAR: + p->regs [slot] = *(guint16*)arg; + break; + case MONO_TYPE_I4: + p->regs [slot] = *(gint32*)arg; + break; + case MONO_TYPE_U4: + p->regs [slot] = *(guint32*)arg; + break; + case MONO_TYPE_R4: + p->fpregs [ainfo->reg] = bitcast_r4_to_r8 (*(float*)arg); + p->n_fpargs ++; + break; + case MONO_TYPE_R8: + p->fpregs [ainfo->reg] = *(double*)arg; + p->n_fpargs ++; + break; + case MONO_TYPE_GENERICINST: + if (MONO_TYPE_IS_REFERENCE (t)) { + p->regs [slot] = (mgreg_t)*arg; + break; + } else { + if (t->type == MONO_TYPE_GENERICINST && mono_class_is_nullable (mono_class_from_mono_type (t))) { + MonoClass *klass = mono_class_from_mono_type (t); + guint8 *nullable_buf; + int size; + + /* + * Use p->buffer as a temporary buffer since the data needs to be available after this call + * if the nullable param is passed by ref. + */ + size = mono_class_value_size (klass, NULL); + nullable_buf = p->buffer + buffer_offset; + buffer_offset += size; + g_assert (buffer_offset <= 256); + + /* The argument pointed to by arg is either a boxed vtype or null */ + mono_nullable_init (nullable_buf, (MonoObject*)arg, klass); + + arg = (gpointer*)nullable_buf; + /* Fall though */ + } else { + /* Fall though */ + } + } + case MONO_TYPE_VALUETYPE: + switch (ainfo->storage) { + case ArgVtypeInIRegs: + for (i = 0; i < ainfo->nregs; ++i) + p->regs [slot ++] = ((mgreg_t*)arg) [i]; + break; + case ArgHFA: + if (ainfo->esize == 4) { + for (i = 0; i < ainfo->nregs; ++i) + p->fpregs [ainfo->reg + i] = bitcast_r4_to_r8 (((float*)arg) [ainfo->foffsets [i] / 4]); + } else { + for (i = 0; i < ainfo->nregs; ++i) + p->fpregs [ainfo->reg + i] = ((double*)arg) [ainfo->foffsets [i] / 8]; + } + p->n_fpargs += ainfo->nregs; + break; + case ArgVtypeByRef: + p->regs [slot] = (mgreg_t)arg; + break; + default: + g_assert_not_reached (); + break; + } + break; + default: + g_assert_not_reached (); + } + } +} + +void +mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf) +{ + ArchDynCallInfo *ainfo = (ArchDynCallInfo*)info; + CallInfo *cinfo = ainfo->cinfo; + DynCallArgs *args = (DynCallArgs*)buf; + MonoType *ptype = ainfo->rtype; + guint8 *ret = args->ret; + mgreg_t res = args->res; + mgreg_t res2 = args->res2; + int i; + + if (cinfo->ret.storage == ArgVtypeByRef) + return; + + switch (ptype->type) { + case MONO_TYPE_VOID: + *(gpointer*)ret = NULL; + break; + case MONO_TYPE_STRING: + case MONO_TYPE_CLASS: + case MONO_TYPE_ARRAY: + case MONO_TYPE_SZARRAY: + case MONO_TYPE_OBJECT: + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_PTR: + *(gpointer*)ret = (gpointer)res; + break; + case MONO_TYPE_I1: + *(gint8*)ret = res; + break; + case MONO_TYPE_U1: + case MONO_TYPE_BOOLEAN: + *(guint8*)ret = res; + break; + case MONO_TYPE_I2: + *(gint16*)ret = res; + break; + case MONO_TYPE_U2: + case MONO_TYPE_CHAR: + *(guint16*)ret = res; + break; + case MONO_TYPE_I4: + *(gint32*)ret = res; + break; + case MONO_TYPE_U4: + *(guint32*)ret = res; + break; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + *(guint64*)ret = res; + break; + case MONO_TYPE_R4: + *(float*)ret = bitcast_r8_to_r4 (args->fpregs [0]); + break; + case MONO_TYPE_R8: + *(double*)ret = args->fpregs [0]; + break; + case MONO_TYPE_GENERICINST: + if (MONO_TYPE_IS_REFERENCE (ptype)) { + *(gpointer*)ret = (gpointer)res; + break; + } else { + /* Fall though */ + } + case MONO_TYPE_VALUETYPE: + switch (ainfo->cinfo->ret.storage) { + case ArgVtypeInIRegs: + *(mgreg_t*)ret = res; + if (ainfo->cinfo->ret.nregs > 1) + ((mgreg_t*)ret) [1] = res2; + break; + case ArgHFA: + /* Use the same area for returning fp values */ + if (cinfo->ret.esize == 4) { + for (i = 0; i < cinfo->ret.nregs; ++i) + ((float*)ret) [cinfo->ret.foffsets [i] / 4] = bitcast_r8_to_r4 (args->fpregs [i]); + } else { + for (i = 0; i < cinfo->ret.nregs; ++i) + ((double*)ret) [cinfo->ret.foffsets [i] / 8] = args->fpregs [i]; + } + break; + default: + g_assert_not_reached (); + break; + } + break; + default: + g_assert_not_reached (); + } +} + +#if __APPLE__ +void sys_icache_invalidate (void *start, size_t len); +#endif + +void +mono_arch_flush_icache (guint8 *code, gint size) +{ +#ifndef MONO_CROSS_COMPILE +#if __APPLE__ + sys_icache_invalidate (code, size); +#else + __clear_cache (code, code + size); +#endif +#endif +} + +#ifndef DISABLE_JIT + +gboolean +mono_arch_opcode_needs_emulation (MonoCompile *cfg, int opcode) +{ + NOT_IMPLEMENTED; + return FALSE; +} + +GList * +mono_arch_get_allocatable_int_vars (MonoCompile *cfg) +{ + GList *vars = NULL; + int i; + + for (i = 0; i < cfg->num_varinfo; i++) { + MonoInst *ins = cfg->varinfo [i]; + MonoMethodVar *vmv = MONO_VARINFO (cfg, i); + + /* unused vars */ + if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos) + continue; + + if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || + (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG)) + continue; + + if (mono_is_regsize_var (ins->inst_vtype)) { + g_assert (MONO_VARINFO (cfg, i)->reg == -1); + g_assert (i == vmv->idx); + vars = g_list_prepend (vars, vmv); + } + } + + vars = mono_varlist_sort (cfg, vars, 0); + + return vars; +} + +GList * +mono_arch_get_global_int_regs (MonoCompile *cfg) +{ + GList *regs = NULL; + int i; + + /* r28 is reserved for cfg->arch.args_reg */ + /* r27 is reserved for the imt argument */ + for (i = ARMREG_R19; i <= ARMREG_R26; ++i) + regs = g_list_prepend (regs, GUINT_TO_POINTER (i)); + + return regs; +} + +guint32 +mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv) +{ + MonoInst *ins = cfg->varinfo [vmv->idx]; + + if (ins->opcode == OP_ARG) + return 1; + else + return 2; +} + +void +mono_arch_create_vars (MonoCompile *cfg) +{ + MonoMethodSignature *sig; + CallInfo *cinfo; + + sig = mono_method_signature (cfg->method); + if (!cfg->arch.cinfo) + cfg->arch.cinfo = get_call_info (cfg->mempool, sig); + cinfo = cfg->arch.cinfo; + + if (cinfo->ret.storage == ArgVtypeByRef) { + cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL); + cfg->vret_addr->flags |= MONO_INST_VOLATILE; + } + + if (cfg->gen_sdb_seq_points) { + MonoInst *ins; + + if (cfg->compile_aot) { + ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL); + ins->flags |= MONO_INST_VOLATILE; + cfg->arch.seq_point_info_var = ins; + } + + ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL); + ins->flags |= MONO_INST_VOLATILE; + cfg->arch.ss_tramp_var = ins; + + ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL); + ins->flags |= MONO_INST_VOLATILE; + cfg->arch.bp_tramp_var = ins; + } + + if (cfg->method->save_lmf) { + cfg->create_lmf_var = TRUE; + cfg->lmf_ir = TRUE; +#ifndef TARGET_MACH + cfg->lmf_ir_mono_lmf = TRUE; +#endif + } +} + +void +mono_arch_allocate_vars (MonoCompile *cfg) +{ + MonoMethodSignature *sig; + MonoInst *ins; + CallInfo *cinfo; + ArgInfo *ainfo; + int i, offset, size, align; + guint32 locals_stack_size, locals_stack_align; + gint32 *offsets; + + /* + * Allocate arguments and locals to either register (OP_REGVAR) or to a stack slot (OP_REGOFFSET). + * Compute cfg->stack_offset and update cfg->used_int_regs. + */ + + sig = mono_method_signature (cfg->method); + + if (!cfg->arch.cinfo) + cfg->arch.cinfo = get_call_info (cfg->mempool, sig); + cinfo = cfg->arch.cinfo; + + /* + * The ARM64 ABI always uses a frame pointer. + * The instruction set prefers positive offsets, so fp points to the bottom of the + * frame, and stack slots are at positive offsets. + * If some arguments are received on the stack, their offsets relative to fp can + * not be computed right now because the stack frame might grow due to spilling + * done by the local register allocator. To solve this, we reserve a register + * which points to them. + * The stack frame looks like this: + * args_reg -> + * + * fp -> + * sp -> + */ + cfg->frame_reg = ARMREG_FP; + cfg->flags |= MONO_CFG_HAS_SPILLUP; + offset = 0; + + /* Saved fp+lr */ + offset += 16; + + if (cinfo->stack_usage) { + g_assert (!(cfg->used_int_regs & (1 << ARMREG_R28))); + cfg->arch.args_reg = ARMREG_R28; + cfg->used_int_regs |= 1 << ARMREG_R28; + } + + if (cfg->method->save_lmf) { + /* The LMF var is allocated normally */ + } else { + /* Callee saved regs */ + cfg->arch.saved_gregs_offset = offset; + for (i = 0; i < 32; ++i) + if ((MONO_ARCH_CALLEE_SAVED_REGS & (1 << i)) && (cfg->used_int_regs & (1 << i))) + offset += 8; + } + + /* Return value */ + switch (cinfo->ret.storage) { + case ArgNone: + break; + case ArgInIReg: + case ArgInFReg: + case ArgInFRegR4: + cfg->ret->opcode = OP_REGVAR; + cfg->ret->dreg = cinfo->ret.reg; + break; + case ArgVtypeInIRegs: + case ArgHFA: + /* Allocate a local to hold the result, the epilog will copy it to the correct place */ + cfg->ret->opcode = OP_REGOFFSET; + cfg->ret->inst_basereg = cfg->frame_reg; + cfg->ret->inst_offset = offset; + if (cinfo->ret.storage == ArgHFA) + // FIXME: + offset += 64; + else + offset += 16; + break; + case ArgVtypeByRef: + /* This variable will be initalized in the prolog from R8 */ + cfg->vret_addr->opcode = OP_REGOFFSET; + cfg->vret_addr->inst_basereg = cfg->frame_reg; + cfg->vret_addr->inst_offset = offset; + offset += 8; + if (G_UNLIKELY (cfg->verbose_level > 1)) { + printf ("vret_addr ="); + mono_print_ins (cfg->vret_addr); + } + break; + default: + g_assert_not_reached (); + break; + } + + /* Arguments */ + for (i = 0; i < sig->param_count + sig->hasthis; ++i) { + ainfo = cinfo->args + i; + + ins = cfg->args [i]; + if (ins->opcode == OP_REGVAR) + continue; + + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = cfg->frame_reg; + + switch (ainfo->storage) { + case ArgInIReg: + case ArgInFReg: + case ArgInFRegR4: + // FIXME: Use nregs/size + /* These will be copied to the stack in the prolog */ + ins->inst_offset = offset; + offset += 8; + break; + case ArgOnStack: + case ArgOnStackR4: + case ArgOnStackR8: + case ArgVtypeOnStack: + /* These are in the parent frame */ + g_assert (cfg->arch.args_reg); + ins->inst_basereg = cfg->arch.args_reg; + ins->inst_offset = ainfo->offset; + break; + case ArgVtypeInIRegs: + case ArgHFA: + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = cfg->frame_reg; + /* These arguments are saved to the stack in the prolog */ + ins->inst_offset = offset; + if (cfg->verbose_level >= 2) + printf ("arg %d allocated to %s+0x%0x.\n", i, mono_arch_regname (ins->inst_basereg), (int)ins->inst_offset); + if (ainfo->storage == ArgHFA) + // FIXME: + offset += 64; + else + offset += 16; + break; + case ArgVtypeByRefOnStack: { + MonoInst *vtaddr; + + if (ainfo->gsharedvt) { + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = cfg->arch.args_reg; + ins->inst_offset = ainfo->offset; + break; + } + + /* The vtype address is in the parent frame */ + g_assert (cfg->arch.args_reg); + MONO_INST_NEW (cfg, vtaddr, 0); + vtaddr->opcode = OP_REGOFFSET; + vtaddr->inst_basereg = cfg->arch.args_reg; + vtaddr->inst_offset = ainfo->offset; + + /* Need an indirection */ + ins->opcode = OP_VTARG_ADDR; + ins->inst_left = vtaddr; + break; + } + case ArgVtypeByRef: { + MonoInst *vtaddr; + + if (ainfo->gsharedvt) { + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = cfg->frame_reg; + ins->inst_offset = offset; + offset += 8; + break; + } + + /* The vtype address is in a register, will be copied to the stack in the prolog */ + MONO_INST_NEW (cfg, vtaddr, 0); + vtaddr->opcode = OP_REGOFFSET; + vtaddr->inst_basereg = cfg->frame_reg; + vtaddr->inst_offset = offset; + offset += 8; + + /* Need an indirection */ + ins->opcode = OP_VTARG_ADDR; + ins->inst_left = vtaddr; + break; + } + default: + g_assert_not_reached (); + break; + } + } + + /* Allocate these first so they have a small offset, OP_SEQ_POINT depends on this */ + // FIXME: Allocate these to registers + ins = cfg->arch.seq_point_info_var; + if (ins) { + size = 8; + align = 8; + offset += align - 1; + offset &= ~(align - 1); + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = cfg->frame_reg; + ins->inst_offset = offset; + offset += size; + } + ins = cfg->arch.ss_tramp_var; + if (ins) { + size = 8; + align = 8; + offset += align - 1; + offset &= ~(align - 1); + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = cfg->frame_reg; + ins->inst_offset = offset; + offset += size; + } + ins = cfg->arch.bp_tramp_var; + if (ins) { + size = 8; + align = 8; + offset += align - 1; + offset &= ~(align - 1); + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = cfg->frame_reg; + ins->inst_offset = offset; + offset += size; + } + + /* Locals */ + offsets = mono_allocate_stack_slots (cfg, FALSE, &locals_stack_size, &locals_stack_align); + if (locals_stack_align) + offset = ALIGN_TO (offset, locals_stack_align); + + for (i = cfg->locals_start; i < cfg->num_varinfo; i++) { + if (offsets [i] != -1) { + ins = cfg->varinfo [i]; + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = cfg->frame_reg; + ins->inst_offset = offset + offsets [i]; + //printf ("allocated local %d to ", i); mono_print_tree_nl (ins); + } + } + offset += locals_stack_size; + + offset = ALIGN_TO (offset, MONO_ARCH_FRAME_ALIGNMENT); + + cfg->stack_offset = offset; +} + +#ifdef ENABLE_LLVM +LLVMCallInfo* +mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig) +{ + int i, n; + CallInfo *cinfo; + ArgInfo *ainfo; + LLVMCallInfo *linfo; + + n = sig->param_count + sig->hasthis; + + cinfo = get_call_info (cfg->mempool, sig); + + linfo = mono_mempool_alloc0 (cfg->mempool, sizeof (LLVMCallInfo) + (sizeof (LLVMArgInfo) * n)); + + switch (cinfo->ret.storage) { + case ArgInIReg: + case ArgInFReg: + case ArgInFRegR4: + case ArgNone: + break; + case ArgVtypeByRef: + linfo->ret.storage = LLVMArgVtypeByRef; + break; + // + // FIXME: This doesn't work yet since the llvm backend represents these types as an i8 + // array which is returned in int regs + // + case ArgHFA: + linfo->ret.storage = LLVMArgFpStruct; + linfo->ret.nslots = cinfo->ret.nregs; + linfo->ret.esize = cinfo->ret.esize; + break; + case ArgVtypeInIRegs: + /* LLVM models this by returning an int */ + linfo->ret.storage = LLVMArgVtypeAsScalar; + linfo->ret.nslots = cinfo->ret.nregs; + linfo->ret.esize = cinfo->ret.esize; + break; + default: + g_assert_not_reached (); + break; + } + + for (i = 0; i < n; ++i) { + LLVMArgInfo *lainfo = &linfo->args [i]; + + ainfo = cinfo->args + i; + + lainfo->storage = LLVMArgNone; + + switch (ainfo->storage) { + case ArgInIReg: + case ArgInFReg: + case ArgInFRegR4: + case ArgOnStack: + case ArgOnStackR4: + case ArgOnStackR8: + lainfo->storage = LLVMArgNormal; + break; + case ArgVtypeByRef: + case ArgVtypeByRefOnStack: + lainfo->storage = LLVMArgVtypeByRef; + break; + case ArgHFA: { + int j; + + lainfo->storage = LLVMArgAsFpArgs; + lainfo->nslots = ainfo->nregs; + lainfo->esize = ainfo->esize; + for (j = 0; j < ainfo->nregs; ++j) + lainfo->pair_storage [j] = LLVMArgInFPReg; + break; + } + case ArgVtypeInIRegs: + lainfo->storage = LLVMArgAsIArgs; + lainfo->nslots = ainfo->nregs; + break; + case ArgVtypeOnStack: + if (ainfo->hfa) { + int j; + /* Same as above */ + lainfo->storage = LLVMArgAsFpArgs; + lainfo->nslots = ainfo->nregs; + lainfo->esize = ainfo->esize; + lainfo->ndummy_fpargs = ainfo->nfregs_to_skip; + for (j = 0; j < ainfo->nregs; ++j) + lainfo->pair_storage [j] = LLVMArgInFPReg; + } else { + lainfo->storage = LLVMArgAsIArgs; + lainfo->nslots = ainfo->size / 8; + } + break; + default: + g_assert_not_reached (); + break; + } + } + + return linfo; +} +#endif + +static void +add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, ArgStorage storage, int reg, MonoInst *arg) +{ + MonoInst *ins; + + switch (storage) { + case ArgInIReg: + MONO_INST_NEW (cfg, ins, OP_MOVE); + ins->dreg = mono_alloc_ireg_copy (cfg, arg->dreg); + ins->sreg1 = arg->dreg; + MONO_ADD_INS (cfg->cbb, ins); + mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, FALSE); + break; + case ArgInFReg: + MONO_INST_NEW (cfg, ins, OP_FMOVE); + ins->dreg = mono_alloc_freg (cfg); + ins->sreg1 = arg->dreg; + MONO_ADD_INS (cfg->cbb, ins); + mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE); + break; + case ArgInFRegR4: + if (COMPILE_LLVM (cfg)) + MONO_INST_NEW (cfg, ins, OP_FMOVE); + else if (cfg->r4fp) + MONO_INST_NEW (cfg, ins, OP_RMOVE); + else + MONO_INST_NEW (cfg, ins, OP_ARM_SETFREG_R4); + ins->dreg = mono_alloc_freg (cfg); + ins->sreg1 = arg->dreg; + MONO_ADD_INS (cfg->cbb, ins); + mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE); + break; + default: + g_assert_not_reached (); + break; + } +} + +static void +emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo) +{ + MonoMethodSignature *tmp_sig; + int sig_reg; + + if (call->tail_call) + NOT_IMPLEMENTED; + + g_assert (cinfo->sig_cookie.storage == ArgOnStack); + + /* + * mono_ArgIterator_Setup assumes the signature cookie is + * passed first and all the arguments which were before it are + * passed on the stack after the signature. So compensate by + * passing a different signature. + */ + tmp_sig = mono_metadata_signature_dup (call->signature); + tmp_sig->param_count -= call->signature->sentinelpos; + tmp_sig->sentinelpos = 0; + memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*)); + + sig_reg = mono_alloc_ireg (cfg); + MONO_EMIT_NEW_SIGNATURECONST (cfg, sig_reg, tmp_sig); + + MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, cinfo->sig_cookie.offset, sig_reg); +} + +void +mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call) +{ + MonoMethodSignature *sig; + MonoInst *arg, *vtarg; + CallInfo *cinfo; + ArgInfo *ainfo; + int i; + + sig = call->signature; + + cinfo = get_call_info (cfg->mempool, sig); + + switch (cinfo->ret.storage) { + case ArgVtypeInIRegs: + case ArgHFA: + /* + * The vtype is returned in registers, save the return area address in a local, and save the vtype into + * the location pointed to by it after call in emit_move_return_value (). + */ + if (!cfg->arch.vret_addr_loc) { + cfg->arch.vret_addr_loc = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL); + /* Prevent it from being register allocated or optimized away */ + ((MonoInst*)cfg->arch.vret_addr_loc)->flags |= MONO_INST_VOLATILE; + } + + MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, ((MonoInst*)cfg->arch.vret_addr_loc)->dreg, call->vret_var->dreg); + break; + case ArgVtypeByRef: + /* Pass the vtype return address in R8 */ + MONO_INST_NEW (cfg, vtarg, OP_MOVE); + vtarg->sreg1 = call->vret_var->dreg; + vtarg->dreg = mono_alloc_preg (cfg); + MONO_ADD_INS (cfg->cbb, vtarg); + + mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE); + break; + default: + break; + } + + for (i = 0; i < cinfo->nargs; ++i) { + ainfo = cinfo->args + i; + arg = call->args [i]; + + if ((sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) { + /* Emit the signature cookie just before the implicit arguments */ + emit_sig_cookie (cfg, call, cinfo); + } + + switch (ainfo->storage) { + case ArgInIReg: + case ArgInFReg: + case ArgInFRegR4: + add_outarg_reg (cfg, call, ainfo->storage, ainfo->reg, arg); + break; + case ArgOnStack: + switch (ainfo->slot_size) { + case 8: + MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, ainfo->offset, arg->dreg); + break; + case 4: + MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, ARMREG_SP, ainfo->offset, arg->dreg); + break; + case 2: + MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI2_MEMBASE_REG, ARMREG_SP, ainfo->offset, arg->dreg); + break; + case 1: + MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI1_MEMBASE_REG, ARMREG_SP, ainfo->offset, arg->dreg); + break; + default: + g_assert_not_reached (); + break; + } + break; + case ArgOnStackR8: + MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, ARMREG_SP, ainfo->offset, arg->dreg); + break; + case ArgOnStackR4: + MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, ARMREG_SP, ainfo->offset, arg->dreg); + break; + case ArgVtypeInIRegs: + case ArgVtypeByRef: + case ArgVtypeByRefOnStack: + case ArgVtypeOnStack: + case ArgHFA: { + MonoInst *ins; + guint32 align; + guint32 size; + + size = mono_class_value_size (arg->klass, &align); + + MONO_INST_NEW (cfg, ins, OP_OUTARG_VT); + ins->sreg1 = arg->dreg; + ins->klass = arg->klass; + ins->backend.size = size; + ins->inst_p0 = call; + ins->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo)); + memcpy (ins->inst_p1, ainfo, sizeof (ArgInfo)); + MONO_ADD_INS (cfg->cbb, ins); + break; + } + default: + g_assert_not_reached (); + break; + } + } + + /* Handle the case where there are no implicit arguments */ + if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (cinfo->nargs == sig->sentinelpos)) + emit_sig_cookie (cfg, call, cinfo); + + call->call_info = cinfo; + call->stack_usage = cinfo->stack_usage; +} + +void +mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src) +{ + MonoCallInst *call = (MonoCallInst*)ins->inst_p0; + ArgInfo *ainfo = ins->inst_p1; + MonoInst *load; + int i; + + if (ins->backend.size == 0 && !ainfo->gsharedvt) + return; + + switch (ainfo->storage) { + case ArgVtypeInIRegs: + for (i = 0; i < ainfo->nregs; ++i) { + // FIXME: Smaller sizes + MONO_INST_NEW (cfg, load, OP_LOADI8_MEMBASE); + load->dreg = mono_alloc_ireg (cfg); + load->inst_basereg = src->dreg; + load->inst_offset = i * sizeof(mgreg_t); + MONO_ADD_INS (cfg->cbb, load); + add_outarg_reg (cfg, call, ArgInIReg, ainfo->reg + i, load); + } + break; + case ArgHFA: + for (i = 0; i < ainfo->nregs; ++i) { + if (ainfo->esize == 4) + MONO_INST_NEW (cfg, load, OP_LOADR4_MEMBASE); + else + MONO_INST_NEW (cfg, load, OP_LOADR8_MEMBASE); + load->dreg = mono_alloc_freg (cfg); + load->inst_basereg = src->dreg; + load->inst_offset = ainfo->foffsets [i]; + MONO_ADD_INS (cfg->cbb, load); + add_outarg_reg (cfg, call, ainfo->esize == 4 ? ArgInFRegR4 : ArgInFReg, ainfo->reg + i, load); + } + break; + case ArgVtypeByRef: + case ArgVtypeByRefOnStack: { + MonoInst *vtaddr, *load, *arg; + + /* Pass the vtype address in a reg/on the stack */ + if (ainfo->gsharedvt) { + load = src; + } else { + /* Make a copy of the argument */ + vtaddr = mono_compile_create_var (cfg, &ins->klass->byval_arg, OP_LOCAL); + + MONO_INST_NEW (cfg, load, OP_LDADDR); + load->inst_p0 = vtaddr; + vtaddr->flags |= MONO_INST_INDIRECT; + load->type = STACK_MP; + load->klass = vtaddr->klass; + load->dreg = mono_alloc_ireg (cfg); + MONO_ADD_INS (cfg->cbb, load); + mini_emit_memcpy (cfg, load->dreg, 0, src->dreg, 0, ainfo->size, 8); + } + + if (ainfo->storage == ArgVtypeByRef) { + MONO_INST_NEW (cfg, arg, OP_MOVE); + arg->dreg = mono_alloc_preg (cfg); + arg->sreg1 = load->dreg; + MONO_ADD_INS (cfg->cbb, arg); + add_outarg_reg (cfg, call, ArgInIReg, ainfo->reg, arg); + } else { + MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, ainfo->offset, load->dreg); + } + break; + } + case ArgVtypeOnStack: + for (i = 0; i < ainfo->size / 8; ++i) { + MONO_INST_NEW (cfg, load, OP_LOADI8_MEMBASE); + load->dreg = mono_alloc_ireg (cfg); + load->inst_basereg = src->dreg; + load->inst_offset = i * 8; + MONO_ADD_INS (cfg->cbb, load); + MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI8_MEMBASE_REG, ARMREG_SP, ainfo->offset + (i * 8), load->dreg); + } + break; + default: + g_assert_not_reached (); + break; + } +} + +void +mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val) +{ + MonoMethodSignature *sig; + CallInfo *cinfo; + + sig = mono_method_signature (cfg->method); + if (!cfg->arch.cinfo) + cfg->arch.cinfo = get_call_info (cfg->mempool, sig); + cinfo = cfg->arch.cinfo; + + switch (cinfo->ret.storage) { + case ArgNone: + break; + case ArgInIReg: + MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg); + break; + case ArgInFReg: + MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg); + break; + case ArgInFRegR4: + if (COMPILE_LLVM (cfg)) + MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg); + else if (cfg->r4fp) + MONO_EMIT_NEW_UNALU (cfg, OP_RMOVE, cfg->ret->dreg, val->dreg); + else + MONO_EMIT_NEW_UNALU (cfg, OP_ARM_SETFREG_R4, cfg->ret->dreg, val->dreg); + break; + default: + g_assert_not_reached (); + break; + } +} + +gboolean +mono_arch_tail_call_supported (MonoCompile *cfg, MonoMethodSignature *caller_sig, MonoMethodSignature *callee_sig) +{ + CallInfo *c1, *c2; + gboolean res; + + if (cfg->compile_aot && !cfg->full_aot) + /* OP_TAILCALL doesn't work with AOT */ + return FALSE; + + c1 = get_call_info (NULL, caller_sig); + c2 = get_call_info (NULL, callee_sig); + res = TRUE; + // FIXME: Relax these restrictions + if (c1->stack_usage != 0) + res = FALSE; + if (c1->stack_usage != c2->stack_usage) + res = FALSE; + if ((c1->ret.storage != ArgNone && c1->ret.storage != ArgInIReg) || c1->ret.storage != c2->ret.storage) + res = FALSE; + + g_free (c1); + g_free (c2); + + return res; +} + +gboolean +mono_arch_is_inst_imm (gint64 imm) +{ + return (imm >= -((gint64)1<<31) && imm <= (((gint64)1<<31)-1)); +} + +void* +mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments) +{ + NOT_IMPLEMENTED; + return NULL; +} + +void* +mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers) +{ + NOT_IMPLEMENTED; + return NULL; +} + +void +mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb) +{ + //NOT_IMPLEMENTED; +} + +void +mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) +{ + //NOT_IMPLEMENTED; +} + +#define ADD_NEW_INS(cfg,dest,op) do { \ + MONO_INST_NEW ((cfg), (dest), (op)); \ + mono_bblock_insert_before_ins (bb, ins, (dest)); \ + } while (0) + +void +mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) +{ + MonoInst *ins, *temp, *last_ins = NULL; + + MONO_BB_FOR_EACH_INS (bb, ins) { + switch (ins->opcode) { + case OP_SBB: + case OP_ISBB: + case OP_SUBCC: + case OP_ISUBCC: + if (ins->next && (ins->next->opcode == OP_COND_EXC_C || ins->next->opcode == OP_COND_EXC_IC)) + /* ARM sets the C flag to 1 if there was _no_ overflow */ + ins->next->opcode = OP_COND_EXC_NC; + break; + case OP_IDIV_IMM: + case OP_IREM_IMM: + case OP_IDIV_UN_IMM: + case OP_IREM_UN_IMM: + case OP_LREM_IMM: + mono_decompose_op_imm (cfg, bb, ins); + break; + case OP_LOCALLOC_IMM: + if (ins->inst_imm > 32) { + ADD_NEW_INS (cfg, temp, OP_ICONST); + temp->inst_c0 = ins->inst_imm; + temp->dreg = mono_alloc_ireg (cfg); + ins->sreg1 = temp->dreg; + ins->opcode = mono_op_imm_to_op (ins->opcode); + } + break; + case OP_ICOMPARE_IMM: + if (ins->inst_imm == 0 && ins->next && ins->next->opcode == OP_IBEQ) { + ins->next->opcode = OP_ARM64_CBZW; + ins->next->sreg1 = ins->sreg1; + NULLIFY_INS (ins); + } else if (ins->inst_imm == 0 && ins->next && ins->next->opcode == OP_IBNE_UN) { + ins->next->opcode = OP_ARM64_CBNZW; + ins->next->sreg1 = ins->sreg1; + NULLIFY_INS (ins); + } + break; + case OP_LCOMPARE_IMM: + case OP_COMPARE_IMM: + if (ins->inst_imm == 0 && ins->next && ins->next->opcode == OP_LBEQ) { + ins->next->opcode = OP_ARM64_CBZX; + ins->next->sreg1 = ins->sreg1; + NULLIFY_INS (ins); + } else if (ins->inst_imm == 0 && ins->next && ins->next->opcode == OP_LBNE_UN) { + ins->next->opcode = OP_ARM64_CBNZX; + ins->next->sreg1 = ins->sreg1; + NULLIFY_INS (ins); + } + break; + case OP_FCOMPARE: { + gboolean swap = FALSE; + int reg; + + if (!ins->next) { + /* Optimized away */ + NULLIFY_INS (ins); + break; + } + + /* + * FP compares with unordered operands set the flags + * to NZCV=0011, which matches some non-unordered compares + * as well, like LE, so have to swap the operands. + */ + switch (ins->next->opcode) { + case OP_FBLT: + ins->next->opcode = OP_FBGT; + swap = TRUE; + break; + case OP_FBLE: + ins->next->opcode = OP_FBGE; + swap = TRUE; + break; + default: + break; + } + if (swap) { + reg = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = reg; + } + break; + } + default: + break; + } + + last_ins = ins; + } + bb->last_ins = last_ins; + bb->max_vreg = cfg->next_vreg; +} + +void +mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *long_ins) +{ +} + +static int +opcode_to_armcond (int opcode) +{ + switch (opcode) { + case OP_IBEQ: + case OP_LBEQ: + case OP_FBEQ: + case OP_CEQ: + case OP_ICEQ: + case OP_LCEQ: + case OP_FCEQ: + case OP_RCEQ: + case OP_COND_EXC_IEQ: + case OP_COND_EXC_EQ: + return ARMCOND_EQ; + case OP_IBGE: + case OP_LBGE: + case OP_FBGE: + case OP_ICGE: + case OP_FCGE: + case OP_RCGE: + return ARMCOND_GE; + case OP_IBGT: + case OP_LBGT: + case OP_FBGT: + case OP_CGT: + case OP_ICGT: + case OP_LCGT: + case OP_FCGT: + case OP_RCGT: + case OP_COND_EXC_IGT: + case OP_COND_EXC_GT: + return ARMCOND_GT; + case OP_IBLE: + case OP_LBLE: + case OP_FBLE: + case OP_ICLE: + case OP_FCLE: + case OP_RCLE: + return ARMCOND_LE; + case OP_IBLT: + case OP_LBLT: + case OP_FBLT: + case OP_CLT: + case OP_ICLT: + case OP_LCLT: + case OP_COND_EXC_ILT: + case OP_COND_EXC_LT: + return ARMCOND_LT; + case OP_IBNE_UN: + case OP_LBNE_UN: + case OP_FBNE_UN: + case OP_ICNEQ: + case OP_FCNEQ: + case OP_RCNEQ: + case OP_COND_EXC_INE_UN: + case OP_COND_EXC_NE_UN: + return ARMCOND_NE; + case OP_IBGE_UN: + case OP_LBGE_UN: + case OP_FBGE_UN: + case OP_ICGE_UN: + case OP_COND_EXC_IGE_UN: + case OP_COND_EXC_GE_UN: + return ARMCOND_HS; + case OP_IBGT_UN: + case OP_LBGT_UN: + case OP_FBGT_UN: + case OP_CGT_UN: + case OP_ICGT_UN: + case OP_LCGT_UN: + case OP_FCGT_UN: + case OP_RCGT_UN: + case OP_COND_EXC_IGT_UN: + case OP_COND_EXC_GT_UN: + return ARMCOND_HI; + case OP_IBLE_UN: + case OP_LBLE_UN: + case OP_FBLE_UN: + case OP_ICLE_UN: + case OP_COND_EXC_ILE_UN: + case OP_COND_EXC_LE_UN: + return ARMCOND_LS; + case OP_IBLT_UN: + case OP_LBLT_UN: + case OP_FBLT_UN: + case OP_CLT_UN: + case OP_ICLT_UN: + case OP_LCLT_UN: + case OP_COND_EXC_ILT_UN: + case OP_COND_EXC_LT_UN: + return ARMCOND_LO; + /* + * FCMP sets the NZCV condition bits as follows: + * eq = 0110 + * < = 1000 + * > = 0010 + * unordered = 0011 + * ARMCOND_LT is N!=V, so it matches unordered too, so + * fclt and fclt_un need to be special cased. + */ + case OP_FCLT: + case OP_RCLT: + /* N==1 */ + return ARMCOND_MI; + case OP_FCLT_UN: + case OP_RCLT_UN: + return ARMCOND_LT; + case OP_COND_EXC_C: + case OP_COND_EXC_IC: + return ARMCOND_CS; + case OP_COND_EXC_OV: + case OP_COND_EXC_IOV: + return ARMCOND_VS; + case OP_COND_EXC_NC: + case OP_COND_EXC_INC: + return ARMCOND_CC; + case OP_COND_EXC_NO: + case OP_COND_EXC_INO: + return ARMCOND_VC; + default: + printf ("%s\n", mono_inst_name (opcode)); + g_assert_not_reached (); + return -1; + } +} + +/* This clobbers LR */ +static inline __attribute__((warn_unused_result)) guint8* +emit_cond_exc (MonoCompile *cfg, guint8 *code, int opcode, const char *exc_name) +{ + int cond; + + cond = opcode_to_armcond (opcode); + /* Capture PC */ + arm_adrx (code, ARMREG_IP1, code); + mono_add_patch_info_rel (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, exc_name, MONO_R_ARM64_BCC); + arm_bcc (code, cond, 0); + return code; +} + +static guint8* +emit_move_return_value (MonoCompile *cfg, guint8 * code, MonoInst *ins) +{ + CallInfo *cinfo; + MonoCallInst *call; + + call = (MonoCallInst*)ins; + cinfo = call->call_info; + g_assert (cinfo); + switch (cinfo->ret.storage) { + case ArgNone: + break; + case ArgInIReg: + /* LLVM compiled code might only set the bottom bits */ + if (call->signature && mini_get_underlying_type (call->signature->ret)->type == MONO_TYPE_I4) + arm_sxtwx (code, call->inst.dreg, cinfo->ret.reg); + else if (call->inst.dreg != cinfo->ret.reg) + arm_movx (code, call->inst.dreg, cinfo->ret.reg); + break; + case ArgInFReg: + if (call->inst.dreg != cinfo->ret.reg) + arm_fmovd (code, call->inst.dreg, cinfo->ret.reg); + break; + case ArgInFRegR4: + if (cfg->r4fp) + arm_fmovs (code, call->inst.dreg, cinfo->ret.reg); + else + arm_fcvt_sd (code, call->inst.dreg, cinfo->ret.reg); + break; + case ArgVtypeInIRegs: { + MonoInst *loc = cfg->arch.vret_addr_loc; + int i; + + /* Load the destination address */ + g_assert (loc && loc->opcode == OP_REGOFFSET); + code = emit_ldrx (code, ARMREG_LR, loc->inst_basereg, loc->inst_offset); + for (i = 0; i < cinfo->ret.nregs; ++i) + arm_strx (code, cinfo->ret.reg + i, ARMREG_LR, i * 8); + break; + } + case ArgHFA: { + MonoInst *loc = cfg->arch.vret_addr_loc; + int i; + + /* Load the destination address */ + g_assert (loc && loc->opcode == OP_REGOFFSET); + code = emit_ldrx (code, ARMREG_LR, loc->inst_basereg, loc->inst_offset); + for (i = 0; i < cinfo->ret.nregs; ++i) { + if (cinfo->ret.esize == 4) + arm_strfpw (code, cinfo->ret.reg + i, ARMREG_LR, cinfo->ret.foffsets [i]); + else + arm_strfpx (code, cinfo->ret.reg + i, ARMREG_LR, cinfo->ret.foffsets [i]); + } + break; + } + case ArgVtypeByRef: + break; + default: + g_assert_not_reached (); + break; + } + return code; +} + +/* + * emit_branch_island: + * + * Emit a branch island for the conditional branches from cfg->native_code + start_offset to code. + */ +static guint8* +emit_branch_island (MonoCompile *cfg, guint8 *code, int start_offset) +{ + MonoJumpInfo *ji; + int offset, island_size; + + /* Iterate over the patch infos added so far by this bb */ + island_size = 0; + for (ji = cfg->patch_info; ji; ji = ji->next) { + if (ji->ip.i < start_offset) + /* The patch infos are in reverse order, so this means the end */ + break; + if (ji->relocation == MONO_R_ARM64_BCC || ji->relocation == MONO_R_ARM64_CBZ) + island_size += 4; + } + + if (island_size) { + offset = code - cfg->native_code; + if (offset > (cfg->code_size - island_size - 16)) { + cfg->code_size *= 2; + cfg->native_code = g_realloc (cfg->native_code, cfg->code_size); + code = cfg->native_code + offset; + } + + /* Branch over the island */ + arm_b (code, code + 4 + island_size); + + for (ji = cfg->patch_info; ji; ji = ji->next) { + if (ji->ip.i < start_offset) + break; + if (ji->relocation == MONO_R_ARM64_BCC || ji->relocation == MONO_R_ARM64_CBZ) { + /* Rewrite the cond branch so it branches to an uncoditional branch in the branch island */ + arm_patch_rel (cfg->native_code + ji->ip.i, code, ji->relocation); + /* Rewrite the patch so it points to the unconditional branch */ + ji->ip.i = code - cfg->native_code; + ji->relocation = MONO_R_ARM64_B; + arm_b (code, code); + } + } + } + return code; +} + +void +mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) +{ + MonoInst *ins; + MonoCallInst *call; + guint offset; + guint8 *code = cfg->native_code + cfg->code_len; + int start_offset, max_len, dreg, sreg1, sreg2; + mgreg_t imm; + + if (cfg->verbose_level > 2) + g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset); + + start_offset = code - cfg->native_code; + + MONO_BB_FOR_EACH_INS (bb, ins) { + offset = code - cfg->native_code; + + max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN]; + + if (offset > (cfg->code_size - max_len - 16)) { + cfg->code_size *= 2; + cfg->native_code = g_realloc (cfg->native_code, cfg->code_size); + code = cfg->native_code + offset; + } + + if (G_UNLIKELY (cfg->arch.cond_branch_islands && offset - start_offset > 4 * 0x1ffff)) { + /* Emit a branch island for large basic blocks */ + code = emit_branch_island (cfg, code, start_offset); + offset = code - cfg->native_code; + start_offset = offset; + } + + mono_debug_record_line_number (cfg, ins, offset); + + dreg = ins->dreg; + sreg1 = ins->sreg1; + sreg2 = ins->sreg2; + imm = ins->inst_imm; + + switch (ins->opcode) { + case OP_ICONST: + code = emit_imm (code, dreg, ins->inst_c0); + break; + case OP_I8CONST: + code = emit_imm64 (code, dreg, ins->inst_c0); + break; + case OP_MOVE: + if (dreg != sreg1) + arm_movx (code, dreg, sreg1); + break; + case OP_NOP: + case OP_RELAXED_NOP: + break; + case OP_JUMP_TABLE: + mono_add_patch_info_rel (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0, MONO_R_ARM64_IMM); + code = emit_imm64_template (code, dreg); + break; + case OP_BREAK: + /* + * gdb does not like encountering the hw breakpoint ins in the debugged code. + * So instead of emitting a trap, we emit a call a C function and place a + * breakpoint there. + */ + code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_break"); + break; + case OP_LOCALLOC: { + guint8 *buf [16]; + + arm_addx_imm (code, ARMREG_IP0, sreg1, (MONO_ARCH_FRAME_ALIGNMENT - 1)); + // FIXME: andx_imm doesn't work yet + code = emit_imm (code, ARMREG_IP1, -MONO_ARCH_FRAME_ALIGNMENT); + arm_andx (code, ARMREG_IP0, ARMREG_IP0, ARMREG_IP1); + //arm_andx_imm (code, ARMREG_IP0, sreg1, - MONO_ARCH_FRAME_ALIGNMENT); + arm_movspx (code, ARMREG_IP1, ARMREG_SP); + arm_subx (code, ARMREG_IP1, ARMREG_IP1, ARMREG_IP0); + arm_movspx (code, ARMREG_SP, ARMREG_IP1); + + /* Init */ + /* ip1 = pointer, ip0 = end */ + arm_addx (code, ARMREG_IP0, ARMREG_IP1, ARMREG_IP0); + buf [0] = code; + arm_cmpx (code, ARMREG_IP1, ARMREG_IP0); + buf [1] = code; + arm_bcc (code, ARMCOND_EQ, 0); + arm_stpx (code, ARMREG_RZR, ARMREG_RZR, ARMREG_IP1, 0); + arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 16); + arm_b (code, buf [0]); + arm_patch_rel (buf [1], code, MONO_R_ARM64_BCC); + + arm_movspx (code, dreg, ARMREG_SP); + if (cfg->param_area) + code = emit_subx_sp_imm (code, cfg->param_area); + break; + } + case OP_LOCALLOC_IMM: { + int imm, offset; + + imm = ALIGN_TO (ins->inst_imm, MONO_ARCH_FRAME_ALIGNMENT); + g_assert (arm_is_arith_imm (imm)); + arm_subx_imm (code, ARMREG_SP, ARMREG_SP, imm); + + /* Init */ + g_assert (MONO_ARCH_FRAME_ALIGNMENT == 16); + offset = 0; + while (offset < imm) { + arm_stpx (code, ARMREG_RZR, ARMREG_RZR, ARMREG_SP, offset); + offset += 16; + } + arm_movspx (code, dreg, ARMREG_SP); + if (cfg->param_area) + code = emit_subx_sp_imm (code, cfg->param_area); + break; + } + case OP_AOTCONST: + code = emit_aotconst (cfg, code, dreg, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0); + break; + case OP_OBJC_GET_SELECTOR: + mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_OBJC_SELECTOR_REF, ins->inst_p0); + /* See arch_emit_objc_selector_ref () in aot-compiler.c */ + arm_ldrx_lit (code, ins->dreg, 0); + arm_nop (code); + arm_nop (code); + break; + case OP_SEQ_POINT: { + MonoInst *info_var = cfg->arch.seq_point_info_var; + + /* + * For AOT, we use one got slot per method, which will point to a + * SeqPointInfo structure, containing all the information required + * by the code below. + */ + if (cfg->compile_aot) { + g_assert (info_var); + g_assert (info_var->opcode == OP_REGOFFSET); + } + + if (ins->flags & MONO_INST_SINGLE_STEP_LOC) { + MonoInst *var = cfg->arch.ss_tramp_var; + + g_assert (var); + g_assert (var->opcode == OP_REGOFFSET); + /* Load ss_tramp_var */ + /* This is equal to &ss_trampoline */ + arm_ldrx (code, ARMREG_IP1, var->inst_basereg, var->inst_offset); + /* Load the trampoline address */ + arm_ldrx (code, ARMREG_IP1, ARMREG_IP1, 0); + /* Call it if it is non-null */ + arm_cbzx (code, ARMREG_IP1, code + 8); + arm_blrx (code, ARMREG_IP1); + } + + mono_add_seq_point (cfg, bb, ins, code - cfg->native_code); + + if (cfg->compile_aot) { + guint32 offset = code - cfg->native_code; + guint32 val; + + arm_ldrx (code, ARMREG_IP1, info_var->inst_basereg, info_var->inst_offset); + /* Add the offset */ + val = ((offset / 4) * sizeof (guint8*)) + MONO_STRUCT_OFFSET (SeqPointInfo, bp_addrs); + /* Load the info->bp_addrs [offset], which is either 0 or the address of the bp trampoline */ + code = emit_ldrx (code, ARMREG_IP1, ARMREG_IP1, val); + /* Skip the load if its 0 */ + arm_cbzx (code, ARMREG_IP1, code + 8); + /* Call the breakpoint trampoline */ + arm_blrx (code, ARMREG_IP1); + } else { + MonoInst *var = cfg->arch.bp_tramp_var; + + g_assert (var); + g_assert (var->opcode == OP_REGOFFSET); + /* Load the address of the bp trampoline into IP0 */ + arm_ldrx (code, ARMREG_IP0, var->inst_basereg, var->inst_offset); + /* + * A placeholder for a possible breakpoint inserted by + * mono_arch_set_breakpoint (). + */ + arm_nop (code); + } + break; + } + + /* BRANCH */ + case OP_BR: + mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb, MONO_R_ARM64_B); + arm_b (code, code); + break; + case OP_BR_REG: + arm_brx (code, sreg1); + break; + case OP_IBEQ: + case OP_IBGE: + case OP_IBGT: + case OP_IBLE: + case OP_IBLT: + case OP_IBNE_UN: + case OP_IBGE_UN: + case OP_IBGT_UN: + case OP_IBLE_UN: + case OP_IBLT_UN: + case OP_LBEQ: + case OP_LBGE: + case OP_LBGT: + case OP_LBLE: + case OP_LBLT: + case OP_LBNE_UN: + case OP_LBGE_UN: + case OP_LBGT_UN: + case OP_LBLE_UN: + case OP_LBLT_UN: + case OP_FBEQ: + case OP_FBNE_UN: + case OP_FBLT: + case OP_FBGT: + case OP_FBGT_UN: + case OP_FBLE: + case OP_FBGE: + case OP_FBGE_UN: { + int cond; + + mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_BCC); + cond = opcode_to_armcond (ins->opcode); + arm_bcc (code, cond, 0); + break; + } + case OP_FBLT_UN: + mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_BCC); + /* For fp compares, ARMCOND_LT is lt or unordered */ + arm_bcc (code, ARMCOND_LT, 0); + break; + case OP_FBLE_UN: + mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_BCC); + arm_bcc (code, ARMCOND_EQ, 0); + offset = code - cfg->native_code; + mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_BCC); + /* For fp compares, ARMCOND_LT is lt or unordered */ + arm_bcc (code, ARMCOND_LT, 0); + break; + case OP_ARM64_CBZW: + mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_CBZ); + arm_cbzw (code, sreg1, 0); + break; + case OP_ARM64_CBZX: + mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_CBZ); + arm_cbzx (code, sreg1, 0); + break; + case OP_ARM64_CBNZW: + mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_CBZ); + arm_cbnzw (code, sreg1, 0); + break; + case OP_ARM64_CBNZX: + mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_CBZ); + arm_cbnzx (code, sreg1, 0); + break; + /* ALU */ + case OP_IADD: + arm_addw (code, dreg, sreg1, sreg2); + break; + case OP_LADD: + arm_addx (code, dreg, sreg1, sreg2); + break; + case OP_ISUB: + arm_subw (code, dreg, sreg1, sreg2); + break; + case OP_LSUB: + arm_subx (code, dreg, sreg1, sreg2); + break; + case OP_IAND: + arm_andw (code, dreg, sreg1, sreg2); + break; + case OP_LAND: + arm_andx (code, dreg, sreg1, sreg2); + break; + case OP_IOR: + arm_orrw (code, dreg, sreg1, sreg2); + break; + case OP_LOR: + arm_orrx (code, dreg, sreg1, sreg2); + break; + case OP_IXOR: + arm_eorw (code, dreg, sreg1, sreg2); + break; + case OP_LXOR: + arm_eorx (code, dreg, sreg1, sreg2); + break; + case OP_INEG: + arm_negw (code, dreg, sreg1); + break; + case OP_LNEG: + arm_negx (code, dreg, sreg1); + break; + case OP_INOT: + arm_mvnw (code, dreg, sreg1); + break; + case OP_LNOT: + arm_mvnx (code, dreg, sreg1); + break; + case OP_IADDCC: + arm_addsw (code, dreg, sreg1, sreg2); + break; + case OP_ADDCC: + case OP_LADDCC: + arm_addsx (code, dreg, sreg1, sreg2); + break; + case OP_ISUBCC: + arm_subsw (code, dreg, sreg1, sreg2); + break; + case OP_LSUBCC: + case OP_SUBCC: + arm_subsx (code, dreg, sreg1, sreg2); + break; + case OP_ICOMPARE: + arm_cmpw (code, sreg1, sreg2); + break; + case OP_COMPARE: + case OP_LCOMPARE: + arm_cmpx (code, sreg1, sreg2); + break; + case OP_IADD_IMM: + code = emit_addw_imm (code, dreg, sreg1, imm); + break; + case OP_LADD_IMM: + case OP_ADD_IMM: + code = emit_addx_imm (code, dreg, sreg1, imm); + break; + case OP_ISUB_IMM: + code = emit_subw_imm (code, dreg, sreg1, imm); + break; + case OP_LSUB_IMM: + code = emit_subx_imm (code, dreg, sreg1, imm); + break; + case OP_IAND_IMM: + code = emit_andw_imm (code, dreg, sreg1, imm); + break; + case OP_LAND_IMM: + case OP_AND_IMM: + code = emit_andx_imm (code, dreg, sreg1, imm); + break; + case OP_IOR_IMM: + code = emit_orrw_imm (code, dreg, sreg1, imm); + break; + case OP_LOR_IMM: + code = emit_orrx_imm (code, dreg, sreg1, imm); + break; + case OP_IXOR_IMM: + code = emit_eorw_imm (code, dreg, sreg1, imm); + break; + case OP_LXOR_IMM: + code = emit_eorx_imm (code, dreg, sreg1, imm); + break; + case OP_ICOMPARE_IMM: + code = emit_cmpw_imm (code, sreg1, imm); + break; + case OP_LCOMPARE_IMM: + case OP_COMPARE_IMM: + if (imm == 0) { + arm_cmpx (code, sreg1, ARMREG_RZR); + } else { + // FIXME: 32 vs 64 bit issues for 0xffffffff + code = emit_imm64 (code, ARMREG_LR, imm); + arm_cmpx (code, sreg1, ARMREG_LR); + } + break; + case OP_ISHL: + arm_lslvw (code, dreg, sreg1, sreg2); + break; + case OP_LSHL: + arm_lslvx (code, dreg, sreg1, sreg2); + break; + case OP_ISHR: + arm_asrvw (code, dreg, sreg1, sreg2); + break; + case OP_LSHR: + arm_asrvx (code, dreg, sreg1, sreg2); + break; + case OP_ISHR_UN: + arm_lsrvw (code, dreg, sreg1, sreg2); + break; + case OP_LSHR_UN: + arm_lsrvx (code, dreg, sreg1, sreg2); + break; + case OP_ISHL_IMM: + if (imm == 0) + arm_movx (code, dreg, sreg1); + else + arm_lslw (code, dreg, sreg1, imm); + break; + case OP_LSHL_IMM: + if (imm == 0) + arm_movx (code, dreg, sreg1); + else + arm_lslx (code, dreg, sreg1, imm); + break; + case OP_ISHR_IMM: + if (imm == 0) + arm_movx (code, dreg, sreg1); + else + arm_asrw (code, dreg, sreg1, imm); + break; + case OP_LSHR_IMM: + case OP_SHR_IMM: + if (imm == 0) + arm_movx (code, dreg, sreg1); + else + arm_asrx (code, dreg, sreg1, imm); + break; + case OP_ISHR_UN_IMM: + if (imm == 0) + arm_movx (code, dreg, sreg1); + else + arm_lsrw (code, dreg, sreg1, imm); + break; + case OP_SHR_UN_IMM: + case OP_LSHR_UN_IMM: + if (imm == 0) + arm_movx (code, dreg, sreg1); + else + arm_lsrx (code, dreg, sreg1, imm); + break; + + /* 64BIT ALU */ + case OP_SEXT_I4: + arm_sxtwx (code, dreg, sreg1); + break; + case OP_ZEXT_I4: + /* Clean out the upper word */ + arm_movw (code, dreg, sreg1); + break; + case OP_SHL_IMM: + arm_lslx (code, dreg, sreg1, imm); + break; + + /* MULTIPLY/DIVISION */ + case OP_IDIV: + case OP_IREM: + // FIXME: Optimize this + /* Check for zero */ + arm_cmpx_imm (code, sreg2, 0); + code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "DivideByZeroException"); + /* Check for INT_MIN/-1 */ + code = emit_imm (code, ARMREG_IP0, 0x80000000); + arm_cmpx (code, sreg1, ARMREG_IP0); + arm_cset (code, ARMCOND_EQ, ARMREG_IP1); + code = emit_imm (code, ARMREG_IP0, 0xffffffff); + arm_cmpx (code, sreg2, ARMREG_IP0); + arm_cset (code, ARMCOND_EQ, ARMREG_IP0); + arm_andx (code, ARMREG_IP0, ARMREG_IP0, ARMREG_IP1); + arm_cmpx_imm (code, ARMREG_IP0, 1); + code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "OverflowException"); + if (ins->opcode == OP_IREM) { + arm_sdivw (code, ARMREG_LR, sreg1, sreg2); + arm_msubw (code, dreg, ARMREG_LR, sreg2, sreg1); + } else { + arm_sdivw (code, dreg, sreg1, sreg2); + } + break; + case OP_IDIV_UN: + arm_cmpx_imm (code, sreg2, 0); + code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "DivideByZeroException"); + arm_udivw (code, dreg, sreg1, sreg2); + break; + case OP_IREM_UN: + arm_cmpx_imm (code, sreg2, 0); + code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "DivideByZeroException"); + arm_udivw (code, ARMREG_LR, sreg1, sreg2); + arm_msubw (code, dreg, ARMREG_LR, sreg2, sreg1); + break; + case OP_LDIV: + case OP_LREM: + // FIXME: Optimize this + /* Check for zero */ + arm_cmpx_imm (code, sreg2, 0); + code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "DivideByZeroException"); + /* Check for INT64_MIN/-1 */ + code = emit_imm64 (code, ARMREG_IP0, 0x8000000000000000); + arm_cmpx (code, sreg1, ARMREG_IP0); + arm_cset (code, ARMCOND_EQ, ARMREG_IP1); + code = emit_imm64 (code, ARMREG_IP0, 0xffffffffffffffff); + arm_cmpx (code, sreg2, ARMREG_IP0); + arm_cset (code, ARMCOND_EQ, ARMREG_IP0); + arm_andx (code, ARMREG_IP0, ARMREG_IP0, ARMREG_IP1); + arm_cmpx_imm (code, ARMREG_IP0, 1); + /* 64 bit uses ArithmeticException */ + code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "ArithmeticException"); + if (ins->opcode == OP_LREM) { + arm_sdivx (code, ARMREG_LR, sreg1, sreg2); + arm_msubx (code, dreg, ARMREG_LR, sreg2, sreg1); + } else { + arm_sdivx (code, dreg, sreg1, sreg2); + } + break; + case OP_LDIV_UN: + arm_cmpx_imm (code, sreg2, 0); + code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "DivideByZeroException"); + arm_udivx (code, dreg, sreg1, sreg2); + break; + case OP_LREM_UN: + arm_cmpx_imm (code, sreg2, 0); + code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "DivideByZeroException"); + arm_udivx (code, ARMREG_LR, sreg1, sreg2); + arm_msubx (code, dreg, ARMREG_LR, sreg2, sreg1); + break; + case OP_IMUL: + arm_mulw (code, dreg, sreg1, sreg2); + break; + case OP_LMUL: + arm_mulx (code, dreg, sreg1, sreg2); + break; + case OP_IMUL_IMM: + code = emit_imm (code, ARMREG_LR, imm); + arm_mulw (code, dreg, sreg1, ARMREG_LR); + break; + case OP_MUL_IMM: + case OP_LMUL_IMM: + code = emit_imm (code, ARMREG_LR, imm); + arm_mulx (code, dreg, sreg1, ARMREG_LR); + break; + + /* CONVERSIONS */ + case OP_ICONV_TO_I1: + case OP_LCONV_TO_I1: + arm_sxtbx (code, dreg, sreg1); + break; + case OP_ICONV_TO_I2: + case OP_LCONV_TO_I2: + arm_sxthx (code, dreg, sreg1); + break; + case OP_ICONV_TO_U1: + case OP_LCONV_TO_U1: + arm_uxtbw (code, dreg, sreg1); + break; + case OP_ICONV_TO_U2: + case OP_LCONV_TO_U2: + arm_uxthw (code, dreg, sreg1); + break; + + /* CSET */ + case OP_CEQ: + case OP_ICEQ: + case OP_LCEQ: + case OP_CLT: + case OP_ICLT: + case OP_LCLT: + case OP_CGT: + case OP_ICGT: + case OP_LCGT: + case OP_CLT_UN: + case OP_ICLT_UN: + case OP_LCLT_UN: + case OP_CGT_UN: + case OP_ICGT_UN: + case OP_LCGT_UN: + case OP_ICNEQ: + case OP_ICGE: + case OP_ICLE: + case OP_ICGE_UN: + case OP_ICLE_UN: { + int cond; + + cond = opcode_to_armcond (ins->opcode); + arm_cset (code, cond, dreg); + break; + } + case OP_FCEQ: + case OP_FCLT: + case OP_FCLT_UN: + case OP_FCGT: + case OP_FCGT_UN: + case OP_FCNEQ: + case OP_FCLE: + case OP_FCGE: { + int cond; + + cond = opcode_to_armcond (ins->opcode); + arm_fcmpd (code, sreg1, sreg2); + arm_cset (code, cond, dreg); + break; + } + + /* MEMORY */ + case OP_LOADI1_MEMBASE: + code = emit_ldrsbx (code, dreg, ins->inst_basereg, ins->inst_offset); + break; + case OP_LOADU1_MEMBASE: + code = emit_ldrb (code, dreg, ins->inst_basereg, ins->inst_offset); + break; + case OP_LOADI2_MEMBASE: + code = emit_ldrshx (code, dreg, ins->inst_basereg, ins->inst_offset); + break; + case OP_LOADU2_MEMBASE: + code = emit_ldrh (code, dreg, ins->inst_basereg, ins->inst_offset); + break; + case OP_LOADI4_MEMBASE: + code = emit_ldrswx (code, dreg, ins->inst_basereg, ins->inst_offset); + break; + case OP_LOADU4_MEMBASE: + code = emit_ldrw (code, dreg, ins->inst_basereg, ins->inst_offset); + break; + case OP_LOAD_MEMBASE: + case OP_LOADI8_MEMBASE: + code = emit_ldrx (code, dreg, ins->inst_basereg, ins->inst_offset); + break; + case OP_STOREI1_MEMBASE_IMM: + case OP_STOREI2_MEMBASE_IMM: + case OP_STOREI4_MEMBASE_IMM: + case OP_STORE_MEMBASE_IMM: + case OP_STOREI8_MEMBASE_IMM: { + int immreg; + + if (imm != 0) { + code = emit_imm (code, ARMREG_LR, imm); + immreg = ARMREG_LR; + } else { + immreg = ARMREG_RZR; + } + + switch (ins->opcode) { + case OP_STOREI1_MEMBASE_IMM: + code = emit_strb (code, immreg, ins->inst_destbasereg, ins->inst_offset); + break; + case OP_STOREI2_MEMBASE_IMM: + code = emit_strh (code, immreg, ins->inst_destbasereg, ins->inst_offset); + break; + case OP_STOREI4_MEMBASE_IMM: + code = emit_strw (code, immreg, ins->inst_destbasereg, ins->inst_offset); + break; + case OP_STORE_MEMBASE_IMM: + case OP_STOREI8_MEMBASE_IMM: + code = emit_strx (code, immreg, ins->inst_destbasereg, ins->inst_offset); + break; + default: + g_assert_not_reached (); + break; + } + break; + } + case OP_STOREI1_MEMBASE_REG: + code = emit_strb (code, sreg1, ins->inst_destbasereg, ins->inst_offset); + break; + case OP_STOREI2_MEMBASE_REG: + code = emit_strh (code, sreg1, ins->inst_destbasereg, ins->inst_offset); + break; + case OP_STOREI4_MEMBASE_REG: + code = emit_strw (code, sreg1, ins->inst_destbasereg, ins->inst_offset); + break; + case OP_STORE_MEMBASE_REG: + case OP_STOREI8_MEMBASE_REG: + code = emit_strx (code, sreg1, ins->inst_destbasereg, ins->inst_offset); + break; + + case OP_TLS_GET: + code = emit_tls_get (code, dreg, ins->inst_offset); + break; + case OP_TLS_GET_REG: + code = emit_tls_get_reg (code, dreg, sreg1); + break; + case OP_TLS_SET: + code = emit_tls_set (code, sreg1, ins->inst_offset); + break; + case OP_TLS_SET_REG: + code = emit_tls_set_reg (code, sreg1, sreg2); + break; + + /* Atomic */ + case OP_MEMORY_BARRIER: + arm_dmb (code, 0); + break; + case OP_ATOMIC_ADD_I4: { + guint8 *buf [16]; + + buf [0] = code; + arm_ldaxrw (code, ARMREG_IP0, sreg1); + arm_addx (code, ARMREG_IP0, ARMREG_IP0, sreg2); + arm_stlxrw (code, ARMREG_IP1, ARMREG_IP0, sreg1); + arm_cbnzw (code, ARMREG_IP1, buf [0]); + + arm_movx (code, dreg, ARMREG_IP0); + break; + } + case OP_ATOMIC_ADD_I8: { + guint8 *buf [16]; + + buf [0] = code; + arm_ldaxrx (code, ARMREG_IP0, sreg1); + arm_addx (code, ARMREG_IP0, ARMREG_IP0, sreg2); + arm_stlxrx (code, ARMREG_IP1, ARMREG_IP0, sreg1); + arm_cbnzx (code, ARMREG_IP1, buf [0]); + + arm_movx (code, dreg, ARMREG_IP0); + break; + } + case OP_ATOMIC_EXCHANGE_I4: { + guint8 *buf [16]; + + buf [0] = code; + arm_ldaxrw (code, ARMREG_IP0, sreg1); + arm_stlxrw (code, ARMREG_IP1, sreg2, sreg1); + arm_cbnzw (code, ARMREG_IP1, buf [0]); + + arm_movx (code, dreg, ARMREG_IP0); + break; + } + case OP_ATOMIC_EXCHANGE_I8: { + guint8 *buf [16]; + + buf [0] = code; + arm_ldaxrx (code, ARMREG_IP0, sreg1); + arm_stlxrx (code, ARMREG_IP1, sreg2, sreg1); + arm_cbnzw (code, ARMREG_IP1, buf [0]); + + arm_movx (code, dreg, ARMREG_IP0); + break; + } + case OP_ATOMIC_CAS_I4: { + guint8 *buf [16]; + + /* sreg2 is the value, sreg3 is the comparand */ + buf [0] = code; + arm_ldaxrw (code, ARMREG_IP0, sreg1); + arm_cmpw (code, ARMREG_IP0, ins->sreg3); + buf [1] = code; + arm_bcc (code, ARMCOND_NE, 0); + arm_stlxrw (code, ARMREG_IP1, sreg2, sreg1); + arm_cbnzw (code, ARMREG_IP1, buf [0]); + arm_patch_rel (buf [1], code, MONO_R_ARM64_BCC); + + arm_movx (code, dreg, ARMREG_IP0); + break; + } + case OP_ATOMIC_CAS_I8: { + guint8 *buf [16]; + + buf [0] = code; + arm_ldaxrx (code, ARMREG_IP0, sreg1); + arm_cmpx (code, ARMREG_IP0, ins->sreg3); + buf [1] = code; + arm_bcc (code, ARMCOND_NE, 0); + arm_stlxrx (code, ARMREG_IP1, sreg2, sreg1); + arm_cbnzw (code, ARMREG_IP1, buf [0]); + arm_patch_rel (buf [1], code, MONO_R_ARM64_BCC); + + arm_movx (code, dreg, ARMREG_IP0); + break; + } + case OP_ATOMIC_LOAD_I1: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset); + arm_ldarb (code, ins->dreg, ARMREG_LR); + arm_sxtbx (code, ins->dreg, ins->dreg); + break; + } + case OP_ATOMIC_LOAD_U1: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset); + arm_ldarb (code, ins->dreg, ARMREG_LR); + arm_uxtbx (code, ins->dreg, ins->dreg); + break; + } + case OP_ATOMIC_LOAD_I2: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset); + arm_ldarh (code, ins->dreg, ARMREG_LR); + arm_sxthx (code, ins->dreg, ins->dreg); + break; + } + case OP_ATOMIC_LOAD_U2: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset); + arm_ldarh (code, ins->dreg, ARMREG_LR); + arm_uxthx (code, ins->dreg, ins->dreg); + break; + } + case OP_ATOMIC_LOAD_I4: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset); + arm_ldarw (code, ins->dreg, ARMREG_LR); + arm_sxtwx (code, ins->dreg, ins->dreg); + break; + } + case OP_ATOMIC_LOAD_U4: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset); + arm_ldarw (code, ins->dreg, ARMREG_LR); + arm_movw (code, ins->dreg, ins->dreg); /* Clear upper half of the register. */ + break; + } + case OP_ATOMIC_LOAD_I8: + case OP_ATOMIC_LOAD_U8: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset); + arm_ldarx (code, ins->dreg, ARMREG_LR); + break; + } + case OP_ATOMIC_LOAD_R4: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset); + if (cfg->r4fp) { + arm_ldarw (code, ARMREG_LR, ARMREG_LR); + arm_fmov_rx_to_double (code, ins->dreg, ARMREG_LR); + } else { + arm_ldarw (code, ARMREG_LR, ARMREG_LR); + arm_fmov_rx_to_double (code, FP_TEMP_REG, ARMREG_LR); + arm_fcvt_sd (code, ins->dreg, FP_TEMP_REG); + } + break; + } + case OP_ATOMIC_LOAD_R8: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset); + arm_ldarx (code, ARMREG_LR, ARMREG_LR); + arm_fmov_rx_to_double (code, ins->dreg, ARMREG_LR); + break; + } + case OP_ATOMIC_STORE_I1: + case OP_ATOMIC_STORE_U1: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset); + arm_stlrb (code, ARMREG_LR, ins->sreg1); + break; + } + case OP_ATOMIC_STORE_I2: + case OP_ATOMIC_STORE_U2: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset); + arm_stlrh (code, ARMREG_LR, ins->sreg1); + break; + } + case OP_ATOMIC_STORE_I4: + case OP_ATOMIC_STORE_U4: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset); + arm_stlrw (code, ARMREG_LR, ins->sreg1); + break; + } + case OP_ATOMIC_STORE_I8: + case OP_ATOMIC_STORE_U8: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset); + arm_stlrx (code, ARMREG_LR, ins->sreg1); + break; + } + case OP_ATOMIC_STORE_R4: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset); + if (cfg->r4fp) { + arm_fmov_double_to_rx (code, ARMREG_IP0, ins->sreg1); + arm_stlrw (code, ARMREG_LR, ARMREG_IP0); + } else { + arm_fcvt_ds (code, FP_TEMP_REG, ins->sreg1); + arm_fmov_double_to_rx (code, ARMREG_IP0, FP_TEMP_REG); + arm_stlrw (code, ARMREG_LR, ARMREG_IP0); + } + break; + } + case OP_ATOMIC_STORE_R8: { + code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset); + arm_fmov_double_to_rx (code, ARMREG_IP0, ins->sreg1); + arm_stlrx (code, ARMREG_LR, ARMREG_IP0); + break; + } + + /* FP */ + case OP_R8CONST: { + guint64 imm = *(guint64*)ins->inst_p0; + + if (imm == 0) { + arm_fmov_rx_to_double (code, dreg, ARMREG_RZR); + } else { + code = emit_imm64 (code, ARMREG_LR, imm); + arm_fmov_rx_to_double (code, ins->dreg, ARMREG_LR); + } + break; + } + case OP_R4CONST: { + guint64 imm = *(guint32*)ins->inst_p0; + + code = emit_imm64 (code, ARMREG_LR, imm); + if (cfg->r4fp) { + arm_fmov_rx_to_double (code, dreg, ARMREG_LR); + } else { + arm_fmov_rx_to_double (code, FP_TEMP_REG, ARMREG_LR); + arm_fcvt_sd (code, dreg, FP_TEMP_REG); + } + break; + } + case OP_LOADR8_MEMBASE: + code = emit_ldrfpx (code, dreg, ins->inst_basereg, ins->inst_offset); + break; + case OP_LOADR4_MEMBASE: + if (cfg->r4fp) { + code = emit_ldrfpw (code, dreg, ins->inst_basereg, ins->inst_offset); + } else { + code = emit_ldrfpw (code, FP_TEMP_REG, ins->inst_basereg, ins->inst_offset); + arm_fcvt_sd (code, dreg, FP_TEMP_REG); + } + break; + case OP_STORER8_MEMBASE_REG: + code = emit_strfpx (code, sreg1, ins->inst_destbasereg, ins->inst_offset); + break; + case OP_STORER4_MEMBASE_REG: + if (cfg->r4fp) { + code = emit_strfpw (code, sreg1, ins->inst_destbasereg, ins->inst_offset); + } else { + arm_fcvt_ds (code, FP_TEMP_REG, sreg1); + code = emit_strfpw (code, FP_TEMP_REG, ins->inst_destbasereg, ins->inst_offset); + } + break; + case OP_FMOVE: + if (dreg != sreg1) + arm_fmovd (code, dreg, sreg1); + break; + case OP_RMOVE: + if (dreg != sreg1) + arm_fmovs (code, dreg, sreg1); + break; + case OP_MOVE_F_TO_I4: + if (cfg->r4fp) { + arm_fmov_double_to_rx (code, ins->dreg, ins->sreg1); + } else { + arm_fcvt_ds (code, ins->dreg, ins->sreg1); + arm_fmov_double_to_rx (code, ins->dreg, ins->dreg); + } + break; + case OP_MOVE_I4_TO_F: + if (cfg->r4fp) { + arm_fmov_rx_to_double (code, ins->dreg, ins->sreg1); + } else { + arm_fmov_rx_to_double (code, ins->dreg, ins->sreg1); + arm_fcvt_sd (code, ins->dreg, ins->dreg); + } + break; + case OP_MOVE_F_TO_I8: + arm_fmov_double_to_rx (code, ins->dreg, ins->sreg1); + break; + case OP_MOVE_I8_TO_F: + arm_fmov_rx_to_double (code, ins->dreg, ins->sreg1); + break; + case OP_FCOMPARE: + arm_fcmpd (code, sreg1, sreg2); + break; + case OP_RCOMPARE: + arm_fcmps (code, sreg1, sreg2); + break; + case OP_FCONV_TO_I1: + arm_fcvtzs_dx (code, dreg, sreg1); + arm_sxtbx (code, dreg, dreg); + break; + case OP_FCONV_TO_U1: + arm_fcvtzu_dx (code, dreg, sreg1); + arm_uxtbw (code, dreg, dreg); + break; + case OP_FCONV_TO_I2: + arm_fcvtzs_dx (code, dreg, sreg1); + arm_sxthx (code, dreg, dreg); + break; + case OP_FCONV_TO_U2: + arm_fcvtzu_dx (code, dreg, sreg1); + arm_uxthw (code, dreg, dreg); + break; + case OP_FCONV_TO_I4: + arm_fcvtzs_dx (code, dreg, sreg1); + arm_sxtwx (code, dreg, dreg); + break; + case OP_FCONV_TO_U4: + arm_fcvtzu_dx (code, dreg, sreg1); + break; + case OP_FCONV_TO_I8: + arm_fcvtzs_dx (code, dreg, sreg1); + break; + case OP_FCONV_TO_U8: + arm_fcvtzu_dx (code, dreg, sreg1); + break; + case OP_FCONV_TO_R4: + if (cfg->r4fp) { + arm_fcvt_ds (code, dreg, sreg1); + } else { + arm_fcvt_ds (code, FP_TEMP_REG, sreg1); + arm_fcvt_sd (code, dreg, FP_TEMP_REG); + } + break; + case OP_ICONV_TO_R4: + if (cfg->r4fp) { + arm_scvtf_rw_to_s (code, dreg, sreg1); + } else { + arm_scvtf_rw_to_s (code, FP_TEMP_REG, sreg1); + arm_fcvt_sd (code, dreg, FP_TEMP_REG); + } + break; + case OP_LCONV_TO_R4: + if (cfg->r4fp) { + arm_scvtf_rx_to_s (code, dreg, sreg1); + } else { + arm_scvtf_rx_to_s (code, FP_TEMP_REG, sreg1); + arm_fcvt_sd (code, dreg, FP_TEMP_REG); + } + break; + case OP_ICONV_TO_R8: + arm_scvtf_rw_to_d (code, dreg, sreg1); + break; + case OP_LCONV_TO_R8: + arm_scvtf_rx_to_d (code, dreg, sreg1); + break; + case OP_ICONV_TO_R_UN: + arm_ucvtf_rw_to_d (code, dreg, sreg1); + break; + case OP_LCONV_TO_R_UN: + arm_ucvtf_rx_to_d (code, dreg, sreg1); + break; + case OP_FADD: + arm_fadd_d (code, dreg, sreg1, sreg2); + break; + case OP_FSUB: + arm_fsub_d (code, dreg, sreg1, sreg2); + break; + case OP_FMUL: + arm_fmul_d (code, dreg, sreg1, sreg2); + break; + case OP_FDIV: + arm_fdiv_d (code, dreg, sreg1, sreg2); + break; + case OP_FREM: + /* Emulated */ + g_assert_not_reached (); + break; + case OP_FNEG: + arm_fneg_d (code, dreg, sreg1); + break; + case OP_ARM_SETFREG_R4: + arm_fcvt_ds (code, dreg, sreg1); + break; + case OP_CKFINITE: + /* Check for infinity */ + code = emit_imm64 (code, ARMREG_LR, 0x7fefffffffffffffLL); + arm_fmov_rx_to_double (code, FP_TEMP_REG, ARMREG_LR); + arm_fabs_d (code, FP_TEMP_REG2, sreg1); + arm_fcmpd (code, FP_TEMP_REG2, FP_TEMP_REG); + code = emit_cond_exc (cfg, code, OP_COND_EXC_GT, "ArithmeticException"); + /* Check for nans */ + arm_fcmpd (code, FP_TEMP_REG2, FP_TEMP_REG2); + code = emit_cond_exc (cfg, code, OP_COND_EXC_OV, "ArithmeticException"); + arm_fmovd (code, dreg, sreg1); + break; + + /* R4 */ + case OP_RADD: + arm_fadd_s (code, dreg, sreg1, sreg2); + break; + case OP_RSUB: + arm_fsub_s (code, dreg, sreg1, sreg2); + break; + case OP_RMUL: + arm_fmul_s (code, dreg, sreg1, sreg2); + break; + case OP_RDIV: + arm_fdiv_s (code, dreg, sreg1, sreg2); + break; + case OP_RNEG: + arm_fneg_s (code, dreg, sreg1); + break; + case OP_RCONV_TO_I1: + arm_fcvtzs_sx (code, dreg, sreg1); + arm_sxtbx (code, dreg, dreg); + break; + case OP_RCONV_TO_U1: + arm_fcvtzu_sx (code, dreg, sreg1); + arm_uxtbw (code, dreg, dreg); + break; + case OP_RCONV_TO_I2: + arm_fcvtzs_sx (code, dreg, sreg1); + arm_sxthx (code, dreg, dreg); + break; + case OP_RCONV_TO_U2: + arm_fcvtzu_sx (code, dreg, sreg1); + arm_uxthw (code, dreg, dreg); + break; + case OP_RCONV_TO_I4: + arm_fcvtzs_sx (code, dreg, sreg1); + arm_sxtwx (code, dreg, dreg); + break; + case OP_RCONV_TO_U4: + arm_fcvtzu_sx (code, dreg, sreg1); + break; + case OP_RCONV_TO_I8: + arm_fcvtzs_sx (code, dreg, sreg1); + break; + case OP_RCONV_TO_U8: + arm_fcvtzu_sx (code, dreg, sreg1); + break; + case OP_RCONV_TO_R8: + arm_fcvt_sd (code, dreg, sreg1); + break; + case OP_RCONV_TO_R4: + if (dreg != sreg1) + arm_fmovs (code, dreg, sreg1); + break; + case OP_RCEQ: + case OP_RCLT: + case OP_RCLT_UN: + case OP_RCGT: + case OP_RCGT_UN: + case OP_RCNEQ: + case OP_RCLE: + case OP_RCGE: { + int cond; + + cond = opcode_to_armcond (ins->opcode); + arm_fcmps (code, sreg1, sreg2); + arm_cset (code, cond, dreg); + break; + } + + /* CALLS */ + case OP_VOIDCALL: + case OP_CALL: + case OP_LCALL: + case OP_FCALL: + case OP_RCALL: + case OP_VCALL2: + call = (MonoCallInst*)ins; + if (ins->flags & MONO_INST_HAS_METHOD) + code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method); + else + code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr); + code = emit_move_return_value (cfg, code, ins); + break; + case OP_VOIDCALL_REG: + case OP_CALL_REG: + case OP_LCALL_REG: + case OP_FCALL_REG: + case OP_RCALL_REG: + case OP_VCALL2_REG: + arm_blrx (code, sreg1); + code = emit_move_return_value (cfg, code, ins); + break; + case OP_VOIDCALL_MEMBASE: + case OP_CALL_MEMBASE: + case OP_LCALL_MEMBASE: + case OP_FCALL_MEMBASE: + case OP_RCALL_MEMBASE: + case OP_VCALL2_MEMBASE: + code = emit_ldrx (code, ARMREG_IP0, ins->inst_basereg, ins->inst_offset); + arm_blrx (code, ARMREG_IP0); + code = emit_move_return_value (cfg, code, ins); + break; + case OP_TAILCALL: { + MonoCallInst *call = (MonoCallInst*)ins; + + g_assert (!cfg->method->save_lmf); + + // FIXME: Copy stack arguments + + /* Restore registers */ + code = emit_load_regset (code, MONO_ARCH_CALLEE_SAVED_REGS & cfg->used_int_regs, ARMREG_FP, cfg->arch.saved_gregs_offset); + + /* Destroy frame */ + code = mono_arm_emit_destroy_frame (code, cfg->stack_offset, ((1 << ARMREG_IP0) | (1 << ARMREG_IP1))); + + if (cfg->compile_aot) { + /* This is not a PLT patch */ + code = emit_aotconst (cfg, code, ARMREG_IP0, MONO_PATCH_INFO_METHOD_JUMP, call->method); + arm_brx (code, ARMREG_IP0); + } else { + mono_add_patch_info_rel (cfg, code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, call->method, MONO_R_ARM64_B); + arm_b (code, code); + } + ins->flags |= MONO_INST_GC_CALLSITE; + ins->backend.pc_offset = code - cfg->native_code; + break; + } + case OP_ARGLIST: + g_assert (cfg->arch.cinfo); + code = emit_addx_imm (code, ARMREG_IP0, cfg->arch.args_reg, ((CallInfo*)cfg->arch.cinfo)->sig_cookie.offset); + arm_strx (code, ARMREG_IP0, sreg1, 0); + break; + case OP_DYN_CALL: { + MonoInst *var = cfg->dyn_call_var; + guint8 *labels [16]; + int i; + + /* + * sreg1 points to a DynCallArgs structure initialized by mono_arch_start_dyn_call (). + * sreg2 is the function to call. + */ + + g_assert (var->opcode == OP_REGOFFSET); + + arm_movx (code, ARMREG_LR, sreg1); + arm_movx (code, ARMREG_IP1, sreg2); + + /* Save args buffer */ + code = emit_strx (code, ARMREG_LR, var->inst_basereg, var->inst_offset); + + /* Set fp argument regs */ + code = emit_ldrw (code, ARMREG_R0, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, n_fpargs)); + arm_cmpw (code, ARMREG_R0, ARMREG_RZR); + labels [0] = code; + arm_bcc (code, ARMCOND_EQ, 0); + for (i = 0; i < 8; ++i) + code = emit_ldrfpx (code, ARMREG_D0 + i, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, fpregs) + (i * 8)); + arm_patch_rel (labels [0], code, MONO_R_ARM64_BCC); + + /* Set stack args */ + for (i = 0; i < DYN_CALL_STACK_ARGS; ++i) { + code = emit_ldrx (code, ARMREG_R0, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, regs) + ((PARAM_REGS + 1 + i) * sizeof (mgreg_t))); + code = emit_strx (code, ARMREG_R0, ARMREG_SP, i * sizeof (mgreg_t)); + } + + /* Set argument registers + r8 */ + code = mono_arm_emit_load_regarray (code, 0x1ff, ARMREG_LR, 0); + + /* Make the call */ + arm_blrx (code, ARMREG_IP1); + + /* Save result */ + code = emit_ldrx (code, ARMREG_LR, var->inst_basereg, var->inst_offset); + arm_strx (code, ARMREG_R0, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, res)); + arm_strx (code, ARMREG_R1, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, res2)); + /* Save fp result */ + code = emit_ldrw (code, ARMREG_R0, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, n_fpret)); + arm_cmpw (code, ARMREG_R0, ARMREG_RZR); + labels [1] = code; + arm_bcc (code, ARMCOND_EQ, 0); + for (i = 0; i < 8; ++i) + code = emit_strfpx (code, ARMREG_D0 + i, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, fpregs) + (i * 8)); + arm_patch_rel (labels [1], code, MONO_R_ARM64_BCC); + break; + } + + case OP_GENERIC_CLASS_INIT: { + static int byte_offset = -1; + static guint8 bitmask; + guint8 *jump; + + if (byte_offset < 0) + mono_marshal_find_bitfield_offset (MonoVTable, initialized, &byte_offset, &bitmask); + + /* Load vtable->initialized */ + arm_ldrsbx (code, ARMREG_IP0, sreg1, byte_offset); + // FIXME: No andx_imm yet */ + code = mono_arm_emit_imm64 (code, ARMREG_IP1, bitmask); + arm_andx (code, ARMREG_IP0, ARMREG_IP0, ARMREG_IP1); + jump = code; + arm_cbnzx (code, ARMREG_IP0, 0); + + /* Slowpath */ + g_assert (sreg1 == ARMREG_R0); + code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, + (gpointer)"mono_generic_class_init"); + + mono_arm_patch (jump, code, MONO_R_ARM64_CBZ); + break; + } + + case OP_CHECK_THIS: + arm_ldrx (code, ARMREG_LR, sreg1, 0); + break; + case OP_NOT_NULL: + case OP_NOT_REACHED: + case OP_DUMMY_USE: + break; + case OP_IL_SEQ_POINT: + mono_add_seq_point (cfg, bb, ins, code - cfg->native_code); + break; + + /* EH */ + case OP_COND_EXC_C: + case OP_COND_EXC_IC: + case OP_COND_EXC_OV: + case OP_COND_EXC_IOV: + case OP_COND_EXC_NC: + case OP_COND_EXC_INC: + case OP_COND_EXC_NO: + case OP_COND_EXC_INO: + case OP_COND_EXC_EQ: + case OP_COND_EXC_IEQ: + case OP_COND_EXC_NE_UN: + case OP_COND_EXC_INE_UN: + case OP_COND_EXC_ILT: + case OP_COND_EXC_LT: + case OP_COND_EXC_ILT_UN: + case OP_COND_EXC_LT_UN: + case OP_COND_EXC_IGT: + case OP_COND_EXC_GT: + case OP_COND_EXC_IGT_UN: + case OP_COND_EXC_GT_UN: + case OP_COND_EXC_IGE: + case OP_COND_EXC_GE: + case OP_COND_EXC_IGE_UN: + case OP_COND_EXC_GE_UN: + case OP_COND_EXC_ILE: + case OP_COND_EXC_LE: + case OP_COND_EXC_ILE_UN: + case OP_COND_EXC_LE_UN: + code = emit_cond_exc (cfg, code, ins->opcode, ins->inst_p1); + break; + case OP_THROW: + if (sreg1 != ARMREG_R0) + arm_movx (code, ARMREG_R0, sreg1); + code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, + (gpointer)"mono_arch_throw_exception"); + break; + case OP_RETHROW: + if (sreg1 != ARMREG_R0) + arm_movx (code, ARMREG_R0, sreg1); + code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, + (gpointer)"mono_arch_rethrow_exception"); + break; + case OP_CALL_HANDLER: + mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb, MONO_R_ARM64_BL); + arm_bl (code, 0); + cfg->thunk_area += THUNK_SIZE; + break; + case OP_START_HANDLER: { + MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region); + + /* Save caller address */ + code = emit_strx (code, ARMREG_LR, spvar->inst_basereg, spvar->inst_offset); + + /* + * Reserve a param area, see test_0_finally_param_area (). + * This is needed because the param area is not set up when + * we are called from EH code. + */ + if (cfg->param_area) + code = emit_subx_sp_imm (code, cfg->param_area); + break; + } + case OP_ENDFINALLY: + case OP_ENDFILTER: { + MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region); + + if (cfg->param_area) + code = emit_addx_sp_imm (code, cfg->param_area); + + if (ins->opcode == OP_ENDFILTER && sreg1 != ARMREG_R0) + arm_movx (code, ARMREG_R0, sreg1); + + /* Return to either after the branch in OP_CALL_HANDLER, or to the EH code */ + code = emit_ldrx (code, ARMREG_LR, spvar->inst_basereg, spvar->inst_offset); + arm_brx (code, ARMREG_LR); + break; + } + case OP_GET_EX_OBJ: + if (ins->dreg != ARMREG_R0) + arm_movx (code, ins->dreg, ARMREG_R0); + break; + case OP_GC_SAFE_POINT: { +#if defined (USE_COOP_GC) + guint8 *buf [1]; + + arm_ldrx (code, ARMREG_IP1, ins->sreg1, 0); + /* Call it if it is non-null */ + buf [0] = code; + arm_cbzx (code, ARMREG_IP1, 0); + code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, "mono_threads_state_poll"); + mono_arm_patch (buf [0], code, MONO_R_ARM64_CBZ); +#endif + break; + } + + default: + g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__); + g_assert_not_reached (); + } + + if ((cfg->opt & MONO_OPT_BRANCH) && ((code - cfg->native_code - offset) > max_len)) { + g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)", + mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset); + g_assert_not_reached (); + } + } + + /* + * If the compiled code size is larger than the bcc displacement (19 bits signed), + * insert branch islands between/inside basic blocks. + */ + if (cfg->arch.cond_branch_islands) + code = emit_branch_island (cfg, code, start_offset); + + cfg->code_len = code - cfg->native_code; +} + +static guint8* +emit_move_args (MonoCompile *cfg, guint8 *code) +{ + MonoInst *ins; + CallInfo *cinfo; + ArgInfo *ainfo; + int i, part; + + cinfo = cfg->arch.cinfo; + g_assert (cinfo); + for (i = 0; i < cinfo->nargs; ++i) { + ainfo = cinfo->args + i; + ins = cfg->args [i]; + + if (ins->opcode == OP_REGVAR) { + switch (ainfo->storage) { + case ArgInIReg: + arm_movx (code, ins->dreg, ainfo->reg); + break; + case ArgOnStack: + switch (ainfo->slot_size) { + case 1: + if (ainfo->sign) + code = emit_ldrsbx (code, ins->dreg, cfg->arch.args_reg, ainfo->offset); + else + code = emit_ldrb (code, ins->dreg, cfg->arch.args_reg, ainfo->offset); + break; + case 2: + if (ainfo->sign) + code = emit_ldrshx (code, ins->dreg, cfg->arch.args_reg, ainfo->offset); + else + code = emit_ldrh (code, ins->dreg, cfg->arch.args_reg, ainfo->offset); + break; + case 4: + if (ainfo->sign) + code = emit_ldrswx (code, ins->dreg, cfg->arch.args_reg, ainfo->offset); + else + code = emit_ldrw (code, ins->dreg, cfg->arch.args_reg, ainfo->offset); + break; + default: + code = emit_ldrx (code, ins->dreg, cfg->arch.args_reg, ainfo->offset); + break; + } + break; + default: + g_assert_not_reached (); + break; + } + } else { + if (ainfo->storage != ArgVtypeByRef && ainfo->storage != ArgVtypeByRefOnStack) + g_assert (ins->opcode == OP_REGOFFSET); + + switch (ainfo->storage) { + case ArgInIReg: + /* Stack slots for arguments have size 8 */ + code = emit_strx (code, ainfo->reg, ins->inst_basereg, ins->inst_offset); + break; + case ArgInFReg: + code = emit_strfpx (code, ainfo->reg, ins->inst_basereg, ins->inst_offset); + break; + case ArgInFRegR4: + code = emit_strfpw (code, ainfo->reg, ins->inst_basereg, ins->inst_offset); + break; + case ArgOnStack: + case ArgOnStackR4: + case ArgOnStackR8: + case ArgVtypeByRefOnStack: + case ArgVtypeOnStack: + break; + case ArgVtypeByRef: { + MonoInst *addr_arg = ins->inst_left; + + if (ainfo->gsharedvt) { + g_assert (ins->opcode == OP_GSHAREDVT_ARG_REGOFFSET); + arm_strx (code, ainfo->reg, ins->inst_basereg, ins->inst_offset); + } else { + g_assert (ins->opcode == OP_VTARG_ADDR); + g_assert (addr_arg->opcode == OP_REGOFFSET); + arm_strx (code, ainfo->reg, addr_arg->inst_basereg, addr_arg->inst_offset); + } + break; + } + case ArgVtypeInIRegs: + for (part = 0; part < ainfo->nregs; part ++) { + code = emit_strx (code, ainfo->reg + part, ins->inst_basereg, ins->inst_offset + (part * 8)); + } + break; + case ArgHFA: + for (part = 0; part < ainfo->nregs; part ++) { + if (ainfo->esize == 4) + code = emit_strfpw (code, ainfo->reg + part, ins->inst_basereg, ins->inst_offset + ainfo->foffsets [part]); + else + code = emit_strfpx (code, ainfo->reg + part, ins->inst_basereg, ins->inst_offset + ainfo->foffsets [part]); + } + break; + default: + g_assert_not_reached (); + break; + } + } + } + + return code; +} + +/* + * emit_store_regarray: + * + * Emit code to store the registers in REGS into the appropriate elements of + * the register array at BASEREG+OFFSET. + */ +static __attribute__((warn_unused_result)) guint8* +emit_store_regarray (guint8 *code, guint64 regs, int basereg, int offset) +{ + int i; + + for (i = 0; i < 32; ++i) { + if (regs & (1 << i)) { + if (i + 1 < 32 && (regs & (1 << (i + 1))) && (i + 1 != ARMREG_SP)) { + arm_stpx (code, i, i + 1, basereg, offset + (i * 8)); + i++; + } else if (i == ARMREG_SP) { + arm_movspx (code, ARMREG_IP1, ARMREG_SP); + arm_strx (code, ARMREG_IP1, basereg, offset + (i * 8)); + } else { + arm_strx (code, i, basereg, offset + (i * 8)); + } + } + } + return code; +} + +/* + * emit_load_regarray: + * + * Emit code to load the registers in REGS from the appropriate elements of + * the register array at BASEREG+OFFSET. + */ +static __attribute__((warn_unused_result)) guint8* +emit_load_regarray (guint8 *code, guint64 regs, int basereg, int offset) +{ + int i; + + for (i = 0; i < 32; ++i) { + if (regs & (1 << i)) { + if ((regs & (1 << (i + 1))) && (i + 1 != ARMREG_SP)) { + if (offset + (i * 8) < 500) + arm_ldpx (code, i, i + 1, basereg, offset + (i * 8)); + else { + code = emit_ldrx (code, i, basereg, offset + (i * 8)); + code = emit_ldrx (code, i + 1, basereg, offset + ((i + 1) * 8)); + } + i++; + } else if (i == ARMREG_SP) { + g_assert_not_reached (); + } else { + code = emit_ldrx (code, i, basereg, offset + (i * 8)); + } + } + } + return code; +} + +/* + * emit_store_regset: + * + * Emit code to store the registers in REGS into consecutive memory locations starting + * at BASEREG+OFFSET. + */ +static __attribute__((warn_unused_result)) guint8* +emit_store_regset (guint8 *code, guint64 regs, int basereg, int offset) +{ + int i, pos; + + pos = 0; + for (i = 0; i < 32; ++i) { + if (regs & (1 << i)) { + if ((regs & (1 << (i + 1))) && (i + 1 != ARMREG_SP)) { + arm_stpx (code, i, i + 1, basereg, offset + (pos * 8)); + i++; + pos++; + } else if (i == ARMREG_SP) { + arm_movspx (code, ARMREG_IP1, ARMREG_SP); + arm_strx (code, ARMREG_IP1, basereg, offset + (pos * 8)); + } else { + arm_strx (code, i, basereg, offset + (pos * 8)); + } + pos++; + } + } + return code; +} + +/* + * emit_load_regset: + * + * Emit code to load the registers in REGS from consecutive memory locations starting + * at BASEREG+OFFSET. + */ +static __attribute__((warn_unused_result)) guint8* +emit_load_regset (guint8 *code, guint64 regs, int basereg, int offset) +{ + int i, pos; + + pos = 0; + for (i = 0; i < 32; ++i) { + if (regs & (1 << i)) { + if ((regs & (1 << (i + 1))) && (i + 1 != ARMREG_SP)) { + arm_ldpx (code, i, i + 1, basereg, offset + (pos * 8)); + i++; + pos++; + } else if (i == ARMREG_SP) { + g_assert_not_reached (); + } else { + arm_ldrx (code, i, basereg, offset + (pos * 8)); + } + pos++; + } + } + return code; +} + +__attribute__((warn_unused_result)) guint8* +mono_arm_emit_load_regarray (guint8 *code, guint64 regs, int basereg, int offset) +{ + return emit_load_regarray (code, regs, basereg, offset); +} + +__attribute__((warn_unused_result)) guint8* +mono_arm_emit_store_regarray (guint8 *code, guint64 regs, int basereg, int offset) +{ + return emit_store_regarray (code, regs, basereg, offset); +} + +__attribute__((warn_unused_result)) guint8* +mono_arm_emit_store_regset (guint8 *code, guint64 regs, int basereg, int offset) +{ + return emit_store_regset (code, regs, basereg, offset); +} + +/* Same as emit_store_regset, but emit unwind info too */ +/* CFA_OFFSET is the offset between the CFA and basereg */ +static __attribute__((warn_unused_result)) guint8* +emit_store_regset_cfa (MonoCompile *cfg, guint8 *code, guint64 regs, int basereg, int offset, int cfa_offset, guint64 no_cfa_regset) +{ + int i, j, pos, nregs; + guint32 cfa_regset = regs & ~no_cfa_regset; + + pos = 0; + for (i = 0; i < 32; ++i) { + nregs = 1; + if (regs & (1 << i)) { + if ((regs & (1 << (i + 1))) && (i + 1 != ARMREG_SP)) { + if (offset < 256) { + arm_stpx (code, i, i + 1, basereg, offset + (pos * 8)); + } else { + code = emit_strx (code, i, basereg, offset + (pos * 8)); + code = emit_strx (code, i + 1, basereg, offset + (pos * 8) + 8); + } + nregs = 2; + } else if (i == ARMREG_SP) { + arm_movspx (code, ARMREG_IP1, ARMREG_SP); + code = emit_strx (code, ARMREG_IP1, basereg, offset + (pos * 8)); + } else { + code = emit_strx (code, i, basereg, offset + (pos * 8)); + } + + for (j = 0; j < nregs; ++j) { + if (cfa_regset & (1 << (i + j))) + mono_emit_unwind_op_offset (cfg, code, i + j, (- cfa_offset) + offset + ((pos + j) * 8)); + } + + i += nregs - 1; + pos += nregs; + } + } + return code; +} + +/* + * emit_setup_lmf: + * + * Emit code to initialize an LMF structure at LMF_OFFSET. + * Clobbers ip0/ip1. + */ +static guint8* +emit_setup_lmf (MonoCompile *cfg, guint8 *code, gint32 lmf_offset, int cfa_offset) +{ + /* + * The LMF should contain all the state required to be able to reconstruct the machine state + * at the current point of execution. Since the LMF is only read during EH, only callee + * saved etc. registers need to be saved. + * FIXME: Save callee saved fp regs, JITted code doesn't use them, but native code does, and they + * need to be restored during EH. + */ + + /* pc */ + arm_adrx (code, ARMREG_LR, code); + code = emit_strx (code, ARMREG_LR, ARMREG_FP, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, pc)); + /* gregs + fp + sp */ + /* Don't emit unwind info for sp/fp, they are already handled in the prolog */ + code = emit_store_regset_cfa (cfg, code, MONO_ARCH_LMF_REGS, ARMREG_FP, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, gregs), cfa_offset, (1 << ARMREG_FP) | (1 << ARMREG_SP)); + + return code; +} + +guint8 * +mono_arch_emit_prolog (MonoCompile *cfg) +{ + MonoMethod *method = cfg->method; + MonoMethodSignature *sig; + MonoBasicBlock *bb; + guint8 *code; + int cfa_offset, max_offset; + + sig = mono_method_signature (method); + cfg->code_size = 256 + sig->param_count * 64; + code = cfg->native_code = g_malloc (cfg->code_size); + + /* This can be unaligned */ + cfg->stack_offset = ALIGN_TO (cfg->stack_offset, MONO_ARCH_FRAME_ALIGNMENT); + + /* + * - Setup frame + */ + cfa_offset = 0; + mono_emit_unwind_op_def_cfa (cfg, code, ARMREG_SP, 0); + + /* Setup frame */ + if (arm_is_ldpx_imm (-cfg->stack_offset)) { + arm_stpx_pre (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, -cfg->stack_offset); + } else { + /* sp -= cfg->stack_offset */ + /* This clobbers ip0/ip1 */ + code = emit_subx_sp_imm (code, cfg->stack_offset); + arm_stpx (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, 0); + } + cfa_offset += cfg->stack_offset; + mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset); + mono_emit_unwind_op_offset (cfg, code, ARMREG_FP, (- cfa_offset) + 0); + mono_emit_unwind_op_offset (cfg, code, ARMREG_LR, (- cfa_offset) + 8); + arm_movspx (code, ARMREG_FP, ARMREG_SP); + mono_emit_unwind_op_def_cfa_reg (cfg, code, ARMREG_FP); + if (cfg->param_area) { + /* The param area is below the frame pointer */ + code = emit_subx_sp_imm (code, cfg->param_area); + } + + if (cfg->method->save_lmf) { + code = emit_setup_lmf (cfg, code, cfg->lmf_var->inst_offset, cfa_offset); + } else { + /* Save gregs */ + code = emit_store_regset_cfa (cfg, code, MONO_ARCH_CALLEE_SAVED_REGS & cfg->used_int_regs, ARMREG_FP, cfg->arch.saved_gregs_offset, cfa_offset, 0); + } + + /* Setup args reg */ + if (cfg->arch.args_reg) { + /* The register was already saved above */ + code = emit_addx_imm (code, cfg->arch.args_reg, ARMREG_FP, cfg->stack_offset); + } + + /* Save return area addr received in R8 */ + if (cfg->vret_addr) { + MonoInst *ins = cfg->vret_addr; + + g_assert (ins->opcode == OP_REGOFFSET); + code = emit_strx (code, ARMREG_R8, ins->inst_basereg, ins->inst_offset); + } + + /* Save mrgctx received in MONO_ARCH_RGCTX_REG */ + if (cfg->rgctx_var) { + MonoInst *ins = cfg->rgctx_var; + + g_assert (ins->opcode == OP_REGOFFSET); + + code = emit_strx (code, MONO_ARCH_RGCTX_REG, ins->inst_basereg, ins->inst_offset); + } + + /* + * Move arguments to their registers/stack locations. + */ + code = emit_move_args (cfg, code); + + /* Initialize seq_point_info_var */ + if (cfg->arch.seq_point_info_var) { + MonoInst *ins = cfg->arch.seq_point_info_var; + + /* Initialize the variable from a GOT slot */ + code = emit_aotconst (cfg, code, ARMREG_IP0, MONO_PATCH_INFO_SEQ_POINT_INFO, cfg->method); + g_assert (ins->opcode == OP_REGOFFSET); + code = emit_strx (code, ARMREG_IP0, ins->inst_basereg, ins->inst_offset); + + /* Initialize ss_tramp_var */ + ins = cfg->arch.ss_tramp_var; + g_assert (ins->opcode == OP_REGOFFSET); + + code = emit_ldrx (code, ARMREG_IP1, ARMREG_IP0, MONO_STRUCT_OFFSET (SeqPointInfo, ss_tramp_addr)); + code = emit_strx (code, ARMREG_IP1, ins->inst_basereg, ins->inst_offset); + } else { + MonoInst *ins; + + if (cfg->arch.ss_tramp_var) { + /* Initialize ss_tramp_var */ + ins = cfg->arch.ss_tramp_var; + g_assert (ins->opcode == OP_REGOFFSET); + + code = emit_imm64 (code, ARMREG_IP0, (guint64)&ss_trampoline); + code = emit_strx (code, ARMREG_IP0, ins->inst_basereg, ins->inst_offset); + } + + if (cfg->arch.bp_tramp_var) { + /* Initialize bp_tramp_var */ + ins = cfg->arch.bp_tramp_var; + g_assert (ins->opcode == OP_REGOFFSET); + + code = emit_imm64 (code, ARMREG_IP0, (guint64)bp_trampoline); + code = emit_strx (code, ARMREG_IP0, ins->inst_basereg, ins->inst_offset); + } + } + + max_offset = 0; + if (cfg->opt & MONO_OPT_BRANCH) { + for (bb = cfg->bb_entry; bb; bb = bb->next_bb) { + MonoInst *ins; + bb->max_offset = max_offset; + + MONO_BB_FOR_EACH_INS (bb, ins) { + max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN]; + } + } + } + if (max_offset > 0x3ffff * 4) + cfg->arch.cond_branch_islands = TRUE; + + return code; +} + +static guint8* +realloc_code (MonoCompile *cfg, int size) +{ + while (cfg->code_len + size > (cfg->code_size - 16)) { + cfg->code_size *= 2; + cfg->native_code = g_realloc (cfg->native_code, cfg->code_size); + cfg->stat_code_reallocs++; + } + return cfg->native_code + cfg->code_len; +} + +void +mono_arch_emit_epilog (MonoCompile *cfg) +{ + CallInfo *cinfo; + int max_epilog_size; + guint8 *code; + int i; + + max_epilog_size = 16 + 20*4; + code = realloc_code (cfg, max_epilog_size); + + if (cfg->method->save_lmf) { + code = mono_arm_emit_load_regarray (code, MONO_ARCH_CALLEE_SAVED_REGS & cfg->used_int_regs, ARMREG_FP, cfg->lmf_var->inst_offset + MONO_STRUCT_OFFSET (MonoLMF, gregs) - (MONO_ARCH_FIRST_LMF_REG * 8)); + } else { + /* Restore gregs */ + code = emit_load_regset (code, MONO_ARCH_CALLEE_SAVED_REGS & cfg->used_int_regs, ARMREG_FP, cfg->arch.saved_gregs_offset); + } + + /* Load returned vtypes into registers if needed */ + cinfo = cfg->arch.cinfo; + switch (cinfo->ret.storage) { + case ArgVtypeInIRegs: { + MonoInst *ins = cfg->ret; + + for (i = 0; i < cinfo->ret.nregs; ++i) + code = emit_ldrx (code, cinfo->ret.reg + i, ins->inst_basereg, ins->inst_offset + (i * 8)); + break; + } + case ArgHFA: { + MonoInst *ins = cfg->ret; + + for (i = 0; i < cinfo->ret.nregs; ++i) { + if (cinfo->ret.esize == 4) + code = emit_ldrfpw (code, cinfo->ret.reg + i, ins->inst_basereg, ins->inst_offset + cinfo->ret.foffsets [i]); + else + code = emit_ldrfpx (code, cinfo->ret.reg + i, ins->inst_basereg, ins->inst_offset + cinfo->ret.foffsets [i]); + } + break; + } + default: + break; + } + + /* Destroy frame */ + code = mono_arm_emit_destroy_frame (code, cfg->stack_offset, ((1 << ARMREG_IP0) | (1 << ARMREG_IP1))); + + arm_retx (code, ARMREG_LR); + + g_assert (code - (cfg->native_code + cfg->code_len) < max_epilog_size); + + cfg->code_len = code - cfg->native_code; +} + +void +mono_arch_emit_exceptions (MonoCompile *cfg) +{ + MonoJumpInfo *ji; + MonoClass *exc_class; + guint8 *code, *ip; + guint8* exc_throw_pos [MONO_EXC_INTRINS_NUM]; + guint8 exc_throw_found [MONO_EXC_INTRINS_NUM]; + int i, id, size = 0; + + for (i = 0; i < MONO_EXC_INTRINS_NUM; i++) { + exc_throw_pos [i] = NULL; + exc_throw_found [i] = 0; + } + + for (ji = cfg->patch_info; ji; ji = ji->next) { + if (ji->type == MONO_PATCH_INFO_EXC) { + i = mini_exception_id_by_name (ji->data.target); + if (!exc_throw_found [i]) { + size += 32; + exc_throw_found [i] = TRUE; + } + } + } + + code = realloc_code (cfg, size); + + /* Emit code to raise corlib exceptions */ + for (ji = cfg->patch_info; ji; ji = ji->next) { + if (ji->type != MONO_PATCH_INFO_EXC) + continue; + + ip = cfg->native_code + ji->ip.i; + + id = mini_exception_id_by_name (ji->data.target); + + if (exc_throw_pos [id]) { + /* ip points to the bcc () in OP_COND_EXC_... */ + arm_patch_rel (ip, exc_throw_pos [id], ji->relocation); + ji->type = MONO_PATCH_INFO_NONE; + continue; + } + + exc_throw_pos [id] = code; + arm_patch_rel (ip, code, ji->relocation); + + /* We are being branched to from the code generated by emit_cond_exc (), the pc is in ip1 */ + + /* r0 = type token */ + exc_class = mono_class_load_from_name (mono_defaults.corlib, "System", ji->data.name); + code = emit_imm (code, ARMREG_R0, exc_class->type_token - MONO_TOKEN_TYPE_DEF); + /* r1 = throw ip */ + arm_movx (code, ARMREG_R1, ARMREG_IP1); + /* Branch to the corlib exception throwing trampoline */ + ji->ip.i = code - cfg->native_code; + ji->type = MONO_PATCH_INFO_INTERNAL_METHOD; + ji->data.name = "mono_arch_throw_corlib_exception"; + ji->relocation = MONO_R_ARM64_BL; + arm_bl (code, 0); + cfg->thunk_area += THUNK_SIZE; + } + + cfg->code_len = code - cfg->native_code; + + g_assert (cfg->code_len < cfg->code_size); +} + +MonoInst* +mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) +{ + return NULL; +} + +gboolean +mono_arch_print_tree (MonoInst *tree, int arity) +{ + return FALSE; +} + +guint32 +mono_arch_get_patch_offset (guint8 *code) +{ + return 0; +} + +gpointer +mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count, + gpointer fail_tramp) +{ + int i, buf_len, imt_reg; + guint8 *buf, *code; + +#if DEBUG_IMT + printf ("building IMT thunk for class %s %s entries %d code size %d code at %p end %p vtable %p\n", vtable->klass->name_space, vtable->klass->name, count, size, start, ((guint8*)start) + size, vtable); + for (i = 0; i < count; ++i) { + MonoIMTCheckItem *item = imt_entries [i]; + printf ("method %d (%p) %s vtable slot %p is_equals %d chunk size %d\n", i, item->key, item->key->name, &vtable->vtable [item->value.vtable_slot], item->is_equals, item->chunk_size); + } +#endif + + buf_len = 0; + for (i = 0; i < count; ++i) { + MonoIMTCheckItem *item = imt_entries [i]; + if (item->is_equals) { + gboolean fail_case = !item->check_target_idx && fail_tramp; + + if (item->check_target_idx || fail_case) { + if (!item->compare_done || fail_case) { + buf_len += 4 * 4 + 4; + } + buf_len += 4; + if (item->has_target_code) { + buf_len += 5 * 4; + } else { + buf_len += 6 * 4; + } + if (fail_case) { + buf_len += 5 * 4; + } + } else { + buf_len += 6 * 4; + } + } else { + buf_len += 6 * 4; + } + } + + if (fail_tramp) + buf = mono_method_alloc_generic_virtual_thunk (domain, buf_len); + else + buf = mono_domain_code_reserve (domain, buf_len); + code = buf; + + /* + * We are called by JITted code, which passes in the IMT argument in + * MONO_ARCH_RGCTX_REG (r27). We need to preserve all caller saved regs + * except ip0/ip1. + */ + imt_reg = MONO_ARCH_RGCTX_REG; + for (i = 0; i < count; ++i) { + MonoIMTCheckItem *item = imt_entries [i]; + + item->code_target = code; + + if (item->is_equals) { + /* + * Check the imt argument against item->key, if equals, jump to either + * item->value.target_code or to vtable [item->value.vtable_slot]. + * If fail_tramp is set, jump to it if not-equals. + */ + gboolean fail_case = !item->check_target_idx && fail_tramp; + + if (item->check_target_idx || fail_case) { + /* Compare imt_reg with item->key */ + if (!item->compare_done || fail_case) { + // FIXME: Optimize this + code = emit_imm64 (code, ARMREG_IP0, (guint64)item->key); + arm_cmpx (code, imt_reg, ARMREG_IP0); + } + item->jmp_code = code; + arm_bcc (code, ARMCOND_NE, 0); + /* Jump to target if equals */ + if (item->has_target_code) { + code = emit_imm64 (code, ARMREG_IP0, (guint64)item->value.target_code); + arm_brx (code, ARMREG_IP0); + } else { + guint64 imm = (guint64)&(vtable->vtable [item->value.vtable_slot]); + + code = emit_imm64 (code, ARMREG_IP0, imm); + arm_ldrx (code, ARMREG_IP0, ARMREG_IP0, 0); + arm_brx (code, ARMREG_IP0); + } + + if (fail_case) { + arm_patch_rel (item->jmp_code, code, MONO_R_ARM64_BCC); + item->jmp_code = NULL; + code = emit_imm64 (code, ARMREG_IP0, (guint64)fail_tramp); + arm_brx (code, ARMREG_IP0); + } + } else { + guint64 imm = (guint64)&(vtable->vtable [item->value.vtable_slot]); + + code = emit_imm64 (code, ARMREG_IP0, imm); + arm_ldrx (code, ARMREG_IP0, ARMREG_IP0, 0); + arm_brx (code, ARMREG_IP0); + } + } else { + code = emit_imm64 (code, ARMREG_IP0, (guint64)item->key); + arm_cmpx (code, imt_reg, ARMREG_IP0); + item->jmp_code = code; + arm_bcc (code, ARMCOND_HS, 0); + } + } + /* Patch the branches */ + for (i = 0; i < count; ++i) { + MonoIMTCheckItem *item = imt_entries [i]; + if (item->jmp_code && item->check_target_idx) + arm_patch_rel (item->jmp_code, imt_entries [item->check_target_idx]->code_target, MONO_R_ARM64_BCC); + } + + g_assert ((code - buf) < buf_len); + + mono_arch_flush_icache (buf, code - buf); + + return buf; +} + +GSList * +mono_arch_get_trampolines (gboolean aot) +{ + return mono_arm_get_exception_trampolines (aot); +} + +#else /* DISABLE_JIT */ + +gpointer +mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count, + gpointer fail_tramp) +{ + g_assert_not_reached (); + return NULL; +} + +#endif /* !DISABLE_JIT */ + +#ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED + +void +mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip) +{ + guint8 *code = ip; + guint32 native_offset = ip - (guint8*)ji->code_start; + + if (ji->from_aot) { + SeqPointInfo *info = mono_arch_get_seq_point_info (mono_domain_get (), ji->code_start); + + g_assert (native_offset % 4 == 0); + g_assert (info->bp_addrs [native_offset / 4] == 0); + info->bp_addrs [native_offset / 4] = mini_get_breakpoint_trampoline (); + } else { + /* ip points to an ldrx */ + code += 4; + arm_blrx (code, ARMREG_IP0); + mono_arch_flush_icache (ip, code - ip); + } +} + +void +mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip) +{ + guint8 *code = ip; + + if (ji->from_aot) { + guint32 native_offset = ip - (guint8*)ji->code_start; + SeqPointInfo *info = mono_arch_get_seq_point_info (mono_domain_get (), ji->code_start); + + g_assert (native_offset % 4 == 0); + info->bp_addrs [native_offset / 4] = NULL; + } else { + /* ip points to an ldrx */ + code += 4; + arm_nop (code); + mono_arch_flush_icache (ip, code - ip); + } +} + +void +mono_arch_start_single_stepping (void) +{ + ss_trampoline = mini_get_single_step_trampoline (); +} + +void +mono_arch_stop_single_stepping (void) +{ + ss_trampoline = NULL; +} + +gboolean +mono_arch_is_single_step_event (void *info, void *sigctx) +{ + /* We use soft breakpoints on arm64 */ + return FALSE; +} + +gboolean +mono_arch_is_breakpoint_event (void *info, void *sigctx) +{ + /* We use soft breakpoints on arm64 */ + return FALSE; +} + +void +mono_arch_skip_breakpoint (MonoContext *ctx, MonoJitInfo *ji) +{ + g_assert_not_reached (); +} + +void +mono_arch_skip_single_step (MonoContext *ctx) +{ + g_assert_not_reached (); +} + +gpointer +mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code) +{ + SeqPointInfo *info; + MonoJitInfo *ji; + + // FIXME: Add a free function + + mono_domain_lock (domain); + info = g_hash_table_lookup (domain_jit_info (domain)->arch_seq_points, + code); + mono_domain_unlock (domain); + + if (!info) { + ji = mono_jit_info_table_find (domain, (char*)code); + g_assert (ji); + + info = g_malloc0 (sizeof (SeqPointInfo) + (ji->code_size / 4) * sizeof(guint8*)); + + info->ss_tramp_addr = &ss_trampoline; + + mono_domain_lock (domain); + g_hash_table_insert (domain_jit_info (domain)->arch_seq_points, + code, info); + mono_domain_unlock (domain); + } + + return info; +} + +void +mono_arch_init_lmf_ext (MonoLMFExt *ext, gpointer prev_lmf) +{ + ext->lmf.previous_lmf = prev_lmf; + /* Mark that this is a MonoLMFExt */ + ext->lmf.previous_lmf = (gpointer)(((gssize)ext->lmf.previous_lmf) | 2); + ext->lmf.gregs [MONO_ARCH_LMF_REG_SP] = (gssize)ext; +} + +#endif /* MONO_ARCH_SOFT_DEBUG_SUPPORTED */ + +gboolean +mono_arch_opcode_supported (int opcode) +{ + switch (opcode) { + case OP_ATOMIC_ADD_I4: + case OP_ATOMIC_ADD_I8: + case OP_ATOMIC_EXCHANGE_I4: + case OP_ATOMIC_EXCHANGE_I8: + case OP_ATOMIC_CAS_I4: + case OP_ATOMIC_CAS_I8: + case OP_ATOMIC_LOAD_I1: + case OP_ATOMIC_LOAD_I2: + case OP_ATOMIC_LOAD_I4: + case OP_ATOMIC_LOAD_I8: + case OP_ATOMIC_LOAD_U1: + case OP_ATOMIC_LOAD_U2: + case OP_ATOMIC_LOAD_U4: + case OP_ATOMIC_LOAD_U8: + case OP_ATOMIC_LOAD_R4: + case OP_ATOMIC_LOAD_R8: + case OP_ATOMIC_STORE_I1: + case OP_ATOMIC_STORE_I2: + case OP_ATOMIC_STORE_I4: + case OP_ATOMIC_STORE_I8: + case OP_ATOMIC_STORE_U1: + case OP_ATOMIC_STORE_U2: + case OP_ATOMIC_STORE_U4: + case OP_ATOMIC_STORE_U8: + case OP_ATOMIC_STORE_R4: + case OP_ATOMIC_STORE_R8: + return TRUE; + default: + return FALSE; + } +} + +CallInfo* +mono_arch_get_call_info (MonoMemPool *mp, MonoMethodSignature *sig) +{ + return get_call_info (mp, sig); +} + diff --git a/mono/mini/mini-arm64.h b/mono/mini/mini-arm64.h index a963c75d51c..21e1f542474 100644 --- a/mono/mini/mini-arm64.h +++ b/mono/mini/mini-arm64.h @@ -1 +1,267 @@ -#include "../../../mono-extensions/mono/mini/mini-arm64.h" +/* + * mini-arm64.h + * + * Copyright 2013 Xamarin Inc + * + * Based on mini-arm.h: + * + * Copyright 2011 Xamarin Inc + */ + +#ifndef __MONO_MINI_ARM64_H__ +#define __MONO_MINI_ARM64_H__ + +#include +#include + +#define MONO_ARCH_CPU_SPEC mono_arm64_cpu_desc + +#define MONO_MAX_IREGS 32 +#define MONO_MAX_FREGS 32 + +#define MONO_CONTEXT_SET_LLVM_EXC_REG(ctx, exc) do { (ctx)->regs [0] = (gsize)exc; } while (0) + +#define MONO_INIT_CONTEXT_FROM_FUNC(ctx,func) do { \ + MONO_CONTEXT_SET_BP ((ctx), __builtin_frame_address (0)); \ + MONO_CONTEXT_SET_SP ((ctx), __builtin_frame_address (0)); \ + MONO_CONTEXT_SET_IP ((ctx), (func)); \ + } while (0) + +#define MONO_ARCH_INIT_TOP_LMF_ENTRY(lmf) + +/* Parameters used by the register allocator */ +/* r0..r7, r9..r14 (r15 is the imt/rgctx reg) */ +#define MONO_ARCH_CALLEE_REGS 0xfeff +/* r19..r28 */ +#define MONO_ARCH_CALLEE_SAVED_REGS (0x3ff << 19) + +/* v16/v17 is reserved for a scratch reg */ +#define MONO_ARCH_CALLEE_FREGS 0xfffc00ff +/* v8..v15 */ +#define MONO_ARCH_CALLEE_SAVED_FREGS 0xff00 + +#define MONO_ARCH_USE_FPSTACK FALSE +#define MONO_ARCH_FPSTACK_SIZE 0 + +#define MONO_ARCH_INST_SREG2_MASK(ins) (0) + +#define MONO_ARCH_INST_FIXED_REG(desc) ((desc) == 'a' ? ARMREG_R0 : -1) + +#define MONO_ARCH_INST_IS_REGPAIR(desc) (0) + +#define MONO_ARCH_INST_IS_FLOAT(desc) ((desc) == 'f') + +#define MONO_ARCH_INST_REGPAIR_REG2(desc,hreg1) (-1) + +#define MONO_ARCH_USE_FPSTACK FALSE + +#define MONO_ARCH_FRAME_ALIGNMENT 16 + +#define MONO_ARCH_CODE_ALIGNMENT 32 + +/* callee saved regs + fp + sp */ +#define MONO_ARCH_LMF_REGS ((0x3ff << 19) | (1 << ARMREG_FP) | (1 << ARMREG_SP)) +#define MONO_ARCH_NUM_LMF_REGS (10 + 2) +#define MONO_ARCH_FIRST_LMF_REG ARMREG_R19 +#define MONO_ARCH_LMF_REG_FP 10 +#define MONO_ARCH_LMF_REG_SP 11 + +struct MonoLMF { + /* + * If the second lowest bit is set to 1, then this is a MonoLMFExt structure, and + * the other fields are not valid. + */ + gpointer previous_lmf; + gpointer lmf_addr; + mgreg_t pc; + mgreg_t gregs [MONO_ARCH_NUM_LMF_REGS]; +}; + +/* Structure used by the sequence points in AOTed code */ +typedef struct { + gpointer ss_trigger_page; + gpointer bp_trigger_page; + gpointer ss_tramp_addr; + guint8* bp_addrs [MONO_ZERO_LEN_ARRAY]; +} SeqPointInfo; + +#define PARAM_REGS 8 +#define FP_PARAM_REGS 8 + +#define DYN_CALL_STACK_ARGS 6 + +typedef struct { + /* The +1 is for r8 */ + mgreg_t regs [PARAM_REGS + 1 + DYN_CALL_STACK_ARGS]; + mgreg_t res, res2; + guint8 *ret; + double fpregs [FP_PARAM_REGS]; + int n_fpargs, n_fpret; + guint8 buffer [256]; +} DynCallArgs; + +typedef struct { + gpointer cinfo; + int saved_gregs_offset; + /* Points to arguments received on the stack */ + int args_reg; + gboolean cond_branch_islands; + gpointer vret_addr_loc; + gpointer seq_point_info_var; + gpointer ss_tramp_var; + gpointer bp_tramp_var; + guint8 *thunks; + int thunks_size; +} MonoCompileArch; + +#define MONO_ARCH_EMULATE_FREM 1 +#define MONO_ARCH_NO_EMULATE_LONG_MUL_OPTS 1 +#define MONO_ARCH_EMULATE_LONG_MUL_OVF_OPTS 1 +#define MONO_ARCH_NO_EMULATE_LONG_SHIFT_OPS 1 +#define MONO_ARCH_NEED_DIV_CHECK 1 +#define MONO_ARCH_EMULATE_MUL_OVF 1 +#define MONO_ARCH_HAVE_IMT 1 +#define MONO_ARCH_HAVE_OP_TAIL_CALL 1 +#define MONO_ARCH_THIS_AS_FIRST_ARG 1 +#define MONO_ARCH_RGCTX_REG ARMREG_R15 +#define MONO_ARCH_IMT_REG MONO_ARCH_RGCTX_REG +#define MONO_ARCH_VTABLE_REG ARMREG_R0 +#define MONO_ARCH_EXC_REG ARMREG_R0 +#define MONO_ARCH_HAVE_XP_UNWIND 1 +#define MONO_ARCH_HAVE_CREATE_DELEGATE_TRAMPOLINE 1 +#define MONO_ARCH_HAVE_GENERALIZED_IMT_THUNK 1 +#define MONO_ARCH_USE_SIGACTION 1 +#define MONO_ARCH_HAVE_SIGCTX_TO_MONOCTX 1 +#define MONO_ARCH_HAVE_CONTEXT_SET_INT_REG 1 +#define MONO_ARCH_GSHARED_SUPPORTED 1 +#define MONO_ARCH_AOT_SUPPORTED 1 +#define MONO_ARCH_LLVM_SUPPORTED 1 +#define MONO_ARCH_HAVE_FULL_AOT_TRAMPOLINES 1 +#define MONO_ARCH_HAVE_EXCEPTIONS_INIT 1 +#define MONO_ARCH_HAVE_GET_TRAMPOLINES 1 +#define MONO_ARCH_DYN_CALL_SUPPORTED 1 +#define MONO_ARCH_DYN_CALL_PARAM_AREA (DYN_CALL_STACK_ARGS * 8) +#define MONO_ARCH_SOFT_DEBUG_SUPPORTED 1 +#ifndef TARGET_ANDROID +#define MONO_ARCH_GSHAREDVT_SUPPORTED 1 +#endif +#define MONO_ARCH_HAVE_SETUP_RESUME_FROM_SIGNAL_HANDLER_CTX 1 +#define MONO_ARCH_HAVE_SETUP_ASYNC_CALLBACK 1 +#define MONO_ARCH_HAVE_GENERAL_RGCTX_LAZY_FETCH_TRAMPOLINE 1 +#ifndef MONO_CROSS_COMPILE +#define MONO_ARCH_ENABLE_MONO_LMF_VAR 1 +#endif +#define MONO_ARCH_HAVE_OP_GET_EX_OBJ 1 +#define MONO_ARCH_HAVE_OBJC_GET_SELECTOR 1 +#define MONO_ARCH_HAVE_SDB_TRAMPOLINES 1 +#define MONO_ARCH_HAVE_PATCH_CODE_NEW 1 +#define MONO_ARCH_HAVE_OP_GENERIC_CLASS_INIT 1 +#define MONO_ARCH_HAVE_OPCODE_NEEDS_EMULATION 1 +#define MONO_ARCH_HAVE_DECOMPOSE_LONG_OPTS 1 + +#ifdef TARGET_IOS + +#define MONO_ARCH_REDZONE_SIZE 128 + +#else + +#define MONO_ARCH_REDZONE_SIZE 0 +#if !defined(__PIC__) +#define MONO_ARCH_HAVE_TLS_GET 1 +#endif +#define MONO_ARCH_HAVE_TLS_GET_REG 1 + +#endif + +#if defined(TARGET_APPLETVOS) +#define MONO_ARCH_HAVE_UNWIND_BACKTRACE 1 +#endif + +/* Relocations */ +#define MONO_R_ARM64_B 1 +#define MONO_R_ARM64_BCC 2 +#define MONO_R_ARM64_IMM 3 +#define MONO_R_ARM64_BL 4 +#define MONO_R_ARM64_BL_SHORT 5 +#define MONO_R_ARM64_CBZ 6 + + +typedef enum { + ArgInIReg, + ArgInFReg, + ArgInFRegR4, + ArgOnStack, + ArgOnStackR8, + ArgOnStackR4, + /* + * Vtype passed in consecutive int registers. + * ainfo->reg is the firs register, + * ainfo->nregs is the number of registers, + * ainfo->size is the size of the structure. + */ + ArgVtypeInIRegs, + ArgVtypeByRef, + ArgVtypeByRefOnStack, + ArgVtypeOnStack, + ArgHFA, + ArgNone +} ArgStorage; + +typedef struct { + ArgStorage storage; + int reg; + /* ArgOnStack */ + int offset; + /* ArgVtypeInIRegs/ArgHFA */ + int nregs, size; + /* ArgHFA */ + int esize; + /* ArgHFA */ + /* The offsets of the float values inside the arg */ + guint16 foffsets [4]; + /* ArgOnStack */ + int slot_size; + /* hfa */ + int nfregs_to_skip; + gboolean sign; + gboolean gsharedvt; + gboolean hfa; +} ArgInfo; + +typedef struct { + int nargs; + int gr, fr, stack_usage; + ArgInfo ret; + ArgInfo sig_cookie; + ArgInfo args [1]; +} CallInfo; + + +guint8* mono_arm_emit_imm64 (guint8 *code, int dreg, gint64 imm); + +guint8* mono_arm_emit_ldrx (guint8 *code, int rt, int rn, int imm); + +guint8* mono_arm_emit_destroy_frame (guint8 *code, int stack_offset, guint64 temp_regs); + +guint8* mono_arm_emit_store_regset (guint8 *code, guint64 regs, int basereg, int offset); + +guint8* mono_arm_emit_store_regarray (guint8 *code, guint64 regs, int basereg, int offset); + +guint8* mono_arm_emit_load_regarray (guint8 *code, guint64 regs, int basereg, int offset); + +/* MonoJumpInfo **ji */ +guint8* mono_arm_emit_aotconst (gpointer ji, guint8 *code, guint8 *code_start, int dreg, guint32 patch_type, gconstpointer data); + +void mono_arm_patch (guint8 *code, guint8 *target, int relocation); + +void mono_arm_throw_exception (gpointer arg, mgreg_t pc, mgreg_t *int_regs, gdouble *fp_regs, gboolean corlib, gboolean rethrow); + +void mono_arm_gsharedvt_init (void); + +GSList* mono_arm_get_exception_trampolines (gboolean aot); + +void mono_arm_resume_unwind (gpointer arg, mgreg_t pc, mgreg_t *int_regs, gdouble *fp_regs, gboolean corlib, gboolean rethrow); + +CallInfo* mono_arch_get_call_info (MonoMemPool *mp, MonoMethodSignature *sig); + +#endif /* __MONO_MINI_ARM64_H__ */ diff --git a/mono/mini/tramp-arm64-gsharedvt.c b/mono/mini/tramp-arm64-gsharedvt.c new file mode 100644 index 00000000000..d93b6520da3 --- /dev/null +++ b/mono/mini/tramp-arm64-gsharedvt.c @@ -0,0 +1,574 @@ +/* + * tramp-arm64-gsharedvt.c: gsharedvt support code for arm64 + * + * Authors: + * Zoltan Varga + * + * Copyright 2013 Xamarin, Inc (http://www.xamarin.com) + * Licensed under the MIT license. See LICENSE file in the project root for full license information. + */ +#include + +#include "mini.h" +#include "mini-arm64.h" +#include "mini-arm64-gsharedvt.h" + +/* + * GSHAREDVT + */ +#ifdef MONO_ARCH_GSHARED_SUPPORTED + +/* + * mono_arch_get_gsharedvt_arg_trampoline: + * + * See tramp-x86.c for documentation. + */ +gpointer +mono_arch_get_gsharedvt_arg_trampoline (MonoDomain *domain, gpointer arg, gpointer addr) +{ + guint8 *code, *buf; + int buf_len = 40; + + /* + * Return a trampoline which calls ADDR passing in ARG. + * Pass the argument in ip1, clobbering ip0. + */ + buf = code = mono_global_codeman_reserve (buf_len); + + code = mono_arm_emit_imm64 (code, ARMREG_IP1, (guint64)arg); + code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)addr); + + arm_brx (code, ARMREG_IP0); + + g_assert ((code - buf) < buf_len); + mono_arch_flush_icache (buf, code - buf); + + return buf; +} + +gpointer +mono_arm_start_gsharedvt_call (GSharedVtCallInfo *info, gpointer *caller, gpointer *callee, gpointer mrgctx_reg) +{ + int i; + + /* Set vtype ret arg */ + if (info->vret_slot != -1) { + g_assert (info->vret_slot); + callee [info->vret_arg_reg] = &callee [info->vret_slot]; + } + + for (i = 0; i < info->map_count; ++i) { + int src = info->map [i * 2]; + int dst = info->map [(i * 2) + 1]; + int arg_marshal = (src >> 18) & 0xf; + int arg_size = (src >> 22) & 0xf; + + if (G_UNLIKELY (arg_size)) { + int src_offset = (src >> 26) & 0xf; + int dst_offset = (dst >> 26) & 0xf; + int src_slot, dst_slot; + guint8 *src_ptr, *dst_ptr; + + /* + * Argument passed in part of a stack slot on ios. + * src_offset/dst_offset is the offset within the stack slot. + */ + switch (arg_marshal) { + case GSHAREDVT_ARG_NONE: + src_slot = src & 0xffff; + dst_slot = dst & 0xffff; + src_ptr = (guint8*)(caller + src_slot) + src_offset; + dst_ptr = (guint8*)(callee + dst_slot) + dst_offset; + break; + case GSHAREDVT_ARG_BYREF_TO_BYVAL: + src_slot = src & 0x3f; + dst_slot = dst & 0xffff; + src_ptr = caller [src_slot]; + dst_ptr = (guint8*)(callee + dst_slot) + dst_offset; + break; + case GSHAREDVT_ARG_BYVAL_TO_BYREF_HFAR4: + case GSHAREDVT_ARG_BYREF_TO_BYVAL_HFAR4: + case GSHAREDVT_ARG_BYREF_TO_BYREF: + g_assert_not_reached (); + break; + default: + NOT_IMPLEMENTED; + break; + } + + switch (arg_size) { + case GSHAREDVT_ARG_SIZE_I1: + *(gint8*)dst_ptr = *(gint8*)src_ptr; + break; + case GSHAREDVT_ARG_SIZE_U1: + *(guint8*)dst_ptr = *(guint8*)src_ptr; + break; + case GSHAREDVT_ARG_SIZE_I2: + *(gint16*)dst_ptr = *(gint16*)src_ptr; + break; + case GSHAREDVT_ARG_SIZE_U2: + *(guint16*)dst_ptr = *(guint16*)src_ptr; + break; + case GSHAREDVT_ARG_SIZE_I4: + *(gint32*)dst_ptr = *(gint32*)src_ptr; + break; + case GSHAREDVT_ARG_SIZE_U4: + *(guint32*)dst_ptr = *(guint32*)src_ptr; + break; + default: + g_assert_not_reached (); + } + continue; + } + + switch (arg_marshal) { + case GSHAREDVT_ARG_NONE: + callee [dst] = caller [src]; + break; + case GSHAREDVT_ARG_BYVAL_TO_BYREF: + /* gsharedvt argument passed by addr in reg/stack slot */ + src = src & 0x3f; + callee [dst] = caller + src; + break; + case GSHAREDVT_ARG_BYVAL_TO_BYREF_HFAR4: { + int nslots = (src >> 6) & 0xff; + int src_slot = src & 0x3f; + int j; + float *dst_arr = (float*)(caller + src_slot); + + /* The r4 hfa is in separate slots, need to compress them together in place */ + for (j = 0; j < nslots; ++j) + dst_arr [j] = *(float*)(caller + src_slot + j); + + callee [dst] = caller + src_slot; + break; + } + case GSHAREDVT_ARG_BYREF_TO_BYVAL: { + int nslots = (src >> 6) & 0xff; + int src_slot = src & 0x3f; + int j; + gpointer *addr = caller [src_slot]; + + for (j = 0; j < nslots; ++j) + callee [dst + j] = addr [j]; + break; + } + case GSHAREDVT_ARG_BYREF_TO_BYVAL_HFAR4: { + int nslots = (src >> 6) & 0xff; + int src_slot = src & 0x3f; + int j; + guint32 *addr = (guint32*)(caller [src_slot]); + + /* addr points to an array of floats, need to load them to registers */ + for (j = 0; j < nslots; ++j) + callee [dst + j] = GUINT_TO_POINTER (addr [j]); + break; + } + case GSHAREDVT_ARG_BYREF_TO_BYREF: { + int src_slot = src & 0x3f; + + callee [dst] = caller [src_slot]; + break; + } + default: + g_assert_not_reached (); + break; + } + } + + if (info->vcall_offset != -1) { + MonoObject *this_obj = caller [0]; + + if (G_UNLIKELY (!this_obj)) + return NULL; + if (info->vcall_offset == MONO_GSHAREDVT_DEL_INVOKE_VT_OFFSET) + /* delegate invoke */ + return ((MonoDelegate*)this_obj)->invoke_impl; + else + return *(gpointer*)((char*)this_obj->vtable + info->vcall_offset); + } else if (info->calli) { + /* The address to call is passed in the mrgctx reg */ + return mrgctx_reg; + } else { + return info->addr; + } +} + +#ifndef DISABLE_JIT + +gpointer +mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot) +{ + guint8 *code, *buf; + int buf_len, cfa_offset; + GSList *unwind_ops = NULL; + MonoJumpInfo *ji = NULL; + guint8 *br_out, *br [64], *br_ret [64], *bcc_ret [64]; + int i, n_arg_regs, n_arg_fregs, offset, arg_reg, info_offset, rgctx_arg_reg_offset; + int caller_reg_area_offset, callee_reg_area_offset, callee_stack_area_offset; + int br_ret_index, bcc_ret_index; + + buf_len = 2048; + buf = code = mono_global_codeman_reserve (buf_len); + + /* + * We are being called by an gsharedvt arg trampoline, the info argument is in IP1. + */ + arg_reg = ARMREG_IP1; + n_arg_regs = NUM_GSHAREDVT_ARG_GREGS; + n_arg_fregs = NUM_GSHAREDVT_ARG_FREGS; + + /* Compute stack frame size and offsets */ + offset = 0; + /* frame block */ + offset += 2 * 8; + /* info argument */ + info_offset = offset; + offset += 8; + /* saved rgctx */ + rgctx_arg_reg_offset = offset; + offset += 8; + /* alignment */ + offset += 8; + /* argument regs */ + caller_reg_area_offset = offset; + offset += (n_arg_regs + n_arg_fregs) * 8; + + /* We need the argument regs to be saved at the top of the frame */ + g_assert (offset % MONO_ARCH_FRAME_ALIGNMENT == 0); + + cfa_offset = offset; + + /* Setup frame */ + arm_stpx_pre (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, -cfa_offset); + mono_add_unwind_op_def_cfa (unwind_ops, code, buf, ARMREG_SP, cfa_offset); + mono_add_unwind_op_offset (unwind_ops, code, buf, ARMREG_FP, -cfa_offset + 0); + mono_add_unwind_op_offset (unwind_ops, code, buf, ARMREG_LR, -cfa_offset + 8); + arm_movspx (code, ARMREG_FP, ARMREG_SP); + mono_add_unwind_op_def_cfa_reg (unwind_ops, code, buf, ARMREG_FP); + + /* Save info argument */ + arm_strx (code, arg_reg, ARMREG_FP, info_offset); + + /* Save rgxctx */ + arm_strx (code, MONO_ARCH_RGCTX_REG, ARMREG_FP, rgctx_arg_reg_offset); + + /* Save argument regs below the stack arguments */ + for (i = 0; i < n_arg_regs; ++i) + arm_strx (code, i, ARMREG_SP, caller_reg_area_offset + (i * 8)); + // FIXME: Only do this if fp regs are used + for (i = 0; i < n_arg_fregs; ++i) + arm_strfpx (code, i, ARMREG_SP, caller_reg_area_offset + ((n_arg_regs + i) * 8)); + + /* Allocate callee area */ + arm_ldrw (code, ARMREG_IP0, arg_reg, MONO_STRUCT_OFFSET (GSharedVtCallInfo, stack_usage)); + arm_movspx (code, ARMREG_LR, ARMREG_SP); + arm_subx (code, ARMREG_LR, ARMREG_LR, ARMREG_IP0); + arm_movspx (code, ARMREG_SP, ARMREG_LR); + /* Allocate callee register area just below the callee area so it can be accessed from start_gsharedvt_call using negative offsets */ + /* The + 8 is for alignment */ + callee_reg_area_offset = 8; + callee_stack_area_offset = callee_reg_area_offset + (n_arg_regs * sizeof (gpointer)); + arm_subx_imm (code, ARMREG_SP, ARMREG_SP, ((n_arg_regs + n_arg_fregs) * sizeof (gpointer)) + 8); + + /* + * The stack now looks like this: + * + * + * + * <- fp + * <- sp + */ + + /* Call start_gsharedvt_call () */ + /* arg1 == info */ + arm_ldrx (code, ARMREG_R0, ARMREG_FP, info_offset); + /* arg2 = caller stack area */ + arm_addx_imm (code, ARMREG_R1, ARMREG_FP, caller_reg_area_offset); + /* arg3 == callee stack area */ + arm_addx_imm (code, ARMREG_R2, ARMREG_SP, callee_reg_area_offset); + /* arg4 = mrgctx reg */ + arm_ldrx (code, ARMREG_R3, ARMREG_FP, rgctx_arg_reg_offset); + + if (aot) + code = mono_arm_emit_aotconst (&ji, code, buf, ARMREG_IP0, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_arm_start_gsharedvt_call"); + else + code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)mono_arm_start_gsharedvt_call); + arm_blrx (code, ARMREG_IP0); + + /* Make the real method call */ + /* R0 contains the addr to call */ + arm_movx (code, ARMREG_IP1, ARMREG_R0); + /* Load rgxctx */ + arm_ldrx (code, MONO_ARCH_RGCTX_REG, ARMREG_FP, rgctx_arg_reg_offset); + /* Load argument registers */ + // FIXME: + for (i = 0; i < n_arg_regs; ++i) + arm_ldrx (code, i, ARMREG_SP, callee_reg_area_offset + (i * 8)); + // FIXME: Only do this if needed + for (i = 0; i < n_arg_fregs; ++i) + arm_ldrfpx (code, i, ARMREG_SP, callee_reg_area_offset + ((n_arg_regs + i) * 8)); + /* Clear callee reg area */ + arm_addx_imm (code, ARMREG_SP, ARMREG_SP, ((n_arg_regs + n_arg_fregs) * sizeof (gpointer)) + 8); + /* Make the call */ + arm_blrx (code, ARMREG_IP1); + + br_ret_index = 0; + bcc_ret_index = 0; + + // FIXME: Use a switch + /* Branch between IN/OUT cases */ + arm_ldrx (code, ARMREG_IP1, ARMREG_FP, info_offset); + arm_ldrw (code, ARMREG_IP1, ARMREG_IP1, MONO_STRUCT_OFFSET (GSharedVtCallInfo, gsharedvt_in)); + br_out = code; + arm_cbzx (code, ARMREG_IP1, 0); + + /* IN CASE */ + + /* IP1 == return marshalling type */ + arm_ldrx (code, ARMREG_IP1, ARMREG_FP, info_offset); + arm_ldrw (code, ARMREG_IP1, ARMREG_IP1, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal)); + + /* Continue if no marshalling required */ + // FIXME: Use cmpx_imm + code = mono_arm_emit_imm64 (code, ARMREG_IP0, GSHAREDVT_RET_NONE); + arm_cmpx (code, ARMREG_IP0, ARMREG_IP1); + bcc_ret [bcc_ret_index ++] = code; + arm_bcc (code, ARMCOND_EQ, 0); + + /* Compute vret area address in LR */ + arm_ldrx (code, ARMREG_LR, ARMREG_FP, info_offset); + arm_ldrw (code, ARMREG_LR, ARMREG_LR, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_slot)); + arm_subx_imm (code, ARMREG_LR, ARMREG_LR, n_arg_regs + n_arg_fregs); + arm_lslx (code, ARMREG_LR, ARMREG_LR, 3); + arm_movspx (code, ARMREG_IP0, ARMREG_SP); + arm_addx (code, ARMREG_LR, ARMREG_IP0, ARMREG_LR); + + /* Branch to specific marshalling code */ + for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) { + code = mono_arm_emit_imm64 (code, ARMREG_IP0, i); + arm_cmpx (code, ARMREG_IP0, ARMREG_IP1); + br [i] = code; + arm_bcc (code, ARMCOND_EQ, 0); + } + + arm_brk (code, 0); + + /* + * The address of the return value area is in LR, have to load it into + * registers. + */ + for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) { + mono_arm_patch (br [i], code, MONO_R_ARM64_BCC); + switch (i) { + case GSHAREDVT_RET_NONE: + break; + case GSHAREDVT_RET_I8: + arm_ldrx (code, ARMREG_R0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_I1: + arm_ldrsbx (code, ARMREG_R0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_U1: + arm_ldrb (code, ARMREG_R0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_I2: + arm_ldrshx (code, ARMREG_R0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_U2: + arm_ldrh (code, ARMREG_R0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_I4: + arm_ldrswx (code, ARMREG_R0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_U4: + arm_ldrw (code, ARMREG_R0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_R8: + arm_ldrfpx (code, ARMREG_D0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_R4: + arm_ldrfpw (code, ARMREG_D0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_IREGS_1: + case GSHAREDVT_RET_IREGS_2: + case GSHAREDVT_RET_IREGS_3: + case GSHAREDVT_RET_IREGS_4: + case GSHAREDVT_RET_IREGS_5: + case GSHAREDVT_RET_IREGS_6: + case GSHAREDVT_RET_IREGS_7: + case GSHAREDVT_RET_IREGS_8: { + int j; + + for (j = 0; j < i - GSHAREDVT_RET_IREGS_1 + 1; ++j) + arm_ldrx (code, j, ARMREG_LR, j * 8); + break; + } + case GSHAREDVT_RET_HFAR8_1: + case GSHAREDVT_RET_HFAR8_2: + case GSHAREDVT_RET_HFAR8_3: + case GSHAREDVT_RET_HFAR8_4: { + int j; + + for (j = 0; j < i - GSHAREDVT_RET_HFAR8_1 + 1; ++j) + arm_ldrfpx (code, j, ARMREG_LR, j * 8); + break; + } + case GSHAREDVT_RET_HFAR4_1: + case GSHAREDVT_RET_HFAR4_2: + case GSHAREDVT_RET_HFAR4_3: + case GSHAREDVT_RET_HFAR4_4: { + int j; + + for (j = 0; j < i - GSHAREDVT_RET_HFAR4_1 + 1; ++j) + arm_ldrfpw (code, j, ARMREG_LR, j * 4); + break; + } + default: + g_assert_not_reached (); + break; + } + br_ret [br_ret_index ++] = code; + arm_b (code, 0); + } + + /* OUT CASE */ + mono_arm_patch (br_out, code, MONO_R_ARM64_CBZ); + + /* Compute vret area address in LR */ + arm_ldrx (code, ARMREG_LR, ARMREG_FP, caller_reg_area_offset + (ARMREG_R8 * 8)); + + /* IP1 == return marshalling type */ + arm_ldrx (code, ARMREG_IP1, ARMREG_FP, info_offset); + arm_ldrw (code, ARMREG_IP1, ARMREG_IP1, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal)); + + /* Branch to specific marshalling code */ + for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) { + code = mono_arm_emit_imm64 (code, ARMREG_IP0, i); + arm_cmpx (code, ARMREG_IP0, ARMREG_IP1); + br [i] = code; + arm_bcc (code, ARMCOND_EQ, 0); + } + + /* + * The return value is in registers, need to save to the return area passed by the caller in + * R8. + */ + for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) { + mono_arm_patch (br [i], code, MONO_R_ARM64_BCC); + switch (i) { + case GSHAREDVT_RET_NONE: + break; + case GSHAREDVT_RET_I8: + arm_strx (code, ARMREG_R0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_I1: + case GSHAREDVT_RET_U1: + arm_strb (code, ARMREG_R0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_I2: + case GSHAREDVT_RET_U2: + arm_strh (code, ARMREG_R0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_I4: + case GSHAREDVT_RET_U4: + arm_strw (code, ARMREG_R0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_R8: + arm_strfpx (code, ARMREG_D0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_R4: + arm_strfpw (code, ARMREG_D0, ARMREG_LR, 0); + break; + case GSHAREDVT_RET_IREGS_1: + case GSHAREDVT_RET_IREGS_2: + case GSHAREDVT_RET_IREGS_3: + case GSHAREDVT_RET_IREGS_4: + case GSHAREDVT_RET_IREGS_5: + case GSHAREDVT_RET_IREGS_6: + case GSHAREDVT_RET_IREGS_7: + case GSHAREDVT_RET_IREGS_8: { + int j; + + for (j = 0; j < i - GSHAREDVT_RET_IREGS_1 + 1; ++j) + arm_strx (code, j, ARMREG_LR, j * 8); + break; + } + case GSHAREDVT_RET_HFAR8_1: + case GSHAREDVT_RET_HFAR8_2: + case GSHAREDVT_RET_HFAR8_3: + case GSHAREDVT_RET_HFAR8_4: { + int j; + + for (j = 0; j < i - GSHAREDVT_RET_HFAR8_1 + 1; ++j) + arm_strfpx (code, j, ARMREG_LR, j * 8); + break; + } + case GSHAREDVT_RET_HFAR4_1: + case GSHAREDVT_RET_HFAR4_2: + case GSHAREDVT_RET_HFAR4_3: + case GSHAREDVT_RET_HFAR4_4: { + int j; + + for (j = 0; j < i - GSHAREDVT_RET_HFAR4_1 + 1; ++j) + arm_strfpw (code, j, ARMREG_LR, j * 4); + break; + } + default: + arm_brk (code, i); + break; + } + br_ret [br_ret_index ++] = code; + arm_b (code, 0); + } + + arm_brk (code, 0); + + for (i = 0; i < br_ret_index; ++i) + mono_arm_patch (br_ret [i], code, MONO_R_ARM64_B); + for (i = 0; i < bcc_ret_index; ++i) + mono_arm_patch (bcc_ret [i], code, MONO_R_ARM64_BCC); + + /* Normal return */ + arm_movspx (code, ARMREG_SP, ARMREG_FP); + arm_ldpx_post (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, offset); + arm_retx (code, ARMREG_LR); + + g_assert ((code - buf) < buf_len); + + if (info) + *info = mono_tramp_info_create ("gsharedvt_trampoline", buf, code - buf, ji, unwind_ops); + + mono_arch_flush_icache (buf, code - buf); + return buf; +} + +#else + +gpointer +mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot) +{ + g_assert_not_reached (); + return NULL; +} + +#endif + +#else + +gpointer +mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot) +{ + if (info) + *info = NULL; + return NULL; +} + +gpointer +mono_arch_get_gsharedvt_arg_trampoline (MonoDomain *domain, gpointer arg, gpointer addr) +{ + g_assert_not_reached (); + return NULL; +} + +#endif /* MONO_ARCH_GSHARED_SUPPORTED */ \ No newline at end of file diff --git a/mono/mini/tramp-arm64.c b/mono/mini/tramp-arm64.c index 16504375d84..2ea00c87fc3 100644 --- a/mono/mini/tramp-arm64.c +++ b/mono/mini/tramp-arm64.c @@ -1 +1,615 @@ -#include "../../../mono-extensions/mono/mini/tramp-arm64.c" +/* + * tramp-arm64.c: JIT trampoline code for ARM64 + * + * Copyright 2013 Xamarin Inc + * + * Based on tramp-arm.c: + * + * Authors: + * Paolo Molaro (lupus@ximian.com) + * + * (C) 2001-2003 Ximian, Inc. + * Copyright 2003-2011 Novell Inc + * Copyright 2011 Xamarin Inc + */ + +#include "mini.h" +#include "debugger-agent.h" + +#include +#include + +#define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1)) + +void +mono_arch_patch_callsite (guint8 *method_start, guint8 *code_ptr, guint8 *addr) +{ + mono_arm_patch (code_ptr - 4, addr, MONO_R_ARM64_BL); + mono_arch_flush_icache (code_ptr - 4, 4); +} + +void +mono_arch_patch_plt_entry (guint8 *code, gpointer *got, mgreg_t *regs, guint8 *addr) +{ + guint32 ins; + guint64 slot_addr; + int disp; + + /* + * Decode the address loaded by the PLT entry emitted by arch_emit_plt_entry () in + * aot-compiler.c + */ + + /* adrp */ + ins = ((guint32*)code) [0]; + g_assert (((ins >> 24) & 0x1f) == 0x10); + disp = (((ins >> 5) & 0x7ffff) << 2) | ((ins >> 29) & 0x3); + /* FIXME: disp is signed */ + g_assert ((disp >> 20) == 0); + + slot_addr = ((guint64)code + (disp << 12)) & ~0xfff; + + /* add x16, x16, :lo12:got */ + ins = ((guint32*)code) [1]; + g_assert (((ins >> 22) & 0x3) == 0); + slot_addr += (ins >> 10) & 0xfff; + + /* ldr x16, [x16, ] */ + ins = ((guint32*)code) [2]; + g_assert (((ins >> 24) & 0x3f) == 0x39); + slot_addr += ((ins >> 10) & 0xfff) * 8; + + g_assert (*(guint64*)slot_addr); + *(gpointer*)slot_addr = addr; +} + +guint8* +mono_arch_get_call_target (guint8 *code) +{ + guint32 imm; + int disp; + + code -= 4; + + imm = *(guint32*)code; + /* Should be a bl */ + g_assert (((imm >> 31) & 0x1) == 0x1); + g_assert (((imm >> 26) & 0x7) == 0x5); + + disp = (imm & 0x3ffffff); + if ((disp >> 25) != 0) + /* Negative, sing extend to 32 bits */ + disp = disp | 0xfc000000; + + return code + (disp * 4); +} + +guint32 +mono_arch_get_plt_info_offset (guint8 *plt_entry, mgreg_t *regs, guint8 *code) +{ + /* The offset is stored as the 5th word of the plt entry */ + return ((guint32*)plt_entry) [4]; +} + +#ifndef DISABLE_JIT + +guchar* +mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInfo **info, gboolean aot) +{ + guint8 *code, *buf, *tramp; + int i, buf_len, imm; + int frame_size, offset, gregs_offset, num_fregs, fregs_offset, arg_offset, lmf_offset; + guint64 gregs_regset; + GSList *unwind_ops = NULL; + MonoJumpInfo *ji = NULL; + char *tramp_name; + + buf_len = 768; + buf = code = mono_global_codeman_reserve (buf_len); + + /* + * We are getting called by a specific trampoline, ip1 contains the trampoline argument. + */ + + /* Compute stack frame size and offsets */ + offset = 0; + /* frame block */ + offset += 2 * 8; + /* gregs */ + gregs_offset = offset; + offset += 32 * 8; + /* fregs */ + // FIXME: Save 128 bits + /* Only have to save the argument regs */ + num_fregs = 8; + fregs_offset = offset; + offset += num_fregs * 8; + /* arg */ + arg_offset = offset; + offset += 8; + /* LMF */ + lmf_offset = offset; + offset += sizeof (MonoLMF); + //offset += 22 * 8; + frame_size = ALIGN_TO (offset, MONO_ARCH_FRAME_ALIGNMENT); + + /* Setup stack frame */ + imm = frame_size; + while (imm > 256) { + arm_subx_imm (code, ARMREG_SP, ARMREG_SP, 256); + imm -= 256; + } + arm_subx_imm (code, ARMREG_SP, ARMREG_SP, imm); + arm_stpx (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, 0); + arm_movspx (code, ARMREG_FP, ARMREG_SP); + + /* Save gregs */ + // FIXME: Optimize this + gregs_regset = ~((1 << ARMREG_FP) | (1 << ARMREG_SP)); + code = mono_arm_emit_store_regarray (code, gregs_regset, ARMREG_FP, gregs_offset); + /* Save fregs */ + for (i = 0; i < num_fregs; ++i) + arm_strfpx (code, i, ARMREG_FP, fregs_offset + (i * 8)); + /* Save trampoline arg */ + arm_strx (code, ARMREG_IP1, ARMREG_FP, arg_offset); + + /* Setup LMF */ + arm_addx_imm (code, ARMREG_IP0, ARMREG_FP, lmf_offset); + code = mono_arm_emit_store_regset (code, MONO_ARCH_LMF_REGS, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, gregs)); + /* Save caller fp */ + arm_ldrx (code, ARMREG_IP1, ARMREG_FP, 0); + arm_strx (code, ARMREG_IP1, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, gregs) + (MONO_ARCH_LMF_REG_FP * 8)); + /* Save caller sp */ + arm_movx (code, ARMREG_IP1, ARMREG_FP); + imm = frame_size; + while (imm > 256) { + arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 256); + imm -= 256; + } + arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, imm); + arm_strx (code, ARMREG_IP1, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, gregs) + (MONO_ARCH_LMF_REG_SP * 8)); + /* Save caller pc */ + if (tramp_type == MONO_TRAMPOLINE_JUMP) + arm_movx (code, ARMREG_LR, ARMREG_RZR); + else + arm_ldrx (code, ARMREG_LR, ARMREG_FP, 8); + arm_strx (code, ARMREG_LR, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, pc)); + + /* Save LMF */ + /* Similar to emit_save_lmf () */ + if (aot) { + code = mono_arm_emit_aotconst (&ji, code, buf, ARMREG_IP0, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_get_lmf_addr"); + } else { + tramp = (guint8*)mono_get_lmf_addr; + code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)tramp); + } + arm_blrx (code, ARMREG_IP0); + /* r0 contains the address of the tls slot holding the current lmf */ + /* ip0 = lmf */ + arm_addx_imm (code, ARMREG_IP0, ARMREG_FP, lmf_offset); + /* lmf->lmf_addr = lmf_addr */ + arm_strx (code, ARMREG_R0, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, lmf_addr)); + /* lmf->previous_lmf = *lmf_addr */ + arm_ldrx (code, ARMREG_IP1, ARMREG_R0, 0); + arm_strx (code, ARMREG_IP1, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, previous_lmf)); + /* *lmf_addr = lmf */ + arm_strx (code, ARMREG_IP0, ARMREG_R0, 0); + + /* Call the C trampoline function */ + /* Arg 1 = gregs */ + arm_addx_imm (code, ARMREG_R0, ARMREG_FP, gregs_offset); + /* Arg 2 = caller */ + if (tramp_type == MONO_TRAMPOLINE_JUMP) + arm_movx (code, ARMREG_R1, ARMREG_RZR); + else + arm_ldrx (code, ARMREG_R1, ARMREG_FP, gregs_offset + (ARMREG_LR * 8)); + /* Arg 3 = arg */ + if (MONO_TRAMPOLINE_TYPE_HAS_ARG (tramp_type)) + /* Passed in r0 */ + arm_ldrx (code, ARMREG_R2, ARMREG_FP, gregs_offset + (ARMREG_R0 * 8)); + else + arm_ldrx (code, ARMREG_R2, ARMREG_FP, arg_offset); + /* Arg 4 = trampoline addr */ + arm_movx (code, ARMREG_R3, ARMREG_RZR); + + if (aot) { + char *icall_name = g_strdup_printf ("trampoline_func_%d", tramp_type); + code = mono_arm_emit_aotconst (&ji, code, buf, ARMREG_IP0, MONO_PATCH_INFO_JIT_ICALL_ADDR, icall_name); + } else { + tramp = (guint8*)mono_get_trampoline_func (tramp_type); + code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)tramp); + } + arm_blrx (code, ARMREG_IP0); + + /* Restore LMF */ + /* Similar to emit_restore_lmf () */ + /* Clobbers ip0/ip1 */ + /* ip0 = lmf */ + arm_addx_imm (code, ARMREG_IP0, ARMREG_FP, lmf_offset); + /* ip1 = lmf->previous_lmf */ + arm_ldrx (code, ARMREG_IP1, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, previous_lmf)); + /* ip0 = lmf->lmf_addr */ + arm_ldrx (code, ARMREG_IP0, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, lmf_addr)); + /* *lmf_addr = previous_lmf */ + arm_strx (code, ARMREG_IP1, ARMREG_IP0, 0); + + /* Save the result to ip1 */ + arm_movx (code, ARMREG_IP1, ARMREG_R0); + + /* Restore gregs */ + /* Only have to load the argument regs (r0..r8) and the rgctx reg */ + code = mono_arm_emit_load_regarray (code, 0x1ff | (1 << ARMREG_LR) | (1 << MONO_ARCH_RGCTX_REG), ARMREG_FP, gregs_offset); + /* Restore fregs */ + for (i = 0; i < num_fregs; ++i) + arm_ldrfpx (code, i, ARMREG_FP, fregs_offset + (i * 8)); + + /* These trampolines return a value */ + if (tramp_type == MONO_TRAMPOLINE_RGCTX_LAZY_FETCH) + arm_movx (code, ARMREG_R0, ARMREG_IP1); + + /* Cleanup frame */ + code = mono_arm_emit_destroy_frame (code, frame_size, ((1 << ARMREG_IP0))); + + if (tramp_type == MONO_TRAMPOLINE_RGCTX_LAZY_FETCH) + arm_retx (code, ARMREG_LR); + else + arm_brx (code, ARMREG_IP1); + + g_assert ((code - buf) < buf_len); + mono_arch_flush_icache (buf, code - buf); + + if (info) { + tramp_name = mono_get_generic_trampoline_name (tramp_type); + *info = mono_tramp_info_create (tramp_name, buf, code - buf, ji, unwind_ops); + g_free (tramp_name); + } + + return buf; +} + +gpointer +mono_arch_create_specific_trampoline (gpointer arg1, MonoTrampolineType tramp_type, MonoDomain *domain, guint32 *code_len) +{ + guint8 *code, *buf, *tramp; + int buf_len = 64; + + /* + * Return a trampoline which calls generic trampoline TRAMP_TYPE passing in ARG1. + * Pass the argument in ip1, clobbering ip0. + */ + tramp = mono_get_trampoline_code (tramp_type); + + buf = code = mono_global_codeman_reserve (buf_len); + + code = mono_arm_emit_imm64 (code, ARMREG_IP1, (guint64)arg1); + code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)tramp); + + arm_brx (code, ARMREG_IP0); + + g_assert ((code - buf) < buf_len); + mono_arch_flush_icache (buf, code - buf); + if (code_len) + *code_len = code - buf; + + return buf; +} + +gpointer +mono_arch_get_unbox_trampoline (MonoMethod *m, gpointer addr) +{ + guint8 *code, *start; + guint32 size = 32; + MonoDomain *domain = mono_domain_get (); + + start = code = mono_domain_code_reserve (domain, size); + code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)addr); + arm_addx_imm (code, ARMREG_R0, ARMREG_R0, sizeof (MonoObject)); + arm_brx (code, ARMREG_IP0); + + g_assert ((code - start) <= size); + mono_arch_flush_icache (start, code - start); + return start; +} + +gpointer +mono_arch_get_static_rgctx_trampoline (MonoMethod *m, MonoMethodRuntimeGenericContext *mrgctx, gpointer addr) +{ + guint8 *code, *start; + guint32 buf_len = 32; + MonoDomain *domain = mono_domain_get (); + + start = code = mono_domain_code_reserve (domain, buf_len); + code = mono_arm_emit_imm64 (code, MONO_ARCH_RGCTX_REG, (guint64)mrgctx); + code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)addr); + arm_brx (code, ARMREG_IP0); + + g_assert ((code - start) <= buf_len); + + mono_arch_flush_icache (start, code - start); + + return start; +} + +gpointer +mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info, gboolean aot) +{ + guint8 *code, *buf; + int buf_size; + int i, depth, index, njumps; + gboolean is_mrgctx; + guint8 **rgctx_null_jumps; + MonoJumpInfo *ji = NULL; + GSList *unwind_ops = NULL; + guint8 *tramp; + guint32 code_len; + + is_mrgctx = MONO_RGCTX_SLOT_IS_MRGCTX (slot); + index = MONO_RGCTX_SLOT_INDEX (slot); + if (is_mrgctx) + index += MONO_SIZEOF_METHOD_RUNTIME_GENERIC_CONTEXT / sizeof (gpointer); + for (depth = 0; ; ++depth) { + int size = mono_class_rgctx_get_array_size (depth, is_mrgctx); + + if (index < size - 1) + break; + index -= size - 1; + } + + buf_size = 64 + 16 * depth; + code = buf = mono_global_codeman_reserve (buf_size); + + rgctx_null_jumps = g_malloc0 (sizeof (guint8*) * (depth + 2)); + njumps = 0; + + /* The vtable/mrgtx is in R0 */ + g_assert (MONO_ARCH_VTABLE_REG == ARMREG_R0); + + if (is_mrgctx) { + /* get mrgctx ptr */ + arm_movx (code, ARMREG_IP1, ARMREG_R0); + } else { + /* load rgctx ptr from vtable */ + code = mono_arm_emit_ldrx (code, ARMREG_IP1, ARMREG_R0, MONO_STRUCT_OFFSET (MonoVTable, runtime_generic_context)); + /* is the rgctx ptr null? */ + /* if yes, jump to actual trampoline */ + rgctx_null_jumps [njumps ++] = code; + arm_cbzx (code, ARMREG_IP1, 0); + } + + for (i = 0; i < depth; ++i) { + /* load ptr to next array */ + if (is_mrgctx && i == 0) { + code = mono_arm_emit_ldrx (code, ARMREG_IP1, ARMREG_IP1, MONO_SIZEOF_METHOD_RUNTIME_GENERIC_CONTEXT); + } else { + code = mono_arm_emit_ldrx (code, ARMREG_IP1, ARMREG_IP1, 0); + } + /* is the ptr null? */ + /* if yes, jump to actual trampoline */ + rgctx_null_jumps [njumps ++] = code; + arm_cbzx (code, ARMREG_IP1, 0); + } + + /* fetch slot */ + code = mono_arm_emit_ldrx (code, ARMREG_IP1, ARMREG_IP1, sizeof (gpointer) * (index + 1)); + /* is the slot null? */ + /* if yes, jump to actual trampoline */ + rgctx_null_jumps [njumps ++] = code; + arm_cbzx (code, ARMREG_IP1, 0); + /* otherwise return, result is in IP1 */ + arm_movx (code, ARMREG_R0, ARMREG_IP1); + arm_brx (code, ARMREG_LR); + + g_assert (njumps <= depth + 2); + for (i = 0; i < njumps; ++i) + mono_arm_patch (rgctx_null_jumps [i], code, MONO_R_ARM64_CBZ); + + g_free (rgctx_null_jumps); + + /* Slowpath */ + + /* Call mono_rgctx_lazy_fetch_trampoline (), passing in the slot as argument */ + /* The vtable/mrgctx is still in R0 */ + if (aot) { + code = mono_arm_emit_aotconst (&ji, code, buf, ARMREG_IP0, MONO_PATCH_INFO_JIT_ICALL_ADDR, g_strdup_printf ("specific_trampoline_lazy_fetch_%u", slot)); + } else { + tramp = mono_arch_create_specific_trampoline (GUINT_TO_POINTER (slot), MONO_TRAMPOLINE_RGCTX_LAZY_FETCH, mono_get_root_domain (), &code_len); + code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)tramp); + } + arm_brx (code, ARMREG_IP0); + + mono_arch_flush_icache (buf, code - buf); + + g_assert (code - buf <= buf_size); + + if (info) { + char *name = mono_get_rgctx_fetch_trampoline_name (slot); + *info = mono_tramp_info_create (name, buf, code - buf, ji, unwind_ops); + g_free (name); + } + + return buf; +} + +gpointer +mono_arch_create_general_rgctx_lazy_fetch_trampoline (MonoTrampInfo **info, gboolean aot) +{ + guint8 *code, *buf; + int tramp_size; + MonoJumpInfo *ji = NULL; + GSList *unwind_ops = NULL; + + g_assert (aot); + + tramp_size = 32; + + code = buf = mono_global_codeman_reserve (tramp_size); + + mono_add_unwind_op_def_cfa (unwind_ops, code, buf, ARMREG_SP, 0); + + // FIXME: Currently, we always go to the slow path. + /* Load trampoline addr */ + arm_ldrx (code, ARMREG_IP0, MONO_ARCH_RGCTX_REG, 8); + /* The vtable/mrgctx is in R0 */ + g_assert (MONO_ARCH_VTABLE_REG == ARMREG_R0); + arm_brx (code, ARMREG_IP0); + + mono_arch_flush_icache (buf, code - buf); + + g_assert (code - buf <= tramp_size); + + if (info) + *info = mono_tramp_info_create ("rgctx_fetch_trampoline_general", buf, code - buf, ji, unwind_ops); + + return buf; +} + +/* + * mono_arch_create_sdb_trampoline: + * + * Return a trampoline which captures the current context, passes it to + * debugger_agent_single_step_from_context ()/debugger_agent_breakpoint_from_context (), + * then restores the (potentially changed) context. + */ +guint8* +mono_arch_create_sdb_trampoline (gboolean single_step, MonoTrampInfo **info, gboolean aot) +{ + int tramp_size = 512; + int offset, imm, frame_size, ctx_offset; + guint64 gregs_regset; + guint8 *code, *buf; + GSList *unwind_ops = NULL; + MonoJumpInfo *ji = NULL; + + code = buf = mono_global_codeman_reserve (tramp_size); + + /* Compute stack frame size and offsets */ + offset = 0; + /* frame block */ + offset += 2 * 8; + /* MonoContext */ + ctx_offset = offset; + offset += sizeof (MonoContext); + offset = ALIGN_TO (offset, MONO_ARCH_FRAME_ALIGNMENT); + frame_size = offset; + + // FIXME: Unwind info + + /* Setup stack frame */ + imm = frame_size; + while (imm > 256) { + arm_subx_imm (code, ARMREG_SP, ARMREG_SP, 256); + imm -= 256; + } + arm_subx_imm (code, ARMREG_SP, ARMREG_SP, imm); + arm_stpx (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, 0); + arm_movspx (code, ARMREG_FP, ARMREG_SP); + + /* Initialize a MonoContext structure on the stack */ + /* No need to save fregs */ + gregs_regset = ~((1 << ARMREG_FP) | (1 << ARMREG_SP)); + code = mono_arm_emit_store_regarray (code, gregs_regset, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, regs)); + /* Save caller fp */ + arm_ldrx (code, ARMREG_IP1, ARMREG_FP, 0); + arm_strx (code, ARMREG_IP1, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_FP * 8)); + /* Save caller sp */ + arm_movx (code, ARMREG_IP1, ARMREG_FP); + imm = frame_size; + while (imm > 256) { + arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 256); + imm -= 256; + } + arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, imm); + arm_strx (code, ARMREG_IP1, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_SP * 8)); + /* Save caller ip */ + arm_ldrx (code, ARMREG_IP1, ARMREG_FP, 8); + arm_strx (code, ARMREG_IP1, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, pc)); + + /* Call the single step/breakpoint function in sdb */ + /* Arg1 = ctx */ + arm_addx_imm (code, ARMREG_R0, ARMREG_FP, ctx_offset); + if (aot) { + if (single_step) + code = mono_arm_emit_aotconst (&ji, code, buf, ARMREG_IP0, MONO_PATCH_INFO_JIT_ICALL_ADDR, "debugger_agent_single_step_from_context"); + else + code = mono_arm_emit_aotconst (&ji, code, buf, ARMREG_IP0, MONO_PATCH_INFO_JIT_ICALL_ADDR, "debugger_agent_breakpoint_from_context"); + } else { + gpointer addr = single_step ? debugger_agent_single_step_from_context : debugger_agent_breakpoint_from_context; + + code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)addr); + } + arm_blrx (code, ARMREG_IP0); + + /* Restore ctx */ + /* Save fp/pc into the frame block */ + arm_ldrx (code, ARMREG_IP0, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_FP * 8)); + arm_strx (code, ARMREG_IP0, ARMREG_FP, 0); + arm_ldrx (code, ARMREG_IP0, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, pc)); + arm_strx (code, ARMREG_IP0, ARMREG_FP, 8); + gregs_regset = ~((1 << ARMREG_FP) | (1 << ARMREG_SP)); + code = mono_arm_emit_load_regarray (code, gregs_regset, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, regs)); + + code = mono_arm_emit_destroy_frame (code, frame_size, ((1 << ARMREG_IP0) | (1 << ARMREG_IP1))); + + arm_retx (code, ARMREG_LR); + + mono_arch_flush_icache (code, code - buf); + g_assert (code - buf <= tramp_size); + + const char *tramp_name = single_step ? "sdb_single_step_trampoline" : "sdb_breakpoint_trampoline"; + *info = mono_tramp_info_create (tramp_name, buf, code - buf, ji, unwind_ops); + + return buf; +} + +#else /* DISABLE_JIT */ + +guchar* +mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInfo **info, gboolean aot) +{ + g_assert_not_reached (); + return NULL; +} + +gpointer +mono_arch_create_specific_trampoline (gpointer arg1, MonoTrampolineType tramp_type, MonoDomain *domain, guint32 *code_len) +{ + g_assert_not_reached (); + return NULL; +} + +gpointer +mono_arch_get_unbox_trampoline (MonoMethod *m, gpointer addr) +{ + g_assert_not_reached (); + return NULL; +} + +gpointer +mono_arch_get_static_rgctx_trampoline (MonoMethod *m, MonoMethodRuntimeGenericContext *mrgctx, gpointer addr) +{ + g_assert_not_reached (); + return NULL; +} + +gpointer +mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info, gboolean aot) +{ + g_assert_not_reached (); + return NULL; +} + +gpointer +mono_arch_get_nullified_class_init_trampoline (MonoTrampInfo **info) +{ + g_assert_not_reached (); + return NULL; +} + +guint8* +mono_arch_create_sdb_trampoline (gboolean single_step, MonoTrampInfo **info, gboolean aot) +{ + g_assert_not_reached (); + return NULL; +} + +#endif /* !DISABLE_JIT */ -- 2.25.1