+int size_to_ia64_load_u_membase_inc (int size);
+int size_to_ia64_store_membase_inc_reg (int size);
+
%%
#
tree->dreg = state->reg1;
}
+reg: OP_ATOMIC_ADD_NEW_I4 (base, OP_ICONST),
+reg: OP_ATOMIC_ADD_NEW_I8 (base, OP_ICONST) {
+ tree->opcode = tree->opcode == OP_ATOMIC_ADD_NEW_I4 ? OP_IA64_FETCHADD4_IMM : OP_IA64_FETCHADD8_IMM;
+ tree->dreg = state->reg1;
+ tree->inst_imm = state->right->tree->inst_imm;
+ tree->inst_basereg = state->left->tree->inst_basereg;
+ tree->inst_offset = state->left->tree->inst_offset;
+
+ mono_bblock_add_inst (s->cbb, tree);
+}
+
+reg: OP_ATOMIC_EXCHANGE_I4 (base, reg),
+reg: OP_ATOMIC_EXCHANGE_I8 (base, reg) {
+ tree->opcode = tree->opcode;
+ tree->dreg = state->reg1;
+ tree->sreg2 = state->right->reg1;
+ tree->inst_basereg = state->left->tree->inst_basereg;
+ tree->inst_offset = state->left->tree->inst_offset;
+
+ mono_bblock_add_inst (s->cbb, tree);
+}
+
+# Optimized memset implementation
+stmt: OP_MEMSET (base) "0" {
+ int dest_reg, dest_reg2, val_reg, unit, align;
+ int size = tree->unused;
+
+ dest_reg = mono_regstate_next_int (s->rs);
+
+ if (state->left->tree->inst_basereg == s->frame_reg)
+ /* Aligned by mono_allocate_stack_slots */
+ align = 8;
+ else
+ align = 4;
+
+ if (tree->inst_imm == 0)
+ val_reg = IA64_R0;
+ else {
+ val_reg = mono_regstate_next_int (s->rs);
+
+ MONO_EMIT_NEW_ICONST (s, val_reg, tree->inst_imm);
+ }
+
+ MONO_EMIT_NEW_BIALU_IMM (s, OP_ADD_IMM, dest_reg, state->left->tree->inst_basereg, state->left->tree->inst_offset);
+
+ /* FIXME: Alignment */
+ for (unit = align; unit >= 1; unit = unit >> 1) {
+ dest_reg2 = mono_regstate_next_int (s->rs);
+
+ /* Use two destination regs to increase paralellism */
+ if (size >= 2 * unit) {
+ MONO_EMIT_NEW_BIALU_IMM (s, OP_ADD_IMM, dest_reg2, state->left->tree->inst_basereg, state->left->tree->inst_offset + unit);
+
+ while (size >= (2 * unit)) {
+ MONO_EMIT_NEW_STORE_MEMBASE (s, size_to_ia64_store_membase_inc_reg (unit), dest_reg, 0, val_reg);
+ MONO_EMIT_NEW_STORE_MEMBASE (s, size_to_ia64_store_membase_inc_reg (unit), dest_reg2, 0, val_reg);
+ size -= 2 * unit;
+ }
+ }
+
+ while (size >= unit) {
+ MONO_EMIT_NEW_STORE_MEMBASE (s, size_to_ia64_store_membase_inc_reg (unit), dest_reg, 0, val_reg);
+ size -= unit;
+ }
+ }
+
+}
+
+# Optimized memcpy implementation
+stmt: OP_MEMCPY (base, base) "0" {
+ int cur_reg, src_reg, dest_reg, unit;
+ int size = tree->unused;
+ int align;
+
+ src_reg = mono_regstate_next_int (s->rs);
+ dest_reg = mono_regstate_next_int (s->rs);
+
+ if ((state->left->tree->inst_basereg == s->frame_reg) &&
+ (state->right->tree->inst_basereg == s->frame_reg))
+ /* Aligned by mono_allocate_stack_slots */
+ align = 8;
+ else
+ align = 4;
+
+ MONO_EMIT_NEW_BIALU_IMM (s, OP_ADD_IMM, dest_reg, state->left->tree->inst_basereg, state->left->tree->inst_offset);
+ MONO_EMIT_NEW_BIALU_IMM (s, OP_ADD_IMM, src_reg, state->right->tree->inst_basereg, state->right->tree->inst_offset);
+
+ /* FIXME: Alignment */
+ for (unit = align; unit >= 1; unit = unit >> 1) {
+
+ while (size >= unit) {
+ cur_reg = mono_regstate_next_int (s->rs);
+ MONO_EMIT_NEW_LOAD_MEMBASE_OP (s, size_to_ia64_load_u_membase_inc (unit), cur_reg, src_reg, 0);
+ MONO_EMIT_NEW_STORE_MEMBASE (s, size_to_ia64_store_membase_inc_reg (unit), dest_reg, 0, cur_reg);
+ size -= unit;
+ }
+ }
+}
+
%%
+
+int
+size_to_ia64_load_u_membase_inc (int size)
+{
+ switch (size) {
+ case 1:
+ return OP_IA64_LOADU1_MEMBASE_INC;
+ case 2:
+ return OP_IA64_LOADU2_MEMBASE_INC;
+ case 4:
+ return OP_IA64_LOADU4_MEMBASE_INC;
+ case 8:
+ return OP_IA64_LOADI8_MEMBASE_INC;
+ default:
+ g_assert_not_reached ();
+ return -1;
+ }
+}
+
+int
+size_to_ia64_store_membase_inc_reg (int size)
+{
+ switch (size) {
+ case 1:
+ return OP_IA64_STOREI1_MEMBASE_INC_REG;
+ case 2:
+ return OP_IA64_STOREI2_MEMBASE_INC_REG;
+ case 4:
+ return OP_IA64_STOREI4_MEMBASE_INC_REG;
+ case 8:
+ return OP_IA64_STOREI8_MEMBASE_INC_REG;
+ default:
+ g_assert_not_reached ();
+ return -1;
+ }
+}