From: Elijah Taylor Date: Thu, 6 Jan 2011 00:02:57 +0000 (-0800) Subject: Merge remote branch 'upstream/master' X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=commitdiff_plain;h=5b558abeeb255a3179d4ca6a85617e051c6abd38;hp=45890e8b46ac438d2b8ccc1bd0d74eea31870de1;p=mono.git Merge remote branch 'upstream/master' --- diff --git a/configure.in b/configure.in index 540f0309b69..ac9717d2a31 100644 --- a/configure.in +++ b/configure.in @@ -196,6 +196,7 @@ case "$host" in use_sigposix=yes ikvm_native=no AC_DEFINE(DISABLE_SOCKETS,1,[Disable sockets support]) + AC_DEFINE(DISABLE_ATTACH, 1, [Disable agent attach support]) ;; *-*-hpux*) host_win32=no @@ -611,6 +612,10 @@ if test "x$enable_shared" = "xno"; then with_shared_mono=no fi +case $host in +*nacl* ) with_shared_mono=yes;; +esac + if test "x$host_win32" = "xyes"; then # Boehm GC requires the runtime to be in its own dll with_static_mono=no @@ -1980,6 +1985,7 @@ dnl *** NaCl *** dnl ************** AC_ARG_ENABLE(nacl_codegen, [ --enable-nacl-codegen Enable Native Client code generation], enable_nacl_codegen=$enableval, enable_nacl_codegen=no) +AC_ARG_ENABLE(nacl_gc, [ --enable-nacl-gc Enable Native Client garbage collection], enable_nacl_gc=$enableval, enable_nacl_gc=no) AM_CONDITIONAL(NACL_CODEGEN, test x$enable_nacl_codegen != xno) if test "x$enable_nacl_codegen" = "xyes"; then @@ -1988,6 +1994,10 @@ if test "x$enable_nacl_codegen" = "xyes"; then AC_DEFINE(TARGET_NACL, 1, [...]) else MONO_NACL_ALIGN_MASK_OFF=0 + CPPFLAGS="$CPPFLAGS -D__default_codegen__" +fi +if test "x$enable_nacl_gc" = "xyes"; then + CPPFLAGS="$CPPFLAGS -finstrument-for-thread-suspension -D__native_client_gc__" fi AC_SUBST(MONO_NACL_ALIGN_MASK_OFF) @@ -2140,6 +2150,12 @@ case "$host" in sgen_supported=true ;; esac + case "$host" in + x86_64-*-nacl*) + AC_DEFINE(__mono_ilp32__, 1, [64 bit mode with 4 byte longs and pointers]) + sizeof_register=8 + ;; + esac ;; ia64-*-*) TARGET=IA64 @@ -2291,6 +2307,14 @@ if test "x$host" != "x$target"; then sizeof_register=8 target_byte_order=G_BIG_ENDIAN ;; + x86_64-*-nacl) + TARGET=AMD64 + arch_target=amd64 + AC_DEFINE(TARGET_AMD64, 1, [...]) + AC_DEFINE(MONO_CROSS_COMPILE,1,[The runtime is compiled for cross-compiling mode]) + AC_DEFINE(__mono_ilp32__, 1, [64 bit mode with 4 byte longs and pointers]) + sizeof_register=8 + ;; *) AC_MSG_WARN([Cross compiling is only supported for targets matching 'powerpc64-{ps3,xbox360}-linux-gnu']) esac @@ -2548,6 +2572,10 @@ case "x$gc" in if test x$TARGET = xSPARC -o x$TARGET = xSPARC64; then LIBGC_CPPFLAGS=`echo $LIBGC_CPPFLAGS | sed -e 's/-D_FILE_OFFSET_BITS=64//g'` fi + # Don't pass -finstrument-for-thread-suspension in, + # if these are instrumented it will be very bad news + # (infinite recursion, undefined parking behavior, etc) + LIBGC_CPPFLAGS=`echo $LIBGC_CPPFLAGS | sed -e 's/-finstrument-for-thread-suspension//g'` ac_configure_args="$ac_configure_args --disable-embed-check --with-libgc-threads=$libgc_threads $libgc_configure_args \"CPPFLAGS_FOR_LIBGC=$LIBGC_CPPFLAGS\" \"CFLAGS_FOR_LIBGC=$CFLAGS_FOR_LIBGC\"" AC_CONFIG_SUBDIRS(libgc) ;; diff --git a/libgc/configure.in b/libgc/configure.in index c7ce110c6c2..e68d7382883 100644 --- a/libgc/configure.in +++ b/libgc/configure.in @@ -94,7 +94,7 @@ case "$THREADS" in fi AC_DEFINE(THREAD_LOCAL_ALLOC) ;; - *-*-linux*) + *-*-linux* | *-*-nacl*) AC_DEFINE(GC_LINUX_THREADS) AC_DEFINE(_REENTRANT) ;; @@ -340,6 +340,9 @@ case "$host" in machdep="mach_dep.lo ia64_save_regs_in_stack.lo" target_ia64=true ;; + *-*-nacl*) + AC_DEFINE(NO_EXECUTE_PERMISSION) + ;; esac if test x"$machdep" = x; then AC_MSG_RESULT($machdep) diff --git a/libgc/gc_dlopen.c b/libgc/gc_dlopen.c index 4c690edcfe4..6ca9e996fb2 100644 --- a/libgc/gc_dlopen.c +++ b/libgc/gc_dlopen.c @@ -25,7 +25,7 @@ #include "private/gc_priv.h" -# if (defined(GC_PTHREADS) && !defined(GC_DARWIN_THREADS)) \ +# if defined(DYNAMIC_LOADING) && (defined(GC_PTHREADS) && !defined(GC_DARWIN_THREADS)) \ || defined(GC_SOLARIS_THREADS) # if defined(dlopen) && !defined(GC_USE_LD_WRAP) diff --git a/libgc/include/gc_pthread_redirects.h b/libgc/include/gc_pthread_redirects.h index bedcc26ac7e..520a36bf66c 100644 --- a/libgc/include/gc_pthread_redirects.h +++ b/libgc/include/gc_pthread_redirects.h @@ -59,6 +59,11 @@ #endif int GC_pthread_join(pthread_t thread, void **retval); int GC_pthread_detach(pthread_t thread); +#if defined(__native_client__) || defined(NACL) + void GC_pthread_exit(void *status); +# undef pthread_exit +# define pthread_exit GC_pthread_exit +#endif #if defined(GC_OSF1_THREADS) \ && defined(_PTHREAD_USE_MANGLED_NAMES_) && !defined(_PTHREAD_USE_PTDNAM_) diff --git a/libgc/include/private/gc_priv.h b/libgc/include/private/gc_priv.h index 2efb4732f79..5c4c5146ba2 100644 --- a/libgc/include/private/gc_priv.h +++ b/libgc/include/private/gc_priv.h @@ -1962,6 +1962,8 @@ void GC_err_puts GC_PROTO((GC_CONST char *s)); /* SPARC/Linux doesn't properly define SIGPWR in . * It is aliased to SIGLOST in asm/signal.h, though. */ # define SIG_SUSPEND SIGLOST +# elif defined(NACL) +# define SIG_SUSPEND 0 # else /* Linuxthreads itself uses SIGUSR1 and SIGUSR2. */ # define SIG_SUSPEND SIGPWR diff --git a/libgc/include/private/gcconfig.h b/libgc/include/private/gcconfig.h index d1c46944bfe..8276079d91e 100644 --- a/libgc/include/private/gcconfig.h +++ b/libgc/include/private/gcconfig.h @@ -65,6 +65,11 @@ # endif /* Determine the machine type: */ +# if defined(__native_client__) +# define NACL +# define I386 +# define mach_type_known +# endif # if defined(__arm__) || defined(__thumb__) # define ARM32 # if !defined(LINUX) && !defined(NETBSD) && !defined(DARWIN) @@ -1086,13 +1091,19 @@ # endif # ifdef I386 -# define MACH_TYPE "I386" -# if defined(__LP64__) || defined(_WIN64) -# define CPP_WORDSZ 64 -# define ALIGNMENT 8 -# else +# if defined( NACL ) +# define MACH_TYPE "NACL" # define CPP_WORDSZ 32 # define ALIGNMENT 4 +# else +# define MACH_TYPE "I386" +# if defined(__LP64__) || defined(_WIN64) +# define CPP_WORDSZ 64 +# define ALIGNMENT 8 +# else +# define CPP_WORDSZ 32 +# define ALIGNMENT 4 +# endif /* Appears to hold for all "32 bit" compilers */ /* except Borland. The -a4 option fixes */ /* Borland. */ @@ -1188,7 +1199,32 @@ # define HEAP_START DATAEND # endif /* USE_MMAP */ # endif /* DGUX */ - +# ifdef NACL +# define OS_TYPE "NACL" + extern int etext[]; +# define DATASTART ((ptr_t)((((word) (etext)) + 0xfff) & ~0xfff)) + extern int _end[]; +# define DATAEND (_end) +# ifdef STACK_GRAN +# undef STACK_GRAN +# endif /* STACK_GRAN */ +# define STACK_GRAN 0x10000 +# define HEURISTIC1 +# ifdef USE_MMAP +# undef USE_MMAP +# endif +# ifdef USE_MUNMAP +# undef USE_MUNMAP +# endif +# ifdef USE_MMAP_ANON +# undef USE_MMAP_ANON +# endif +# ifdef USE_MMAP_FIXED +# undef USE_MMAP_FIXED +# endif +# define GETPAGESIZE() 65536 +# define MAX_NACL_GC_THREADS 1024 +# endif # ifdef LINUX # ifndef __GNUC__ /* The Intel compiler doesn't like inline assembly */ @@ -2271,7 +2307,7 @@ # if defined(GC_IRIX_THREADS) && !defined(IRIX5) --> inconsistent configuration # endif -# if defined(GC_LINUX_THREADS) && !defined(LINUX) +# if defined(GC_LINUX_THREADS) && !(defined(LINUX) || defined(NACL)) --> inconsistent configuration # endif # if defined(GC_SOLARIS_THREADS) && !defined(SUNOS5) diff --git a/libgc/include/private/pthread_stop_world.h b/libgc/include/private/pthread_stop_world.h index 054c7a0eacd..bd72739f580 100644 --- a/libgc/include/private/pthread_stop_world.h +++ b/libgc/include/private/pthread_stop_world.h @@ -7,6 +7,15 @@ struct thread_stop_info { /* last successfully handled a suspend */ /* signal. */ ptr_t stack_ptr; /* Valid only when stopped. */ +#ifdef NACL +/* Grab NACL_GC_REG_STORAGE_SIZE pointers off the stack when going into */ +/* a syscall. 20 is more than we need, but it's an overestimate in case*/ +/* the instrumented function uses any callee saved registers, they may */ +/* be pushed to the stack much earlier. Also, on amd64 'push' puts 8 */ +/* bytes on the stack even though our pointers are 4 bytes. */ +#define NACL_GC_REG_STORAGE_SIZE 20 + ptr_t reg_storage[NACL_GC_REG_STORAGE_SIZE]; +#endif }; #endif diff --git a/libgc/include/private/pthread_support.h b/libgc/include/private/pthread_support.h index 852d3815bc6..dbd6dbcde89 100644 --- a/libgc/include/private/pthread_support.h +++ b/libgc/include/private/pthread_support.h @@ -92,6 +92,9 @@ typedef struct GC_Thread_Rep { # define THREAD_TABLE_SZ 128 /* Must be power of 2 */ extern volatile GC_thread GC_threads[THREAD_TABLE_SZ]; +#ifdef NACL +extern __thread GC_thread gc_thread_self; +#endif extern GC_bool GC_thr_initialized; diff --git a/libgc/os_dep.c b/libgc/os_dep.c index ee2e409ad42..ecaa27c7d82 100644 --- a/libgc/os_dep.c +++ b/libgc/os_dep.c @@ -133,7 +133,7 @@ # include #endif -#ifdef UNIX_LIKE +#if defined( UNIX_LIKE ) || defined(NACL) # include #endif @@ -618,6 +618,12 @@ void GC_enable_signals(void) /* longjmp implementations. Most systems appear not to have */ /* a signal 32. */ # define SIGSETMASK(old, new) (old) = sigsetmask(new) +# elif defined(NACL) + /* We don't use signals in NaCl. */ +# define SIGSET_T int +# define SIG_DEL(set, signal) +# define SIG_FILL(set) +# define SIGSETMASK(old, new) # else /* Use POSIX/SYSV interface */ # define SIGSET_T sigset_t @@ -2067,8 +2073,21 @@ void GC_remap(ptr_t start, word bytes) int result; if (0 == start_addr) return; +#ifdef NACL + { + /* NaCl doesn't expose mprotect, but mmap should work fine */ + void * mmap_result; + mmap_result = mmap(start_addr, len, PROT_READ | PROT_WRITE | OPT_PROT_EXEC, + MAP_PRIVATE | MAP_FIXED | OPT_MAP_ANON, + zero_fd, 0/* offset */); + if (mmap_result != (void *)start_addr) ABORT("mmap as mprotect failed"); + /* Fake the return value as if mprotect succeeded. */ + result = 0; + } +#else /* NACL */ result = mprotect(start_addr, len, PROT_READ | PROT_WRITE | OPT_PROT_EXEC); +#endif /* NACL */ if (result != 0) { GC_err_printf3( "Mprotect failed at 0x%lx (length %ld) with errno %ld\n", diff --git a/libgc/pthread_stop_world.c b/libgc/pthread_stop_world.c index bf2faafe3d6..b1f98099d1e 100644 --- a/libgc/pthread_stop_world.c +++ b/libgc/pthread_stop_world.c @@ -9,6 +9,7 @@ #include #include #include +#include /* work around a dlopen issue (bug #75390), undefs to avoid warnings with redefinitions */ #undef PACKAGE_BUGREPORT @@ -22,6 +23,19 @@ #include "include/libgc-mono-debugger.h" #endif +#ifdef NACL +int nacl_park_threads_now = 0; +pthread_t nacl_thread_parker = -1; + +int nacl_thread_parked[MAX_NACL_GC_THREADS]; +int nacl_thread_used[MAX_NACL_GC_THREADS]; +int nacl_thread_parking_inited = 0; +int nacl_num_gc_threads = 0; +pthread_mutex_t nacl_thread_alloc_lock = PTHREAD_MUTEX_INITIALIZER; +__thread int nacl_thread_idx = -1; +__thread GC_thread nacl_gc_thread_self = NULL; +#endif + #if DEBUG_THREADS #ifndef NSIG @@ -36,6 +50,7 @@ # endif #endif +#ifndef NACL void GC_print_sig_mask() { sigset_t blocked; @@ -49,7 +64,7 @@ void GC_print_sig_mask() } GC_printf0("\n"); } - +#endif /* NACL */ #endif /* Remove the signals that we want to allow in thread stopping */ @@ -116,6 +131,7 @@ sem_t GC_suspend_ack_sem; static void _GC_suspend_handler(int sig) { +#ifndef NACL int dummy; pthread_t my_thread = pthread_self(); GC_thread me; @@ -185,6 +201,8 @@ static void _GC_suspend_handler(int sig) #if DEBUG_THREADS GC_printf1("Continuing 0x%lx\n", my_thread); #endif + +#endif /* NACL */ } void GC_suspend_handler(int sig) @@ -278,6 +296,10 @@ static void pthread_push_all_stacks() # else GC_push_all_stack(lo, hi); # endif +# ifdef NACL + /* Push reg_storage as roots, this will cover the reg context */ + GC_push_all_stack(p -> stop_info.reg_storage, p -> stop_info.reg_storage + NACL_GC_REG_STORAGE_SIZE); +# endif # ifdef IA64 # if DEBUG_THREADS GC_printf3("Reg stack for thread 0x%lx = [%lx,%lx)\n", @@ -337,6 +359,7 @@ int android_thread_kill(pid_t tid, int sig) /* were sent. */ int GC_suspend_all() { +#ifndef NACL int n_live_threads = 0; int i; GC_thread p; @@ -375,11 +398,15 @@ int GC_suspend_all() } } return n_live_threads; +#else /* NACL */ + return 0; +#endif } /* Caller holds allocation lock. */ static void pthread_stop_world() { +#ifndef NACL int i; int n_live_threads; int code; @@ -431,8 +458,128 @@ static void pthread_stop_world() GC_printf1("World stopped from 0x%lx\n", pthread_self()); #endif GC_stopping_thread = 0; /* debugging only */ +#else /* NACL */ + GC_thread p; + int i; + + #if DEBUG_THREADS + GC_printf1("pthread_stop_world: num_threads %d\n", nacl_num_gc_threads - 1); + #endif + nacl_thread_parker = pthread_self(); + nacl_park_threads_now = 1; + + while (1) { + #define NACL_PARK_WAIT_NANOSECONDS 100000 + int num_threads_parked = 0; + struct timespec ts; + int num_used = 0; + /* Check the 'parked' flag for each thread the GC knows about */ + for (i = 0; i < MAX_NACL_GC_THREADS && num_used < nacl_num_gc_threads; i++) { + if (nacl_thread_used[i] == 1) { + num_used++; + if (nacl_thread_parked[i] == 1) { + num_threads_parked++; + } + } + } + /* -1 for the current thread */ + if (num_threads_parked >= nacl_num_gc_threads - 1) + break; + ts.tv_sec = 0; + ts.tv_nsec = NACL_PARK_WAIT_NANOSECONDS; + #if DEBUG_THREADS + GC_printf1("sleeping waiting for %d threads to park...\n", nacl_num_gc_threads - num_threads_parked - 1); + #endif + nanosleep(&ts, 0); + } + +#endif /* NACL */ } + +#ifdef NACL + +#if __x86_64__ + +#define NACL_STORE_REGS() \ + do { \ + asm("push %rbx");\ + asm("push %rbp");\ + asm("push %r12");\ + asm("push %r13");\ + asm("push %r14");\ + asm("push %r15");\ + asm("mov %%esp, %0" : "=m" (nacl_gc_thread_self->stop_info.stack_ptr));\ + memcpy(nacl_gc_thread_self->stop_info.reg_storage, nacl_gc_thread_self->stop_info.stack_ptr, NACL_GC_REG_STORAGE_SIZE * sizeof(ptr_t));\ + asm("add $48, %esp");\ + asm("add %r15, %rsp");\ + } while (0) + +#elif __i386__ + +#define NACL_STORE_REGS() \ + do { \ + asm("push %ebx");\ + asm("push %ebp");\ + asm("push %esi");\ + asm("push %edi");\ + asm("mov %%esp, %0" : "=m" (nacl_gc_thread_self->stop_info.stack_ptr));\ + memcpy(nacl_gc_thread_self->stop_info.reg_storage, nacl_gc_thread_self->stop_info.stack_ptr, NACL_GC_REG_STORAGE_SIZE * sizeof(ptr_t));\ + asm("add $16, %esp");\ + } while (0) + +#endif + +void nacl_pre_syscall_hook() +{ + int local_dummy = 0; + if (nacl_thread_idx != -1) { + NACL_STORE_REGS(); + nacl_gc_thread_self->stop_info.stack_ptr = (ptr_t)(&local_dummy); + nacl_thread_parked[nacl_thread_idx] = 1; + } +} + +void nacl_post_syscall_hook() +{ + /* Calling __nacl_suspend_thread_if_needed() right away should guarantee we don't mutate the GC set. */ + __nacl_suspend_thread_if_needed(); + if (nacl_thread_idx != -1) { + nacl_thread_parked[nacl_thread_idx] = 0; + } +} + +void __nacl_suspend_thread_if_needed() { + if (nacl_park_threads_now) { + pthread_t self = pthread_self(); + int local_dummy = 0; + /* Don't try to park the thread parker. */ + if (nacl_thread_parker == self) + return; + + /* This can happen when a thread is created */ + /* outside of the GC system (wthread mostly). */ + if (nacl_thread_idx < 0) + return; + + /* If it was already 'parked', we're returning from a syscall, */ + /* so don't bother storing registers again, the GC has a set. */ + if (!nacl_thread_parked[nacl_thread_idx]) { + NACL_STORE_REGS(); + nacl_gc_thread_self->stop_info.stack_ptr = (ptr_t)(&local_dummy); + } + nacl_thread_parked[nacl_thread_idx] = 1; + while (nacl_park_threads_now) + ; /* spin */ + nacl_thread_parked[nacl_thread_idx] = 0; + + /* Clear out the reg storage for next suspend. */ + memset(nacl_gc_thread_self->stop_info.reg_storage, 0, NACL_GC_REG_STORAGE_SIZE * sizeof(ptr_t)); + } +} + +#endif /* NACL */ + /* Caller holds allocation lock. */ void GC_stop_world() { @@ -465,6 +612,7 @@ void GC_stop_world() /* the world stopped. */ static void pthread_start_world() { +#ifndef NACL pthread_t my_thread = pthread_self(); register int i; register GC_thread p; @@ -525,6 +673,12 @@ static void pthread_start_world() #if DEBUG_THREADS GC_printf0("World started\n"); #endif +#else /* NACL */ +# if DEBUG_THREADS + GC_printf0("World starting\n"); +# endif + nacl_park_threads_now = 0; +#endif /* NACL */ } void GC_start_world() @@ -538,6 +692,7 @@ void GC_start_world() } static void pthread_stop_init() { +#ifndef NACL struct sigaction act; if (sem_init(&GC_suspend_ack_sem, 0, 0) != 0) @@ -578,6 +733,7 @@ static void pthread_stop_init() { GC_printf0("Will retry suspend signal if necessary.\n"); } # endif +#endif /* NACL */ } /* We hold the allocation lock. */ diff --git a/libgc/pthread_support.c b/libgc/pthread_support.c index c307ac0eec5..3e588ace211 100644 --- a/libgc/pthread_support.c +++ b/libgc/pthread_support.c @@ -164,6 +164,9 @@ # endif # undef pthread_join # undef pthread_detach +# if defined(NACL) +# undef pthread_exit +# endif # if defined(GC_OSF1_THREADS) && defined(_PTHREAD_USE_MANGLED_NAMES_) \ && !defined(_PTHREAD_USE_PTDNAM_) /* Restore the original mangled names on Tru64 UNIX. */ @@ -676,6 +679,52 @@ void GC_mark_thread_local_free_lists(void) static struct GC_Thread_Rep first_thread; +#ifdef NACL +extern int nacl_thread_parked[MAX_NACL_GC_THREADS]; +extern int nacl_thread_used[MAX_NACL_GC_THREADS]; +extern int nacl_thread_parking_inited; +extern int nacl_num_gc_threads; +extern pthread_mutex_t nacl_thread_alloc_lock; +extern __thread int nacl_thread_idx; +extern __thread GC_thread nacl_gc_thread_self; + +void nacl_initialize_gc_thread() +{ + int i; + pthread_mutex_lock(&nacl_thread_alloc_lock); + if (!nacl_thread_parking_inited) + { + for (i = 0; i < MAX_NACL_GC_THREADS; i++) { + nacl_thread_used[i] = 0; + nacl_thread_parked[i] = 0; + } + nacl_thread_parking_inited = 1; + } + GC_ASSERT(nacl_num_gc_threads <= MAX_NACL_GC_THREADS); + for (i = 0; i < MAX_NACL_GC_THREADS; i++) { + if (nacl_thread_used[i] == 0) { + nacl_thread_used[i] = 1; + nacl_thread_idx = i; + nacl_num_gc_threads++; + break; + } + } + pthread_mutex_unlock(&nacl_thread_alloc_lock); +} + +void nacl_shutdown_gc_thread() +{ + pthread_mutex_lock(&nacl_thread_alloc_lock); + GC_ASSERT(nacl_thread_idx >= 0 && nacl_thread_idx < MAX_NACL_GC_THREADS); + GC_ASSERT(nacl_thread_used[nacl_thread_idx] != 0); + nacl_thread_used[nacl_thread_idx] = 0; + nacl_thread_idx = -1; + nacl_num_gc_threads--; + pthread_mutex_unlock(&nacl_thread_alloc_lock); +} + +#endif /* NACL */ + /* Add a thread to GC_threads. We assume it wasn't already there. */ /* Caller holds allocation lock. */ GC_thread GC_new_thread(pthread_t id) @@ -698,6 +747,10 @@ GC_thread GC_new_thread(pthread_t id) #endif result -> next = GC_threads[hv]; GC_threads[hv] = result; +#ifdef NACL + nacl_gc_thread_self = result; + nacl_initialize_gc_thread(); +#endif GC_ASSERT(result -> flags == 0 && result -> thread_blocked == 0); return(result); } @@ -711,6 +764,11 @@ void GC_delete_thread(pthread_t id) register GC_thread p = GC_threads[hv]; register GC_thread prev = 0; +#ifdef NACL + nacl_shutdown_gc_thread(); + nacl_gc_thread_self = NULL; +#endif + while (!pthread_equal(p -> id, id)) { prev = p; p = p -> next; @@ -1118,6 +1176,7 @@ void GC_init_parallel() #if !defined(GC_DARWIN_THREADS) && !defined(GC_OPENBSD_THREADS) +#ifndef NACL int WRAP_FUNC(pthread_sigmask)(int how, const sigset_t *set, sigset_t *oset) { sigset_t fudged_set; @@ -1129,6 +1188,7 @@ int WRAP_FUNC(pthread_sigmask)(int how, const sigset_t *set, sigset_t *oset) } return(REAL_FUNC(pthread_sigmask)(how, set, oset)); } +#endif #endif /* !GC_DARWIN_THREADS */ /* Wrappers for functions that are likely to block for an appreciable */ @@ -1259,6 +1319,17 @@ int WRAP_FUNC(pthread_join)(pthread_t thread, void **retval) return result; } +#ifdef NACL +/* Native Client doesn't support pthread cleanup functions, */ +/* so wrap pthread_exit and manually cleanup the thread. */ +void +WRAP_FUNC(pthread_exit)(void *status) +{ + GC_thread_exit_proc(0); + REAL_FUNC(pthread_exit)(status); +} +#endif + int WRAP_FUNC(pthread_detach)(pthread_t thread) { diff --git a/mono/arch/amd64/amd64-codegen.h b/mono/arch/amd64/amd64-codegen.h index 7ca557d6d21..8684a5c8656 100644 --- a/mono/arch/amd64/amd64-codegen.h +++ b/mono/arch/amd64/amd64-codegen.h @@ -67,6 +67,32 @@ typedef enum AMD64_REX_W = 8 /* Opeartion is 64-bits instead of 32 (default) or 16 (with 0x66 prefix) */ } AMD64_REX_Bits; +#if defined(__default_codegen__) + +#define amd64_codegen_pre(inst) +#define amd64_codegen_post(inst) + +#elif defined(__native_client_codegen__) + +#define amd64_codegen_pre(inst) guint8* _codegen_start = (inst); amd64_nacl_instruction_pre(); +#define amd64_codegen_post(inst) (amd64_nacl_instruction_post(&_codegen_start, &(inst)), _codegen_start); + +/* Because of rex prefixes, etc, call sequences are not constant size. */ +/* These pre- and post-sequence hooks remedy this by aligning the call */ +/* sequence after we emit it, since we will know the exact size then. */ +#define amd64_call_sequence_pre(inst) guint8* _code_start = (inst); +#define amd64_call_sequence_post(inst) \ + (mono_nacl_align_call(&_code_start, &(inst)), _code_start); + +/* Native client can load/store using one of the following registers */ +/* as a base: rip, r15, rbp, rsp. Any other base register needs to have */ +/* its upper 32 bits cleared and reference memory using r15 as the base. */ +#define amd64_is_valid_nacl_base(reg) \ + ((reg) == AMD64_RIP || (reg) == AMD64_R15 || \ + (reg) == AMD64_RBP || (reg) == AMD64_RSP) + +#endif /*__native_client_codegen__*/ + #ifdef TARGET_WIN32 #define AMD64_ARG_REG1 AMD64_RCX #define AMD64_ARG_REG2 AMD64_RDX @@ -88,6 +114,16 @@ typedef enum #define AMD64_CALLEE_SAVED_REGS ((1< 7) ? AMD64_REX_B : 0); \ if ((_amd64_rex_bits != 0) || (((width) == 1))) *(inst)++ = AMD64_REX(_amd64_rex_bits); \ } while (0) +#elif defined(__native_client_codegen__) +#define amd64_emit_rex(inst, width, reg_modrm, reg_index, reg_rm_base_opcode) do \ + { \ + unsigned char _amd64_rex_bits = \ + (((width) > 4) ? AMD64_REX_W : 0) | \ + (((reg_modrm) > 7) ? AMD64_REX_R : 0) | \ + (((reg_index) > 7) ? AMD64_REX_X : 0) | \ + (((reg_rm_base_opcode) > 7) ? AMD64_REX_B : 0); \ + amd64_nacl_tag_rex((inst)); \ + if ((_amd64_rex_bits != 0) || (((width) == 1))) *(inst)++ = AMD64_REX(_amd64_rex_bits); \ + } while (0) +#endif typedef union { - gsize val; + guint64 val; unsigned char b [8]; } amd64_imm_buf; @@ -138,7 +187,7 @@ typedef union { #define x86_imm_emit64(inst,imm) \ do { \ amd64_imm_buf imb; \ - imb.val = (gsize) (imm); \ + imb.val = (guint64) (imm); \ *(inst)++ = imb.b [0]; \ *(inst)++ = imb.b [1]; \ *(inst)++ = imb.b [2]; \ @@ -158,7 +207,7 @@ typedef union { x86_membase_emit ((inst),(reg)&0x7, (basereg)&0x7, (disp)); \ } while (0) -#define amd64_alu_reg_imm_size(inst,opc,reg,imm,size) \ +#define amd64_alu_reg_imm_size_body(inst,opc,reg,imm,size) \ do { \ if (x86_is_imm8((imm))) { \ amd64_emit_rex(inst, size, 0, 0, (reg)); \ @@ -177,29 +226,67 @@ typedef union { } \ } while (0) -#define amd64_alu_reg_imm(inst,opc,reg,imm) amd64_alu_reg_imm_size((inst),(opc),(reg),(imm),8) - -#define amd64_alu_reg_reg_size(inst,opc,dreg,reg,size) \ +#define amd64_alu_reg_reg_size_body(inst,opc,dreg,reg,size) \ do { \ amd64_emit_rex(inst, size, (dreg), 0, (reg)); \ *(inst)++ = (((unsigned char)(opc)) << 3) + 3; \ x86_reg_emit ((inst), (dreg), (reg)); \ } while (0) -#define amd64_alu_reg_reg(inst,opc,dreg,reg) amd64_alu_reg_reg_size ((inst),(opc),(dreg),(reg),8) +#if defined(__default_codegen__) + +#define amd64_alu_reg_imm_size(inst,opc,reg,imm,size) \ + amd64_alu_reg_imm_size_body((inst), (opc), (reg), (imm), (size)) -#define amd64_alu_reg_membase_size(inst,opc,reg,basereg,disp,size) \ +#define amd64_alu_reg_reg_size(inst,opc,dreg,reg,size) \ + amd64_alu_reg_reg_size_body((inst), (opc), (dreg), (reg), (size)) + +#elif defined(__native_client_codegen__) +/* NaCl modules may not directly update RSP or RBP other than direct copies */ +/* between them. Instead the lower 4 bytes are updated and then added to R15 */ +#define amd64_is_nacl_stack_reg(reg) (((reg) == AMD64_RSP) || ((reg) == AMD64_RBP)) + +#define amd64_alu_reg_imm_size(inst,opc,reg,imm,size) \ + do{ \ + amd64_codegen_pre(inst); \ + if (amd64_is_nacl_stack_reg(reg)) { \ + if (((opc) != X86_ADD) && ((opc) != X86_SUB)) \ + g_assert_not_reached(); \ + amd64_alu_reg_imm_size_body((inst), (opc), (reg), (imm), 4); \ + /* Use LEA instead of ADD to preserve flags */ \ + amd64_lea_memindex_size((inst), (reg), (reg), 0, AMD64_R15, 0, 8); \ + } else { \ + amd64_alu_reg_imm_size_body((inst), (opc), (reg), (imm), (size)); \ + } \ + amd64_codegen_post(inst); \ + } while(0) + +#define amd64_alu_reg_reg_size(inst,opc,dreg,reg,size) \ do { \ - amd64_emit_rex ((inst),(size),(reg),0,(basereg)); \ - *(inst)++ = (((unsigned char)(opc)) << 3) + 3; \ - amd64_membase_emit (inst, reg, basereg, disp); \ -} while (0) + amd64_codegen_pre(inst); \ + if (amd64_is_nacl_stack_reg((dreg)) && ((reg) != AMD64_R15)) { \ + if (((opc) != X86_ADD && (opc) != X86_SUB)) \ + g_assert_not_reached(); \ + amd64_alu_reg_reg_size_body((inst), (opc), (dreg), (reg), 4); \ + /* Use LEA instead of ADD to preserve flags */ \ + amd64_lea_memindex_size((inst), (dreg), (dreg), 0, AMD64_R15, 0, 8); \ + } else { \ + amd64_alu_reg_reg_size_body((inst), (opc), (dreg), (reg), (size)); \ + } \ + amd64_codegen_post(inst); \ + } while (0) +#endif /*__native_client_codegen__*/ + +#define amd64_alu_reg_imm(inst,opc,reg,imm) amd64_alu_reg_imm_size((inst),(opc),(reg),(imm),8) + +#define amd64_alu_reg_reg(inst,opc,dreg,reg) amd64_alu_reg_reg_size ((inst),(opc),(dreg),(reg),8) #define amd64_mov_regp_reg(inst,regp,reg,size) \ do { \ + amd64_codegen_pre(inst); \ if ((size) == 2) \ - *(inst)++ = (unsigned char)0x66; \ + x86_prefix((inst), X86_OPERAND_PREFIX); \ amd64_emit_rex(inst, (size), (reg), 0, (regp)); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x88; break; \ @@ -207,12 +294,14 @@ typedef union { default: assert (0); \ } \ x86_regp_emit ((inst), (reg), (regp)); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_mov_membase_reg(inst,basereg,disp,reg,size) \ do { \ + amd64_codegen_pre(inst); \ if ((size) == 2) \ - *(inst)++ = (unsigned char)0x66; \ + x86_prefix((inst), X86_OPERAND_PREFIX); \ amd64_emit_rex(inst, (size), (reg), 0, (basereg)); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x88; break; \ @@ -220,27 +309,31 @@ typedef union { default: assert (0); \ } \ x86_membase_emit ((inst), ((reg)&0x7), ((basereg)&0x7), (disp)); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_mov_mem_reg(inst,mem,reg,size) \ do { \ + amd64_codegen_pre(inst); \ if ((size) == 2) \ - *(inst)++ = (unsigned char)0x66; \ + x86_prefix((inst), X86_OPERAND_PREFIX); \ amd64_emit_rex(inst, (size), (reg), 0, 0); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x88; break; \ case 2: case 4: case 8: *(inst)++ = (unsigned char)0x89; break; \ default: assert (0); \ } \ - x86_address_byte ((inst), 0, (reg), 4); \ - x86_address_byte ((inst), 0, 4, 5); \ - x86_imm_emit32 ((inst), (mem)); \ + x86_address_byte ((inst), 0, (reg), 4); \ + x86_address_byte ((inst), 0, 4, 5); \ + x86_imm_emit32 ((inst), (mem)); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_mov_reg_reg(inst,dreg,reg,size) \ do { \ + amd64_codegen_pre(inst); \ if ((size) == 2) \ - *(inst)++ = (unsigned char)0x66; \ + x86_prefix((inst), X86_OPERAND_PREFIX); \ amd64_emit_rex(inst, (size), (dreg), 0, (reg)); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x8a; break; \ @@ -248,27 +341,43 @@ typedef union { default: assert (0); \ } \ x86_reg_emit ((inst), (dreg), (reg)); \ + amd64_codegen_post(inst); \ } while (0) -#define amd64_mov_reg_mem(inst,reg,mem,size) \ +#define amd64_mov_reg_mem_body(inst,reg,mem,size) \ do { \ + amd64_codegen_pre(inst); \ if ((size) == 2) \ - *(inst)++ = (unsigned char)0x66; \ + x86_prefix((inst), X86_OPERAND_PREFIX); \ amd64_emit_rex(inst, (size), (reg), 0, 0); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x8a; break; \ case 2: case 4: case 8: *(inst)++ = (unsigned char)0x8b; break; \ default: assert (0); \ } \ - x86_address_byte ((inst), 0, (reg), 4); \ - x86_address_byte ((inst), 0, 4, 5); \ - x86_imm_emit32 ((inst), (mem)); \ + x86_address_byte ((inst), 0, (reg), 4); \ + x86_address_byte ((inst), 0, 4, 5); \ + x86_imm_emit32 ((inst), (mem)); \ + amd64_codegen_post(inst); \ } while (0) -#define amd64_mov_reg_membase(inst,reg,basereg,disp,size) \ +#if defined(__default_codegen__) +#define amd64_mov_reg_mem(inst,reg,mem,size) \ + do { \ + amd64_mov_reg_mem_body((inst),(reg),(mem),(size)); \ + } while (0) +#elif defined(__native_client_codegen__) +/* We have to re-base memory reads because memory isn't zero based. */ +#define amd64_mov_reg_mem(inst,reg,mem,size) \ + do { \ + amd64_mov_reg_membase((inst),(reg),AMD64_R15,(mem),(size)); \ + } while (0) +#endif /* __native_client_codegen__ */ + +#define amd64_mov_reg_membase_body(inst,reg,basereg,disp,size) \ do { \ if ((size) == 2) \ - *(inst)++ = (unsigned char)0x66; \ + x86_prefix((inst), X86_OPERAND_PREFIX); \ amd64_emit_rex(inst, (size), (reg), 0, (basereg)); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x8a; break; \ @@ -278,8 +387,56 @@ typedef union { amd64_membase_emit ((inst), (reg), (basereg), (disp)); \ } while (0) +#define amd64_mov_reg_memindex_size_body(inst,reg,basereg,disp,indexreg,shift,size) \ + do { \ + amd64_emit_rex ((inst),(size),(reg),(indexreg),(basereg)); \ + x86_mov_reg_memindex((inst),((reg)&0x7),((basereg)&0x7),(disp),((indexreg)&0x7),(shift),(size) == 8 ? 4 : (size)); \ + } while (0) + +#if defined(__default_codegen__) + +#define amd64_mov_reg_memindex_size(inst,reg,basereg,disp,indexreg,shift,size) \ + amd64_mov_reg_memindex_size_body((inst),(reg),(basereg),(disp),(indexreg),(shift),(size)) +#define amd64_mov_reg_membase(inst,reg,basereg,disp,size) \ + do { \ + amd64_mov_reg_membase_body((inst), (reg), (basereg), (disp), (size)); \ + } while (0) + +#elif defined(__native_client_codegen__) + +#define amd64_mov_reg_memindex_size(inst,reg,basereg,disp,indexreg,shift,size) \ + do { \ + amd64_codegen_pre(inst); \ + if (amd64_is_nacl_stack_reg((reg))) { \ + /* Clear upper 32 bits with mov of size 4 */ \ + amd64_mov_reg_memindex_size_body((inst), (reg), (basereg), (disp), (indexreg), (shift), 4); \ + /* Add %r15 using LEA to preserve flags */ \ + amd64_lea_memindex_size((inst), (reg), (reg), 0, AMD64_R15, 0, 8); \ + } else { \ + amd64_mov_reg_memindex_size_body((inst), (reg), (basereg), (disp), (indexreg), (shift), (size)); \ + } \ + amd64_codegen_post(inst); \ + } while(0) + +#define amd64_mov_reg_membase(inst,reg,basereg,disp,size) \ + do { \ + amd64_codegen_pre(inst); \ + if (amd64_is_nacl_stack_reg((reg))) { \ + /* Clear upper 32 bits with mov of size 4 */ \ + amd64_mov_reg_membase_body((inst), (reg), (basereg), (disp), 4); \ + /* Add %r15 */ \ + amd64_lea_memindex_size((inst), (reg), (reg), 0, AMD64_R15, 0, 8); \ + } else { \ + amd64_mov_reg_membase_body((inst), (reg), (basereg), (disp), (size)); \ + } \ + amd64_codegen_post(inst); \ + } while (0) + +#endif /*__native_client_codegen__*/ + #define amd64_movzx_reg_membase(inst,reg,basereg,disp,size) \ do { \ + amd64_codegen_pre(inst); \ amd64_emit_rex(inst, (size), (reg), 0, (basereg)); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x0f; *(inst)++ = (unsigned char)0xb6; break; \ @@ -288,27 +445,34 @@ typedef union { default: assert (0); \ } \ x86_membase_emit ((inst), ((reg)&0x7), ((basereg)&0x7), (disp)); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_movsxd_reg_mem(inst,reg,mem) \ do { \ - amd64_emit_rex(inst,8,(reg),0,0); \ - *(inst)++ = (unsigned char)0x63; \ - x86_mem_emit ((inst), ((reg)&0x7), (mem)); \ + amd64_codegen_pre(inst); \ + amd64_emit_rex(inst,8,(reg),0,0); \ + *(inst)++ = (unsigned char)0x63; \ + x86_mem_emit ((inst), ((reg)&0x7), (mem)); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_movsxd_reg_membase(inst,reg,basereg,disp) \ do { \ - amd64_emit_rex(inst,8,(reg),0,(basereg)); \ - *(inst)++ = (unsigned char)0x63; \ - x86_membase_emit ((inst), ((reg)&0x7), ((basereg)&0x7), (disp)); \ + amd64_codegen_pre(inst); \ + amd64_emit_rex(inst,8,(reg),0,(basereg)); \ + *(inst)++ = (unsigned char)0x63; \ + x86_membase_emit ((inst), ((reg)&0x7), ((basereg)&0x7), (disp)); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_movsxd_reg_reg(inst,dreg,reg) \ do { \ - amd64_emit_rex(inst,8,(dreg),0,(reg)); \ - *(inst)++ = (unsigned char)0x63; \ - x86_reg_emit ((inst), (dreg), (reg)); \ + amd64_codegen_pre(inst); \ + amd64_emit_rex(inst,8,(dreg),0,(reg)); \ + *(inst)++ = (unsigned char)0x63; \ + x86_reg_emit ((inst), (dreg), (reg)); \ + amd64_codegen_post(inst); \ } while (0) /* Pretty much the only instruction that supports a 64-bit immediate. Optimize for common case of @@ -316,18 +480,22 @@ typedef union { */ #define amd64_mov_reg_imm_size(inst,reg,imm,size) \ do { \ + amd64_codegen_pre(inst); \ amd64_emit_rex(inst, (size), 0, 0, (reg)); \ *(inst)++ = (unsigned char)0xb8 + ((reg) & 0x7); \ if ((size) == 8) \ - x86_imm_emit64 ((inst), (gsize)(imm)); \ + x86_imm_emit64 ((inst), (guint64)(imm)); \ else \ - x86_imm_emit32 ((inst), (int)(gsize)(imm)); \ + x86_imm_emit32 ((inst), (int)(guint64)(imm)); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_mov_reg_imm(inst,reg,imm) \ do { \ - int _amd64_width_temp = ((gsize)(imm) == (gsize)(int)(gsize)(imm)); \ - amd64_mov_reg_imm_size ((inst), (reg), (imm), (_amd64_width_temp ? 4 : 8)); \ + int _amd64_width_temp = ((guint64)(imm) == (guint64)(int)(guint64)(imm)); \ + amd64_codegen_pre(inst); \ + amd64_mov_reg_imm_size ((inst), (reg), (imm), (_amd64_width_temp ? 4 : 8)); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_set_reg_template(inst,reg) amd64_mov_reg_imm_size ((inst),(reg), 0, 8) @@ -336,8 +504,9 @@ typedef union { #define amd64_mov_membase_imm(inst,basereg,disp,imm,size) \ do { \ + amd64_codegen_pre(inst); \ if ((size) == 2) \ - *(inst)++ = (unsigned char)0x66; \ + x86_prefix((inst), X86_OPERAND_PREFIX); \ amd64_emit_rex(inst, (size) == 1 ? 0 : (size), 0, 0, (basereg)); \ if ((size) == 1) { \ *(inst)++ = (unsigned char)0xc6; \ @@ -352,36 +521,69 @@ typedef union { x86_membase_emit ((inst), 0, (basereg) & 0x7, (disp)); \ x86_imm_emit32 ((inst), (imm)); \ } \ + amd64_codegen_post(inst); \ } while (0) -#define amd64_lea_membase(inst,reg,basereg,disp) \ + +#define amd64_lea_membase_body(inst,reg,basereg,disp) \ do { \ amd64_emit_rex(inst, 8, (reg), 0, (basereg)); \ *(inst)++ = (unsigned char)0x8d; \ amd64_membase_emit ((inst), (reg), (basereg), (disp)); \ } while (0) +#if defined(__default_codegen__) +#define amd64_lea_membase(inst,reg,basereg,disp) \ + amd64_lea_membase_body((inst), (reg), (basereg), (disp)) +#elif defined(__native_client_codegen__) +/* NaCl modules may not write directly into RSP/RBP. Instead, use a */ +/* 32-bit LEA and add R15 to the effective address */ +#define amd64_lea_membase(inst,reg,basereg,disp) \ + do { \ + amd64_codegen_pre(inst); \ + if (amd64_is_nacl_stack_reg(reg)) { \ + /* 32-bit LEA */ \ + amd64_emit_rex((inst), 4, (reg), 0, (basereg)); \ + *(inst)++ = (unsigned char)0x8d; \ + amd64_membase_emit((inst), (reg), (basereg), (disp)); \ + /* Use a 64-bit LEA instead of an ADD to preserve flags */ \ + amd64_lea_memindex_size((inst), (reg), (reg), 0, AMD64_R15, 0, 8); \ + } else { \ + amd64_lea_membase_body((inst), (reg), (basereg), (disp)); \ + } \ + amd64_codegen_post(inst); \ + } while (0) +#endif /*__native_client_codegen__*/ + /* Instruction are implicitly 64-bits so don't generate REX for just the size. */ #define amd64_push_reg(inst,reg) \ do { \ + amd64_codegen_pre(inst); \ amd64_emit_rex(inst, 0, 0, 0, (reg)); \ *(inst)++ = (unsigned char)0x50 + ((reg) & 0x7); \ + amd64_codegen_post(inst); \ } while (0) /* Instruction is implicitly 64-bits so don't generate REX for just the size. */ #define amd64_push_membase(inst,basereg,disp) \ do { \ + amd64_codegen_pre(inst); \ amd64_emit_rex(inst, 0, 0, 0, (basereg)); \ *(inst)++ = (unsigned char)0xff; \ x86_membase_emit ((inst), 6, (basereg) & 0x7, (disp)); \ + amd64_codegen_post(inst); \ } while (0) -#define amd64_pop_reg(inst,reg) \ +#define amd64_pop_reg_body(inst,reg) \ do { \ + amd64_codegen_pre(inst); \ amd64_emit_rex(inst, 0, 0, 0, (reg)); \ *(inst)++ = (unsigned char)0x58 + ((reg) & 0x7); \ + amd64_codegen_post(inst); \ } while (0) +#if defined(__default_codegen__) + #define amd64_call_reg(inst,reg) \ do { \ amd64_emit_rex(inst, 0, 0, 0, (reg)); \ @@ -389,94 +591,203 @@ typedef union { x86_reg_emit ((inst), 2, ((reg) & 0x7)); \ } while (0) + #define amd64_ret(inst) do { *(inst)++ = (unsigned char)0xc3; } while (0) #define amd64_leave(inst) do { *(inst)++ = (unsigned char)0xc9; } while (0) + +#define amd64_pop_reg(inst,reg) amd64_pop_reg_body((inst), (reg)) + +#elif defined(__native_client_codegen__) + +/* Size is ignored for Native Client jumps, we restrict jumping to 32-bits */ +#define amd64_jump_reg_size(inst,reg,size) \ + do { \ + amd64_codegen_pre((inst)); \ + amd64_alu_reg_imm_size((inst), X86_AND, (reg), (nacl_align_byte), 4); \ + amd64_alu_reg_reg_size((inst), X86_ADD, (reg), AMD64_R15, 8); \ + amd64_emit_rex ((inst),0,0,0,(reg)); \ + x86_jump_reg((inst),((reg)&0x7)); \ + amd64_codegen_post((inst)); \ + } while (0) + +/* Size is ignored for Native Client jumps, we restrict jumping to 32-bits */ +#define amd64_jump_mem_size(inst,mem,size) \ + do { \ + amd64_codegen_pre((inst)); \ + amd64_mov_reg_mem((inst), (mem), AMD64_R11, 4); \ + amd64_jump_reg_size((inst), AMD64_R11, 4); \ + amd64_codegen_post((inst)); \ + } while (0) + +#define amd64_call_reg_internal(inst,reg) \ + do { \ + amd64_codegen_pre((inst)); \ + amd64_alu_reg_imm_size((inst), X86_AND, (reg), (nacl_align_byte), 4); \ + amd64_alu_reg_reg_size((inst), X86_ADD, (reg), AMD64_R15, 8); \ + amd64_emit_rex((inst), 0, 0, 0, (reg)); \ + x86_call_reg((inst), ((reg) & 0x7)); \ + amd64_codegen_post((inst)); \ + } while (0) + +#define amd64_call_reg(inst,reg) \ + do { \ + amd64_codegen_pre((inst)); \ + amd64_call_sequence_pre(inst); \ + amd64_call_reg_internal((inst), (reg)); \ + amd64_call_sequence_post(inst); \ + amd64_codegen_post((inst)); \ + } while (0) + + +#define amd64_ret(inst) \ + do { \ + amd64_codegen_pre(inst); \ + amd64_pop_reg_body((inst), AMD64_R11); \ + amd64_jump_reg_size((inst), AMD64_R11, 8); \ + amd64_codegen_post(inst); \ + } while (0) + +#define amd64_leave(inst) \ + do { \ + amd64_codegen_pre(inst); \ + amd64_mov_reg_reg((inst), AMD64_RSP, AMD64_RBP, 8); \ + amd64_pop_reg_body((inst), AMD64_R11); \ + amd64_mov_reg_reg_size((inst), AMD64_RBP, AMD64_R11, 4); \ + amd64_alu_reg_reg_size((inst), X86_ADD, AMD64_RBP, AMD64_R15, 8); \ + amd64_codegen_post(inst); \ + } while (0) + +#define amd64_pop_reg(inst,reg) \ + do { \ + amd64_codegen_pre(inst); \ + if (amd64_is_nacl_stack_reg((reg))) { \ + amd64_pop_reg_body((inst), AMD64_R11); \ + amd64_mov_reg_reg_size((inst), (reg), AMD64_R11, 4); \ + amd64_alu_reg_reg_size((inst), X86_ADD, (reg), AMD64_R15, 8); \ + } else { \ + amd64_pop_reg_body((inst), (reg)); \ + } \ + amd64_codegen_post(inst); \ + } while (0) + +#endif /*__native_client_codegen__*/ + #define amd64_movsd_reg_regp(inst,reg,regp) \ do { \ - *(inst)++ = (unsigned char)0xf2; \ + amd64_codegen_pre(inst); \ + x86_prefix((inst), 0xf2); \ amd64_emit_rex(inst, 0, (reg), 0, (regp)); \ *(inst)++ = (unsigned char)0x0f; \ *(inst)++ = (unsigned char)0x10; \ x86_regp_emit ((inst), (reg) & 0x7, (regp) & 0x7); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_movsd_regp_reg(inst,regp,reg) \ do { \ - *(inst)++ = (unsigned char)0xf2; \ + amd64_codegen_pre(inst); \ + x86_prefix((inst), 0xf2); \ amd64_emit_rex(inst, 0, (reg), 0, (regp)); \ *(inst)++ = (unsigned char)0x0f; \ *(inst)++ = (unsigned char)0x11; \ x86_regp_emit ((inst), (reg) & 0x7, (regp) & 0x7); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_movss_reg_regp(inst,reg,regp) \ do { \ - *(inst)++ = (unsigned char)0xf3; \ + amd64_codegen_pre(inst); \ + x86_prefix((inst), 0xf3); \ amd64_emit_rex(inst, 0, (reg), 0, (regp)); \ *(inst)++ = (unsigned char)0x0f; \ *(inst)++ = (unsigned char)0x10; \ x86_regp_emit ((inst), (reg) & 0x7, (regp) & 0x7); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_movss_regp_reg(inst,regp,reg) \ do { \ - *(inst)++ = (unsigned char)0xf3; \ + amd64_codegen_pre(inst); \ + x86_prefix((inst), 0xf3); \ amd64_emit_rex(inst, 0, (reg), 0, (regp)); \ *(inst)++ = (unsigned char)0x0f; \ *(inst)++ = (unsigned char)0x11; \ x86_regp_emit ((inst), (reg) & 0x7, (regp) & 0x7); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_movsd_reg_membase(inst,reg,basereg,disp) \ do { \ - *(inst)++ = (unsigned char)0xf2; \ + amd64_codegen_pre(inst); \ + x86_prefix((inst), 0xf2); \ amd64_emit_rex(inst, 0, (reg), 0, (basereg)); \ *(inst)++ = (unsigned char)0x0f; \ *(inst)++ = (unsigned char)0x10; \ x86_membase_emit ((inst), (reg) & 0x7, (basereg) & 0x7, (disp)); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_movss_reg_membase(inst,reg,basereg,disp) \ do { \ - *(inst)++ = (unsigned char)0xf3; \ + amd64_codegen_pre(inst); \ + x86_prefix((inst), 0xf3); \ amd64_emit_rex(inst, 0, (reg), 0, (basereg)); \ *(inst)++ = (unsigned char)0x0f; \ *(inst)++ = (unsigned char)0x10; \ x86_membase_emit ((inst), (reg) & 0x7, (basereg) & 0x7, (disp)); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_movsd_membase_reg(inst,basereg,disp,reg) \ do { \ - *(inst)++ = (unsigned char)0xf2; \ + amd64_codegen_pre(inst); \ + x86_prefix((inst), 0xf2); \ amd64_emit_rex(inst, 0, (reg), 0, (basereg)); \ *(inst)++ = (unsigned char)0x0f; \ *(inst)++ = (unsigned char)0x11; \ x86_membase_emit ((inst), (reg) & 0x7, (basereg) & 0x7, (disp)); \ + amd64_codegen_post(inst); \ } while (0) #define amd64_movss_membase_reg(inst,basereg,disp,reg) \ do { \ - *(inst)++ = (unsigned char)0xf3; \ + amd64_codegen_pre(inst); \ + x86_prefix((inst), 0xf3); \ amd64_emit_rex(inst, 0, (reg), 0, (basereg)); \ *(inst)++ = (unsigned char)0x0f; \ *(inst)++ = (unsigned char)0x11; \ x86_membase_emit ((inst), (reg) & 0x7, (basereg) & 0x7, (disp)); \ + amd64_codegen_post(inst); \ } while (0) /* The original inc_reg opcode is used as the REX prefix */ #define amd64_inc_reg_size(inst,reg,size) \ - do { \ - amd64_emit_rex ((inst),(size),0,0,(reg)); \ - *(inst)++ = (unsigned char)0xff; \ - x86_reg_emit ((inst),0,(reg) & 0x7); \ - } while (0) + do { \ + amd64_codegen_pre(inst); \ + amd64_emit_rex ((inst),(size),0,0,(reg)); \ + *(inst)++ = (unsigned char)0xff; \ + x86_reg_emit ((inst),0,(reg) & 0x7); \ + amd64_codegen_post(inst); \ + } while (0) #define amd64_dec_reg_size(inst,reg,size) \ - do { \ - amd64_emit_rex ((inst),(size),0,0,(reg)); \ - *(inst)++ = (unsigned char)0xff; \ - x86_reg_emit ((inst),1,(reg) & 0x7); \ - } while (0) + do { \ + amd64_codegen_pre(inst); \ + amd64_emit_rex ((inst),(size),0,0,(reg)); \ + *(inst)++ = (unsigned char)0xff; \ + x86_reg_emit ((inst),1,(reg) & 0x7); \ + amd64_codegen_post(inst); \ + } while (0) + +#define amd64_fld_membase_size(inst,basereg,disp,is_double,size) do { \ + amd64_codegen_pre(inst); \ + amd64_emit_rex ((inst),0,0,0,(basereg)); \ + *(inst)++ = (is_double) ? (unsigned char)0xdd : (unsigned char)0xd9; \ + amd64_membase_emit ((inst), 0, (basereg), (disp)); \ + amd64_codegen_post(inst); \ +} while (0) + +#if defined (__default_codegen__) /* From the AMD64 Software Optimization Manual */ #define amd64_padding_size(inst,size) \ @@ -489,12 +800,6 @@ typedef union { }; \ } while (0) -#define amd64_fld_membase_size(inst,basereg,disp,is_double,size) do { \ - amd64_emit_rex ((inst),0,0,0,(basereg)); \ - *(inst)++ = (is_double) ? (unsigned char)0xdd : (unsigned char)0xd9; \ - amd64_membase_emit ((inst), 0, (basereg), (disp)); \ -} while (0) - #define amd64_call_membase_size(inst,basereg,disp,size) do { amd64_emit_rex ((inst),0,0,0,(basereg)); *(inst)++ = (unsigned char)0xff; amd64_membase_emit ((inst),2, (basereg),(disp)); } while (0) #define amd64_jump_membase_size(inst,basereg,disp,size) do { amd64_emit_rex ((inst),0,0,0,(basereg)); *(inst)++ = (unsigned char)0xff; amd64_membase_emit ((inst), 4, (basereg), (disp)); } while (0) @@ -508,6 +813,98 @@ typedef union { } \ } while (0) +#elif defined(__native_client_codegen__) + +/* The 3-7 byte NOP sequences in amd64_padding_size below are all illegal in */ +/* 64-bit Native Client because they load into rSP/rBP or use duplicate */ +/* prefixes. Instead we use the NOPs recommended in Section 3.5.1.8 of the */ +/* Intel64 and IA-32 Architectures Optimization Reference Manual and */ +/* Section 4.13 of AMD Software Optimization Guide for Family 10h Processors. */ + +#define amd64_padding_size(inst,size) \ + do { \ + unsigned char *code_start = (inst); \ + switch ((size)) { \ + /* xchg %eax,%eax, recognized by hardware as a NOP */ \ + case 1: *(inst)++ = 0x90; break; \ + /* xchg %ax,%ax */ \ + case 2: *(inst)++ = 0x66; *(inst)++ = 0x90; \ + break; \ + /* nop (%rax) */ \ + case 3: *(inst)++ = 0x0f; *(inst)++ = 0x1f; \ + *(inst)++ = 0x00; \ + break; \ + /* nop 0x0(%rax) */ \ + case 4: *(inst)++ = 0x0f; *(inst)++ = 0x1f; \ + x86_address_byte ((inst), 1, 0, AMD64_RAX); \ + x86_imm_emit8 ((inst), 0); \ + break; \ + /* nop 0x0(%rax,%rax) */ \ + case 5: *(inst)++ = 0x0f; *(inst)++ = 0x1f; \ + x86_address_byte ((inst), 1, 0, 4); \ + x86_address_byte ((inst), 0, AMD64_RAX, AMD64_RAX); \ + x86_imm_emit8 ((inst), 0); \ + break; \ + /* nopw 0x0(%rax,%rax) */ \ + case 6: *(inst)++ = 0x66; *(inst)++ = 0x0f; \ + *(inst)++ = 0x1f; \ + x86_address_byte ((inst), 1, 0, 4); \ + x86_address_byte ((inst), 0, AMD64_RAX, AMD64_RAX); \ + x86_imm_emit8 ((inst), 0); \ + break; \ + /* nop 0x0(%rax) (32-bit displacement) */ \ + case 7: *(inst)++ = 0x0f; *(inst)++ = 0x1f; \ + x86_address_byte ((inst), 2, 0, AMD64_RAX); \ + x86_imm_emit32((inst), 0); \ + break; \ + /* nop 0x0(%rax,%rax) (32-bit displacement) */ \ + case 8: *(inst)++ = 0x0f; *(inst)++ = 0x1f; \ + x86_address_byte ((inst), 2, 0, 4); \ + x86_address_byte ((inst), 0, AMD64_RAX, AMD64_RAX); \ + x86_imm_emit32 ((inst), 0); \ + break; \ + default: \ + g_assert_not_reached(); \ + } \ + g_assert(code_start + (size) == (unsigned char *)(inst)); \ + } while (0) + + +/* Size is ignored for Native Client calls, we restrict jumping to 32-bits */ +#define amd64_call_membase_size(inst,basereg,disp,size) \ + do { \ + amd64_codegen_pre((inst)); \ + amd64_call_sequence_pre(inst); \ + amd64_mov_reg_membase((inst), AMD64_R11, (basereg), (disp), 4); \ + amd64_call_reg_internal((inst), AMD64_R11); \ + amd64_call_sequence_post(inst); \ + amd64_codegen_post((inst)); \ + } while (0) + +/* Size is ignored for Native Client jumps, we restrict jumping to 32-bits */ +#define amd64_jump_membase_size(inst,basereg,disp,size) \ + do { \ + amd64_mov_reg_membase((inst), AMD64_R11, (basereg), (disp), 4); \ + amd64_jump_reg_size((inst), AMD64_R11, 4); \ + } while (0) + +/* On Native Client we can't jump more than INT_MAX in either direction */ +#define amd64_jump_code_size(inst,target,size) \ + do { \ + /* x86_jump_code used twice in case of */ \ + /* relocation by amd64_codegen_post */ \ + guint8* jump_start; \ + amd64_codegen_pre(inst); \ + assert(amd64_is_imm32 ((gint64)(target) - (gint64)(inst))); \ + x86_jump_code((inst),(target)); \ + inst = amd64_codegen_post(inst); \ + jump_start = (inst); \ + x86_jump_code((inst),(target)); \ + mono_amd64_patch(jump_start, (target)); \ +} while (0) + +#endif /*__native_client_codegen__*/ + /* * SSE */ @@ -517,31 +914,39 @@ typedef union { /* Two opcode SSE defines */ #define emit_sse_reg_reg_op2_size(inst,dreg,reg,op1,op2,size) do { \ + amd64_codegen_pre(inst); \ amd64_emit_rex ((inst), size, (dreg), 0, (reg)); \ *(inst)++ = (unsigned char)(op1); \ *(inst)++ = (unsigned char)(op2); \ x86_reg_emit ((inst), (dreg), (reg)); \ + amd64_codegen_post(inst); \ } while (0) #define emit_sse_reg_reg_op2(inst,dreg,reg,op1,op2) emit_sse_reg_reg_op2_size ((inst), (dreg), (reg), (op1), (op2), 0) #define emit_sse_reg_reg_op2_imm(inst,dreg,reg,op1,op2,imm) do { \ + amd64_codegen_pre(inst); \ emit_sse_reg_reg_op2 ((inst), (dreg), (reg), (op1), (op2)); \ x86_imm_emit8 ((inst), (imm)); \ + amd64_codegen_post(inst); \ } while (0) #define emit_sse_membase_reg_op2(inst,basereg,disp,reg,op1,op2) do { \ + amd64_codegen_pre(inst); \ amd64_emit_rex ((inst), 0, (reg), 0, (basereg)); \ *(inst)++ = (unsigned char)(op1); \ *(inst)++ = (unsigned char)(op2); \ amd64_membase_emit ((inst), (reg), (basereg), (disp)); \ + amd64_codegen_post(inst); \ } while (0) #define emit_sse_reg_membase_op2(inst,dreg,basereg,disp,op1,op2) do { \ + amd64_codegen_pre(inst); \ amd64_emit_rex ((inst), 0, (dreg), 0, (basereg) == AMD64_RIP ? 0 : (basereg)); \ *(inst)++ = (unsigned char)(op1); \ *(inst)++ = (unsigned char)(op2); \ amd64_membase_emit ((inst), (dreg), (basereg), (disp)); \ + amd64_codegen_post(inst); \ } while (0) /* Three opcode SSE defines */ @@ -553,45 +958,55 @@ typedef union { } while (0) #define emit_sse_reg_reg_size(inst,dreg,reg,op1,op2,op3,size) do { \ + amd64_codegen_pre(inst); \ *(inst)++ = (unsigned char)(op1); \ amd64_emit_rex ((inst), size, (dreg), 0, (reg)); \ *(inst)++ = (unsigned char)(op2); \ *(inst)++ = (unsigned char)(op3); \ x86_reg_emit ((inst), (dreg), (reg)); \ + amd64_codegen_post(inst); \ } while (0) #define emit_sse_reg_reg(inst,dreg,reg,op1,op2,op3) emit_sse_reg_reg_size ((inst), (dreg), (reg), (op1), (op2), (op3), 0) #define emit_sse_reg_reg_imm(inst,dreg,reg,op1,op2,op3,imm) do { \ + amd64_codegen_pre(inst); \ emit_sse_reg_reg ((inst), (dreg), (reg), (op1), (op2), (op3)); \ x86_imm_emit8 ((inst), (imm)); \ + amd64_codegen_post(inst); \ } while (0) #define emit_sse_membase_reg(inst,basereg,disp,reg,op1,op2,op3) do { \ - *(inst)++ = (unsigned char)(op1); \ + amd64_codegen_pre(inst); \ + x86_prefix((inst), (unsigned char)(op1)); \ amd64_emit_rex ((inst), 0, (reg), 0, (basereg)); \ *(inst)++ = (unsigned char)(op2); \ *(inst)++ = (unsigned char)(op3); \ amd64_membase_emit ((inst), (reg), (basereg), (disp)); \ + amd64_codegen_post(inst); \ } while (0) #define emit_sse_reg_membase(inst,dreg,basereg,disp,op1,op2,op3) do { \ - *(inst)++ = (unsigned char)(op1); \ + amd64_codegen_pre(inst); \ + x86_prefix((inst), (unsigned char)(op1)); \ amd64_emit_rex ((inst), 0, (dreg), 0, (basereg) == AMD64_RIP ? 0 : (basereg)); \ *(inst)++ = (unsigned char)(op2); \ *(inst)++ = (unsigned char)(op3); \ amd64_membase_emit ((inst), (dreg), (basereg), (disp)); \ + amd64_codegen_post(inst); \ } while (0) /* Four opcode SSE defines */ #define emit_sse_reg_reg_op4_size(inst,dreg,reg,op1,op2,op3,op4,size) do { \ - *(inst)++ = (unsigned char)(op1); \ - amd64_emit_rex ((inst), size, (dreg), 0, (reg)); \ + amd64_codegen_pre(inst); \ + x86_prefix((inst), (unsigned char)(op1)); \ + amd64_emit_rex ((inst), size, (dreg), 0, (reg)); \ *(inst)++ = (unsigned char)(op2); \ *(inst)++ = (unsigned char)(op3); \ *(inst)++ = (unsigned char)(op4); \ x86_reg_emit ((inst), (dreg), (reg)); \ + amd64_codegen_post(inst); \ } while (0) #define emit_sse_reg_reg_op4(inst,dreg,reg,op1,op2,op3,op4) emit_sse_reg_reg_op4_size ((inst), (dreg), (reg), (op1), (op2), (op3), (op4), 0) @@ -954,189 +1369,244 @@ typedef union { /* Generated from x86-codegen.h */ #define amd64_breakpoint_size(inst,size) do { x86_breakpoint(inst); } while (0) -#define amd64_cld_size(inst,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_cld(inst); } while (0) -#define amd64_stosb_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_stosb(inst); } while (0) -#define amd64_stosl_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_stosl(inst); } while (0) -#define amd64_stosd_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_stosd(inst); } while (0) -#define amd64_movsb_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_movsb(inst); } while (0) -#define amd64_movsl_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_movsl(inst); } while (0) -#define amd64_movsd_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_movsd(inst); } while (0) -#define amd64_prefix_size(inst,p,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_prefix((inst), p); } while (0) -#define amd64_rdtsc_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_rdtsc(inst); } while (0) -#define amd64_cmpxchg_reg_reg_size(inst,dreg,reg,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_cmpxchg_reg_reg((inst),((dreg)&0x7),((reg)&0x7)); } while (0) -#define amd64_cmpxchg_mem_reg_size(inst,mem,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_cmpxchg_mem_reg((inst),(mem),((reg)&0x7)); } while (0) -#define amd64_cmpxchg_membase_reg_size(inst,basereg,disp,reg,size) do { amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_cmpxchg_membase_reg((inst),((basereg)&0x7),(disp),((reg)&0x7)); } while (0) -#define amd64_xchg_reg_reg_size(inst,dreg,reg,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_xchg_reg_reg((inst),((dreg)&0x7),((reg)&0x7),(size) == 8 ? 4 : (size)); } while (0) -#define amd64_xchg_mem_reg_size(inst,mem,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_xchg_mem_reg((inst),(mem),((reg)&0x7),(size) == 8 ? 4 : (size)); } while (0) -#define amd64_xchg_membase_reg_size(inst,basereg,disp,reg,size) do { amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_xchg_membase_reg((inst),((basereg)&0x7),(disp),((reg)&0x7),(size) == 8 ? 4 : (size)); } while (0) -#define amd64_inc_mem_size(inst,mem,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_inc_mem((inst),(mem)); } while (0) -#define amd64_inc_membase_size(inst,basereg,disp,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_inc_membase((inst),((basereg)&0x7),(disp)); } while (0) -//#define amd64_inc_reg_size(inst,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_inc_reg((inst),((reg)&0x7)); } while (0) -#define amd64_dec_mem_size(inst,mem,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_dec_mem((inst),(mem)); } while (0) -#define amd64_dec_membase_size(inst,basereg,disp,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_dec_membase((inst),((basereg)&0x7),(disp)); } while (0) -//#define amd64_dec_reg_size(inst,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_dec_reg((inst),((reg)&0x7)); } while (0) -#define amd64_not_mem_size(inst,mem,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_not_mem((inst),(mem)); } while (0) -#define amd64_not_membase_size(inst,basereg,disp,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_not_membase((inst),((basereg)&0x7),(disp)); } while (0) -#define amd64_not_reg_size(inst,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_not_reg((inst),((reg)&0x7)); } while (0) -#define amd64_neg_mem_size(inst,mem,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_neg_mem((inst),(mem)); } while (0) -#define amd64_neg_membase_size(inst,basereg,disp,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_neg_membase((inst),((basereg)&0x7),(disp)); } while (0) -#define amd64_neg_reg_size(inst,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_neg_reg((inst),((reg)&0x7)); } while (0) -#define amd64_nop_size(inst,size) do { x86_nop(inst); } while (0) -//#define amd64_alu_reg_imm_size(inst,opc,reg,imm,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_alu_reg_imm((inst),(opc),((reg)&0x7),(imm)); } while (0) -#define amd64_alu_mem_imm_size(inst,opc,mem,imm,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_alu_mem_imm((inst),(opc),(mem),(imm)); } while (0) -#define amd64_alu_membase_imm_size(inst,opc,basereg,disp,imm,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_alu_membase_imm((inst),(opc),((basereg)&0x7),(disp),(imm)); } while (0) -#define amd64_alu_membase8_imm_size(inst,opc,basereg,disp,imm,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_alu_membase8_imm((inst),(opc),((basereg)&0x7),(disp),(imm)); } while (0) -#define amd64_alu_mem_reg_size(inst,opc,mem,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_alu_mem_reg((inst),(opc),(mem),((reg)&0x7)); } while (0) -#define amd64_alu_membase_reg_size(inst,opc,basereg,disp,reg,size) do { amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_alu_membase_reg((inst),(opc),((basereg)&0x7),(disp),((reg)&0x7)); } while (0) -//#define amd64_alu_reg_reg_size(inst,opc,dreg,reg,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_alu_reg_reg((inst),(opc),((dreg)&0x7),((reg)&0x7)); } while (0) -#define amd64_alu_reg8_reg8_size(inst,opc,dreg,reg,is_dreg_h,is_reg_h,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_alu_reg8_reg8((inst),(opc),((dreg)&0x7),((reg)&0x7),(is_dreg_h),(is_reg_h)); } while (0) -#define amd64_alu_reg_mem_size(inst,opc,reg,mem,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_alu_reg_mem((inst),(opc),((reg)&0x7),(mem)); } while (0) -//#define amd64_alu_reg_membase_size(inst,opc,reg,basereg,disp,size) do { amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_alu_reg_membase((inst),(opc),((reg)&0x7),((basereg)&0x7),(disp)); } while (0) -#define amd64_test_reg_imm_size(inst,reg,imm,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_test_reg_imm((inst),((reg)&0x7),(imm)); } while (0) -#define amd64_test_mem_imm_size(inst,mem,imm,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_test_mem_imm((inst),(mem),(imm)); } while (0) -#define amd64_test_membase_imm_size(inst,basereg,disp,imm,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_test_membase_imm((inst),((basereg)&0x7),(disp),(imm)); } while (0) -#define amd64_test_reg_reg_size(inst,dreg,reg,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_test_reg_reg((inst),((dreg)&0x7),((reg)&0x7)); } while (0) -#define amd64_test_mem_reg_size(inst,mem,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_test_mem_reg((inst),(mem),((reg)&0x7)); } while (0) -#define amd64_test_membase_reg_size(inst,basereg,disp,reg,size) do { amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_test_membase_reg((inst),((basereg)&0x7),(disp),((reg)&0x7)); } while (0) -#define amd64_shift_reg_imm_size(inst,opc,reg,imm,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_shift_reg_imm((inst),(opc),((reg)&0x7),(imm)); } while (0) -#define amd64_shift_mem_imm_size(inst,opc,mem,imm,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_shift_mem_imm((inst),(opc),(mem),(imm)); } while (0) -#define amd64_shift_membase_imm_size(inst,opc,basereg,disp,imm,size) do { amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_shift_membase_imm((inst),(opc),((basereg)&0x7),(disp),(imm)); } while (0) -#define amd64_shift_reg_size(inst,opc,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_shift_reg((inst),(opc),((reg)&0x7)); } while (0) -#define amd64_shift_mem_size(inst,opc,mem,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_shift_mem((inst),(opc),(mem)); } while (0) -#define amd64_shift_membase_size(inst,opc,basereg,disp,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_shift_membase((inst),(opc),((basereg)&0x7),(disp)); } while (0) -#define amd64_shrd_reg_size(inst,dreg,reg,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_shrd_reg((inst),((dreg)&0x7),((reg)&0x7)); } while (0) -#define amd64_shrd_reg_imm_size(inst,dreg,reg,shamt,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_shrd_reg_imm((inst),((dreg)&0x7),((reg)&0x7),(shamt)); } while (0) -#define amd64_shld_reg_size(inst,dreg,reg,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_shld_reg((inst),((dreg)&0x7),((reg)&0x7)); } while (0) -#define amd64_shld_reg_imm_size(inst,dreg,reg,shamt,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_shld_reg_imm((inst),((dreg)&0x7),((reg)&0x7),(shamt)); } while (0) -#define amd64_mul_reg_size(inst,reg,is_signed,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_mul_reg((inst),((reg)&0x7),(is_signed)); } while (0) -#define amd64_mul_mem_size(inst,mem,is_signed,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_mul_mem((inst),(mem),(is_signed)); } while (0) -#define amd64_mul_membase_size(inst,basereg,disp,is_signed,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_mul_membase((inst),((basereg)&0x7),(disp),(is_signed)); } while (0) -#define amd64_imul_reg_reg_size(inst,dreg,reg,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_imul_reg_reg((inst),((dreg)&0x7),((reg)&0x7)); } while (0) -#define amd64_imul_reg_mem_size(inst,reg,mem,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_imul_reg_mem((inst),((reg)&0x7),(mem)); } while (0) -#define amd64_imul_reg_membase_size(inst,reg,basereg,disp,size) do { amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_imul_reg_membase((inst),((reg)&0x7),((basereg)&0x7),(disp)); } while (0) -#define amd64_imul_reg_reg_imm_size(inst,dreg,reg,imm,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_imul_reg_reg_imm((inst),((dreg)&0x7),((reg)&0x7),(imm)); } while (0) -#define amd64_imul_reg_mem_imm_size(inst,reg,mem,imm,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_imul_reg_mem_imm((inst),((reg)&0x7),(mem),(imm)); } while (0) -#define amd64_imul_reg_membase_imm_size(inst,reg,basereg,disp,imm,size) do { amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_imul_reg_membase_imm((inst),((reg)&0x7),((basereg)&0x7),(disp),(imm)); } while (0) -#define amd64_div_reg_size(inst,reg,is_signed,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_div_reg((inst),((reg)&0x7),(is_signed)); } while (0) -#define amd64_div_mem_size(inst,mem,is_signed,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_div_mem((inst),(mem),(is_signed)); } while (0) -#define amd64_div_membase_size(inst,basereg,disp,is_signed,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_div_membase((inst),((basereg)&0x7),(disp),(is_signed)); } while (0) -#define amd64_mov_mem_reg_size(inst,mem,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_mov_mem_reg((inst),(mem),((reg)&0x7),(size) == 8 ? 4 : (size)); } while (0) -//#define amd64_mov_regp_reg_size(inst,regp,reg,size) do { amd64_emit_rex ((inst),(size),(regp),0,(reg)); x86_mov_regp_reg((inst),(regp),((reg)&0x7),(size) == 8 ? 4 : (size)); } while (0) -//#define amd64_mov_membase_reg_size(inst,basereg,disp,reg,size) do { amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_mov_membase_reg((inst),((basereg)&0x7),(disp),((reg)&0x7),(size) == 8 ? 4 : (size)); } while (0) -#define amd64_mov_memindex_reg_size(inst,basereg,disp,indexreg,shift,reg,size) do { amd64_emit_rex ((inst),(size),(reg),(indexreg),(basereg)); x86_mov_memindex_reg((inst),((basereg)&0x7),(disp),((indexreg)&0x7),(shift),((reg)&0x7),(size) == 8 ? 4 : (size)); } while (0) -#define amd64_mov_reg_reg_size(inst,dreg,reg,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_mov_reg_reg((inst),((dreg)&0x7),((reg)&0x7),(size) == 8 ? 4 : (size)); } while (0) -//#define amd64_mov_reg_mem_size(inst,reg,mem,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_mov_reg_mem((inst),((reg)&0x7),(mem),(size) == 8 ? 4 : (size)); } while (0) -//#define amd64_mov_reg_membase_size(inst,reg,basereg,disp,size) do { amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_mov_reg_membase((inst),((reg)&0x7),((basereg)&0x7),(disp),(size) == 8 ? 4 : (size)); } while (0) -#define amd64_mov_reg_memindex_size(inst,reg,basereg,disp,indexreg,shift,size) do { amd64_emit_rex ((inst),(size),(reg),(indexreg),(basereg)); x86_mov_reg_memindex((inst),((reg)&0x7),((basereg)&0x7),(disp),((indexreg)&0x7),(shift),(size) == 8 ? 4 : (size)); } while (0) -#define amd64_clear_reg_size(inst,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_clear_reg((inst),((reg)&0x7)); } while (0) -//#define amd64_mov_reg_imm_size(inst,reg,imm,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_mov_reg_imm((inst),((reg)&0x7),(imm)); } while (0) -#define amd64_mov_mem_imm_size(inst,mem,imm,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_mov_mem_imm((inst),(mem),(imm),(size) == 8 ? 4 : (size)); } while (0) -//#define amd64_mov_membase_imm_size(inst,basereg,disp,imm,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_mov_membase_imm((inst),((basereg)&0x7),(disp),(imm),(size) == 8 ? 4 : (size)); } while (0) -#define amd64_mov_memindex_imm_size(inst,basereg,disp,indexreg,shift,imm,size) do { amd64_emit_rex ((inst),(size),0,(indexreg),(basereg)); x86_mov_memindex_imm((inst),((basereg)&0x7),(disp),((indexreg)&0x7),(shift),(imm),(size) == 8 ? 4 : (size)); } while (0) -#define amd64_lea_mem_size(inst,reg,mem,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_lea_mem((inst),((reg)&0x7),(mem)); } while (0) -//#define amd64_lea_membase_size(inst,reg,basereg,disp,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_lea_membase((inst),((reg)&0x7),((basereg)&0x7),(disp)); } while (0) -#define amd64_lea_memindex_size(inst,reg,basereg,disp,indexreg,shift,size) do { amd64_emit_rex ((inst),(size),(reg),(indexreg),(basereg)); x86_lea_memindex((inst),((reg)&0x7),((basereg)&0x7),(disp),((indexreg)&0x7),(shift)); } while (0) -#define amd64_widen_reg_size(inst,dreg,reg,is_signed,is_half,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_widen_reg((inst),((dreg)&0x7),((reg)&0x7),(is_signed),(is_half)); } while (0) -#define amd64_widen_mem_size(inst,dreg,mem,is_signed,is_half,size) do { amd64_emit_rex ((inst),(size),(dreg),0,0); x86_widen_mem((inst),((dreg)&0x7),(mem),(is_signed),(is_half)); } while (0) -#define amd64_widen_membase_size(inst,dreg,basereg,disp,is_signed,is_half,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(basereg)); x86_widen_membase((inst),((dreg)&0x7),((basereg)&0x7),(disp),(is_signed),(is_half)); } while (0) -#define amd64_widen_memindex_size(inst,dreg,basereg,disp,indexreg,shift,is_signed,is_half,size) do { amd64_emit_rex ((inst),(size),(dreg),(indexreg),(basereg)); x86_widen_memindex((inst),((dreg)&0x7),((basereg)&0x7),(disp),((indexreg)&0x7),(shift),(is_signed),(is_half)); } while (0) -#define amd64_cdq_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_cdq(inst); } while (0) -#define amd64_wait_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_wait(inst); } while (0) -#define amd64_fp_op_mem_size(inst,opc,mem,is_double,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fp_op_mem((inst),(opc),(mem),(is_double)); } while (0) -#define amd64_fp_op_membase_size(inst,opc,basereg,disp,is_double,size) do { amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fp_op_membase((inst),(opc),((basereg)&0x7),(disp),(is_double)); } while (0) -#define amd64_fp_op_size(inst,opc,index,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fp_op((inst),(opc),(index)); } while (0) -#define amd64_fp_op_reg_size(inst,opc,index,pop_stack,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fp_op_reg((inst),(opc),(index),(pop_stack)); } while (0) -#define amd64_fp_int_op_membase_size(inst,opc,basereg,disp,is_int,size) do { amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fp_int_op_membase((inst),(opc),((basereg)&0x7),(disp),(is_int)); } while (0) -#define amd64_fstp_size(inst,index,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fstp((inst),(index)); } while (0) -#define amd64_fcompp_size(inst,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fcompp(inst); } while (0) -#define amd64_fucompp_size(inst,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fucompp(inst); } while (0) -#define amd64_fnstsw_size(inst,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fnstsw(inst); } while (0) -#define amd64_fnstcw_size(inst,mem,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fnstcw((inst),(mem)); } while (0) -#define amd64_fnstcw_membase_size(inst,basereg,disp,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_fnstcw_membase((inst),((basereg)&0x7),(disp)); } while (0) -#define amd64_fldcw_size(inst,mem,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fldcw((inst),(mem)); } while (0) -#define amd64_fldcw_membase_size(inst,basereg,disp,size) do { amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fldcw_membase((inst),((basereg)&0x7),(disp)); } while (0) -#define amd64_fchs_size(inst,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fchs(inst); } while (0) -#define amd64_frem_size(inst,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_frem(inst); } while (0) -#define amd64_fxch_size(inst,index,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fxch((inst),(index)); } while (0) -#define amd64_fcomi_size(inst,index,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fcomi((inst),(index)); } while (0) -#define amd64_fcomip_size(inst,index,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fcomip((inst),(index)); } while (0) -#define amd64_fucomi_size(inst,index,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fucomi((inst),(index)); } while (0) -#define amd64_fucomip_size(inst,index,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fucomip((inst),(index)); } while (0) -#define amd64_fld_size(inst,mem,is_double,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fld((inst),(mem),(is_double)); } while (0) -//#define amd64_fld_membase_size(inst,basereg,disp,is_double,size) do { amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fld_membase((inst),((basereg)&0x7),(disp),(is_double)); } while (0) -#define amd64_fld80_mem_size(inst,mem,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fld80_mem((inst),(mem)); } while (0) -#define amd64_fld80_membase_size(inst,basereg,disp,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_fld80_membase((inst),((basereg)&0x7),(disp)); } while (0) -#define amd64_fild_size(inst,mem,is_long,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fild((inst),(mem),(is_long)); } while (0) -#define amd64_fild_membase_size(inst,basereg,disp,is_long,size) do { amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fild_membase((inst),((basereg)&0x7),(disp),(is_long)); } while (0) -#define amd64_fld_reg_size(inst,index,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fld_reg((inst),(index)); } while (0) -#define amd64_fldz_size(inst,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fldz(inst); } while (0) -#define amd64_fld1_size(inst,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fld1(inst); } while (0) -#define amd64_fldpi_size(inst,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fldpi(inst); } while (0) -#define amd64_fst_size(inst,mem,is_double,pop_stack,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fst((inst),(mem),(is_double),(pop_stack)); } while (0) -#define amd64_fst_membase_size(inst,basereg,disp,is_double,pop_stack,size) do { amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fst_membase((inst),((basereg)&0x7),(disp),(is_double),(pop_stack)); } while (0) -#define amd64_fst80_mem_size(inst,mem,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fst80_mem((inst),(mem)); } while (0) -#define amd64_fst80_membase_size(inst,basereg,disp,size) do { amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fst80_membase((inst),((basereg)&0x7),(disp)); } while (0) -#define amd64_fist_pop_size(inst,mem,is_long,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_fist_pop((inst),(mem),(is_long)); } while (0) -#define amd64_fist_pop_membase_size(inst,basereg,disp,is_long,size) do { amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fist_pop_membase((inst),((basereg)&0x7),(disp),(is_long)); } while (0) -#define amd64_fstsw_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_fstsw(inst); } while (0) -#define amd64_fist_membase_size(inst,basereg,disp,is_int,size) do { amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fist_membase((inst),((basereg)&0x7),(disp),(is_int)); } while (0) -//#define amd64_push_reg_size(inst,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_push_reg((inst),((reg)&0x7)); } while (0) -#define amd64_push_regp_size(inst,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_push_regp((inst),((reg)&0x7)); } while (0) -#define amd64_push_mem_size(inst,mem,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_push_mem((inst),(mem)); } while (0) -//#define amd64_push_membase_size(inst,basereg,disp,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_push_membase((inst),((basereg)&0x7),(disp)); } while (0) -#define amd64_push_memindex_size(inst,basereg,disp,indexreg,shift,size) do { amd64_emit_rex ((inst),(size),0,(indexreg),(basereg)); x86_push_memindex((inst),((basereg)&0x7),(disp),((indexreg)&0x7),(shift)); } while (0) -#define amd64_push_imm_size(inst,imm,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_push_imm((inst),(imm)); } while (0) -//#define amd64_pop_reg_size(inst,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_pop_reg((inst),((reg)&0x7)); } while (0) -#define amd64_pop_mem_size(inst,mem,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_pop_mem((inst),(mem)); } while (0) -#define amd64_pop_membase_size(inst,basereg,disp,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_pop_membase((inst),((basereg)&0x7),(disp)); } while (0) -#define amd64_pushad_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_pushad(inst); } while (0) -#define amd64_pushfd_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_pushfd(inst); } while (0) -#define amd64_popad_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_popad(inst); } while (0) -#define amd64_popfd_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_popfd(inst); } while (0) -#define amd64_loop_size(inst,imm,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_loop((inst),(imm)); } while (0) -#define amd64_loope_size(inst,imm,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_loope((inst),(imm)); } while (0) -#define amd64_loopne_size(inst,imm,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_loopne((inst),(imm)); } while (0) -#define amd64_jump32_size(inst,imm,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_jump32((inst),(imm)); } while (0) -#define amd64_jump8_size(inst,imm,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_jump8((inst),(imm)); } while (0) +#define amd64_cld_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_cld(inst); amd64_codegen_post(inst); } while (0) +#define amd64_stosb_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_stosb(inst); amd64_codegen_post(inst); } while (0) +#define amd64_stosl_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_stosl(inst); amd64_codegen_post(inst); } while (0) +#define amd64_stosd_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_stosd(inst); amd64_codegen_post(inst); } while (0) +#define amd64_movsb_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_movsb(inst); amd64_codegen_post(inst); } while (0) +#define amd64_movsl_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_movsl(inst); amd64_codegen_post(inst); } while (0) +#define amd64_movsd_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_movsd(inst); amd64_codegen_post(inst); } while (0) +#define amd64_prefix_size(inst,p,size) do { x86_prefix((inst), p); } while (0) +#define amd64_rdtsc_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_rdtsc(inst); amd64_codegen_post(inst); } while (0) +#define amd64_cmpxchg_reg_reg_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_cmpxchg_reg_reg((inst),((dreg)&0x7),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_cmpxchg_mem_reg_size(inst,mem,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_cmpxchg_mem_reg((inst),(mem),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_cmpxchg_membase_reg_size(inst,basereg,disp,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_cmpxchg_membase_reg((inst),((basereg)&0x7),(disp),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_xchg_reg_reg_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_xchg_reg_reg((inst),((dreg)&0x7),((reg)&0x7),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +#define amd64_xchg_mem_reg_size(inst,mem,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_xchg_mem_reg((inst),(mem),((reg)&0x7),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +#define amd64_xchg_membase_reg_size(inst,basereg,disp,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_xchg_membase_reg((inst),((basereg)&0x7),(disp),((reg)&0x7),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +#define amd64_inc_mem_size(inst,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_inc_mem((inst),(mem)); amd64_codegen_post(inst); } while (0) +#define amd64_inc_membase_size(inst,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_inc_membase((inst),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +//#define amd64_inc_reg_size(inst,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_inc_reg((inst),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_dec_mem_size(inst,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_dec_mem((inst),(mem)); amd64_codegen_post(inst); } while (0) +#define amd64_dec_membase_size(inst,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_dec_membase((inst),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +//#define amd64_dec_reg_size(inst,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_dec_reg((inst),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_not_mem_size(inst,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_not_mem((inst),(mem)); amd64_codegen_post(inst); } while (0) +#define amd64_not_membase_size(inst,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_not_membase((inst),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +#define amd64_not_reg_size(inst,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_not_reg((inst),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_neg_mem_size(inst,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_neg_mem((inst),(mem)); amd64_codegen_post(inst); } while (0) +#define amd64_neg_membase_size(inst,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_neg_membase((inst),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +#define amd64_neg_reg_size(inst,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_neg_reg((inst),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_nop_size(inst,size) do { amd64_codegen_pre(inst); x86_nop(inst); amd64_codegen_post(inst); } while (0) +//#define amd64_alu_reg_imm_size(inst,opc,reg,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_alu_reg_imm((inst),(opc),((reg)&0x7),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_alu_mem_imm_size(inst,opc,mem,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_alu_mem_imm((inst),(opc),(mem),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_alu_membase_imm_size(inst,opc,basereg,disp,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_alu_membase_imm((inst),(opc),((basereg)&0x7),(disp),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_alu_membase8_imm_size(inst,opc,basereg,disp,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_alu_membase8_imm((inst),(opc),((basereg)&0x7),(disp),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_alu_mem_reg_size(inst,opc,mem,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_alu_mem_reg((inst),(opc),(mem),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_alu_membase_reg_size(inst,opc,basereg,disp,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_alu_membase_reg((inst),(opc),((basereg)&0x7),(disp),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +//#define amd64_alu_reg_reg_size(inst,opc,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_alu_reg_reg((inst),(opc),((dreg)&0x7),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_alu_reg8_reg8_size(inst,opc,dreg,reg,is_dreg_h,is_reg_h,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_alu_reg8_reg8((inst),(opc),((dreg)&0x7),((reg)&0x7),(is_dreg_h),(is_reg_h)); amd64_codegen_post(inst); } while (0) +#define amd64_alu_reg_mem_size(inst,opc,reg,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_alu_reg_mem((inst),(opc),((reg)&0x7),(mem)); amd64_codegen_post(inst); } while (0) +#define amd64_alu_reg_membase_size(inst,opc,reg,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_alu_reg_membase((inst),(opc),((reg)&0x7),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +#define amd64_test_reg_imm_size(inst,reg,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_test_reg_imm((inst),((reg)&0x7),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_test_mem_imm_size(inst,mem,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_test_mem_imm((inst),(mem),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_test_membase_imm_size(inst,basereg,disp,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_test_membase_imm((inst),((basereg)&0x7),(disp),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_test_reg_reg_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_test_reg_reg((inst),((dreg)&0x7),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_test_mem_reg_size(inst,mem,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_test_mem_reg((inst),(mem),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_test_membase_reg_size(inst,basereg,disp,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_test_membase_reg((inst),((basereg)&0x7),(disp),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_shift_reg_imm_size(inst,opc,reg,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_shift_reg_imm((inst),(opc),((reg)&0x7),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_shift_mem_imm_size(inst,opc,mem,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_shift_mem_imm((inst),(opc),(mem),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_shift_membase_imm_size(inst,opc,basereg,disp,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_shift_membase_imm((inst),(opc),((basereg)&0x7),(disp),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_shift_reg_size(inst,opc,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_shift_reg((inst),(opc),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_shift_mem_size(inst,opc,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_shift_mem((inst),(opc),(mem)); amd64_codegen_post(inst); } while (0) +#define amd64_shift_membase_size(inst,opc,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_shift_membase((inst),(opc),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +#define amd64_shrd_reg_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_shrd_reg((inst),((dreg)&0x7),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_shrd_reg_imm_size(inst,dreg,reg,shamt,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_shrd_reg_imm((inst),((dreg)&0x7),((reg)&0x7),(shamt)); amd64_codegen_post(inst); } while (0) +#define amd64_shld_reg_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_shld_reg((inst),((dreg)&0x7),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_shld_reg_imm_size(inst,dreg,reg,shamt,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_shld_reg_imm((inst),((dreg)&0x7),((reg)&0x7),(shamt)); amd64_codegen_post(inst); } while (0) +#define amd64_mul_reg_size(inst,reg,is_signed,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_mul_reg((inst),((reg)&0x7),(is_signed)); amd64_codegen_post(inst); } while (0) +#define amd64_mul_mem_size(inst,mem,is_signed,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_mul_mem((inst),(mem),(is_signed)); amd64_codegen_post(inst); } while (0) +#define amd64_mul_membase_size(inst,basereg,disp,is_signed,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_mul_membase((inst),((basereg)&0x7),(disp),(is_signed)); amd64_codegen_post(inst); } while (0) +#define amd64_imul_reg_reg_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_imul_reg_reg((inst),((dreg)&0x7),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_imul_reg_mem_size(inst,reg,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_imul_reg_mem((inst),((reg)&0x7),(mem)); amd64_codegen_post(inst); } while (0) +#define amd64_imul_reg_membase_size(inst,reg,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_imul_reg_membase((inst),((reg)&0x7),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +#define amd64_imul_reg_reg_imm_size(inst,dreg,reg,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_imul_reg_reg_imm((inst),((dreg)&0x7),((reg)&0x7),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_imul_reg_mem_imm_size(inst,reg,mem,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_imul_reg_mem_imm((inst),((reg)&0x7),(mem),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_imul_reg_membase_imm_size(inst,reg,basereg,disp,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_imul_reg_membase_imm((inst),((reg)&0x7),((basereg)&0x7),(disp),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_div_reg_size(inst,reg,is_signed,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_div_reg((inst),((reg)&0x7),(is_signed)); amd64_codegen_post(inst); } while (0) +#define amd64_div_mem_size(inst,mem,is_signed,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_div_mem((inst),(mem),(is_signed)); amd64_codegen_post(inst); } while (0) +#define amd64_div_membase_size(inst,basereg,disp,is_signed,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_div_membase((inst),((basereg)&0x7),(disp),(is_signed)); amd64_codegen_post(inst); } while (0) +#define amd64_mov_mem_reg_size(inst,mem,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_mov_mem_reg((inst),(mem),((reg)&0x7),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +//#define amd64_mov_regp_reg_size(inst,regp,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(regp),0,(reg)); x86_mov_regp_reg((inst),(regp),((reg)&0x7),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +//#define amd64_mov_membase_reg_size(inst,basereg,disp,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_mov_membase_reg((inst),((basereg)&0x7),(disp),((reg)&0x7),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +#define amd64_mov_memindex_reg_size(inst,basereg,disp,indexreg,shift,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),(indexreg),(basereg)); x86_mov_memindex_reg((inst),((basereg)&0x7),(disp),((indexreg)&0x7),(shift),((reg)&0x7),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +#define amd64_mov_reg_reg_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_mov_reg_reg((inst),((dreg)&0x7),((reg)&0x7),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +//#define amd64_mov_reg_mem_size(inst,reg,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_mov_reg_mem((inst),((reg)&0x7),(mem),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +//#define amd64_mov_reg_membase_size(inst,reg,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_mov_reg_membase((inst),((reg)&0x7),((basereg)&0x7),(disp),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +//#define amd64_mov_reg_memindex_size(inst,reg,basereg,disp,indexreg,shift,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),(indexreg),(basereg)); x86_mov_reg_memindex((inst),((reg)&0x7),((basereg)&0x7),(disp),((indexreg)&0x7),(shift),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +#define amd64_clear_reg_size(inst,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_clear_reg((inst),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +//#define amd64_mov_reg_imm_size(inst,reg,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_mov_reg_imm((inst),((reg)&0x7),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_mov_mem_imm_size(inst,mem,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_mov_mem_imm((inst),(mem),(imm),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +//#define amd64_mov_membase_imm_size(inst,basereg,disp,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_mov_membase_imm((inst),((basereg)&0x7),(disp),(imm),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +#define amd64_mov_memindex_imm_size(inst,basereg,disp,indexreg,shift,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,(indexreg),(basereg)); x86_mov_memindex_imm((inst),((basereg)&0x7),(disp),((indexreg)&0x7),(shift),(imm),(size) == 8 ? 4 : (size)); amd64_codegen_post(inst); } while (0) +#define amd64_lea_mem_size(inst,reg,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_lea_mem((inst),((reg)&0x7),(mem)); amd64_codegen_post(inst); } while (0) +//#define amd64_lea_membase_size(inst,reg,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_lea_membase((inst),((reg)&0x7),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +#define amd64_lea_memindex_size(inst,reg,basereg,disp,indexreg,shift,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),(indexreg),(basereg)); x86_lea_memindex((inst),((reg)&0x7),((basereg)&0x7),(disp),((indexreg)&0x7),(shift)); amd64_codegen_post(inst); } while (0) +#define amd64_widen_reg_size(inst,dreg,reg,is_signed,is_half,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_widen_reg((inst),((dreg)&0x7),((reg)&0x7),(is_signed),(is_half)); amd64_codegen_post(inst); } while (0) +#define amd64_widen_mem_size(inst,dreg,mem,is_signed,is_half,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,0); x86_widen_mem((inst),((dreg)&0x7),(mem),(is_signed),(is_half)); amd64_codegen_post(inst); } while (0) +#define amd64_widen_membase_size(inst,dreg,basereg,disp,is_signed,is_half,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(basereg)); x86_widen_membase((inst),((dreg)&0x7),((basereg)&0x7),(disp),(is_signed),(is_half)); amd64_codegen_post(inst); } while (0) +#define amd64_widen_memindex_size(inst,dreg,basereg,disp,indexreg,shift,is_signed,is_half,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),(indexreg),(basereg)); x86_widen_memindex((inst),((dreg)&0x7),((basereg)&0x7),(disp),((indexreg)&0x7),(shift),(is_signed),(is_half)); amd64_codegen_post(inst); } while (0) +#define amd64_cdq_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_cdq(inst); amd64_codegen_post(inst); } while (0) +#define amd64_wait_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_wait(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fp_op_mem_size(inst,opc,mem,is_double,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fp_op_mem((inst),(opc),(mem),(is_double)); amd64_codegen_post(inst); } while (0) +#define amd64_fp_op_membase_size(inst,opc,basereg,disp,is_double,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fp_op_membase((inst),(opc),((basereg)&0x7),(disp),(is_double)); amd64_codegen_post(inst); } while (0) +#define amd64_fp_op_size(inst,opc,index,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fp_op((inst),(opc),(index)); amd64_codegen_post(inst); } while (0) +#define amd64_fp_op_reg_size(inst,opc,index,pop_stack,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fp_op_reg((inst),(opc),(index),(pop_stack)); amd64_codegen_post(inst); } while (0) +#define amd64_fp_int_op_membase_size(inst,opc,basereg,disp,is_int,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fp_int_op_membase((inst),(opc),((basereg)&0x7),(disp),(is_int)); amd64_codegen_post(inst); } while (0) +#define amd64_fstp_size(inst,index,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fstp((inst),(index)); amd64_codegen_post(inst); } while (0) +#define amd64_fcompp_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fcompp(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fucompp_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fucompp(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fnstsw_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fnstsw(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fnstcw_size(inst,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fnstcw((inst),(mem)); amd64_codegen_post(inst); } while (0) +#define amd64_fnstcw_membase_size(inst,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_fnstcw_membase((inst),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +#define amd64_fldcw_size(inst,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fldcw((inst),(mem)); amd64_codegen_post(inst); } while (0) +#define amd64_fldcw_membase_size(inst,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fldcw_membase((inst),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +#define amd64_fchs_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fchs(inst); amd64_codegen_post(inst); } while (0) +#define amd64_frem_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_frem(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fxch_size(inst,index,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fxch((inst),(index)); amd64_codegen_post(inst); } while (0) +#define amd64_fcomi_size(inst,index,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fcomi((inst),(index)); amd64_codegen_post(inst); } while (0) +#define amd64_fcomip_size(inst,index,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fcomip((inst),(index)); amd64_codegen_post(inst); } while (0) +#define amd64_fucomi_size(inst,index,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fucomi((inst),(index)); amd64_codegen_post(inst); } while (0) +#define amd64_fucomip_size(inst,index,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fucomip((inst),(index)); amd64_codegen_post(inst); } while (0) +#define amd64_fld_size(inst,mem,is_double,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fld((inst),(mem),(is_double)); amd64_codegen_post(inst); } while (0) +//#define amd64_fld_membase_size(inst,basereg,disp,is_double,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fld_membase((inst),((basereg)&0x7),(disp),(is_double)); amd64_codegen_post(inst); } while (0) +#define amd64_fld80_mem_size(inst,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fld80_mem((inst),(mem)); amd64_codegen_post(inst); } while (0) +#define amd64_fld80_membase_size(inst,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_fld80_membase((inst),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +#define amd64_fild_size(inst,mem,is_long,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fild((inst),(mem),(is_long)); amd64_codegen_post(inst); } while (0) +#define amd64_fild_membase_size(inst,basereg,disp,is_long,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fild_membase((inst),((basereg)&0x7),(disp),(is_long)); amd64_codegen_post(inst); } while (0) +#define amd64_fld_reg_size(inst,index,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fld_reg((inst),(index)); amd64_codegen_post(inst); } while (0) +#define amd64_fldz_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fldz(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fld1_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fld1(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fldpi_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fldpi(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fst_size(inst,mem,is_double,pop_stack,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fst((inst),(mem),(is_double),(pop_stack)); amd64_codegen_post(inst); } while (0) +#define amd64_fst_membase_size(inst,basereg,disp,is_double,pop_stack,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fst_membase((inst),((basereg)&0x7),(disp),(is_double),(pop_stack)); amd64_codegen_post(inst); } while (0) +#define amd64_fst80_mem_size(inst,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fst80_mem((inst),(mem)); amd64_codegen_post(inst); } while (0) +#define amd64_fst80_membase_size(inst,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fst80_membase((inst),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +#define amd64_fist_pop_size(inst,mem,is_long,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_fist_pop((inst),(mem),(is_long)); amd64_codegen_post(inst); } while (0) +#define amd64_fist_pop_membase_size(inst,basereg,disp,is_long,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fist_pop_membase((inst),((basereg)&0x7),(disp),(is_long)); amd64_codegen_post(inst); } while (0) +#define amd64_fstsw_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_fstsw(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fist_membase_size(inst,basereg,disp,is_int,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,(basereg)); x86_fist_membase((inst),((basereg)&0x7),(disp),(is_int)); amd64_codegen_post(inst); } while (0) +//#define amd64_push_reg_size(inst,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_push_reg((inst),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_push_regp_size(inst,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_push_regp((inst),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_push_mem_size(inst,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_push_mem((inst),(mem)); amd64_codegen_post(inst); } while (0) +//#define amd64_push_membase_size(inst,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_push_membase((inst),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +#define amd64_push_memindex_size(inst,basereg,disp,indexreg,shift,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,(indexreg),(basereg)); x86_push_memindex((inst),((basereg)&0x7),(disp),((indexreg)&0x7),(shift)); amd64_codegen_post(inst); } while (0) +#define amd64_push_imm_size(inst,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_push_imm((inst),(imm)); amd64_codegen_post(inst); } while (0) +//#define amd64_pop_reg_size(inst,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_pop_reg((inst),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_pop_mem_size(inst,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_pop_mem((inst),(mem)); amd64_codegen_post(inst); } while (0) +#define amd64_pop_membase_size(inst,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_pop_membase((inst),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +#define amd64_pushad_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_pushad(inst); amd64_codegen_post(inst); } while (0) +#define amd64_pushfd_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_pushfd(inst); amd64_codegen_post(inst); } while (0) +#define amd64_popad_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_popad(inst); amd64_codegen_post(inst); } while (0) +#define amd64_popfd_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_popfd(inst); amd64_codegen_post(inst); } while (0) +#define amd64_loop_size(inst,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_loop((inst),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_loope_size(inst,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_loope((inst),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_loopne_size(inst,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_loopne((inst),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_jump32_size(inst,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_jump32((inst),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_jump8_size(inst,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_jump8((inst),(imm)); amd64_codegen_post(inst); } while (0) +#if !defined( __native_client_codegen__ ) +/* Defined above for Native Client, so they can be used in other macros */ #define amd64_jump_reg_size(inst,reg,size) do { amd64_emit_rex ((inst),0,0,0,(reg)); x86_jump_reg((inst),((reg)&0x7)); } while (0) #define amd64_jump_mem_size(inst,mem,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_jump_mem((inst),(mem)); } while (0) -#define amd64_jump_disp_size(inst,disp,size) do { amd64_emit_rex ((inst),0,0,0,0); x86_jump_disp((inst),(disp)); } while (0) +#endif +#define amd64_jump_disp_size(inst,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,0); x86_jump_disp((inst),(disp)); amd64_codegen_post(inst); } while (0) #define amd64_branch8_size(inst,cond,imm,is_signed,size) do { x86_branch8((inst),(cond),(imm),(is_signed)); } while (0) #define amd64_branch32_size(inst,cond,imm,is_signed,size) do { x86_branch32((inst),(cond),(imm),(is_signed)); } while (0) -#define amd64_branch_size(inst,cond,target,is_signed,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_branch((inst),(cond),(target),(is_signed)); } while (0) -#define amd64_branch_disp_size(inst,cond,disp,is_signed,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_branch_disp((inst),(cond),(disp),(is_signed)); } while (0) -#define amd64_set_reg_size(inst,cond,reg,is_signed,size) do { amd64_emit_rex((inst),1,0,0,(reg)); x86_set_reg((inst),(cond),((reg)&0x7),(is_signed)); } while (0) -#define amd64_set_mem_size(inst,cond,mem,is_signed,size) do { x86_set_mem((inst),(cond),(mem),(is_signed)); } while (0) -#define amd64_set_membase_size(inst,cond,basereg,disp,is_signed,size) do { amd64_emit_rex ((inst),0,0,0,(basereg)); x86_set_membase((inst),(cond),((basereg)&0x7),(disp),(is_signed)); } while (0) +#define amd64_branch_size_body(inst,cond,target,is_signed,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_branch((inst),(cond),(target),(is_signed)); amd64_codegen_post(inst); } while (0) +#if defined(__default_codegen__) +#define amd64_branch_size(inst,cond,target,is_signed,size) do { amd64_branch_size_body((inst),(cond),(target),(is_signed),(size)); } while (0) +#elif defined(__native_client_codegen__) +#define amd64_branch_size(inst,cond,target,is_signed,size) \ + do { \ + /* amd64_branch_size_body used twice in */ \ + /* case of relocation by amd64_codegen_post */ \ + guint8* branch_start; \ + amd64_codegen_pre(inst); \ + amd64_branch_size_body((inst),(cond),(target),(is_signed),(size)); \ + inst = amd64_codegen_post(inst); \ + branch_start = inst; \ + amd64_branch_size_body((inst),(cond),(target),(is_signed),(size)); \ + mono_amd64_patch(branch_start, (target)); \ + } while (0) +#endif + +#define amd64_branch_disp_size(inst,cond,disp,is_signed,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_branch_disp((inst),(cond),(disp),(is_signed)); amd64_codegen_post(inst); } while (0) +#define amd64_set_reg_size(inst,cond,reg,is_signed,size) do { amd64_codegen_pre(inst); amd64_emit_rex((inst),1,0,0,(reg)); x86_set_reg((inst),(cond),((reg)&0x7),(is_signed)); amd64_codegen_post(inst); } while (0) +#define amd64_set_mem_size(inst,cond,mem,is_signed,size) do { amd64_codegen_pre(inst); x86_set_mem((inst),(cond),(mem),(is_signed)); amd64_codegen_post(inst); } while (0) +#define amd64_set_membase_size(inst,cond,basereg,disp,is_signed,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),0,0,0,(basereg)); x86_set_membase((inst),(cond),((basereg)&0x7),(disp),(is_signed)); amd64_codegen_post(inst); } while (0) +//#define amd64_call_reg_size(inst,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_call_reg((inst),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_call_mem_size(inst,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_call_mem((inst),(mem)); amd64_codegen_post(inst); } while (0) + +#if defined(__default_codegen__) + #define amd64_call_imm_size(inst,disp,size) do { x86_call_imm((inst),(disp)); } while (0) -//#define amd64_call_reg_size(inst,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_call_reg((inst),((reg)&0x7)); } while (0) -#define amd64_call_mem_size(inst,mem,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_call_mem((inst),(mem)); } while (0) #define amd64_call_code_size(inst,target,size) do { x86_call_code((inst),(target)); } while (0) -//#define amd64_ret_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_ret(inst); } while (0) -#define amd64_ret_imm_size(inst,imm,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_ret_imm((inst),(imm)); } while (0) -#define amd64_cmov_reg_size(inst,cond,is_signed,dreg,reg,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_cmov_reg((inst),(cond),(is_signed),((dreg)&0x7),((reg)&0x7)); } while (0) -#define amd64_cmov_mem_size(inst,cond,is_signed,reg,mem,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_cmov_mem((inst),(cond),(is_signed),((reg)&0x7),(mem)); } while (0) -#define amd64_cmov_membase_size(inst,cond,is_signed,reg,basereg,disp,size) do { amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_cmov_membase((inst),(cond),(is_signed),((reg)&0x7),((basereg)&0x7),(disp)); } while (0) -#define amd64_enter_size(inst,framesize) do { amd64_emit_rex ((inst),(size),0,0,0); x86_enter((inst),(framesize)); } while (0) -//#define amd64_leave_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_leave(inst); } while (0) -#define amd64_sahf_size(inst,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_sahf(inst); } while (0) -#define amd64_fsin_size(inst,size) do { x86_fsin(inst); } while (0) -#define amd64_fcos_size(inst,size) do { x86_fcos(inst); } while (0) -#define amd64_fabs_size(inst,size) do { x86_fabs(inst); } while (0) -#define amd64_ftst_size(inst,size) do { x86_ftst(inst); } while (0) -#define amd64_fxam_size(inst,size) do { x86_fxam(inst); } while (0) -#define amd64_fpatan_size(inst,size) do { x86_fpatan(inst); } while (0) -#define amd64_fprem_size(inst,size) do { x86_fprem(inst); } while (0) -#define amd64_fprem1_size(inst,size) do { x86_fprem1(inst); } while (0) -#define amd64_frndint_size(inst,size) do { x86_frndint(inst); } while (0) -#define amd64_fsqrt_size(inst,size) do { x86_fsqrt(inst); } while (0) -#define amd64_fptan_size(inst,size) do { x86_fptan(inst); } while (0) -//#define amd64_padding_size(inst,size) do { x86_padding((inst),(size)); } while (0) -#define amd64_prolog_size(inst,frame_size,reg_mask,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_prolog((inst),(frame_size),(reg_mask)); } while (0) -#define amd64_epilog_size(inst,reg_mask,size) do { amd64_emit_rex ((inst),(size),0,0,0); x86_epilog((inst),(reg_mask)); } while (0) -#define amd64_xadd_reg_reg_size(inst,dreg,reg,size) do { amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_xadd_reg_reg ((inst), (dreg), (reg), (size)); } while (0) -#define amd64_xadd_mem_reg_size(inst,mem,reg,size) do { amd64_emit_rex ((inst),(size),0,0,(reg)); x86_xadd_mem_reg((inst),(mem),((reg)&0x7), (size)); } while (0) -#define amd64_xadd_membase_reg_size(inst,basereg,disp,reg,size) do { amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_xadd_membase_reg((inst),((basereg)&0x7),(disp),((reg)&0x7),(size)); } while (0) + +#elif defined(__native_client_codegen__) +/* Size is ignored for Native Client calls, we restrict jumping to 32-bits */ +#define amd64_call_imm_size(inst,disp,size) \ + do { \ + amd64_codegen_pre((inst)); \ + amd64_call_sequence_pre((inst)); \ + x86_call_imm((inst),(disp)); \ + amd64_call_sequence_post((inst)); \ + amd64_codegen_post((inst)); \ + } while (0) + +/* x86_call_code is called twice below, first so we can get the size of the */ +/* call sequence, and again so the exact offset from "inst" is used, since */ +/* the sequence could have moved from amd64_call_sequence_post. */ +/* Size is ignored for Native Client jumps, we restrict jumping to 32-bits */ +#define amd64_call_code_size(inst,target,size) \ + do { \ + amd64_codegen_pre((inst)); \ + guint8* adjusted_start; \ + guint8* call_start; \ + amd64_call_sequence_pre((inst)); \ + x86_call_code((inst),(target)); \ + adjusted_start = amd64_call_sequence_post((inst)); \ + call_start = adjusted_start; \ + x86_call_code(adjusted_start, (target)); \ + amd64_codegen_post((inst)); \ + mono_amd64_patch(call_start, (target)); \ + } while (0) + +#endif /*__native_client_codegen__*/ + +//#define amd64_ret_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_ret(inst); amd64_codegen_post(inst); } while (0) +#define amd64_ret_imm_size(inst,imm,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_ret_imm((inst),(imm)); amd64_codegen_post(inst); } while (0) +#define amd64_cmov_reg_size(inst,cond,is_signed,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_cmov_reg((inst),(cond),(is_signed),((dreg)&0x7),((reg)&0x7)); amd64_codegen_post(inst); } while (0) +#define amd64_cmov_mem_size(inst,cond,is_signed,reg,mem,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_cmov_mem((inst),(cond),(is_signed),((reg)&0x7),(mem)); amd64_codegen_post(inst); } while (0) +#define amd64_cmov_membase_size(inst,cond,is_signed,reg,basereg,disp,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(basereg)); x86_cmov_membase((inst),(cond),(is_signed),((reg)&0x7),((basereg)&0x7),(disp)); amd64_codegen_post(inst); } while (0) +#define amd64_enter_size(inst,framesize) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_enter((inst),(framesize)); amd64_codegen_post(inst); } while (0) +//#define amd64_leave_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_leave(inst); amd64_codegen_post(inst); } while (0) +#define amd64_sahf_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_sahf(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fsin_size(inst,size) do { amd64_codegen_pre(inst); x86_fsin(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fcos_size(inst,size) do { amd64_codegen_pre(inst); x86_fcos(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fabs_size(inst,size) do { amd64_codegen_pre(inst); x86_fabs(inst); amd64_codegen_post(inst); } while (0) +#define amd64_ftst_size(inst,size) do { amd64_codegen_pre(inst); x86_ftst(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fxam_size(inst,size) do { amd64_codegen_pre(inst); x86_fxam(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fpatan_size(inst,size) do { amd64_codegen_pre(inst); x86_fpatan(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fprem_size(inst,size) do { amd64_codegen_pre(inst); x86_fprem(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fprem1_size(inst,size) do { amd64_codegen_pre(inst); x86_fprem1(inst); amd64_codegen_post(inst); } while (0) +#define amd64_frndint_size(inst,size) do { amd64_codegen_pre(inst); x86_frndint(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fsqrt_size(inst,size) do { amd64_codegen_pre(inst); x86_fsqrt(inst); amd64_codegen_post(inst); } while (0) +#define amd64_fptan_size(inst,size) do { amd64_codegen_pre(inst); x86_fptan(inst); amd64_codegen_post(inst); } while (0) +//#define amd64_padding_size(inst,size) do { amd64_codegen_pre(inst); x86_padding((inst),(size)); amd64_codegen_post(inst); } while (0) +#define amd64_prolog_size(inst,frame_size,reg_mask,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_prolog((inst),(frame_size),(reg_mask)); amd64_codegen_post(inst); } while (0) +#define amd64_epilog_size(inst,reg_mask,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_epilog((inst),(reg_mask)); amd64_codegen_post(inst); } while (0) +#define amd64_xadd_reg_reg_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_xadd_reg_reg ((inst), (dreg), (reg), (size)); amd64_codegen_post(inst); } while (0) +#define amd64_xadd_mem_reg_size(inst,mem,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,(reg)); x86_xadd_mem_reg((inst),(mem),((reg)&0x7), (size)); amd64_codegen_post(inst); } while (0) +#define amd64_xadd_membase_reg_size(inst,basereg,disp,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(reg),0,(basereg)); x86_xadd_membase_reg((inst),((basereg)&0x7),(disp),((reg)&0x7),(size)); amd64_codegen_post(inst); } while (0) diff --git a/mono/arch/amd64/tramp.c b/mono/arch/amd64/tramp.c index 5a4f9a9ed2b..6dbec93e859 100644 --- a/mono/arch/amd64/tramp.c +++ b/mono/arch/amd64/tramp.c @@ -543,7 +543,7 @@ enum_marshal2: amd64_call_reg (p, AMD64_R11); if (sig->ret->byref || string_ctor || !(retval_implicit || sig->ret->type == MONO_TYPE_VOID)) { - amd64_mov_reg_membase(p, AMD64_RSI, AMD64_RBP, -8, 8); + amd64_mov_reg_membase(p, AMD64_RSI, AMD64_RBP, -8, SIZEOF_VOID_P); } /* * Handle retval. @@ -883,19 +883,19 @@ enum_calc_size: * Initialize MonoInvocation fields, first the ones known now. */ amd64_alu_reg_reg (p, X86_XOR, AMD64_RAX, AMD64_RAX); - amd64_mov_membase_reg (p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, ex)), AMD64_RAX, 8); - amd64_mov_membase_reg (p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, ex_handler)), AMD64_RAX, 8); - amd64_mov_membase_reg (p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, parent)), AMD64_RAX, 8); + amd64_mov_membase_reg (p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, ex)), AMD64_RAX, SIZEOF_VOID_P); + amd64_mov_membase_reg (p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, ex_handler)), AMD64_RAX, SIZEOF_VOID_P); + amd64_mov_membase_reg (p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, parent)), AMD64_RAX, SIZEOF_VOID_P); /* * Set the method pointer. */ - amd64_mov_membase_imm (p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, method)), (long)method, 8); + amd64_mov_membase_imm (p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, method)), (long)method, SIZEOF_VOID_P); /* * Handle this. */ if (sig->hasthis) - amd64_mov_membase_reg(p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, obj)), this_reg, 8); + amd64_mov_membase_reg(p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, obj)), this_reg, SIZEOF_VOID_P); /* * Handle the arguments. stackval_pos is the offset from RBP of the stackval in the MonoInvocation args array . @@ -903,7 +903,7 @@ enum_calc_size: * We just call stackval_from_data to handle all the (nasty) issues.... */ amd64_lea_membase (p, AMD64_RAX, AMD64_RBP, stackval_pos); - amd64_mov_membase_reg (p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, stack_args)), AMD64_RAX, 8); + amd64_mov_membase_reg (p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, stack_args)), AMD64_RAX, SIZEOF_VOID_P); for (i = 0; i < sig->param_count; ++i) { /* Need to call stackval_from_data (MonoType *type, stackval *result, char *data, gboolean pinvoke); */ amd64_mov_reg_imm (p, AMD64_R11, stackval_from_data); @@ -926,12 +926,12 @@ enum_calc_size: * Handle the return value storage area. */ amd64_lea_membase (p, AMD64_RAX, AMD64_RBP, stackval_pos); - amd64_mov_membase_reg (p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, retval)), AMD64_RAX, 8); + amd64_mov_membase_reg (p, AMD64_RBP, (mono_invocation_pos + G_STRUCT_OFFSET (MonoInvocation, retval)), AMD64_RAX, SIZEOF_VOID_P); if (sig->ret->type == MONO_TYPE_VALUETYPE && !sig->ret->byref) { MonoClass *klass = sig->ret->data.klass; if (!klass->enumtype) { - amd64_mov_reg_membase (p, AMD64_RCX, AMD64_RBP, retval_ptr_rbp_offset, 8); - amd64_mov_membase_reg (p, AMD64_RBP, stackval_pos, AMD64_RCX, 8); + amd64_mov_reg_membase (p, AMD64_RCX, AMD64_RBP, retval_ptr_rbp_offset, SIZEOF_VOID_P); + amd64_mov_membase_reg (p, AMD64_RBP, stackval_pos, AMD64_RCX, SIZEOF_VOID_P); } } @@ -947,7 +947,7 @@ enum_calc_size: */ amd64_lea_membase (p, AMD64_RAX, AMD64_RBP, stackval_pos); if (sig->ret->byref) { - amd64_mov_reg_membase (p, AMD64_RAX, AMD64_RAX, 0, 8); + amd64_mov_reg_membase (p, AMD64_RAX, AMD64_RAX, 0, SIZEOF_VOID_P); } else { int simpletype = sig->ret->type; enum_retvalue: diff --git a/mono/arch/x86/x86-codegen.h b/mono/arch/x86/x86-codegen.h index af3e3c6f558..6ca3695c7e1 100644 --- a/mono/arch/x86/x86-codegen.h +++ b/mono/arch/x86/x86-codegen.h @@ -17,9 +17,7 @@ #include #ifdef __native_client_codegen__ -#define kNaClAlignment 32 -#define kNaClAlignmentMask (kNaClAlignment - 1) -extern guint8 nacl_align_byte; +extern gint8 nacl_align_byte; #endif /* __native_client_codegen__ */ @@ -28,15 +26,10 @@ extern guint8 nacl_align_byte; #define x86_call_sequence_pre(inst) guint8* _code_start = (inst); #define x86_call_sequence_post(inst) \ (mono_nacl_align_call(&_code_start, &(inst)), _code_start); -#define x86_call_sequence_pre_val(inst) guint8* _code_start = (inst); -#define x86_call_sequence_post_val(inst) \ - (mono_nacl_align_call(&_code_start, &(inst)), _code_start); #else #define x86_codegen_pre(inst_ptr_ptr, inst_len) do {} while (0) -#define x86_call_sequence_pre(inst) -#define x86_call_sequence_post(inst) -#define x86_call_sequence_pre_val(inst) guint8* _code_start = (inst); -#define x86_call_sequence_post_val(inst) _code_start +#define x86_call_sequence_pre(inst) guint8* _code_start = (inst); +#define x86_call_sequence_post(inst) _code_start #endif /* __native_client_codegen__ */ @@ -305,7 +298,7 @@ typedef union { #define kMaxMembaseEmitPadding 6 -#define x86_membase_emit(inst,r,basereg,disp) do {\ +#define x86_membase_emit_body(inst,r,basereg,disp) do {\ if ((basereg) == X86_ESP) { \ if ((disp) == 0) { \ x86_address_byte ((inst), 0, (r), X86_ESP); \ @@ -334,6 +327,18 @@ typedef union { } \ } while (0) +#if defined(__native_client_codegen__) && defined(TARGET_AMD64) +#define x86_membase_emit(inst,r,basereg,disp) \ + do { \ + amd64_nacl_membase_handler(&(inst), (basereg), (disp), (r)) ; \ + } while (0) +#else /* __default_codegen__ || 32-bit NaCl codegen */ +#define x86_membase_emit(inst,r,basereg,disp) \ + do { \ + x86_membase_emit_body((inst),(r),(basereg),(disp)); \ + } while (0) +#endif + #define kMaxMemindexEmitPadding 6 #define x86_memindex_emit(inst,r,basereg,disp,indexreg,shift) \ @@ -351,7 +356,7 @@ typedef union { x86_imm_emit8 ((inst), (disp)); \ } else { \ x86_address_byte ((inst), 2, (r), 4); \ - x86_address_byte ((inst), (shift), (indexreg), 5); \ + x86_address_byte ((inst), (shift), (indexreg), (basereg)); \ x86_imm_emit32 ((inst), (disp)); \ } \ } while (0) @@ -438,12 +443,23 @@ typedef union { } while ( in_nop ); \ } while (0) +#if defined(__native_client__) #define x86_patch(ins,target) \ do { \ unsigned char* inst = (ins); \ + guint8* new_target = nacl_modify_patch_target((target)); \ x86_skip_nops((inst)); \ - x86_do_patch((inst), (target)); \ + x86_do_patch((inst), new_target); \ } while (0) +#else /* __native_client__ */ +#define x86_patch(ins,target) \ + do { \ + unsigned char* inst = (ins); \ + guint8* new_target = (target); \ + x86_skip_nops((inst)); \ + x86_do_patch((inst), new_target); \ + } while (0) +#endif /* __native_client__ */ #else #define x86_patch(ins,target) do { x86_do_patch((ins), (target)); } while (0) @@ -472,6 +488,13 @@ typedef union { #define x86_movsl(inst) do { *(inst)++ =(unsigned char)0xa5; } while (0) #define x86_movsd(inst) x86_movsl((inst)) +#if defined(__default_codegen__) +#define x86_prefix(inst,p) \ + do { \ + *(inst)++ =(unsigned char) (p); \ + } while (0) +#elif defined(__native_client_codegen__) +#if defined(TARGET_X86) /* kNaClAlignment - 1 is the max value we can pass into x86_codegen_pre. */ /* This keeps us from having to call x86_codegen_pre with specific */ /* knowledge of the size of the instruction that follows it, and */ @@ -481,6 +504,18 @@ typedef union { x86_codegen_pre(&(inst), kNaClAlignment - 1); \ *(inst)++ =(unsigned char) (p); \ } while (0) +#elif defined(TARGET_AMD64) +/* We need to tag any prefixes so we can perform proper membase sandboxing */ +/* See: mini-amd64.c:amd64_nacl_membase_handler for verbose details */ +#define x86_prefix(inst,p) \ + do { \ + amd64_nacl_tag_legacy_prefix((inst)); \ + *(inst)++ =(unsigned char) (p); \ + } while (0) + +#endif /* TARGET_AMD64 */ + +#endif /* __native_client_codegen__ */ #define x86_rdtsc(inst) \ do { \ @@ -1041,7 +1076,7 @@ typedef union { x86_codegen_pre(&(inst), 7); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x88; break; \ - case 2: *(inst)++ = (unsigned char)0x66; /* fall through */ \ + case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */ \ case 4: *(inst)++ = (unsigned char)0x89; break; \ default: assert (0); \ } \ @@ -1053,7 +1088,7 @@ typedef union { x86_codegen_pre(&(inst), 3); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x88; break; \ - case 2: *(inst)++ = (unsigned char)0x66; /* fall through */ \ + case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */ \ case 4: *(inst)++ = (unsigned char)0x89; break; \ default: assert (0); \ } \ @@ -1065,7 +1100,7 @@ typedef union { x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x88; break; \ - case 2: *(inst)++ = (unsigned char)0x66; /* fall through */ \ + case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */ \ case 4: *(inst)++ = (unsigned char)0x89; break; \ default: assert (0); \ } \ @@ -1077,7 +1112,7 @@ typedef union { x86_codegen_pre(&(inst), 2 + kMaxMemindexEmitPadding); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x88; break; \ - case 2: *(inst)++ = (unsigned char)0x66; /* fall through */ \ + case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */ \ case 4: *(inst)++ = (unsigned char)0x89; break; \ default: assert (0); \ } \ @@ -1089,7 +1124,7 @@ typedef union { x86_codegen_pre(&(inst), 3); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x8a; break; \ - case 2: *(inst)++ = (unsigned char)0x66; /* fall through */ \ + case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */ \ case 4: *(inst)++ = (unsigned char)0x8b; break; \ default: assert (0); \ } \ @@ -1101,7 +1136,7 @@ typedef union { x86_codegen_pre(&(inst), 7); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x8a; break; \ - case 2: *(inst)++ = (unsigned char)0x66; /* fall through */ \ + case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */ \ case 4: *(inst)++ = (unsigned char)0x8b; break; \ default: assert (0); \ } \ @@ -1115,7 +1150,7 @@ typedef union { x86_codegen_pre(&(inst), kMovRegMembasePadding); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x8a; break; \ - case 2: *(inst)++ = (unsigned char)0x66; /* fall through */ \ + case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */ \ case 4: *(inst)++ = (unsigned char)0x8b; break; \ default: assert (0); \ } \ @@ -1127,7 +1162,7 @@ typedef union { x86_codegen_pre(&(inst), 2 + kMaxMemindexEmitPadding); \ switch ((size)) { \ case 1: *(inst)++ = (unsigned char)0x8a; break; \ - case 2: *(inst)++ = (unsigned char)0x66; /* fall through */ \ + case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */ \ case 4: *(inst)++ = (unsigned char)0x8b; break; \ default: assert (0); \ } \ @@ -1155,7 +1190,7 @@ typedef union { x86_imm_emit8 ((inst), (imm)); \ } else if ((size) == 2) { \ x86_codegen_pre(&(inst), 9); \ - *(inst)++ = (unsigned char)0x66; \ + x86_prefix((inst), X86_OPERAND_PREFIX); \ *(inst)++ = (unsigned char)0xc7; \ x86_mem_emit ((inst), 0, (mem)); \ x86_imm_emit16 ((inst), (imm)); \ @@ -1176,7 +1211,7 @@ typedef union { x86_imm_emit8 ((inst), (imm)); \ } else if ((size) == 2) { \ x86_codegen_pre(&(inst), 4 + kMaxMembaseEmitPadding); \ - *(inst)++ = (unsigned char)0x66; \ + x86_prefix((inst), X86_OPERAND_PREFIX); \ *(inst)++ = (unsigned char)0xc7; \ x86_membase_emit ((inst), 0, (basereg), (disp)); \ x86_imm_emit16 ((inst), (imm)); \ @@ -1197,7 +1232,7 @@ typedef union { x86_imm_emit8 ((inst), (imm)); \ } else if ((size) == 2) { \ x86_codegen_pre(&(inst), 4 + kMaxMemindexEmitPadding); \ - *(inst)++ = (unsigned char)0x66; \ + x86_prefix((inst), X86_OPERAND_PREFIX); \ *(inst)++ = (unsigned char)0xc7; \ x86_memindex_emit ((inst), 0, (basereg), (disp), (indexreg), (shift)); \ x86_imm_emit16 ((inst), (imm)); \ @@ -1681,6 +1716,7 @@ typedef union { x86_imm_emit8 ((inst), (imm)); \ } while (0) +#if defined(TARGET_X86) #define x86_jump32(inst,imm) \ do { \ x86_codegen_pre(&(inst), 5); \ @@ -1694,9 +1730,27 @@ typedef union { *(inst)++ = (unsigned char)0xeb; \ x86_imm_emit8 ((inst), (imm)); \ } while (0) +#elif defined(TARGET_AMD64) +/* These macros are used directly from mini-amd64.c and other */ +/* amd64 specific files, so they need to be instrumented directly. */ +#define x86_jump32(inst,imm) \ + do { \ + amd64_codegen_pre(inst); \ + *(inst)++ = (unsigned char)0xe9; \ + x86_imm_emit32 ((inst), (imm)); \ + amd64_codegen_post(inst); \ + } while (0) +#define x86_jump8(inst,imm) \ + do { \ + amd64_codegen_pre(inst); \ + *(inst)++ = (unsigned char)0xeb; \ + x86_imm_emit8 ((inst), (imm)); \ + amd64_codegen_post(inst); \ + } while (0) +#endif -#ifdef __native_client_codegen__ +#if defined( __native_client_codegen__ ) && defined( TARGET_X86 ) #define x86_jump_reg(inst,reg) do { \ x86_codegen_pre(&(inst), 5); \ *(inst)++ = (unsigned char)0x83; /* and */ \ @@ -1747,7 +1801,7 @@ typedef union { /* * target is a pointer in our buffer. */ -#define x86_jump_code(inst,target) \ +#define x86_jump_code_body(inst,target) \ do { \ int t; \ x86_codegen_pre(&(inst), 2); \ @@ -1761,6 +1815,31 @@ typedef union { } \ } while (0) +#if defined(__default_codegen__) +#define x86_jump_code(inst,target) \ + do { \ + x86_jump_code_body((inst),(target)); \ + } while (0) +#elif defined(__native_client_codegen__) && defined(TARGET_X86) +#define x86_jump_code(inst,target) \ + do { \ + guint8* jump_start = (inst); \ + x86_jump_code_body((inst),(target)); \ + x86_patch(jump_start, (target)); \ + } while (0) +#elif defined(__native_client_codegen__) && defined(TARGET_AMD64) +#define x86_jump_code(inst,target) \ + do { \ + /* jump_code_body is used twice because there are offsets */ \ + /* calculated based on the IP, which can change after the */ \ + /* call to amd64_codegen_post */ \ + amd64_codegen_pre(inst); \ + x86_jump_code_body((inst),(target)); \ + inst = amd64_codegen_post(inst); \ + x86_jump_code_body((inst),(target)); \ + } while (0) +#endif /* __native_client_codegen__ */ + #define x86_jump_disp(inst,disp) \ do { \ int t = (disp) - 2; \ @@ -1772,6 +1851,7 @@ typedef union { } \ } while (0) +#if defined(TARGET_X86) #define x86_branch8(inst,cond,imm,is_signed) \ do { \ x86_codegen_pre(&(inst), 2); \ @@ -1792,12 +1872,40 @@ typedef union { *(inst)++ = x86_cc_unsigned_map [(cond)] + 0x10; \ x86_imm_emit32 ((inst), (imm)); \ } while (0) +#elif defined(TARGET_AMD64) +/* These macros are used directly from mini-amd64.c and other */ +/* amd64 specific files, so they need to be instrumented directly. */ +#define x86_branch8(inst,cond,imm,is_signed) \ + do { \ + amd64_codegen_pre(inst); \ + if ((is_signed)) \ + *(inst)++ = x86_cc_signed_map [(cond)]; \ + else \ + *(inst)++ = x86_cc_unsigned_map [(cond)]; \ + x86_imm_emit8 ((inst), (imm)); \ + amd64_codegen_post(inst); \ + } while (0) +#define x86_branch32(inst,cond,imm,is_signed) \ + do { \ + amd64_codegen_pre(inst); \ + *(inst)++ = (unsigned char)0x0f; \ + if ((is_signed)) \ + *(inst)++ = x86_cc_signed_map [(cond)] + 0x10; \ + else \ + *(inst)++ = x86_cc_unsigned_map [(cond)] + 0x10; \ + x86_imm_emit32 ((inst), (imm)); \ + amd64_codegen_post(inst); \ + } while (0) +#endif +#if defined(TARGET_X86) #define x86_branch(inst,cond,target,is_signed) \ do { \ int offset; \ + guint8* branch_start; \ x86_codegen_pre(&(inst), 2); \ offset = (target) - (inst) - 2; \ + branch_start = (inst); \ if (x86_is_imm8 ((offset))) \ x86_branch8 ((inst), (cond), offset, (is_signed)); \ else { \ @@ -1805,7 +1913,42 @@ typedef union { offset = (target) - (inst) - 6; \ x86_branch32 ((inst), (cond), offset, (is_signed)); \ } \ + x86_patch(branch_start, (target)); \ } while (0) +#elif defined(TARGET_AMD64) +/* This macro is used directly from mini-amd64.c and other */ +/* amd64 specific files, so it needs to be instrumented directly. */ + +#define x86_branch_body(inst,cond,target,is_signed) \ + do { \ + int offset = (target) - (inst) - 2; \ + if (x86_is_imm8 ((offset))) \ + x86_branch8 ((inst), (cond), offset, (is_signed)); \ + else { \ + offset = (target) - (inst) - 6; \ + x86_branch32 ((inst), (cond), offset, (is_signed)); \ + } \ + } while (0) + +#if defined(__default_codegen__) +#define x86_branch(inst,cond,target,is_signed) \ + do { \ + x86_branch_body((inst),(cond),(target),(is_signed)); \ + } while (0) +#elif defined(__native_client_codegen__) +#define x86_branch(inst,cond,target,is_signed) \ + do { \ + /* branch_body is used twice because there are offsets */ \ + /* calculated based on the IP, which can change after */ \ + /* the call to amd64_codegen_post */ \ + amd64_codegen_pre(inst); \ + x86_branch_body((inst),(cond),(target),(is_signed)); \ + inst = amd64_codegen_post(inst); \ + x86_branch_body((inst),(cond),(target),(is_signed)); \ + } while (0) +#endif /* __native_client_codegen__ */ + +#endif /* TARGET_AMD64 */ #define x86_branch_disp(inst,cond,disp,is_signed) \ do { \ @@ -1865,10 +2008,10 @@ typedef union { x86_call_sequence_post((inst)); \ } while (0) -#ifdef __native_client_codegen__ + +#if defined( __native_client_codegen__ ) && defined( TARGET_X86 ) #define x86_call_reg_internal(inst,reg) \ do { \ - x86_codegen_pre(&(inst), 5); \ *(inst)++ = (unsigned char)0x83; /* and */ \ x86_reg_emit ((inst), 4, (reg)); /* reg */ \ *(inst)++ = (unsigned char)nacl_align_byte; \ @@ -1914,20 +2057,23 @@ typedef union { #endif /* __native_client_codegen__ */ -#ifdef __native_client_codegen__ +#if defined( __native_client_codegen__ ) && defined( TARGET_X86 ) #define x86_call_code(inst,target) \ do { \ int _x86_offset; \ + guint8* call_start; \ guint8* _aligned_start; \ - x86_call_sequence_pre_val ((inst)); \ + x86_call_sequence_pre((inst)); \ _x86_offset = (unsigned char*)(target) - (inst); \ _x86_offset -= 5; \ x86_call_imm_body ((inst), _x86_offset); \ - _aligned_start = x86_call_sequence_post_val ((inst)); \ + _aligned_start = x86_call_sequence_post((inst)); \ + call_start = _aligned_start; \ _x86_offset = (unsigned char*)(target) - (_aligned_start); \ _x86_offset -= 5; \ x86_call_imm_body ((_aligned_start), _x86_offset); \ + x86_patch(call_start, (target)); \ } while (0) #define SIZE_OF_RET 6 @@ -2062,9 +2208,9 @@ typedef union { #ifdef __native_client_codegen__ -#define kNaClLengthOfCallReg 5 -#define kNaClLengthOfCallImm 5 -#define kNaClLengthOfCallMembase (kNaClLengthOfCallReg + 6) +#define kx86NaClLengthOfCallReg 5 +#define kx86NaClLengthOfCallImm 5 +#define kx86NaClLengthOfCallMembase (kx86NaClLengthOfCallReg + 6) #endif /* __native_client_codegen__ */ diff --git a/mono/io-layer/atomic.h b/mono/io-layer/atomic.h index 258aa185129..e45cfcf99b0 100644 --- a/mono/io-layer/atomic.h +++ b/mono/io-layer/atomic.h @@ -92,7 +92,7 @@ static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest gpointer old; __asm__ __volatile__ ("lock; " -#ifdef __x86_64__ +#if defined(__x86_64__) && !defined(__native_client__) "cmpxchgq" #else "cmpxchgl" @@ -154,7 +154,7 @@ static inline gpointer InterlockedExchangePointer(volatile gpointer *val, gpointer ret; __asm__ __volatile__ ("1:; lock; " -#ifdef __x86_64__ +#if defined(__x86_64__) && !defined(__native_client__) "cmpxchgq" #else "cmpxchgl" diff --git a/mono/io-layer/posix.c b/mono/io-layer/posix.c index 732529039ce..a7781bdd7bd 100644 --- a/mono/io-layer/posix.c +++ b/mono/io-layer/posix.c @@ -60,7 +60,8 @@ gpointer _wapi_stdhandle_create (int fd, const gchar *name) g_message("%s: creating standard handle type %s, fd %d", __func__, name, fd); #endif - + +#if !defined(__native_client__) /* Check if fd is valid */ do { flags=fcntl(fd, F_GETFL); @@ -78,11 +79,18 @@ gpointer _wapi_stdhandle_create (int fd, const gchar *name) SetLastError (_wapi_get_win32_file_error (errno)); return(INVALID_HANDLE_VALUE); } + file_handle.fileaccess=convert_from_flags(flags); +#else + /* + * fcntl will return -1 in nacl, as there is no real file system API. + * Yet, standard streams are available. + */ + file_handle.fileaccess = (fd == STDIN_FILENO) ? GENERIC_READ : GENERIC_WRITE; +#endif file_handle.filename = g_strdup(name); /* some default security attributes might be needed */ file_handle.security_attributes=0; - file_handle.fileaccess=convert_from_flags(flags); /* Apparently input handles can't be written to. (I don't * know if output or error handles can't be read from.) diff --git a/mono/io-layer/sockets.c b/mono/io-layer/sockets.c index e9eaf19f47a..c068f06281d 100644 --- a/mono/io-layer/sockets.c +++ b/mono/io-layer/sockets.c @@ -7,9 +7,10 @@ * (C) 2002 Ximian, Inc. */ +#include + #ifndef DISABLE_SOCKETS -#include #include #include #include diff --git a/mono/metadata/assembly.c b/mono/metadata/assembly.c index d9b25997ba6..482762d0e1a 100644 --- a/mono/metadata/assembly.c +++ b/mono/metadata/assembly.c @@ -197,13 +197,23 @@ mono_public_tokens_are_equal (const unsigned char *pubt1, const unsigned char *p return memcmp (pubt1, pubt2, 16) == 0; } +/* Native Client can't get this info from an environment variable so */ +/* it's passed in to the runtime, or set manually by embedding code. */ +#ifdef __native_client__ +char* nacl_mono_path = NULL; +#endif + static void check_path_env (void) { const char *path; char **splitted, **dest; +#ifdef __native_client__ + path = nacl_mono_path; +#else path = g_getenv ("MONO_PATH"); +#endif if (!path) return; diff --git a/mono/metadata/boehm-gc.c b/mono/metadata/boehm-gc.c index fa49e6a2aa9..050cb328d60 100644 --- a/mono/metadata/boehm-gc.c +++ b/mono/metadata/boehm-gc.c @@ -105,6 +105,8 @@ mono_gc_base_init (void) GC_stackbottom = (char*)ss.ss_sp; } +#elif defined(__native_client__) + /* Do nothing, GC_stackbottom is set correctly in libgc */ #else { int dummy; diff --git a/mono/metadata/domain-internals.h b/mono/metadata/domain-internals.h index bca936805be..815f605c445 100644 --- a/mono/metadata/domain-internals.h +++ b/mono/metadata/domain-internals.h @@ -421,6 +421,12 @@ mono_domain_code_reserve_align (MonoDomain *domain, int size, int alignment) MON void mono_domain_code_commit (MonoDomain *domain, void *data, int size, int newsize) MONO_INTERNAL; +void * +nacl_domain_get_code_dest (MonoDomain *domain, void *data) MONO_INTERNAL; + +void +nacl_domain_code_validate (MonoDomain *domain, guint8 **buf_base, int buf_size, guint8 **code_end) MONO_INTERNAL; + void mono_domain_code_foreach (MonoDomain *domain, MonoCodeManagerFunc func, void *user_data) MONO_INTERNAL; diff --git a/mono/metadata/domain.c b/mono/metadata/domain.c index 07fe67e659e..9c08882435a 100644 --- a/mono/metadata/domain.c +++ b/mono/metadata/domain.c @@ -2196,6 +2196,58 @@ mono_domain_code_commit (MonoDomain *domain, void *data, int size, int newsize) mono_domain_unlock (domain); } +#if defined(__native_client_codegen__) && defined(__native_client__) +/* + * Given the temporary buffer (allocated by mono_domain_code_reserve) into which + * we are generating code, return a pointer to the destination in the dynamic + * code segment into which the code will be copied when mono_domain_code_commit + * is called. + * LOCKING: Acquires the domain lock. + */ +void * +nacl_domain_get_code_dest (MonoDomain *domain, void *data) +{ + void *dest; + mono_domain_lock (domain); + dest = nacl_code_manager_get_code_dest (domain->code_mp, data); + mono_domain_unlock (domain); + return dest; +} + +/* + * Convenience function which calls mono_domain_code_commit to validate and copy + * the code. The caller sets *buf_base and *buf_size to the start and size of + * the buffer (allocated by mono_domain_code_reserve), and *code_end to the byte + * after the last instruction byte. On return, *buf_base will point to the start + * of the copied in the code segment, and *code_end will point after the end of + * the copied code. + */ +void +nacl_domain_code_validate (MonoDomain *domain, guint8 **buf_base, int buf_size, guint8 **code_end) +{ + guint8 *tmp = nacl_domain_get_code_dest (domain, *buf_base); + mono_domain_code_commit (domain, *buf_base, buf_size, *code_end - *buf_base); + *code_end = tmp + (*code_end - *buf_base); + *buf_base = tmp; +} + +#else + +/* no-op versions of Native Client functions */ + +void * +nacl_domain_get_code_dest (MonoDomain *domain, void *data) +{ + return data; +} + +void +nacl_domain_code_validate (MonoDomain *domain, guint8 **buf_base, int buf_size, guint8 **code_end) +{ +} + +#endif + /* * mono_domain_code_foreach: * Iterate over the code thunks of the code manager of @domain. diff --git a/mono/metadata/object.c b/mono/metadata/object.c index f65b690c4e7..565285f5b14 100644 --- a/mono/metadata/object.c +++ b/mono/metadata/object.c @@ -1709,8 +1709,12 @@ mono_method_add_generic_virtual_invocation (MonoDomain *domain, MonoVTable *vtab g_ptr_array_free (sorted, TRUE); } +#ifndef __native_client__ + /* We don't re-use any thunks as there is a lot of overhead */ + /* to deleting and re-using code in Native Client. */ if (old_thunk != vtable_trampoline && old_thunk != imt_trampoline) invalidate_generic_virtual_thunk (domain, old_thunk); +#endif } mono_domain_unlock (domain); diff --git a/mono/mini/Makefile.am b/mono/mini/Makefile.am index c44c88397e4..a1ee64f563e 100644 --- a/mono/mini/Makefile.am +++ b/mono/mini/Makefile.am @@ -385,7 +385,6 @@ test_sources = \ basic-simd.cs regtests=basic.exe basic-float.exe basic-long.exe basic-calls.exe objects.exe arrays.exe basic-math.exe exceptions.exe iltests.exe devirtualization.exe generics.exe basic-simd.exe -fsatests=basic.exe basic-float.exe basic-long.exe basic-calls.exe objects.exe arrays.exe basic-math.exe exceptions.exe devirtualization.exe basic-simd.exe if X86 if MONO_DEBUGGER_SUPPORTED @@ -538,6 +537,9 @@ libmonoinclude_HEADERS = jit.h basic-simd.exe: basic-simd.cs $(MCS) -out:$@ $< -r:TestDriver.dll -r:Mono.Simd.dll +nacl.exe: nacl.cs + $(MCS) -out:$@ $< -r:TestDriver.dll -r:Mono.Simd.dll + generics.exe: generics.cs TestDriver.dll generics-variant-types.dll $(MCS) -out:$@ $< -r:TestDriver.dll -r:generics-variant-types.dll @@ -642,20 +644,6 @@ fullaotcheck: mono $(regtests) llvmfullaotcheck: $(MAKE) fullaotcheck LLVM=1 -fsacheck: mono $(fsatests) fsacheck.c generics.exe - rm -rf fsa-tmp - mkdir fsa-tmp - cp $(CLASS)/mscorlib.dll $(CLASS)/System.Core.dll $(CLASS)/System.dll $(CLASS)/Mono.Posix.dll $(CLASS)/System.Configuration.dll $(CLASS)/System.Security.dll $(CLASS)/System.Xml.dll $(CLASS)/Mono.Security.dll $(CLASS)/Mono.Simd.dll \ - $(fsatests) generics-variant-types.dll TestDriver.dll fsa-tmp/ - cp $(fsatests) fsa-tmp/ - MONO_PATH=fsa-tmp $(top_builddir)/runtime/mono-wrapper --aot=full,static fsa-tmp/*.dll || exit 1 - MONO_PATH=fsa-tmp $(top_builddir)/runtime/mono-wrapper --aot=full,static fsa-tmp/*.exe || exit 1 - $(CC) -o $@.out -g -static $(VPATH)/fsacheck.c fsa-tmp/*.o \ - -lmono-2.0 -lpthread -lm -ldl -lrt \ - -DTARGET_X86 -L.libs -I${prefix}/include/mono-2.0 \ - -I${prefix} -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include - for i in $(fsatests); do echo $$i; MONO_PATH=fsa-tmp ./$@.out $$i || exit 1; done - bench: mono test.exe time env $(RUNTIME) --ncompile $(count) --compile Test:$(mtest) test.exe diff --git a/mono/mini/aot-compiler.c b/mono/mini/aot-compiler.c index 35d9c911840..6f13e974c20 100644 --- a/mono/mini/aot-compiler.c +++ b/mono/mini/aot-compiler.c @@ -484,7 +484,7 @@ encode_sleb128 (gint32 value, guint8 *buf, guint8 **endbuf) #else #define AOT_FUNC_ALIGNMENT 16 #endif -#if defined(TARGET_X86) && defined(__native_client_codegen__) +#if (defined(TARGET_X86) || defined(TARGET_AMD64)) && defined(__native_client_codegen__) #undef AOT_FUNC_ALIGNMENT #define AOT_FUNC_ALIGNMENT 32 #endif @@ -698,8 +698,14 @@ arch_emit_plt_entry (MonoAotCompile *acfg, int index) { #if defined(TARGET_X86) guint32 offset = (acfg->plt_got_offset_base + index) * sizeof (gpointer); - -#ifdef __native_client_codegen__ +#if defined(__default_codegen__) + /* jmp *(%ebx) */ + emit_byte (acfg, 0xff); + emit_byte (acfg, 0xa3); + emit_int32 (acfg, offset); + /* Used by mono_aot_get_plt_info_offset */ + emit_int32 (acfg, acfg->plt_got_info_offsets [index]); +#elif defined(__native_client_codegen__) const guint8 kSizeOfNaClJmp = 11; guint8 bytes[kSizeOfNaClJmp]; guint8 *pbytes = &bytes[0]; @@ -711,15 +717,9 @@ arch_emit_plt_entry (MonoAotCompile *acfg, int index) emit_byte (acfg, 0x68); /* hide data in a push */ emit_int32 (acfg, acfg->plt_got_info_offsets [index]); emit_alignment (acfg, AOT_FUNC_ALIGNMENT); -#else - /* jmp *(%ebx) */ - emit_byte (acfg, 0xff); - emit_byte (acfg, 0xa3); - emit_int32 (acfg, offset); - /* Used by mono_aot_get_plt_info_offset */ - emit_int32 (acfg, acfg->plt_got_info_offsets [index]); -#endif /* __native_client_codegen__ */ +#endif /*__native_client_codegen__*/ #elif defined(TARGET_AMD64) +#if defined(__default_codegen__) /* * We can't emit jumps because they are 32 bits only so they can't be patched. * So we make indirect calls through GOT entries which are patched by the AOT @@ -731,6 +731,27 @@ arch_emit_plt_entry (MonoAotCompile *acfg, int index) emit_symbol_diff (acfg, acfg->got_symbol, ".", ((acfg->plt_got_offset_base + index) * sizeof (gpointer)) -4); /* Used by mono_aot_get_plt_info_offset */ emit_int32 (acfg, acfg->plt_got_info_offsets [index]); +#elif defined(__native_client_codegen__) + guint8 buf [256]; + guint8 *buf_aligned = ALIGN_TO(buf, kNaClAlignment); + guint8 *code = buf_aligned; + + /* mov (%rip), %r11d */ + emit_byte (acfg, '\x45'); + emit_byte (acfg, '\x8b'); + emit_byte (acfg, '\x1d'); + emit_symbol_diff (acfg, acfg->got_symbol, ".", ((acfg->plt_got_offset_base + index) * sizeof (gpointer)) -4); + + amd64_jump_reg (code, AMD64_R11); + /* This should be constant for the plt patch */ + g_assert ((size_t)(code-buf_aligned) == 10); + emit_bytes (acfg, buf_aligned, code - buf_aligned); + + /* Hide data in a push imm32 so it passes validation */ + emit_byte (acfg, 0x68); /* push */ + emit_int32 (acfg, acfg->plt_got_info_offsets [index]); + emit_alignment (acfg, AOT_FUNC_ALIGNMENT); +#endif /*__native_client_codegen__*/ #elif defined(TARGET_ARM) guint8 buf [256]; guint8 *code; @@ -814,6 +835,7 @@ arch_emit_specific_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size * - all the trampolines should be of the same length. */ #if defined(TARGET_AMD64) +#if defined(__default_codegen__) /* This should be exactly 16 bytes long */ *tramp_size = 16; /* call *(%rip) */ @@ -822,8 +844,61 @@ arch_emit_specific_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size emit_byte (acfg, '\x15'); emit_symbol_diff (acfg, acfg->got_symbol, ".", (offset * sizeof (gpointer)) - 4); /* This should be relative to the start of the trampoline */ - emit_symbol_diff (acfg, acfg->got_symbol, ".", (offset * sizeof (gpointer)) - 4 + 19); + emit_symbol_diff (acfg, acfg->got_symbol, ".", ((offset+1) * sizeof (gpointer)) + 7); emit_zero_bytes (acfg, 5); +#elif defined(__native_client_codegen__) + guint8 buf [256]; + guint8 *buf_aligned = ALIGN_TO(buf, kNaClAlignment); + guint8 *code = buf_aligned; + guint8 *call_start; + size_t call_len; + int got_offset; + + /* Emit this call in 'code' so we can find out how long it is. */ + amd64_call_reg (code, AMD64_R11); + call_start = mono_arch_nacl_skip_nops (buf_aligned); + call_len = code - call_start; + + /* The tramp_size is twice the NaCl alignment because it starts with */ + /* a call which needs to be aligned to the end of the boundary. */ + *tramp_size = kNaClAlignment*2; + { + /* Emit nops to align call site below which is 7 bytes plus */ + /* the length of the call sequence emitted above. */ + /* Note: this requires the specific trampoline starts on a */ + /* kNaclAlignedment aligned address, which it does because */ + /* it's its own function that is aligned. */ + guint8 nop_buf[256]; + guint8 *nopbuf_aligned = ALIGN_TO (nop_buf, kNaClAlignment); + guint8 *nopbuf_end = mono_arch_nacl_pad (nopbuf_aligned, kNaClAlignment - 7 - (call_len)); + emit_bytes (acfg, nopbuf_aligned, nopbuf_end - nopbuf_aligned); + } + /* The trampoline is stored at the offset'th pointer, the -4 is */ + /* present because RIP relative addressing starts at the end of */ + /* the current instruction, while the label "." is relative to */ + /* the beginning of the current asm location, which in this case */ + /* is not the mov instruction, but the offset itself, due to the */ + /* way the bytes and ints are emitted here. */ + got_offset = (offset * sizeof(gpointer)) - 4; + + /* mov (%rip), %r11d */ + emit_byte (acfg, '\x45'); + emit_byte (acfg, '\x8b'); + emit_byte (acfg, '\x1d'); + emit_symbol_diff (acfg, acfg->got_symbol, ".", got_offset); + + /* naclcall %r11 */ + emit_bytes (acfg, call_start, call_len); + + /* The arg is stored at the offset+1 pointer, relative to beginning */ + /* of trampoline: 7 for mov, plus the call length, and 1 for push. */ + got_offset = ((offset + 1) * sizeof(gpointer)) + 7 + call_len + 1; + + /* We can't emit this data directly, hide in a "push imm32" */ + emit_byte (acfg, '\x68'); /* push */ + emit_symbol_diff (acfg, acfg->got_symbol, ".", got_offset); + emit_alignment (acfg, kNaClAlignment); +#endif /*__native_client_codegen__*/ #elif defined(TARGET_ARM) guint8 buf [128]; guint8 *code; @@ -1010,6 +1085,7 @@ static void arch_emit_static_rgctx_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size) { #if defined(TARGET_AMD64) +#if defined(__default_codegen__) /* This should be exactly 13 bytes long */ *tramp_size = 13; @@ -1023,6 +1099,31 @@ arch_emit_static_rgctx_trampoline (MonoAotCompile *acfg, int offset, int *tramp_ emit_byte (acfg, '\xff'); emit_byte (acfg, '\x25'); emit_symbol_diff (acfg, acfg->got_symbol, ".", ((offset + 1) * sizeof (gpointer)) - 4); +#elif defined(__native_client_codegen__) + guint8 buf [128]; + guint8 *buf_aligned = ALIGN_TO(buf, kNaClAlignment); + guint8 *code = buf_aligned; + + /* mov (%rip), %r10d */ + emit_byte (acfg, '\x45'); + emit_byte (acfg, '\x8b'); + emit_byte (acfg, '\x15'); + emit_symbol_diff (acfg, acfg->got_symbol, ".", (offset * sizeof (gpointer)) - 4); + + /* mov (%rip), %r11d */ + emit_byte (acfg, '\x45'); + emit_byte (acfg, '\x8b'); + emit_byte (acfg, '\x1d'); + emit_symbol_diff (acfg, acfg->got_symbol, ".", ((offset + 1) * sizeof (gpointer)) - 4); + + /* nacljmp *%r11 */ + amd64_jump_reg (code, AMD64_R11); + emit_bytes (acfg, buf_aligned, code - buf_aligned); + + emit_alignment (acfg, kNaClAlignment); + *tramp_size = kNaClAlignment; +#endif /*__native_client_codegen__*/ + #elif defined(TARGET_ARM) guint8 buf [128]; guint8 *code; @@ -1132,50 +1233,74 @@ arch_emit_imt_thunk (MonoAotCompile *acfg, int offset, int *tramp_size) { #if defined(TARGET_AMD64) guint8 *buf, *code; +#if defined(__native_client_codegen__) + guint8 *buf_alloc; +#endif guint8 *labels [3]; + guint8 mov_buf[3]; + guint8 *mov_buf_ptr = mov_buf; + const int kSizeOfMove = 7; +#if defined(__default_codegen__) code = buf = g_malloc (256); +#elif defined(__native_client_codegen__) + buf_alloc = g_malloc (256 + kNaClAlignment + kSizeOfMove); + buf = ((guint)buf_alloc + kNaClAlignment) & ~kNaClAlignmentMask; + /* The RIP relative move below is emitted first */ + buf += kSizeOfMove; + code = buf; +#endif /* FIXME: Optimize this, i.e. use binary search etc. */ /* Maybe move the body into a separate function (slower, but much smaller) */ - /* R11 is a free register */ + /* MONO_ARCH_IMT_SCRATCH_REG is a free register */ labels [0] = code; - amd64_alu_membase_imm (code, X86_CMP, AMD64_R11, 0, 0); + amd64_alu_membase_imm (code, X86_CMP, MONO_ARCH_IMT_SCRATCH_REG, 0, 0); labels [1] = code; - amd64_branch8 (code, X86_CC_Z, FALSE, 0); + amd64_branch8 (code, X86_CC_Z, 0, FALSE); /* Check key */ - amd64_alu_membase_reg (code, X86_CMP, AMD64_R11, 0, MONO_ARCH_IMT_REG); + amd64_alu_membase_reg_size (code, X86_CMP, MONO_ARCH_IMT_SCRATCH_REG, 0, MONO_ARCH_IMT_REG, sizeof (gpointer)); labels [2] = code; - amd64_branch8 (code, X86_CC_Z, FALSE, 0); + amd64_branch8 (code, X86_CC_Z, 0, FALSE); /* Loop footer */ - amd64_alu_reg_imm (code, X86_ADD, AMD64_R11, 2 * sizeof (gpointer)); + amd64_alu_reg_imm (code, X86_ADD, MONO_ARCH_IMT_SCRATCH_REG, 2 * sizeof (gpointer)); amd64_jump_code (code, labels [0]); /* Match */ mono_amd64_patch (labels [2], code); - amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, sizeof (gpointer), 8); - amd64_jump_membase (code, AMD64_R11, 0); + amd64_mov_reg_membase (code, MONO_ARCH_IMT_SCRATCH_REG, MONO_ARCH_IMT_SCRATCH_REG, sizeof (gpointer), sizeof (gpointer)); + amd64_jump_membase (code, MONO_ARCH_IMT_SCRATCH_REG, 0); /* No match */ /* FIXME: */ mono_amd64_patch (labels [1], code); x86_breakpoint (code); - amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 12345678, 8); - - /* mov (%rip), %r11 */ - emit_byte (acfg, '\x4d'); - emit_byte (acfg, '\x8b'); - emit_byte (acfg, '\x1d'); + /* mov (%rip), MONO_ARCH_IMT_SCRATCH_REG */ + amd64_emit_rex (mov_buf_ptr, sizeof(gpointer), MONO_ARCH_IMT_SCRATCH_REG, 0, AMD64_RIP); + *(mov_buf_ptr)++ = (unsigned char)0x8b; /* mov opcode */ + x86_address_byte (mov_buf_ptr, 0, MONO_ARCH_IMT_SCRATCH_REG & 0x7, 5); + emit_bytes (acfg, mov_buf, mov_buf_ptr - mov_buf); emit_symbol_diff (acfg, acfg->got_symbol, ".", (offset * sizeof (gpointer)) - 4); emit_bytes (acfg, buf, code - buf); - *tramp_size = code - buf + 7; + *tramp_size = code - buf + kSizeOfMove; +#if defined(__native_client_codegen__) + /* The tramp will be padded to the next kNaClAlignment bundle. */ + *tramp_size = ALIGN_TO ((*tramp_size), kNaClAlignment); +#endif + +#if defined(__default_codegen__) + g_free (buf); +#elif defined(__native_client_codegen__) + g_free (buf_alloc); +#endif + #elif defined(TARGET_X86) guint8 *buf, *code; #ifdef __native_client_codegen__ @@ -1183,11 +1308,11 @@ arch_emit_imt_thunk (MonoAotCompile *acfg, int offset, int *tramp_size) #endif guint8 *labels [3]; -#ifdef __native_client_codegen__ +#if defined(__default_codegen__) + code = buf = g_malloc (256); +#elif defined(__native_client_codegen__) buf_alloc = g_malloc (256 + kNaClAlignment); code = buf = ((guint)buf_alloc + kNaClAlignment) & ~kNaClAlignmentMask; -#else - code = buf = g_malloc (256); #endif /* Allocate a temporary stack slot */ @@ -1240,6 +1365,13 @@ arch_emit_imt_thunk (MonoAotCompile *acfg, int offset, int *tramp_size) emit_bytes (acfg, buf, code - buf); *tramp_size = code - buf; + +#if defined(__default_codegen__) + g_free (buf); +#elif defined(__native_client_codegen__) + g_free (buf_alloc); +#endif + #elif defined(TARGET_ARM) guint8 buf [128]; guint8 *code, *code2, *labels [16]; @@ -3916,7 +4048,7 @@ emit_plt (MonoAotCompile *acfg) sprintf (symbol, "plt"); emit_section_change (acfg, ".text", 0); - emit_alignment (acfg, 16); + emit_alignment (acfg, NACL_SIZE(16, kNaClAlignment)); emit_label (acfg, symbol); emit_label (acfg, acfg->plt_symbol); @@ -5038,7 +5170,17 @@ emit_code (MonoAotCompile *acfg) * Emit some padding so the local symbol for the first method doesn't have the * same address as 'methods'. */ +#if defined(__default_codegen__) emit_zero_bytes (acfg, 16); +#elif defined(__native_client_codegen__) + { + const int kPaddingSize = 16; + guint8 pad_buffer[kPaddingSize]; + mono_arch_nacl_pad (pad_buffer, kPaddingSize); + emit_bytes (acfg, pad_buffer, kPaddingSize); + } +#endif + for (l = acfg->method_order; l != NULL; l = l->next) { MonoCompile *cfg; @@ -6262,7 +6404,11 @@ compile_asm (MonoAotCompile *acfg) #endif #ifdef __native_client_codegen__ +#if defined(TARGET_AMD64) +#define AS_NAME "nacl64-as" +#else #define AS_NAME "nacl-as" +#endif #else #define AS_NAME "as" #endif diff --git a/mono/mini/aot-runtime.c b/mono/mini/aot-runtime.c index 9610c7859b2..115384028f0 100644 --- a/mono/mini/aot-runtime.c +++ b/mono/mini/aot-runtime.c @@ -1036,6 +1036,7 @@ load_aot_module (MonoAssembly *assembly, gpointer user_data) MonoAotFileInfo *info = NULL; int i, version; guint8 *blob; + gboolean do_load_image = TRUE; if (mono_compile_aot) return; @@ -1262,8 +1263,20 @@ load_aot_module (MonoAssembly *assembly, gpointer user_data) * non-lazily, since we can't handle out-of-date errors later. * The cached class info also depends on the exact assemblies. */ - for (i = 0; i < amodule->image_table_len; ++i) - load_image (amodule, i, FALSE); +#if defined(__native_client__) + /* TODO: Don't 'load_image' on mscorlib due to a */ + /* recursive loading problem. This should be */ + /* removed if mscorlib is loaded from disk. */ + if (strncmp(assembly->aname.name, "mscorlib", 8)) { + do_load_image = TRUE; + } else { + do_load_image = FALSE; + } +#endif + if (do_load_image) { + for (i = 0; i < amodule->image_table_len; ++i) + load_image (amodule, i, FALSE); + } if (amodule->out_of_date) { mono_trace (G_LOG_LEVEL_INFO, MONO_TRACE_AOT, "AOT Module %s is unusable because a dependency is out-of-date.\n", assembly->image->name); diff --git a/mono/mini/branch-opts.c b/mono/mini/branch-opts.c index 13a399830a5..141b90a2be2 100644 --- a/mono/mini/branch-opts.c +++ b/mono/mini/branch-opts.c @@ -815,6 +815,15 @@ replace_in_block (MonoBasicBlock *bb, MonoBasicBlock *orig, MonoBasicBlock *repl static void replace_out_block_in_code (MonoBasicBlock *bb, MonoBasicBlock *orig, MonoBasicBlock *repl) { MonoInst *ins; + +#if defined(__native_client_codegen__) + /* Need to maintain this flag for the new block because */ + /* we can't jump indirectly to a non-aligned block. */ + if (orig->flags & BB_INDIRECT_JUMP_TARGET) + { + repl->flags |= BB_INDIRECT_JUMP_TARGET; + } +#endif for (ins = bb->code; ins != NULL; ins = ins->next) { switch (ins->opcode) { diff --git a/mono/mini/cpu-amd64.md b/mono/mini/cpu-amd64.md index 31158c42df6..b095aa94280 100644 --- a/mono/mini/cpu-amd64.md +++ b/mono/mini/cpu-amd64.md @@ -53,6 +53,13 @@ # # See the code in mini-x86.c for more details on how the specifiers are used. # +# +# Native Client Note: NaCl call sequences do not really reach > 32 bytes but +# the maximum length can be high, so if we get unlucky and wind up trying to +# emit a call sequence such that we are one or two bytes too long, we need to +# pad out almost an entire 32 bytes. +# + break: len:2 jmp: len:120 tailcall: len:120 clob:c @@ -60,8 +67,8 @@ br: len:6 label: len:0 seq_point: len:25 -long_add: dest:i src1:i src2:i len:3 clob:1 -long_sub: dest:i src1:i src2:i len:3 clob:1 +long_add: dest:i src1:i src2:i len:3 clob:1 nacl:6 +long_sub: dest:i src1:i src2:i len:3 clob:1 nacl:6 long_mul: dest:i src1:i src2:i len:4 clob:1 long_div: dest:a src1:a src2:i len:16 clob:d long_div_un: dest:a src1:a src2:i len:16 clob:d @@ -96,11 +103,11 @@ long_min_un: dest:i src1:i src2:i len:16 clob:1 long_max: dest:i src1:i src2:i len:16 clob:1 long_max_un: dest:i src1:i src2:i len:16 clob:1 -throw: src1:i len:18 -rethrow: src1:i len:18 +throw: src1:i len:18 nacl:50 +rethrow: src1:i len:18 nacl:50 start_handler: len:16 -endfinally: len:9 -endfilter: src1:a len:9 +endfinally: len:9 nacl:22 +endfilter: src1:a len:9 nacl:19 ckfinite: dest:f src1:f len:43 ceq: dest:c len:8 cgt: dest:c len:8 @@ -115,11 +122,11 @@ compare_imm: src1:i len:13 icompare_imm: src1:i len:8 fcompare: src1:f src2:f clob:a len:13 oparglist: src1:b len:11 -checkthis: src1:b len:5 -call: dest:a clob:c len:32 -voidcall: clob:c len:32 -voidcall_reg: src1:i clob:c len:32 -voidcall_membase: src1:b clob:c len:32 +checkthis: src1:b len:5 nacl:8 +call: dest:a clob:c len:32 nacl:64 +voidcall: clob:c len:32 nacl:64 +voidcall_reg: src1:i clob:c len:32 nacl:64 +voidcall_membase: src1:b clob:c len:32 nacl:64 fcall: dest:f len:64 clob:c fcall_reg: dest:f src1:i len:64 clob:c fcall_membase: dest:f src1:b len:64 clob:c @@ -129,39 +136,39 @@ lcall_membase: dest:a src1:b len:64 clob:c vcall: len:64 clob:c vcall_reg: src1:i len:64 clob:c vcall_membase: src1:b len:64 clob:c -call_reg: dest:a src1:i len:32 clob:c -call_membase: dest:a src1:b len:32 clob:c +call_reg: dest:a src1:i len:32 clob:c nacl:64 +call_membase: dest:a src1:b len:32 clob:c nacl:64 iconst: dest:i len:10 i8const: dest:i len:10 r4const: dest:f len:14 r8const: dest:f len:9 store_membase_imm: dest:b len:15 -store_membase_reg: dest:b src1:i len:9 -storei8_membase_reg: dest:b src1:i len:9 -storei1_membase_imm: dest:b len:11 -storei1_membase_reg: dest:b src1:c len:9 -storei2_membase_imm: dest:b len:13 -storei2_membase_reg: dest:b src1:i len:9 -storei4_membase_imm: dest:b len:13 -storei4_membase_reg: dest:b src1:i len:9 +store_membase_reg: dest:b src1:i len:9 nacl:11 +storei8_membase_reg: dest:b src1:i len:9 nacl:11 +storei1_membase_imm: dest:b len:11 nacl:15 +storei1_membase_reg: dest:b src1:c len:9 nacl:11 +storei2_membase_imm: dest:b len:13 nacl:15 +storei2_membase_reg: dest:b src1:i len:9 nacl:11 +storei4_membase_imm: dest:b len:13 nacl:15 +storei4_membase_reg: dest:b src1:i len:9 nacl:11 storei8_membase_imm: dest:b len:18 storer4_membase_reg: dest:b src1:f len:15 storer8_membase_reg: dest:b src1:f len:10 -load_membase: dest:i src1:b len:8 -loadi1_membase: dest:c src1:b len:9 -loadu1_membase: dest:c src1:b len:9 -loadi2_membase: dest:i src1:b len:9 -loadu2_membase: dest:i src1:b len:9 -loadi4_membase: dest:i src1:b len:9 -loadu4_membase: dest:i src1:b len:9 -loadi8_membase: dest:i src1:b len:18 +load_membase: dest:i src1:b len:8 nacl:12 +loadi1_membase: dest:c src1:b len:9 nacl:12 +loadu1_membase: dest:c src1:b len:9 nacl:12 +loadi2_membase: dest:i src1:b len:9 nacl:12 +loadu2_membase: dest:i src1:b len:9 nacl:12 +loadi4_membase: dest:i src1:b len:9 nacl:12 +loadu4_membase: dest:i src1:b len:9 nacl:12 +loadi8_membase: dest:i src1:b len:18 nacl:14 loadr4_membase: dest:f src1:b len:16 loadr8_membase: dest:f src1:b len:16 loadu4_mem: dest:i len:10 amd64_loadi8_memindex: dest:i src1:i src2:i len:10 move: dest:i src1:i len:3 -add_imm: dest:i src1:i len:8 clob:1 -sub_imm: dest:i src1:i len:8 clob:1 +add_imm: dest:i src1:i len:8 clob:1 nacl:11 +sub_imm: dest:i src1:i len:8 clob:1 nacl:11 mul_imm: dest:i src1:i len:11 and_imm: dest:i src1:i len:8 clob:1 or_imm: dest:i src1:i len:8 clob:1 @@ -246,8 +253,9 @@ float_clt_membase: dest:i src1:f src2:b len:35 float_clt_un_membase: dest:i src1:f src2:b len:42 float_conv_to_u: dest:i src1:f len:46 fmove: dest:f src1:f len:8 -call_handler: len:14 clob:c +call_handler: len:14 clob:c nacl:52 aot_const: dest:i len:10 +nacl_gc_safe_point: clob:c x86_test_null: src1:i len:5 x86_compare_membase_reg: src1:b src2:i len:9 x86_compare_membase_imm: src1:b len:13 @@ -263,7 +271,7 @@ x86_push_imm: len:6 x86_push_membase: src1:b len:8 x86_push_obj: src1:b len:40 x86_lea: dest:i src1:i src2:i len:8 -x86_lea_membase: dest:i src1:i len:11 +x86_lea_membase: dest:i src1:i len:11 nacl:14 x86_xchg: src1:i src2:i clob:x len:2 x86_fpop: src1:f len:3 x86_seteq_membase: src1:b len:9 @@ -298,7 +306,7 @@ subcc: dest:i src1:i src2:i len:3 clob:1 adc_imm: dest:i src1:i len:8 clob:1 sbb: dest:i src1:i src2:i len:3 clob:1 sbb_imm: dest:i src1:i len:8 clob:1 -br_reg: src1:i len:3 +br_reg: src1:i len:3 nacl:8 sin: dest:f src1:f len:32 cos: dest:f src1:f len:32 abs: dest:f src1:f clob:1 len:32 @@ -310,8 +318,8 @@ sext_i2: dest:i src1:i len:4 sext_i4: dest:i src1:i len:8 # 32 bit opcodes -int_add: dest:i src1:i src2:i clob:1 len:4 -int_sub: dest:i src1:i src2:i clob:1 len:4 +int_add: dest:i src1:i src2:i clob:1 len:4 nacl:7 +int_sub: dest:i src1:i src2:i clob:1 len:4 nacl:7 int_mul: dest:i src1:i src2:i clob:1 len:4 int_mul_ovf: dest:i src1:i src2:i clob:1 len:32 int_mul_ovf_un: dest:i src1:i src2:i clob:1 len:32 @@ -331,8 +339,8 @@ int_sbb: dest:i src1:i src2:i clob:1 len:4 int_sbb_imm: dest:i src1:i clob:1 len:8 int_addcc: dest:i src1:i src2:i clob:1 len:16 int_subcc: dest:i src1:i src2:i clob:1 len:16 -int_add_imm: dest:i src1:i clob:1 len:8 -int_sub_imm: dest:i src1:i clob:1 len:8 +int_add_imm: dest:i src1:i clob:1 len:8 nacl:10 +int_sub_imm: dest:i src1:i clob:1 len:8 nacl:10 int_mul_imm: dest:i src1:i clob:1 len:32 int_div_imm: dest:a src1:i clob:d len:32 int_div_un_imm: dest:a src1:i clob:d len:32 @@ -438,8 +446,8 @@ cmov_lgt_un: dest:i src1:i src2:i len:16 clob:1 cmov_lle_un: dest:i src1:i src2:i len:16 clob:1 cmov_llt_un: dest:i src1:i src2:i len:16 clob:1 -long_add_imm: dest:i src1:i clob:1 len:12 -long_sub_imm: dest:i src1:i clob:1 len:12 +long_add_imm: dest:i src1:i clob:1 len:12 nacl:15 +long_sub_imm: dest:i src1:i clob:1 len:12 nacl:15 long_and_imm: dest:i src1:i clob:1 len:12 long_or_imm: dest:i src1:i clob:1 len:12 long_xor_imm: dest:i src1:i clob:1 len:12 @@ -486,7 +494,7 @@ vcall2: len:64 clob:c vcall2_reg: src1:i len:64 clob:c vcall2_membase: src1:b len:64 clob:c -dyn_call: src1:i src2:i len:64 clob:c +dyn_call: src1:i src2:i len:64 clob:c nacl:128 localloc_imm: dest:i len:84 diff --git a/mono/mini/cpu-x86.md b/mono/mini/cpu-x86.md index 7b4f876d45b..41dcbbedfad 100644 --- a/mono/mini/cpu-x86.md +++ b/mono/mini/cpu-x86.md @@ -247,6 +247,7 @@ call_handler: len:11 clob:c aot_const: dest:i len:5 load_gotaddr: dest:i len:64 got_entry: dest:i src1:b len:7 +nacl_gc_safe_point: clob:c x86_test_null: src1:i len:2 x86_compare_membase_reg: src1:b src2:i len:7 x86_compare_membase_imm: src1:b len:11 diff --git a/mono/mini/dominators.c b/mono/mini/dominators.c index 5024e066f19..ad35cdb35aa 100644 --- a/mono/mini/dominators.c +++ b/mono/mini/dominators.c @@ -384,6 +384,7 @@ mono_compute_natural_loops (MonoCompile *cfg) /* The loop body start is the first bblock in the order they will be emitted */ MonoBasicBlock *h = cfg->bblocks [i]; MonoBasicBlock *body_start = h; + MonoInst *inst; GList *l; for (l = h->loop_blocks; l; l = l->next) { @@ -394,6 +395,12 @@ mono_compute_natural_loops (MonoCompile *cfg) } } +#if defined(__native_client_codegen__) + /* Instrument the loop (GC back branch safe point) */ + MONO_INST_NEW (cfg, inst, OP_NACL_GC_SAFE_POINT); + inst->dreg = mono_alloc_dreg (cfg, STACK_I4); + mono_bblock_insert_before_ins (body_start, NULL, inst); +#endif body_start->loop_body_start = 1; } } diff --git a/mono/mini/driver.c b/mono/mini/driver.c index 1de8758c074..e8b6a23b3a6 100644 --- a/mono/mini/driver.c +++ b/mono/mini/driver.c @@ -115,7 +115,10 @@ opt_funcs [sizeof (int) * 8] = { }; #ifdef __native_client_codegen__ -extern guint8 nacl_align_byte; +extern gint8 nacl_align_byte; +#endif +#ifdef __native_client__ +extern char *nacl_mono_path; #endif #define DEFAULT_OPTIMIZATIONS ( \ @@ -1644,7 +1647,11 @@ mono_main (int argc, char* argv[]) mono_use_llvm = FALSE; #ifdef __native_client_codegen__ } else if (strcmp (argv [i], "--nacl-align-mask-off") == 0){ - nacl_align_byte = 0xff; + nacl_align_byte = -1; /* 0xff */ +#endif +#ifdef __native_client__ + } else if (strcmp (argv [i], "--nacl-mono-path") == 0){ + nacl_mono_path = g_strdup(argv[++i]); #endif } else { fprintf (stderr, "Unknown command line option: '%s'\n", argv [i]); @@ -1655,7 +1662,7 @@ mono_main (int argc, char* argv[]) #ifdef __native_client_codegen__ if (getenv ("MONO_NACL_ALIGN_MASK_OFF")) { - nacl_align_byte = 0xff; + nacl_align_byte = -1; /* 0xff */ } #endif diff --git a/mono/mini/exceptions-amd64.c b/mono/mini/exceptions-amd64.c index e683bf2a11f..cc883afbfb6 100644 --- a/mono/mini/exceptions-amd64.c +++ b/mono/mini/exceptions-amd64.c @@ -179,7 +179,9 @@ mono_arch_get_restore_context (MonoTrampInfo **info, gboolean aot) amd64_mov_reg_membase (code, AMD64_R12, AMD64_R11, G_STRUCT_OFFSET (MonoContext, r12), 8); amd64_mov_reg_membase (code, AMD64_R13, AMD64_R11, G_STRUCT_OFFSET (MonoContext, r13), 8); amd64_mov_reg_membase (code, AMD64_R14, AMD64_R11, G_STRUCT_OFFSET (MonoContext, r14), 8); +#if !defined(__native_client_codegen__) amd64_mov_reg_membase (code, AMD64_R15, AMD64_R11, G_STRUCT_OFFSET (MonoContext, r15), 8); +#endif if (mono_running_on_valgrind ()) { /* Prevent 'Address 0x... is just below the stack ptr.' errors */ @@ -195,6 +197,8 @@ mono_arch_get_restore_context (MonoTrampInfo **info, gboolean aot) /* jump to the saved IP */ amd64_jump_reg (code, AMD64_R11); + nacl_global_codeman_validate(&start, 256, &code); + mono_arch_flush_icache (start, code - start); if (info) @@ -219,8 +223,9 @@ mono_arch_get_call_filter (MonoTrampInfo **info, gboolean aot) guint32 pos; MonoJumpInfo *ji = NULL; GSList *unwind_ops = NULL; + const guint kMaxCodeSize = NACL_SIZE (128, 256); - start = code = mono_global_codeman_reserve (128); + start = code = mono_global_codeman_reserve (kMaxCodeSize); /* call_filter (MonoContext *ctx, unsigned long eip) */ code = start; @@ -252,7 +257,9 @@ mono_arch_get_call_filter (MonoTrampInfo **info, gboolean aot) amd64_mov_reg_membase (code, AMD64_R12, AMD64_ARG_REG1, G_STRUCT_OFFSET (MonoContext, r12), 8); amd64_mov_reg_membase (code, AMD64_R13, AMD64_ARG_REG1, G_STRUCT_OFFSET (MonoContext, r13), 8); amd64_mov_reg_membase (code, AMD64_R14, AMD64_ARG_REG1, G_STRUCT_OFFSET (MonoContext, r14), 8); +#if !defined(__native_client_codegen__) amd64_mov_reg_membase (code, AMD64_R15, AMD64_ARG_REG1, G_STRUCT_OFFSET (MonoContext, r15), 8); +#endif #ifdef TARGET_WIN32 amd64_mov_reg_membase (code, AMD64_RDI, AMD64_ARG_REG1, G_STRUCT_OFFSET (MonoContext, rdi), 8); amd64_mov_reg_membase (code, AMD64_RSI, AMD64_ARG_REG1, G_STRUCT_OFFSET (MonoContext, rsi), 8); @@ -275,7 +282,9 @@ mono_arch_get_call_filter (MonoTrampInfo **info, gboolean aot) amd64_leave (code); amd64_ret (code); - g_assert ((code - start) < 128); + g_assert ((code - start) < kMaxCodeSize); + + nacl_global_codeman_validate(&start, kMaxCodeSize, &code); mono_arch_flush_icache (start, code - start); @@ -405,10 +414,10 @@ get_throw_trampoline (MonoTrampInfo **info, gboolean rethrow, gboolean corlib, g guint8 *code; MonoJumpInfo *ji = NULL; GSList *unwind_ops = NULL; - int i, buf_size, stack_size, arg_offsets [16], regs_offset; + int i, stack_size, arg_offsets [16], regs_offset; + const guint kMaxCodeSize = NACL_SIZE (256, 512); - buf_size = 256; - start = code = mono_global_codeman_reserve (buf_size); + start = code = mono_global_codeman_reserve (kMaxCodeSize); /* The stack is unaligned on entry */ stack_size = 192 + 8; @@ -429,37 +438,37 @@ get_throw_trampoline (MonoTrampInfo **info, gboolean rethrow, gboolean corlib, g */ arg_offsets [0] = 0; - arg_offsets [1] = sizeof (gpointer); - arg_offsets [2] = sizeof (gpointer) * 2; - arg_offsets [3] = sizeof (gpointer) * 3; - regs_offset = sizeof (gpointer) * 4; + arg_offsets [1] = sizeof(mgreg_t); + arg_offsets [2] = sizeof(mgreg_t) * 2; + arg_offsets [3] = sizeof(mgreg_t) * 3; + regs_offset = sizeof(mgreg_t) * 4; /* Save registers */ for (i = 0; i < AMD64_NREG; ++i) if (i != AMD64_RSP) - amd64_mov_membase_reg (code, AMD64_RSP, regs_offset + (i * sizeof (gpointer)), i, 8); + amd64_mov_membase_reg (code, AMD64_RSP, regs_offset + (i * sizeof(mgreg_t)), i, sizeof(mgreg_t)); /* Save RSP */ - amd64_lea_membase (code, AMD64_RAX, AMD64_RSP, stack_size + sizeof (gpointer)); - amd64_mov_membase_reg (code, AMD64_RSP, regs_offset + (AMD64_RSP * sizeof (gpointer)), X86_EAX, 8); + amd64_lea_membase (code, AMD64_RAX, AMD64_RSP, stack_size + sizeof(mgreg_t)); + amd64_mov_membase_reg (code, AMD64_RSP, regs_offset + (AMD64_RSP * sizeof(mgreg_t)), X86_EAX, sizeof(mgreg_t)); /* Set arg1 == regs */ amd64_lea_membase (code, AMD64_RAX, AMD64_RSP, regs_offset); - amd64_mov_membase_reg (code, AMD64_RSP, arg_offsets [0], AMD64_RAX, 8); + amd64_mov_membase_reg (code, AMD64_RSP, arg_offsets [0], AMD64_RAX, sizeof(mgreg_t)); /* Set arg2 == eip */ if (llvm_abs) amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX); else - amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, stack_size, 8); - amd64_mov_membase_reg (code, AMD64_RSP, arg_offsets [1], AMD64_RAX, 8); + amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, stack_size, sizeof(mgreg_t)); + amd64_mov_membase_reg (code, AMD64_RSP, arg_offsets [1], AMD64_RAX, sizeof(mgreg_t)); /* Set arg3 == exc/ex_token_index */ if (resume_unwind) - amd64_mov_membase_imm (code, AMD64_RSP, arg_offsets [2], 0, 8); + amd64_mov_membase_imm (code, AMD64_RSP, arg_offsets [2], 0, sizeof(mgreg_t)); else - amd64_mov_membase_reg (code, AMD64_RSP, arg_offsets [2], AMD64_ARG_REG1, 8); + amd64_mov_membase_reg (code, AMD64_RSP, arg_offsets [2], AMD64_ARG_REG1, sizeof(mgreg_t)); /* Set arg4 == rethrow/pc offset */ if (resume_unwind) { - amd64_mov_membase_imm (code, AMD64_RSP, arg_offsets [3], 0, 8); + amd64_mov_membase_imm (code, AMD64_RSP, arg_offsets [3], 0, sizeof(mgreg_t)); } else if (corlib) { - amd64_mov_membase_reg (code, AMD64_RSP, arg_offsets [3], AMD64_ARG_REG2, 8); + amd64_mov_membase_reg (code, AMD64_RSP, arg_offsets [3], AMD64_ARG_REG2, sizeof(mgreg_t)); if (llvm_abs) /* * The caller is LLVM code which passes the absolute address not a pc offset, @@ -468,7 +477,7 @@ get_throw_trampoline (MonoTrampInfo **info, gboolean rethrow, gboolean corlib, g */ amd64_neg_membase (code, AMD64_RSP, arg_offsets [3]); } else { - amd64_mov_membase_imm (code, AMD64_RSP, arg_offsets [3], rethrow, 8); + amd64_mov_membase_imm (code, AMD64_RSP, arg_offsets [3], rethrow, sizeof(mgreg_t)); } if (aot) { @@ -482,7 +491,9 @@ get_throw_trampoline (MonoTrampInfo **info, gboolean rethrow, gboolean corlib, g mono_arch_flush_icache (start, code - start); - g_assert ((code - start) < buf_size); + g_assert ((code - start) < kMaxCodeSize); + + nacl_global_codeman_validate(&start, kMaxCodeSize, &code); if (info) *info = mono_tramp_info_create (g_strdup (tramp_name), start, code - start, ji, unwind_ops); @@ -550,7 +561,7 @@ mono_arch_find_jit_info (MonoDomain *domain, MonoJitTlsData *jit_tls, *new_ctx = *ctx; if (ji != NULL) { - gssize regs [MONO_MAX_IREGS + 1]; + mgreg_t regs [MONO_MAX_IREGS + 1]; guint8 *cfa; guint32 unwind_info_len; guint8 *unwind_info; @@ -602,7 +613,7 @@ mono_arch_find_jit_info (MonoDomain *domain, MonoJitTlsData *jit_tls, new_ctx->r15 = regs [AMD64_R15]; /* The CFA becomes the new SP value */ - new_ctx->rsp = (gssize)cfa; + new_ctx->rsp = (mgreg_t)cfa; /* Adjust IP */ new_ctx->rip --; @@ -655,7 +666,7 @@ mono_arch_find_jit_info (MonoDomain *domain, MonoJitTlsData *jit_tls, * The rsp field is set just before the call which transitioned to native * code. Obtain the rip from the stack. */ - rip = *(guint64*)((*lmf)->rsp - sizeof (gpointer)); + rip = *(guint64*)((*lmf)->rsp - sizeof(mgreg_t)); } ji = mini_jit_info_table_find (domain, (gpointer)rip, NULL); @@ -776,6 +787,10 @@ mono_arch_handle_exception (void *sigctx, gpointer obj, gboolean test_only) void mono_arch_sigctx_to_monoctx (void *sigctx, MonoContext *mctx) { +#if defined(__native_client_codegen__) || defined(__native_client__) + printf("WARNING: mono_arch_sigctx_to_monoctx() called!\n"); +#endif + #if defined(MONO_ARCH_USE_SIGACTION) ucontext_t *ctx = (ucontext_t*)sigctx; @@ -814,6 +829,10 @@ mono_arch_sigctx_to_monoctx (void *sigctx, MonoContext *mctx) void mono_arch_monoctx_to_sigctx (MonoContext *mctx, void *sigctx) { +#if defined(__native_client__) || defined(__native_client_codegen__) + printf("WARNING: mono_arch_monoctx_to_sigctx() called!\n"); +#endif + #if defined(MONO_ARCH_USE_SIGACTION) ucontext_t *ctx = (ucontext_t*)sigctx; @@ -971,8 +990,9 @@ mono_arch_get_throw_pending_exception (MonoTrampInfo **info, gboolean aot) gpointer throw_trampoline; MonoJumpInfo *ji = NULL; GSList *unwind_ops = NULL; + const guint kMaxCodeSize = NACL_SIZE (128, 256); - start = code = mono_global_codeman_reserve (128); + start = code = mono_global_codeman_reserve (kMaxCodeSize); /* We are in the frame of a managed method after a call */ /* @@ -1065,7 +1085,9 @@ mono_arch_get_throw_pending_exception (MonoTrampInfo **info, gboolean aot) /* Return to original code */ amd64_jump_reg (code, AMD64_R11); - g_assert ((code - start) < 128); + g_assert ((code - start) < kMaxCodeSize); + + nacl_global_codeman_validate(&start, kMaxCodeSize, &code); if (info) *info = mono_tramp_info_create (g_strdup_printf ("throw_pending_exception"), start, code - start, ji, unwind_ops); @@ -1407,10 +1429,12 @@ mono_tasklets_arch_restore (void) static guint8* saved = NULL; guint8 *code, *start; int cont_reg = AMD64_R9; /* register usable on both call conventions */ + const guint kMaxCodeSize = NACL_SIZE (64, 128); + if (saved) return (MonoContinuationRestore)saved; - code = start = mono_global_codeman_reserve (64); + code = start = mono_global_codeman_reserve (kMaxCodeSize); /* the signature is: restore (MonoContinuation *cont, int state, MonoLMF **lmf_addr) */ /* cont is in AMD64_ARG_REG1 ($rcx or $rdi) * state is in AMD64_ARG_REG2 ($rdx or $rsi) @@ -1436,7 +1460,9 @@ mono_tasklets_arch_restore (void) amd64_mov_reg_membase (code, AMD64_R12, AMD64_RCX, G_STRUCT_OFFSET (MonoLMF, r12), 8); amd64_mov_reg_membase (code, AMD64_R13, AMD64_RCX, G_STRUCT_OFFSET (MonoLMF, r13), 8); amd64_mov_reg_membase (code, AMD64_R14, AMD64_RCX, G_STRUCT_OFFSET (MonoLMF, r14), 8); +#if !defined(__native_client_codegen__) amd64_mov_reg_membase (code, AMD64_R15, AMD64_RCX, G_STRUCT_OFFSET (MonoLMF, r15), 8); +#endif #ifdef TARGET_WIN32 amd64_mov_reg_membase (code, AMD64_RDI, AMD64_RCX, G_STRUCT_OFFSET (MonoLMF, rdi), 8); amd64_mov_reg_membase (code, AMD64_RSI, AMD64_RCX, G_STRUCT_OFFSET (MonoLMF, rsi), 8); @@ -1449,7 +1475,10 @@ mono_tasklets_arch_restore (void) /* state is already in rax */ amd64_jump_membase (code, cont_reg, G_STRUCT_OFFSET (MonoContinuation, return_ip)); - g_assert ((code - start) <= 64); + g_assert ((code - start) <= kMaxCodeSize); + + nacl_global_codeman_validate(&start, kMaxCodeSize, &code); + saved = start; return (MonoContinuationRestore)saved; } diff --git a/mono/mini/exceptions-x86.c b/mono/mini/exceptions-x86.c index e6af5ee0824..d3f09813c20 100644 --- a/mono/mini/exceptions-x86.c +++ b/mono/mini/exceptions-x86.c @@ -308,6 +308,8 @@ mono_arch_get_restore_context (MonoTrampInfo **info, gboolean aot) /* jump to the saved IP */ x86_ret (code); + nacl_global_codeman_validate(&start, 128, &code); + if (info) *info = mono_tramp_info_create (g_strdup_printf ("restore_context"), start, code - start, ji, unwind_ops); else { @@ -335,11 +337,7 @@ mono_arch_get_call_filter (MonoTrampInfo **info, gboolean aot) guint8 *code; MonoJumpInfo *ji = NULL; GSList *unwind_ops = NULL; -#ifdef __native_client_codegen__ - guint kMaxCodeSize = 128; -#else - guint kMaxCodeSize = 64; -#endif /* __native_client_codegen__ */ + guint kMaxCodeSize = NACL_SIZE (64, 128); /* call_filter (MonoContext *ctx, unsigned long eip) */ start = code = mono_global_codeman_reserve (kMaxCodeSize); @@ -387,6 +385,8 @@ mono_arch_get_call_filter (MonoTrampInfo **info, gboolean aot) x86_leave (code); x86_ret (code); + nacl_global_codeman_validate(&start, kMaxCodeSize, &code); + if (info) *info = mono_tramp_info_create (g_strdup_printf ("call_filter"), start, code - start, ji, unwind_ops); else { @@ -515,11 +515,8 @@ get_throw_trampoline (const char *name, gboolean rethrow, gboolean llvm, gboolea int i, stack_size, stack_offset, arg_offsets [5], regs_offset; MonoJumpInfo *ji = NULL; GSList *unwind_ops = NULL; -#ifdef __native_client_codegen__ - guint kMaxCodeSize = 256; -#else - guint kMaxCodeSize = 128; -#endif + guint kMaxCodeSize = NACL_SIZE (128, 256); + start = code = mono_global_codeman_reserve (kMaxCodeSize); stack_size = 128; @@ -629,6 +626,8 @@ get_throw_trampoline (const char *name, gboolean rethrow, gboolean llvm, gboolea } x86_breakpoint (code); + nacl_global_codeman_validate(&start, kMaxCodeSize, &code); + g_assert ((code - start) < kMaxCodeSize); if (info) diff --git a/mono/mini/fsacheck.c b/mono/mini/fsacheck.c index e1d4160ac27..6ee66bb8c58 100644 --- a/mono/mini/fsacheck.c +++ b/mono/mini/fsacheck.c @@ -1,11 +1,14 @@ #include #include +#include +#include #include #include #include #include #include +#include extern void* mono_aot_module_mscorlib_info; extern void* mono_aot_module_System_Core_info; @@ -32,72 +35,97 @@ extern void* mono_aot_module_devirtualization_info; extern void* mono_aot_module_generics_info; extern void* mono_aot_module_generics_variant_types_info; extern void* mono_aot_module_basic_simd_info; +extern void* mono_aot_module_gc_stress_info; +extern void* mono_aot_module_imt_big_iface_test_info; +extern void* mono_aot_module_make_imt_test_info; /* extern void* mono_aot_module_thread_stress_info; */ - +extern void* mono_aot_module_iltests_info; extern void mono_aot_register_module(void *aot_info); extern void mono_aot_init(void); extern void mono_jit_set_aot_only(mono_bool aot_only); extern MonoDomain * mini_init (const char *filename, const char *runtime_version); - -void try_one(char *mname) { - MonoDomain *domain; - MonoAssembly *ma; - MonoImage *mi; - MonoClass *mc; - MonoMethodDesc *mmd; - MonoMethod *mm; - MonoObject *mo; - MonoArray *arg_array; - void *args [1]; - char *cstr_arg = "20"; - - mono_jit_set_aot_only(1); - domain = mono_jit_init(mname); - printf("mono domain: %p\n", domain); - - ma = mono_domain_assembly_open(domain, mname); - if (0 == ma) { - printf("ERROR: could not open mono assembly\n"); - exit(-1); +int run_all_test_methods(MonoClass *klass) { + void * iter = NULL; + MonoMethod *mm = NULL; + int count = 0; + int passed = 0; + printf("Running test methods without reflection\n"); + while (NULL != (mm = mono_class_get_methods(klass, &iter))) { + long expected_result; + const char *name = mono_method_get_name(mm); + char *end = NULL; + if (strncmp(name, "test_", 5)) continue; + printf("=== Test %d, method %s\n", count, mono_method_get_name(mm)); + expected_result = strtol(name + 5, &end, 10); + if (name == end) { + printf(" warning: could not determine expected return value\n"); + expected_result = 0; + } + MonoObject *mo = mono_runtime_invoke(mm, NULL, NULL, NULL); + int *ret = mono_object_unbox(mo); + if (ret && *ret == expected_result) { + printf(" passed!\n"); + passed++; + } else { + printf(" FAILED, expected %d, returned %p, %d\n", expected_result, ret, + ret != NULL ? *ret : 0); + } + count++; } - printf("opened mono assembly: %p\n", ma); - - mi = mono_assembly_get_image(ma); - printf("mono image: %p\n", mi); - - mo = mono_string_new(domain, cstr_arg); - mc = mono_class_from_name(mono_get_corlib(), "System", "String"); - printf("string class: %p\n", mc); - arg_array = mono_array_new(domain, mc, 1); - mono_array_setref(arg_array, 0, mo); - args[0] = arg_array; + if (count > 0) { + printf("============================================\n"); + printf("Final count: %d tests, %d pass, %.2f%%\n", count, passed, + (double)passed / count * 100.0); + } else { + printf("no test methods found.\n"); + } + return count; +} - mmd = mono_method_desc_new("Tests:Main()", 1); - mm = mono_method_desc_search_in_image(mmd, mi); - if (0 == mm) { - mmd = mono_method_desc_new("Tests:Main(string[])", 1); - mm = mono_method_desc_search_in_image(mmd, mi); - if (0 == mm) { - mmd = mono_method_desc_new("SimdTests:Main(string[])", 1); - mm = mono_method_desc_search_in_image(mmd, mi); - if (0 == mm) { - printf("Couldn't find Tests:Main(), Tests:Main(string[]) or SimdTests:Main(string[])\n"); - exit(-1); +#if defined(__native_client__) +extern void* mono_aot_module_nacl_info; +extern char* nacl_mono_path; +char *load_corlib_data() { + FILE *mscorlib; + static char *corlib_data = NULL; + if (corlib_data) return corlib_data; + + mscorlib = fopen("mscorlib.dll", "r"); + if (NULL != mscorlib) { + size_t size; + struct stat st; + if (0 == stat("mscorlib.dll", &st)) { + size = st.st_size; + printf("reading mscorlib.dll, size %ld\n", size); + corlib_data = malloc(size); + if (corlib_data != NULL) { + while (fread(corlib_data, 1, size, mscorlib) != 0) ; + if (!ferror(mscorlib)) { + mono_set_corlib_data(corlib_data, size); + } else { + perror("error reading mscorlib.dll"); + free(corlib_data); + corlib_data = NULL; + } + } else { + perror("Could not allocate memory"); } + } else { + perror("stat error"); } + fclose(mscorlib); } - printf("mono desc method: %p\n", mmd); - printf("mono method: %p\n", mm); - - mo = mono_runtime_invoke(mm, NULL, args, NULL); - printf("mono object: %p\n", mo); - - mono_jit_cleanup(domain); + return corlib_data; } +#endif -int main(int argc, char *argv[]) { +/* Initialize Mono. Must run only once per process */ +MonoDomain *init_mono(char *mname) { + MonoDomain *domain = NULL; +#ifdef AOT_VERSION + mono_jit_set_aot_only(1); mono_aot_register_module(mono_aot_module_mscorlib_info); mono_aot_register_module(mono_aot_module_TestDriver_info); mono_aot_register_module(mono_aot_module_System_Core_info); @@ -120,163 +148,122 @@ int main(int argc, char *argv[]) { mono_aot_register_module(mono_aot_module_basic_math_info); mono_aot_register_module(mono_aot_module_exceptions_info); mono_aot_register_module(mono_aot_module_devirtualization_info); - /* mono_aot_register_module(mono_aot_module_generics_info); mono_aot_register_module(mono_aot_module_generics_variant_types_info); - */ - - /* mono_aot_register_module(mono_aot_module_thread_stress_info); */ - if (argc < 2) { - printf("no test specified; running basic.exe\n"); - printf("==========================\n"); - try_one("basic.exe"); - printf("==========================\n"); - } else { - printf("\nProgram %s %s output:\n", argv[0], argv[1]); - printf("==========================\n\n"); - try_one(argv[1]); + mono_aot_register_module(mono_aot_module_gc_stress_info); + mono_aot_register_module(mono_aot_module_imt_big_iface_test_info); + mono_aot_register_module(mono_aot_module_iltests_info); +#endif + /* mono_aot_register_module(mono_aot_module_make_imt_test_info); */ + /* mono_aot_register_module(mono_aot_module_thread_stress_info); */ +#if defined(__native_client__) +#ifdef AOT_VERSION + mono_aot_register_module(mono_aot_module_nacl_info); +#endif + + /* Test file-less shortcut for loading mscorlib metadata */ + load_corlib_data(); + nacl_mono_path = strdup("."); +#endif + /* Uncomment the following if something is going wrong */ + /* mono_trace_set_level_string("info"); */ + domain = mono_jit_init(mname); + if (NULL == domain) { + printf("ERROR: mono_jit_init failure\n"); + exit(-1); } - - return 0; + return domain; } -#include -#include - -#include -#include -#include -#include -#include - -extern void* mono_aot_module_mscorlib_info; -extern void* mono_aot_module_System_Core_info; -extern void* mono_aot_module_System_info; -extern void* mono_aot_module_Mono_Posix_info; -extern void* mono_aot_module_System_Configuration_info; -extern void* mono_aot_module_System_Security_info; -extern void* mono_aot_module_System_Xml_info; -/* extern void* mono_aot_module_System_Threading_info; */ -extern void* mono_aot_module_Mono_Security_info; -extern void* mono_aot_module_Mono_Simd_info; -extern void* mono_aot_module_TestDriver_info; -extern void* mono_aot_module_basic_info; -extern void* mono_aot_module_basic_float_info; -extern void* mono_aot_module_basic_long_info; -extern void* mono_aot_module_basic_calls_info; -extern void* mono_aot_module_basic_simd_info; -extern void* mono_aot_module_objects_info; -extern void* mono_aot_module_arrays_info; -extern void* mono_aot_module_basic_math_info; -extern void* mono_aot_module_exceptions_info; -extern void* mono_aot_module_devirtualization_info; -extern void* mono_aot_module_generics_info; -extern void* mono_aot_module_generics_variant_types_info; -extern void* mono_aot_module_basic_simd_info; -/* extern void* mono_aot_module_thread_stress_info; */ - - -extern void mono_aot_register_module(void *aot_info); -extern void mono_aot_init(void); -extern void mono_jit_set_aot_only(mono_bool aot_only); -extern MonoDomain * mini_init (const char *filename, const char *runtime_version); - - -void try_one(char *mname) { - MonoDomain *domain; +/* Run all tests from one assembly file */ +int try_one(char *mname, MonoDomain *domain) { MonoAssembly *ma; MonoImage *mi; MonoClass *mc; MonoMethodDesc *mmd; MonoMethod *mm; MonoObject *mo; + MonoString *monostring_arg; MonoArray *arg_array; + int *failures = NULL; + const int kUseTestDriver = 1; + int test_count = 0; void *args [1]; - char *cstr_arg = "20"; - - mono_jit_set_aot_only(1); - domain = mono_jit_init(mname); - printf("mono domain: %p\n", domain); + char *cstr_arg = "--timing"; ma = mono_domain_assembly_open(domain, mname); - if (0 == ma) { + if (NULL == ma) { printf("ERROR: could not open mono assembly\n"); exit(-1); } - printf("opened mono assembly: %p\n", ma); mi = mono_assembly_get_image(ma); - printf("mono image: %p\n", mi); + if (NULL == mi) { + printf("ERROR: could not get assembly image\n"); + exit(-1); + } - mo = mono_string_new(domain, cstr_arg); + monostring_arg = mono_string_new(domain, cstr_arg); mc = mono_class_from_name(mono_get_corlib(), "System", "String"); - printf("string class: %p\n", mc); - arg_array = mono_array_new(domain, mc, 1); - mono_array_setref(arg_array, 0, mo); + if (0 == mc) { + printf("ERROR: could not mono string class\n"); + exit(-1); + } + + // to pass a string argument, change the 0 to a 1 and uncomment + // mono_array_setref below + arg_array = mono_array_new(domain, mc, 0); + //mono_array_setref(arg_array, 0, monostring_arg); args[0] = arg_array; - mmd = mono_method_desc_new("Tests:Main()", 1); - mm = mono_method_desc_search_in_image(mmd, mi); - if (0 == mm) { - mmd = mono_method_desc_new("Tests:Main(string[])", 1); + if (!kUseTestDriver) { + mc = mono_class_from_name(mi, "", "Tests"); + if (NULL == mc) { + printf("could not open Tests class\n"); + exit(-1); + } + test_count = run_all_test_methods(mc); + } + /* If run_all_test_methods didn't find any tests, try Main */ + if (kUseTestDriver || test_count == 0) { + mmd = mono_method_desc_new("Tests:Main()", 1); mm = mono_method_desc_search_in_image(mmd, mi); if (0 == mm) { - mmd = mono_method_desc_new("SimdTests:Main(string[])", 1); + mmd = mono_method_desc_new("Tests:Main(string[])", 1); mm = mono_method_desc_search_in_image(mmd, mi); if (0 == mm) { - printf("Couldn't find Tests:Main(), Tests:Main(string[]) or SimdTests:Main(string[])\n"); + printf("Couldn't find Tests:Main() or Tests:Main(string[])\n"); exit(-1); } } - } - printf("mono desc method: %p\n", mmd); - printf("mono method: %p\n", mm); - - mo = mono_runtime_invoke(mm, NULL, args, NULL); - printf("mono object: %p\n", mo); - mono_jit_cleanup(domain); + mo = mono_runtime_invoke(mm, NULL, args, NULL); + failures = mo != NULL ? mono_object_unbox(mo) : NULL; + if (NULL == failures || *failures != 0) { + printf("--------------------> Failed"); + } + } + return failures != NULL ? failures : 1; } int main(int argc, char *argv[]) { - mono_aot_register_module(mono_aot_module_mscorlib_info); - mono_aot_register_module(mono_aot_module_TestDriver_info); - mono_aot_register_module(mono_aot_module_System_Core_info); - mono_aot_register_module(mono_aot_module_System_info); - mono_aot_register_module(mono_aot_module_Mono_Posix_info); - mono_aot_register_module(mono_aot_module_System_Configuration_info); - mono_aot_register_module(mono_aot_module_System_Security_info); - mono_aot_register_module(mono_aot_module_System_Xml_info); - mono_aot_register_module(mono_aot_module_Mono_Security_info); - /* mono_aot_register_module(mono_aot_module_System_Threading_info); */ - mono_aot_register_module(mono_aot_module_Mono_Simd_info); + MonoDomain *domain; + int failures = 0; - mono_aot_register_module(mono_aot_module_basic_info); - mono_aot_register_module(mono_aot_module_basic_float_info); - mono_aot_register_module(mono_aot_module_basic_long_info); - mono_aot_register_module(mono_aot_module_basic_calls_info); - mono_aot_register_module(mono_aot_module_basic_simd_info); - mono_aot_register_module(mono_aot_module_objects_info); - mono_aot_register_module(mono_aot_module_arrays_info); - mono_aot_register_module(mono_aot_module_basic_math_info); - mono_aot_register_module(mono_aot_module_exceptions_info); - mono_aot_register_module(mono_aot_module_devirtualization_info); - /* - mono_aot_register_module(mono_aot_module_generics_info); - mono_aot_register_module(mono_aot_module_generics_variant_types_info); - */ - - /* mono_aot_register_module(mono_aot_module_thread_stress_info); */ if (argc < 2) { printf("no test specified; running basic.exe\n"); - printf("==========================\n"); - try_one("basic.exe"); - printf("==========================\n"); + printf("================================\n"); + domain = init_mono("basic.exe"); + try_one("basic.exe", domain); } else { - printf("\nProgram %s %s output:\n", argv[0], argv[1]); - printf("==========================\n\n"); - try_one(argv[1]); + domain = init_mono(argv[1]); + int i; + for (i = 1; i < argc; i++) { + printf("\nRunning tests from %s:\n", argv[i]); + printf("===============================\n\n"); + failures += try_one(argv[i], domain); + } } - - return 0; + mono_jit_cleanup(domain); + return failures; } diff --git a/mono/mini/genmdesc.c b/mono/mini/genmdesc.c index 0c942afabaf..20a333b1670 100644 --- a/mono/mini/genmdesc.c +++ b/mono/mini/genmdesc.c @@ -11,6 +11,8 @@ #include #include +void __nacl_suspend_thread_if_needed() {} + #define MINI_OP(a,b,dest,src1,src2) b, #define MINI_OP3(a,b,dest,src1,src2,src3) b, /* keep in sync with the enum in mini.h */ diff --git a/mono/mini/genmdesc.pl b/mono/mini/genmdesc.pl index 7d66e31d761..8c13a6171dd 100644 --- a/mono/mini/genmdesc.pl +++ b/mono/mini/genmdesc.pl @@ -79,7 +79,7 @@ sub load_opcodes if ($arch =~ "__i386__") { $arch_define = "TARGET_X86"; } - if ($arch =~ " __x86_64__") { + if ($arch =~ "__x86_64__") { $arch_define = "TARGET_AMD64"; } if ($arch =~ "__arm__") { diff --git a/mono/mini/jit-icalls.c b/mono/mini/jit-icalls.c index 1e4cbf540d9..e5f08461daf 100644 --- a/mono/mini/jit-icalls.c +++ b/mono/mini/jit-icalls.c @@ -926,6 +926,16 @@ mono_lconv_to_r8_un (guint64 a) } #endif +#if defined(__native_client_codegen__) || defined(__native_client__) +/* When we cross-compile to Native Client we can't directly embed calls */ +/* to the math library on the host. This will use the fmod on the target*/ +double +mono_fmod(double a, double b) +{ + return fmod(a, b); +} +#endif + gpointer mono_helper_compile_generic_method (MonoObject *obj, MonoMethod *method, gpointer *this_arg) { diff --git a/mono/mini/jit-icalls.h b/mono/mini/jit-icalls.h index d0c7214a5ff..16679d36ad0 100644 --- a/mono/mini/jit-icalls.h +++ b/mono/mini/jit-icalls.h @@ -85,6 +85,10 @@ double mono_conv_to_r8_un (guint32 a) MONO_INTERNAL; double mono_lconv_to_r8_un (guint64 a) MONO_INTERNAL; +#if defined(__native_client_codegen__) || defined(__native_client__) +double mono_fmod(double a, double b) MONO_INTERNAL; +#endif + gpointer mono_helper_compile_generic_method (MonoObject *obj, MonoMethod *method, gpointer *this_arg) MONO_INTERNAL; MonoString *mono_helper_ldstr (MonoImage *image, guint32 idx) MONO_INTERNAL; diff --git a/mono/mini/method-to-ir.c b/mono/mini/method-to-ir.c index e26aef5a752..a34d674696d 100644 --- a/mono/mini/method-to-ir.c +++ b/mono/mini/method-to-ir.c @@ -832,7 +832,7 @@ type_from_op (MonoInst *ins, MonoInst *src1, MonoInst *src2) { case OP_LCOMPARE: case OP_ICOMPARE: ins->type = bin_comp_table [src1->type] [src2->type] ? STACK_I4: STACK_INV; - if ((src1->type == STACK_I8) || ((SIZEOF_REGISTER == 8) && ((src1->type == STACK_PTR) || (src1->type == STACK_OBJ) || (src1->type == STACK_MP)))) + if ((src1->type == STACK_I8) || ((SIZEOF_VOID_P == 8) && ((src1->type == STACK_PTR) || (src1->type == STACK_OBJ) || (src1->type == STACK_MP)))) ins->opcode = OP_LCOMPARE; else if (src1->type == STACK_R8) ins->opcode = OP_FCOMPARE; @@ -841,7 +841,7 @@ type_from_op (MonoInst *ins, MonoInst *src1, MonoInst *src2) { break; case OP_ICOMPARE_IMM: ins->type = bin_comp_table [src1->type] [src1->type] ? STACK_I4 : STACK_INV; - if ((src1->type == STACK_I8) || ((SIZEOF_REGISTER == 8) && ((src1->type == STACK_PTR) || (src1->type == STACK_OBJ) || (src1->type == STACK_MP)))) + if ((src1->type == STACK_I8) || ((SIZEOF_VOID_P == 8) && ((src1->type == STACK_PTR) || (src1->type == STACK_OBJ) || (src1->type == STACK_MP)))) ins->opcode = OP_LCOMPARE_IMM; break; case CEE_BEQ: @@ -929,7 +929,7 @@ type_from_op (MonoInst *ins, MonoInst *src1, MonoInst *src2) { break; case STACK_PTR: case STACK_MP: -#if SIZEOF_REGISTER == 8 +#if SIZEOF_VOID_P == 8 ins->opcode = OP_LCONV_TO_U; #else ins->opcode = OP_MOVE; @@ -5730,6 +5730,11 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b cfg->bb_entry = start_bblock; start_bblock->cil_code = NULL; start_bblock->cil_length = 0; +#if defined(__native_client_codegen__) + MONO_INST_NEW (cfg, ins, OP_NACL_GC_SAFE_POINT); + ins->dreg = alloc_dreg (cfg, STACK_I4); + MONO_ADD_INS (start_bblock, ins); +#endif /* EXIT BLOCK */ NEW_BBLOCK (cfg, end_bblock); @@ -9902,7 +9907,7 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b cmp->sreg2 = sp [1]->dreg; type_from_op (cmp, sp [0], sp [1]); CHECK_TYPE (cmp); - if ((sp [0]->type == STACK_I8) || ((SIZEOF_REGISTER == 8) && ((sp [0]->type == STACK_PTR) || (sp [0]->type == STACK_OBJ) || (sp [0]->type == STACK_MP)))) + if ((sp [0]->type == STACK_I8) || ((SIZEOF_VOID_P == 8) && ((sp [0]->type == STACK_PTR) || (sp [0]->type == STACK_OBJ) || (sp [0]->type == STACK_MP)))) cmp->opcode = OP_LCOMPARE; else if (sp [0]->type == STACK_R8) cmp->opcode = OP_FCOMPARE; @@ -10835,7 +10840,11 @@ op_to_op_src1_membase (int load_opcode, int opcode) switch (opcode) { case OP_X86_PUSH: +#ifdef __mono_ilp32__ + if (load_opcode == OP_LOADI8_MEMBASE) +#else if ((load_opcode == OP_LOAD_MEMBASE) || (load_opcode == OP_LOADI8_MEMBASE)) +#endif return OP_X86_PUSH_MEMBASE; break; /* FIXME: This only works for 32 bit immediates @@ -10850,7 +10859,13 @@ op_to_op_src1_membase (int load_opcode, int opcode) break; case OP_COMPARE: case OP_LCOMPARE: +#ifdef __mono_ilp32__ + if (load_opcode == OP_LOAD_MEMBASE) + return OP_AMD64_ICOMPARE_MEMBASE_REG; + if (load_opcode == OP_LOADI8_MEMBASE) +#else if ((load_opcode == OP_LOAD_MEMBASE) || (load_opcode == OP_LOADI8_MEMBASE)) +#endif return OP_AMD64_COMPARE_MEMBASE_REG; break; case OP_ICOMPARE: @@ -10888,7 +10903,11 @@ op_to_op_src2_membase (int load_opcode, int opcode) #endif #ifdef TARGET_AMD64 +#ifdef __mono_ilp32__ + if ((load_opcode == OP_LOADI4_MEMBASE) || (load_opcode == OP_LOADU4_MEMBASE) || (load_opcode == OP_LOAD_MEMBASE) ) { +#else if ((load_opcode == OP_LOADI4_MEMBASE) || (load_opcode == OP_LOADU4_MEMBASE)) { +#endif switch (opcode) { case OP_ICOMPARE: return OP_AMD64_ICOMPARE_REG_MEMBASE; @@ -10903,7 +10922,11 @@ op_to_op_src2_membase (int load_opcode, int opcode) case OP_IXOR: return OP_X86_XOR_REG_MEMBASE; } +#ifdef __mono_ilp32__ + } else if (load_opcode == OP_LOADI8_MEMBASE) { +#else } else if ((load_opcode == OP_LOADI8_MEMBASE) || (load_opcode == OP_LOAD_MEMBASE)) { +#endif switch (opcode) { case OP_COMPARE: case OP_LCOMPARE: diff --git a/mono/mini/mini-amd64.c b/mono/mini/mini-amd64.c index cb1edb8cdc0..49c3a53c715 100644 --- a/mono/mini/mini-amd64.c +++ b/mono/mini/mini-amd64.c @@ -205,11 +205,278 @@ amd64_is_near_call (guint8 *code) return code [0] == 0xe8; } +#ifdef __native_client_codegen__ + +/* Keep track of instruction "depth", that is, the level of sub-instruction */ +/* for any given instruction. For instance, amd64_call_reg resolves to */ +/* amd64_call_reg_internal, which uses amd64_alu_* macros, etc. */ +/* We only want to force bundle alignment for the top level instruction, */ +/* so NaCl pseudo-instructions can be implemented with sub instructions. */ +static guint32 nacl_instruction_depth; + +static guint32 nacl_rex_tag; +static guint32 nacl_legacy_prefix_tag; + +void +amd64_nacl_clear_legacy_prefix_tag () +{ + TlsSetValue (nacl_legacy_prefix_tag, NULL); +} + +void +amd64_nacl_tag_legacy_prefix (guint8* code) +{ + if (TlsGetValue (nacl_legacy_prefix_tag) == NULL) + TlsSetValue (nacl_legacy_prefix_tag, code); +} + +void +amd64_nacl_tag_rex (guint8* code) +{ + TlsSetValue (nacl_rex_tag, code); +} + +guint8* +amd64_nacl_get_legacy_prefix_tag () +{ + return (guint8*)TlsGetValue (nacl_legacy_prefix_tag); +} + +guint8* +amd64_nacl_get_rex_tag () +{ + return (guint8*)TlsGetValue (nacl_rex_tag); +} + +/* Increment the instruction "depth" described above */ +void +amd64_nacl_instruction_pre () +{ + intptr_t depth = (intptr_t) TlsGetValue (nacl_instruction_depth); + depth++; + TlsSetValue (nacl_instruction_depth, (gpointer)depth); +} + +/* amd64_nacl_instruction_post: Decrement instruction "depth", force bundle */ +/* alignment if depth == 0 (top level instruction) */ +/* IN: start, end pointers to instruction beginning and end */ +/* OUT: start, end pointers to beginning and end after possible alignment */ +/* GLOBALS: nacl_instruction_depth defined above */ +void +amd64_nacl_instruction_post (guint8 **start, guint8 **end) +{ + intptr_t depth = (intptr_t) TlsGetValue(nacl_instruction_depth); + depth--; + TlsSetValue (nacl_instruction_depth, (void*)depth); + + g_assert ( depth >= 0 ); + if (depth == 0) { + uintptr_t space_in_block; + uintptr_t instlen; + guint8 *prefix = amd64_nacl_get_legacy_prefix_tag (); + /* if legacy prefix is present, and if it was emitted before */ + /* the start of the instruction sequence, adjust the start */ + if (prefix != NULL && prefix < *start) { + g_assert (*start - prefix <= 3);/* only 3 are allowed */ + *start = prefix; + } + space_in_block = kNaClAlignment - ((uintptr_t)(*start) & kNaClAlignmentMask); + instlen = (uintptr_t)(*end - *start); + /* Only check for instructions which are less than */ + /* kNaClAlignment. The only instructions that should ever */ + /* be that long are call sequences, which are already */ + /* padded out to align the return to the next bundle. */ + if (instlen > space_in_block && instlen < kNaClAlignment) { + const size_t MAX_NACL_INST_LENGTH = kNaClAlignment; + guint8 copy_of_instruction[MAX_NACL_INST_LENGTH]; + const size_t length = (size_t)((*end)-(*start)); + g_assert (length < MAX_NACL_INST_LENGTH); + + memcpy (copy_of_instruction, *start, length); + *start = mono_arch_nacl_pad (*start, space_in_block); + memcpy (*start, copy_of_instruction, length); + *end = *start + length; + } + amd64_nacl_clear_legacy_prefix_tag (); + amd64_nacl_tag_rex (NULL); + } +} + +/* amd64_nacl_membase_handler: ensure all access to memory of the form */ +/* OFFSET(%rXX) is sandboxed. For allowable base registers %rip, %rbp, */ +/* %rsp, and %r15, emit the membase as usual. For all other registers, */ +/* make sure the upper 32-bits are cleared, and use that register in the */ +/* index field of a new address of this form: OFFSET(%r15,%eXX,1) */ +/* IN: code */ +/* pointer to current instruction stream (in the */ +/* middle of an instruction, after opcode is emitted) */ +/* basereg/offset/dreg */ +/* operands of normal membase address */ +/* OUT: code */ +/* pointer to the end of the membase/memindex emit */ +/* GLOBALS: nacl_rex_tag */ +/* position in instruction stream that rex prefix was emitted */ +/* nacl_legacy_prefix_tag */ +/* (possibly NULL) position in instruction of legacy x86 prefix */ +void +amd64_nacl_membase_handler (guint8** code, gint8 basereg, gint32 offset, gint8 dreg) +{ + gint8 true_basereg = basereg; + + /* Cache these values, they might change */ + /* as new instructions are emitted below. */ + guint8* rex_tag = amd64_nacl_get_rex_tag (); + guint8* legacy_prefix_tag = amd64_nacl_get_legacy_prefix_tag (); + + /* 'basereg' is given masked to 0x7 at this point, so check */ + /* the rex prefix to see if this is an extended register. */ + if ((rex_tag != NULL) && IS_REX(*rex_tag) && (*rex_tag & AMD64_REX_B)) { + true_basereg |= 0x8; + } + +#define X86_LEA_OPCODE (0x8D) + + if (!amd64_is_valid_nacl_base (true_basereg) && (*(*code-1) != X86_LEA_OPCODE)) { + guint8* old_instruction_start; + + /* This will hold the 'mov %eXX, %eXX' that clears the upper */ + /* 32-bits of the old base register (new index register) */ + guint8 buf[32]; + guint8* buf_ptr = buf; + size_t insert_len; + + g_assert (rex_tag != NULL); + + if (IS_REX(*rex_tag)) { + /* The old rex.B should be the new rex.X */ + if (*rex_tag & AMD64_REX_B) { + *rex_tag |= AMD64_REX_X; + } + /* Since our new base is %r15 set rex.B */ + *rex_tag |= AMD64_REX_B; + } else { + /* Shift the instruction by one byte */ + /* so we can insert a rex prefix */ + memmove (rex_tag + 1, rex_tag, (size_t)(*code - rex_tag)); + *code += 1; + /* New rex prefix only needs rex.B for %r15 base */ + *rex_tag = AMD64_REX(AMD64_REX_B); + } + + if (legacy_prefix_tag) { + old_instruction_start = legacy_prefix_tag; + } else { + old_instruction_start = rex_tag; + } + + /* Clears the upper 32-bits of the previous base register */ + amd64_mov_reg_reg_size (buf_ptr, true_basereg, true_basereg, 4); + insert_len = buf_ptr - buf; + + /* Move the old instruction forward to make */ + /* room for 'mov' stored in 'buf_ptr' */ + memmove (old_instruction_start + insert_len, old_instruction_start, (size_t)(*code - old_instruction_start)); + *code += insert_len; + memcpy (old_instruction_start, buf, insert_len); + + /* Sandboxed replacement for the normal membase_emit */ + x86_memindex_emit (*code, dreg, AMD64_R15, offset, basereg, 0); + + } else { + /* Normal default behavior, emit membase memory location */ + x86_membase_emit_body (*code, dreg, basereg, offset); + } +} + + +static inline unsigned char* +amd64_skip_nops (unsigned char* code) +{ + guint8 in_nop; + do { + in_nop = 0; + if ( code[0] == 0x90) { + in_nop = 1; + code += 1; + } + if ( code[0] == 0x66 && code[1] == 0x90) { + in_nop = 1; + code += 2; + } + if (code[0] == 0x0f && code[1] == 0x1f + && code[2] == 0x00) { + in_nop = 1; + code += 3; + } + if (code[0] == 0x0f && code[1] == 0x1f + && code[2] == 0x40 && code[3] == 0x00) { + in_nop = 1; + code += 4; + } + if (code[0] == 0x0f && code[1] == 0x1f + && code[2] == 0x44 && code[3] == 0x00 + && code[4] == 0x00) { + in_nop = 1; + code += 5; + } + if (code[0] == 0x66 && code[1] == 0x0f + && code[2] == 0x1f && code[3] == 0x44 + && code[4] == 0x00 && code[5] == 0x00) { + in_nop = 1; + code += 6; + } + if (code[0] == 0x0f && code[1] == 0x1f + && code[2] == 0x80 && code[3] == 0x00 + && code[4] == 0x00 && code[5] == 0x00 + && code[6] == 0x00) { + in_nop = 1; + code += 7; + } + if (code[0] == 0x0f && code[1] == 0x1f + && code[2] == 0x84 && code[3] == 0x00 + && code[4] == 0x00 && code[5] == 0x00 + && code[6] == 0x00 && code[7] == 0x00) { + in_nop = 1; + code += 8; + } + } while ( in_nop ); + return code; +} + +guint8* +mono_arch_nacl_skip_nops (guint8* code) +{ + return amd64_skip_nops(code); +} + +#endif /*__native_client_codegen__*/ + static inline void amd64_patch (unsigned char* code, gpointer target) { guint8 rex = 0; +#ifdef __native_client_codegen__ + code = amd64_skip_nops (code); +#endif +#if defined(__native_client_codegen__) && defined(__native_client__) + if (nacl_is_code_address (code)) { + /* For tail calls, code is patched after being installed */ + /* but not through the normal "patch callsite" method. */ + unsigned char buf[kNaClAlignment]; + unsigned char *aligned_code = (uintptr_t)code & ~kNaClAlignmentMask; + int ret; + memcpy (buf, aligned_code, kNaClAlignment); + /* Patch a temp buffer of bundle size, */ + /* then install to actual location. */ + amd64_patch (buf + ((uintptr_t)code - (uintptr_t)aligned_code), target); + ret = nacl_dyncode_modify (aligned_code, buf, kNaClAlignment); + g_assert (ret == 0); + return; + } + target = nacl_modify_patch_target (target); +#endif + /* Skip REX */ if ((code [0] >= 0x40) && (code [0] <= 0x4f)) { rex = code [0]; @@ -302,7 +569,9 @@ add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo) if (*gr >= PARAM_REGS) { ainfo->storage = ArgOnStack; - (*stack_size) += sizeof (gpointer); + /* Since the same stack slot size is used for all arg */ + /* types, it needs to be big enough to hold them all */ + (*stack_size) += sizeof(mgreg_t); } else { ainfo->storage = ArgInIReg; @@ -324,7 +593,9 @@ add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double) if (*gr >= FLOAT_PARAM_REGS) { ainfo->storage = ArgOnStack; - (*stack_size) += sizeof (gpointer); + /* Since the same stack slot size is used for both float */ + /* types, it needs to be big enough to hold them both */ + (*stack_size) += sizeof(mgreg_t); } else { /* A double register */ @@ -419,6 +690,32 @@ merge_argument_class_from_type (MonoType *type, ArgumentClass class1) return class1; } +#ifdef __native_client_codegen__ +const guint kNaClAlignment = kNaClAlignmentAMD64; +const guint kNaClAlignmentMask = kNaClAlignmentMaskAMD64; + +/* Default alignment for Native Client is 32-byte. */ +gint8 nacl_align_byte = -32; /* signed version of 0xe0 */ + +/* mono_arch_nacl_pad: Add pad bytes of alignment instructions at code, */ +/* Check that alignment doesn't cross an alignment boundary. */ +guint8* +mono_arch_nacl_pad(guint8 *code, int pad) +{ + const int kMaxPadding = 8; /* see amd64-codegen.h:amd64_padding_size() */ + + if (pad == 0) return code; + /* assertion: alignment cannot cross a block boundary */ + g_assert (((uintptr_t)code & (~kNaClAlignmentMask)) == + (((uintptr_t)code + pad - 1) & (~kNaClAlignmentMask))); + while (pad >= kMaxPadding) { + amd64_padding (code, kMaxPadding); + pad -= kMaxPadding; + } + if (pad != 0) amd64_padding (code, pad); + return code; +} +#endif static void add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type, @@ -426,6 +723,9 @@ add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgIn guint32 *gr, guint32 *fr, guint32 *stack_size) { guint32 size, quad, nquads, i; + /* Keep track of the size used in each quad so we can */ + /* use the right size when copying args/return vars. */ + guint32 quadsize [2] = {8, 8}; ArgumentClass args [2]; MonoMarshalType *info = NULL; MonoClass *klass; @@ -454,6 +754,24 @@ add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgIn } #endif + /* If this struct can't be split up naturally into 8-byte */ + /* chunks (registers), pass it on the stack. */ + if (sig->pinvoke && !pass_on_stack) { + info = mono_marshal_load_type_info (klass); + g_assert(info); + guint32 align; + guint32 field_size; + for (i = 0; i < info->num_fields; ++i) { + field_size = mono_marshal_type_size (info->fields [i].field->type, + info->fields [i].mspec, + &align, TRUE, klass->unicode); + if ((info->fields [i].offset < 8) && (info->fields [i].offset + field_size) > 8) { + pass_on_stack = TRUE; + break; + } + } + } + if (pass_on_stack) { /* Allways pass in memory */ ainfo->offset = *stack_size; @@ -553,6 +871,10 @@ add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgIn if ((quad == 1) && (info->fields [i].offset < 8)) continue; + /* How far into this quad this data extends.*/ + /* (8 is size of quad) */ + quadsize [quad] = info->fields [i].offset + size - (quad * 8); + class1 = merge_argument_class_from_type (info->fields [i].field->type, class1); } g_assert (class1 != ARG_CLASS_NO_CLASS); @@ -590,7 +912,9 @@ add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgIn if (*fr >= FLOAT_PARAM_REGS) args [quad] = ARG_CLASS_MEMORY; else { - ainfo->pair_storage [quad] = ArgInDoubleSSEReg; + if (quadsize[quad] <= 4) + ainfo->pair_storage [quad] = ArgInFloatSSEReg; + else ainfo->pair_storage [quad] = ArgInDoubleSSEReg; ainfo->pair_regs [quad] = *fr; (*fr) ++; } @@ -611,7 +935,7 @@ add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgIn if (sig->pinvoke) *stack_size += ALIGN_TO (info->native_size, 8); else - *stack_size += nquads * sizeof (gpointer); + *stack_size += nquads * sizeof(mgreg_t); ainfo->storage = ArgOnStack; } } @@ -910,6 +1234,9 @@ mono_amd64_tail_call_supported (MonoMethodSignature *caller_sig, MonoMethodSigna static int cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx) { +#if defined(MONO_CROSS_COMPILE) + return 0; +#else #ifndef _MSC_VER __asm__ __volatile__ ("cpuid" : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx) @@ -923,6 +1250,7 @@ cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx) *p_edx = info[3]; #endif return 1; +#endif } /* @@ -956,6 +1284,12 @@ mono_arch_init (void) int flags; InitializeCriticalSection (&mini_arch_mutex); +#if defined(__native_client_codegen__) + nacl_instruction_depth = TlsAlloc (); + TlsSetValue (nacl_instruction_depth, (gpointer)0); + nacl_rex_tag = TlsAlloc (); + nacl_legacy_prefix_tag = TlsAlloc (); +#endif #ifdef MONO_ARCH_NOMAP32BIT flags = MONO_MMAP_READ; @@ -988,6 +1322,11 @@ void mono_arch_cleanup (void) { DeleteCriticalSection (&mini_arch_mutex); +#if defined(__native_client_codegen__) + TlsFree (nacl_instruction_depth); + TlsFree (nacl_rex_tag); + TlsFree (nacl_legacy_prefix_tag); +#endif } /* @@ -1119,6 +1458,13 @@ mono_arch_compute_omit_fp (MonoCompile *cfg) cfg->arch.omit_fp = TRUE; cfg->arch.omit_fp_computed = TRUE; +#ifdef __native_client_codegen__ + /* NaCl modules may not change the value of RBP, so it cannot be */ + /* used as a normal register, but it can be used as a frame pointer*/ + cfg->disable_omit_fp = TRUE; + cfg->arch.omit_fp = FALSE; +#endif + if (cfg->disable_omit_fp) cfg->arch.omit_fp = FALSE; @@ -1175,7 +1521,9 @@ mono_arch_get_global_int_regs (MonoCompile *cfg) regs = g_list_prepend (regs, (gpointer)AMD64_R12); regs = g_list_prepend (regs, (gpointer)AMD64_R13); regs = g_list_prepend (regs, (gpointer)AMD64_R14); +#ifndef __native_client_codegen__ regs = g_list_prepend (regs, (gpointer)AMD64_R15); +#endif regs = g_list_prepend (regs, (gpointer)AMD64_R10); regs = g_list_prepend (regs, (gpointer)AMD64_R9); @@ -1194,7 +1542,9 @@ mono_arch_get_global_int_regs (MonoCompile *cfg) regs = g_list_prepend (regs, (gpointer)AMD64_R12); regs = g_list_prepend (regs, (gpointer)AMD64_R13); regs = g_list_prepend (regs, (gpointer)AMD64_R14); +#ifndef __native_client_codegen__ regs = g_list_prepend (regs, (gpointer)AMD64_R15); +#endif #ifdef HOST_WIN32 regs = g_list_prepend (regs, (gpointer)AMD64_RDI); regs = g_list_prepend (regs, (gpointer)AMD64_RSI); @@ -1230,7 +1580,9 @@ mono_arch_get_iregs_clobbered_by_call (MonoCallInst *call) regs = g_list_prepend (regs, (gpointer)AMD64_R12); regs = g_list_prepend (regs, (gpointer)AMD64_R13); regs = g_list_prepend (regs, (gpointer)AMD64_R14); +#ifndef __native_client_codegen__ regs = g_list_prepend (regs, (gpointer)AMD64_R15); +#endif regs = g_list_prepend (regs, (gpointer)AMD64_R10); regs = g_list_prepend (regs, (gpointer)AMD64_R9); @@ -1431,7 +1783,7 @@ mono_arch_allocate_vars (MonoCompile *cfg) /* Reserve space for caller saved registers */ for (i = 0; i < AMD64_NREG; ++i) if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) { - offset += sizeof (gpointer); + offset += sizeof(mgreg_t); } } @@ -1560,12 +1912,12 @@ mono_arch_allocate_vars (MonoCompile *cfg) ins->opcode = OP_REGOFFSET; ins->inst_basereg = cfg->frame_reg; /* These arguments are saved to the stack in the prolog */ - offset = ALIGN_TO (offset, sizeof (gpointer)); + offset = ALIGN_TO (offset, sizeof(mgreg_t)); if (cfg->arch.omit_fp) { ins->inst_offset = offset; - offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (gpointer) : sizeof (gpointer); + offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (mgreg_t) : sizeof (mgreg_t); } else { - offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (gpointer) : sizeof (gpointer); + offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (mgreg_t) : sizeof (mgreg_t); ins->inst_offset = - offset; } break; @@ -1637,14 +1989,14 @@ mono_arch_allocate_vars (MonoCompile *cfg) ins->opcode = OP_REGOFFSET; ins->inst_basereg = cfg->frame_reg; /* These arguments are saved to the stack in the prolog */ - offset = ALIGN_TO (offset, sizeof (gpointer)); + offset = ALIGN_TO (offset, sizeof(mgreg_t)); if (cfg->arch.omit_fp) { ins->inst_offset = offset; - offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (gpointer) : sizeof (gpointer); + offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (mgreg_t) : sizeof (mgreg_t); // Arguments are yet supported by the stack map creation code //cfg->locals_max_stack_offset = MAX (cfg->locals_max_stack_offset, offset); } else { - offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (gpointer) : sizeof (gpointer); + offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (mgreg_t) : sizeof (mgreg_t); ins->inst_offset = - offset; //cfg->locals_min_stack_offset = MIN (cfg->locals_min_stack_offset, offset); } @@ -1740,7 +2092,11 @@ arg_storage_to_load_membase (ArgStorage storage) { switch (storage) { case ArgInIReg: +#if defined(__mono_ilp32__) + return OP_LOADI8_MEMBASE; +#else return OP_LOAD_MEMBASE; +#endif case ArgInDoubleSSEReg: return OP_LOADR8_MEMBASE; case ArgInFloatSSEReg: @@ -2149,7 +2505,7 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src) MONO_INST_NEW (cfg, load, arg_storage_to_load_membase (ainfo->pair_storage [part])); load->inst_basereg = src->dreg; - load->inst_offset = part * sizeof (gpointer); + load->inst_offset = part * sizeof(mgreg_t); switch (ainfo->pair_storage [part]) { case ArgInIReg: @@ -2366,6 +2722,15 @@ mono_arch_dyn_call_free (MonoDynCallInfo *info) g_free (ainfo); } +#if !defined(__native_client__) +#define PTR_TO_GREG(ptr) (mgreg_t)(ptr) +#define GREG_TO_PTR(greg) (gpointer)(greg) +#else +/* Correctly handle casts to/from 32-bit pointers without compiler warnings */ +#define PTR_TO_GREG(ptr) (mgreg_t)(uintptr_t)(ptr) +#define GREG_TO_PTR(greg) (gpointer)(guint32)(greg) +#endif + /* * mono_arch_get_start_dyn_call: * @@ -2398,20 +2763,20 @@ mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, g pindex = 0; if (sig->hasthis || dinfo->cinfo->vret_arg_index == 1) { - p->regs [greg ++] = (mgreg_t)*(args [arg_index ++]); + p->regs [greg ++] = PTR_TO_GREG(*(args [arg_index ++])); if (!sig->hasthis) pindex = 1; } if (dinfo->cinfo->vtype_retaddr) - p->regs [greg ++] = (mgreg_t)ret; + p->regs [greg ++] = PTR_TO_GREG(ret); for (i = pindex; i < sig->param_count; i++) { MonoType *t = mono_type_get_underlying_type (sig->params [i]); gpointer *arg = args [arg_index ++]; if (t->byref) { - p->regs [greg ++] = (mgreg_t)*(arg); + p->regs [greg ++] = PTR_TO_GREG(*(arg)); continue; } @@ -2424,11 +2789,20 @@ mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, g case MONO_TYPE_PTR: case MONO_TYPE_I: case MONO_TYPE_U: +#if !defined(__mono_ilp32__) case MONO_TYPE_I8: case MONO_TYPE_U8: +#endif g_assert (dinfo->cinfo->args [i + sig->hasthis].reg == param_regs [greg]); - p->regs [greg ++] = (mgreg_t)*(arg); + p->regs [greg ++] = PTR_TO_GREG(*(arg)); break; +#if defined(__mono_ilp32__) + case MONO_TYPE_I8: + case MONO_TYPE_U8: + g_assert (dinfo->cinfo->args [i + sig->hasthis].reg == param_regs [greg]); + p->regs [greg ++] = *(guint64*)(arg); + break; +#endif case MONO_TYPE_BOOLEAN: case MONO_TYPE_U1: p->regs [greg ++] = *(guint8*)(arg); @@ -2451,7 +2825,7 @@ mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, g break; case MONO_TYPE_GENERICINST: if (MONO_TYPE_IS_REFERENCE (t)) { - p->regs [greg ++] = (mgreg_t)*(arg); + p->regs [greg ++] = PTR_TO_GREG(*(arg)); break; } else { /* Fall through */ @@ -2507,7 +2881,7 @@ mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf) case MONO_TYPE_I: case MONO_TYPE_U: case MONO_TYPE_PTR: - *(gpointer*)ret = (gpointer)res; + *(gpointer*)ret = GREG_TO_PTR(res); break; case MONO_TYPE_I1: *(gint8*)ret = res; @@ -2537,7 +2911,7 @@ mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf) break; case MONO_TYPE_GENERICINST: if (MONO_TYPE_IS_REFERENCE (sig->ret)) { - *(gpointer*)ret = (gpointer)res; + *(gpointer*)ret = GREG_TO_PTR(res); break; } else { /* Fall through */ @@ -2690,8 +3064,10 @@ emit_call_body (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointe * not span cache lines. This is required for code patching to work on SMP * systems. */ - if (!no_patch && ((guint32)(code + 1 - cfg->native_code) % 4) != 0) - amd64_padding (code, 4 - ((guint32)(code + 1 - cfg->native_code) % 4)); + if (!no_patch && ((guint32)(code + 1 - cfg->native_code) % 4) != 0) { + guint32 pad_size = 4 - ((guint32)(code + 1 - cfg->native_code) % 4); + amd64_padding (code, pad_size); + } mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data); amd64_call_code (code, 0); } @@ -2948,8 +3324,13 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) ins->sreg2 = temp->dreg; } break; +#ifndef __mono_ilp32__ case OP_LOAD_MEMBASE: +#endif case OP_LOADI8_MEMBASE: +#ifndef __native_client_codegen__ + /* Don't generate memindex opcodes (to simplify */ + /* read sandboxing) */ if (!amd64_is_imm32 (ins->inst_offset)) { NEW_INS (cfg, ins, temp, OP_I8CONST); temp->inst_c0 = ins->inst_offset; @@ -2957,8 +3338,11 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) ins->opcode = OP_AMD64_LOADI8_MEMINDEX; ins->inst_indexreg = temp->dreg; } +#endif break; +#ifndef __mono_ilp32__ case OP_STORE_MEMBASE_IMM: +#endif case OP_STOREI8_MEMBASE_IMM: if (!amd64_is_imm32 (ins->inst_imm)) { NEW_INS (cfg, ins, temp, OP_I8CONST); @@ -3110,8 +3494,20 @@ mono_emit_stack_alloc (MonoCompile *cfg, guchar *code, MonoInst* tree) if (cfg->param_area && cfg->arch.no_pushes) amd64_alu_reg_imm (code, X86_ADD, AMD64_RDI, cfg->param_area); amd64_cld (code); +#if defined(__default_codegen__) + amd64_prefix (code, X86_REP_PREFIX); + amd64_stosl (code); +#elif defined(__native_client_codegen__) + /* NaCl stos pseudo-instruction */ + amd64_codegen_pre(code); + /* First, clear the upper 32 bits of RDI (mov %edi, %edi) */ + amd64_mov_reg_reg (code, AMD64_RDI, AMD64_RDI, 4); + /* Add %r15 to %rdi using lea, condition flags unaffected. */ + amd64_lea_memindex_size (code, AMD64_RDI, AMD64_R15, 0, AMD64_RDI, 0, 8); amd64_prefix (code, X86_REP_PREFIX); amd64_stosl (code); + amd64_codegen_post(code); +#endif /* __native_client_codegen__ */ if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI) amd64_pop_reg (code, AMD64_RDI); @@ -3163,12 +3559,12 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code) /* Load the destination address */ g_assert (loc->opcode == OP_REGOFFSET); - amd64_mov_reg_membase (code, AMD64_RCX, loc->inst_basereg, loc->inst_offset, 8); + amd64_mov_reg_membase (code, AMD64_RCX, loc->inst_basereg, loc->inst_offset, sizeof(gpointer)); for (quad = 0; quad < 2; quad ++) { switch (cinfo->ret.pair_storage [quad]) { case ArgInIReg: - amd64_mov_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad], 8); + amd64_mov_membase_reg (code, AMD64_RCX, (quad * sizeof(mgreg_t)), cinfo->ret.pair_regs [quad], sizeof(mgreg_t)); break; case ArgInFloatSSEReg: amd64_movss_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]); @@ -3244,6 +3640,15 @@ amd64_pop_reg (code, AMD64_RAX); #ifndef DISABLE_JIT +#if defined(__native_client__) || defined(__native_client_codegen__) +void mono_nacl_gc() +{ +#ifdef __native_client_gc__ + __nacl_suspend_thread_if_needed(); +#endif +} +#endif + void mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) { @@ -3277,6 +3682,21 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } } +#if defined(__native_client_codegen__) + /* For Native Client, all indirect call/jump targets must be */ + /* 32-byte aligned. Exception handler blocks are jumped to */ + /* indirectly as well. */ + gboolean bb_needs_alignment = (bb->flags & BB_INDIRECT_JUMP_TARGET) || + (bb->flags & BB_EXCEPTION_HANDLER); + + if ( bb_needs_alignment && ((cfg->code_len & kNaClAlignmentMask) != 0)) { + int pad = kNaClAlignment - (cfg->code_len & kNaClAlignmentMask); + if (pad != kNaClAlignment) code = mono_arch_nacl_pad(code, pad); + cfg->code_len += pad; + bb->native_offset = cfg->code_len; + } +#endif /*__native_client_codegen__*/ + if (cfg->verbose_level > 2) g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset); @@ -3302,9 +3722,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN]; - if (G_UNLIKELY (offset > (cfg->code_size - max_len - 16))) { +#define EXTRA_CODE_SPACE (NACL_SIZE (16, 16 + kNaClAlignment)) + + if (G_UNLIKELY (offset > (cfg->code_size - max_len - EXTRA_CODE_SPACE))) { cfg->code_size *= 2; - cfg->native_code = g_realloc (cfg->native_code, cfg->code_size); + cfg->native_code = mono_realloc_native_code(cfg); code = cfg->native_code + offset; mono_jit_stats.code_reallocs++; } @@ -3337,7 +3759,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_STOREI2_MEMBASE_REG: amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2); break; + /* In AMD64 NaCl, pointers are 4 bytes, */ + /* so STORE_* != STOREI8_*. Likewise below. */ case OP_STORE_MEMBASE_REG: + amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, sizeof(gpointer)); + break; case OP_STOREI8_MEMBASE_REG: amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 8); break; @@ -3345,15 +3771,32 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4); break; case OP_STORE_MEMBASE_IMM: +#ifndef __native_client_codegen__ + /* In NaCl, this could be a PCONST type, which could */ + /* mean a pointer type was copied directly into the */ + /* lower 32-bits of inst_imm, so for InvalidPtr==-1 */ + /* the value would be 0x00000000FFFFFFFF which is */ + /* not proper for an imm32 unless you cast it. */ + g_assert (amd64_is_imm32 (ins->inst_imm)); +#endif + amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, (gint32)ins->inst_imm, sizeof(gpointer)); + break; case OP_STOREI8_MEMBASE_IMM: g_assert (amd64_is_imm32 (ins->inst_imm)); amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 8); break; case OP_LOAD_MEM: +#ifdef __mono_ilp32__ + /* In ILP32, pointers are 4 bytes, so separate these */ + /* cases, use literal 8 below where we really want 8 */ + amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm); + amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, sizeof(gpointer)); + break; +#endif case OP_LOADI8_MEM: // FIXME: Decompose this earlier if (amd64_is_imm32 (ins->inst_imm)) - amd64_mov_reg_mem (code, ins->dreg, ins->inst_imm, sizeof (gpointer)); + amd64_mov_reg_mem (code, ins->dreg, ins->inst_imm, 8); else { amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm); amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 8); @@ -3377,13 +3820,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_widen_membase (code, ins->dreg, ins->dreg, 0, FALSE, FALSE); break; case OP_LOADU2_MEM: + /* For NaCl, pointers are 4 bytes, so separate these */ + /* cases, use literal 8 below where we really want 8 */ amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm); amd64_widen_membase (code, ins->dreg, ins->dreg, 0, FALSE, TRUE); break; case OP_LOAD_MEMBASE: + g_assert (amd64_is_imm32 (ins->inst_offset)); + amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, sizeof(gpointer)); + break; case OP_LOADI8_MEMBASE: + /* Use literal 8 instead of sizeof pointer or */ + /* register, we really want 8 for this opcode */ g_assert (amd64_is_imm32 (ins->inst_offset)); - amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, sizeof (gpointer)); + amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 8); break; case OP_LOADI4_MEMBASE: amd64_movsxd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset); @@ -4071,14 +4521,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_AOTCONST: mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0); - amd64_mov_reg_membase (code, ins->dreg, AMD64_RIP, 0, 8); + amd64_mov_reg_membase (code, ins->dreg, AMD64_RIP, 0, sizeof(gpointer)); break; case OP_JUMP_TABLE: mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0); amd64_mov_reg_imm_size (code, ins->dreg, 0, 8); break; case OP_MOVE: - amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, sizeof (gpointer)); + amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, sizeof(mgreg_t)); break; case OP_AMD64_SET_XMMREG_R4: { amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg1); @@ -4116,20 +4566,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) else { for (i = 0; i < AMD64_NREG; ++i) if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) - pos -= sizeof (gpointer); + pos -= sizeof(mgreg_t); /* Restore callee-saved registers */ for (i = AMD64_NREG - 1; i > 0; --i) { if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) { - amd64_mov_reg_membase (code, i, AMD64_RBP, pos, 8); - pos += 8; + amd64_mov_reg_membase (code, i, AMD64_RBP, pos, sizeof(mgreg_t)); + pos += sizeof(mgreg_t); } } /* Copy arguments on the stack to our argument area */ - for (i = 0; i < call->stack_usage; i += 8) { - amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, i, 8); - amd64_mov_membase_reg (code, AMD64_RBP, 16 + i, AMD64_RAX, 8); + for (i = 0; i < call->stack_usage; i += sizeof(mgreg_t)) { + amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, i, sizeof(mgreg_t)); + amd64_mov_membase_reg (code, AMD64_RBP, 16 + i, AMD64_RAX, sizeof(mgreg_t)); } if (pos) @@ -4155,7 +4605,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_ARGLIST: { amd64_lea_membase (code, AMD64_R11, cfg->frame_reg, cfg->sig_cookie); - amd64_mov_membase_reg (code, ins->sreg1, 0, AMD64_R11, 8); + amd64_mov_membase_reg (code, ins->sreg1, 0, AMD64_R11, sizeof(gpointer)); break; } case OP_CALL: @@ -4278,7 +4728,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) /* Set argument registers */ for (i = 0; i < PARAM_REGS; ++i) - amd64_mov_reg_membase (code, param_regs [i], AMD64_R11, i * sizeof (gpointer), 8); + amd64_mov_reg_membase (code, param_regs [i], AMD64_R11, i * sizeof(mgreg_t), sizeof(mgreg_t)); /* Make the call */ amd64_call_reg (code, AMD64_R10); @@ -4403,8 +4853,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8); break; case OP_START_HANDLER: { + /* Even though we're saving RSP, use sizeof */ + /* gpointer because spvar is of type IntPtr */ + /* see: mono_create_spvar_for_region */ MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region); - amd64_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, AMD64_RSP, 8); + amd64_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, AMD64_RSP, sizeof(gpointer)); if ((MONO_BBLOCK_IS_IN_REGION (bb, MONO_REGION_FINALLY) || MONO_BBLOCK_IS_IN_REGION (bb, MONO_REGION_FINALLY)) && @@ -4415,13 +4868,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } case OP_ENDFINALLY: { MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region); - amd64_mov_reg_membase (code, AMD64_RSP, spvar->inst_basereg, spvar->inst_offset, 8); + amd64_mov_reg_membase (code, AMD64_RSP, spvar->inst_basereg, spvar->inst_offset, sizeof(gpointer)); amd64_ret (code); break; } case OP_ENDFILTER: { MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region); - amd64_mov_reg_membase (code, AMD64_RSP, spvar->inst_basereg, spvar->inst_offset, 8); + amd64_mov_reg_membase (code, AMD64_RSP, spvar->inst_basereg, spvar->inst_offset, sizeof(gpointer)); /* The local allocator will put the result into RAX */ amd64_ret (code); break; @@ -5677,6 +6130,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code; break; } + case OP_NACL_GC_SAFE_POINT: { +#if defined(__native_client_codegen__) + code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)mono_nacl_gc, TRUE); +#endif + break; + } case OP_GC_LIVENESS_DEF: case OP_GC_LIVENESS_USE: case OP_GC_PARAM_SLOT_LIVENESS_DEF: @@ -5692,9 +6151,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } if ((code - cfg->native_code - offset) > max_len) { +#if !defined(__native_client_codegen__) g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)", mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset); g_assert_not_reached (); +#endif } last_ins = ins; @@ -5824,10 +6285,27 @@ mono_arch_emit_prolog (MonoCompile *cfg) gint32 lmf_offset = cfg->arch.lmf_offset; gboolean args_clobbered = FALSE; gboolean trace = FALSE; +#ifdef __native_client_codegen__ + guint alignment_check; +#endif cfg->code_size = MAX (cfg->header->code_size * 4, 10240); +#if defined(__default_codegen__) code = cfg->native_code = g_malloc (cfg->code_size); +#elif defined(__native_client_codegen__) + /* native_code_alloc is not 32-byte aligned, native_code is. */ + cfg->native_code_alloc = g_malloc (cfg->code_size + kNaClAlignment); + + /* Align native_code to next nearest kNaclAlignment byte. */ + cfg->native_code = (uintptr_t)cfg->native_code_alloc + kNaClAlignment; + cfg->native_code = (uintptr_t)cfg->native_code & ~kNaClAlignmentMask; + + code = cfg->native_code; + + alignment_check = (guint)cfg->native_code & kNaClAlignmentMask; + g_assert (alignment_check == 0); +#endif if (mono_jit_trace_calls != NULL && mono_trace_eval (method)) trace = TRUE; @@ -5873,7 +6351,7 @@ mono_arch_emit_prolog (MonoCompile *cfg) /* These are handled automatically by the stack marking code */ mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset, SLOT_NOREF); - amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof (gpointer)); + amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof(mgreg_t)); mono_emit_unwind_op_def_cfa_reg (cfg, code, AMD64_RBP); async_exc_point (code); #ifdef HOST_WIN32 @@ -5888,7 +6366,7 @@ mono_arch_emit_prolog (MonoCompile *cfg) for (i = 0; i < AMD64_NREG; ++i) if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) { amd64_push_reg (code, i); - pos += sizeof (gpointer); + pos += 8; /* AMD64 push inst is always 8 bytes, no way to change it */ offset += 8; mono_emit_unwind_op_offset (cfg, code, i, - offset); async_exc_point (code); @@ -5904,7 +6382,7 @@ mono_arch_emit_prolog (MonoCompile *cfg) if (cfg->arch.omit_fp) // FIXME: g_assert_not_reached (); - cfg->stack_offset += ALIGN_TO (cfg->param_area, sizeof (gpointer)); + cfg->stack_offset += ALIGN_TO (cfg->param_area, sizeof(mgreg_t)); } if (cfg->arch.omit_fp) { @@ -5942,7 +6420,7 @@ mono_arch_emit_prolog (MonoCompile *cfg) if (G_UNLIKELY (required_code_size >= (cfg->code_size - offset))) { while (required_code_size >= (cfg->code_size - offset)) cfg->code_size *= 2; - cfg->native_code = g_realloc (cfg->native_code, cfg->code_size); + cfg->native_code = mono_realloc_native_code (cfg); code = cfg->native_code + offset; mono_jit_stats.code_reallocs++; } @@ -6008,8 +6486,20 @@ mono_arch_emit_prolog (MonoCompile *cfg) amd64_mov_reg_reg (code, AMD64_RDI, AMD64_RSP, 8); amd64_cld (code); +#if defined(__default_codegen__) + amd64_prefix (code, X86_REP_PREFIX); + amd64_stosl (code); +#elif defined(__native_client_codegen__) + /* NaCl stos pseudo-instruction */ + amd64_codegen_pre (code); + /* First, clear the upper 32 bits of RDI (mov %edi, %edi) */ + amd64_mov_reg_reg (code, AMD64_RDI, AMD64_RDI, 4); + /* Add %r15 to %rdi using lea, condition flags unaffected. */ + amd64_lea_memindex_size (code, AMD64_RDI, AMD64_R15, 0, AMD64_RDI, 0, 8); amd64_prefix (code, X86_REP_PREFIX); amd64_stosl (code); + amd64_codegen_post (code); +#endif /* __native_client_codegen__ */ amd64_mov_reg_membase (code, AMD64_RDI, AMD64_RSP, -8, 8); amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RSP, -16, 8); @@ -6037,7 +6527,9 @@ mono_arch_emit_prolog (MonoCompile *cfg) case AMD64_R12: offset = G_STRUCT_OFFSET (MonoLMF, r12); break; case AMD64_R13: offset = G_STRUCT_OFFSET (MonoLMF, r13); break; case AMD64_R14: offset = G_STRUCT_OFFSET (MonoLMF, r14); break; +#ifndef __native_client_codegen__ case AMD64_R15: offset = G_STRUCT_OFFSET (MonoLMF, r15); break; +#endif #ifdef HOST_WIN32 case AMD64_RDI: offset = G_STRUCT_OFFSET (MonoLMF, rdi); break; case AMD64_RSI: offset = G_STRUCT_OFFSET (MonoLMF, rsi); break; @@ -6100,7 +6592,7 @@ mono_arch_emit_prolog (MonoCompile *cfg) g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET && (cfg->rgctx_var->inst_basereg == AMD64_RBP || cfg->rgctx_var->inst_basereg == AMD64_RSP)); - amd64_mov_membase_reg (code, cfg->rgctx_var->inst_basereg, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, 8); + amd64_mov_membase_reg (code, cfg->rgctx_var->inst_basereg, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, sizeof(gpointer)); } /* compute max_length in order to use short forward jumps */ @@ -6115,8 +6607,22 @@ mono_arch_emit_prolog (MonoCompile *cfg) /* max alignment for loops */ if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb)) max_length += LOOP_ALIGNMENT; +#ifdef __native_client_codegen__ + /* max alignment for native client */ + max_length += kNaClAlignment; +#endif MONO_BB_FOR_EACH_INS (bb, ins) { +#ifdef __native_client_codegen__ + { + int space_in_block = kNaClAlignment - + ((max_length + cfg->code_len) & kNaClAlignmentMask); + int max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN]; + if (space_in_block < max_len && max_len < kNaClAlignment) { + max_length += space_in_block; + } + } +#endif /*__native_client_codegen__*/ max_length += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN]; } @@ -6168,13 +6674,13 @@ mono_arch_emit_prolog (MonoCompile *cfg) for (quad = 0; quad < 2; quad ++) { switch (ainfo->pair_storage [quad]) { case ArgInIReg: - amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad], sizeof (gpointer)); + amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_regs [quad], sizeof(mgreg_t)); break; case ArgInFloatSSEReg: - amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]); + amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_regs [quad]); break; case ArgInDoubleSSEReg: - amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]); + amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_regs [quad]); break; case ArgNone: break; @@ -6220,13 +6726,13 @@ mono_arch_emit_prolog (MonoCompile *cfg) for (quad = 0; quad < 2; quad ++) { switch (ainfo->pair_storage [quad]) { case ArgInIReg: - amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad], sizeof (gpointer)); + amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_regs [quad], sizeof(mgreg_t)); break; case ArgInFloatSSEReg: - amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]); + amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_regs [quad]); break; case ArgInDoubleSSEReg: - amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]); + amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_regs [quad]); break; case ArgNone: break; @@ -6354,13 +6860,13 @@ mono_arch_emit_prolog (MonoCompile *cfg) } /* Save lmf_addr */ - amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), AMD64_RAX, 8); + amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), AMD64_RAX, sizeof(gpointer)); /* Save previous_lmf */ - amd64_mov_reg_membase (code, AMD64_R11, AMD64_RAX, 0, 8); - amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_R11, 8); + amd64_mov_reg_membase (code, AMD64_R11, AMD64_RAX, 0, sizeof(gpointer)); + amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_R11, sizeof(gpointer)); /* Set new lmf */ amd64_lea_membase (code, AMD64_R11, cfg->frame_reg, lmf_offset); - amd64_mov_membase_reg (code, AMD64_RAX, 0, AMD64_R11, 8); + amd64_mov_membase_reg (code, AMD64_RAX, 0, AMD64_R11, sizeof(gpointer)); } } @@ -6471,7 +6977,7 @@ mono_arch_emit_epilog (MonoCompile *cfg) while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) { cfg->code_size *= 2; - cfg->native_code = g_realloc (cfg->native_code, cfg->code_size); + cfg->native_code = mono_realloc_native_code (cfg); mono_jit_stats.code_reallocs++; } @@ -6507,14 +7013,14 @@ mono_arch_emit_epilog (MonoCompile *cfg) * through the mono_lmf_addr TLS variable. */ /* reg = previous_lmf */ - amd64_mov_reg_membase (code, AMD64_R11, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 8); + amd64_mov_reg_membase (code, AMD64_R11, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), sizeof(gpointer)); x86_prefix (code, X86_FS_PREFIX); amd64_mov_mem_reg (code, lmf_tls_offset, AMD64_R11, 8); } else { /* Restore previous lmf */ - amd64_mov_reg_membase (code, AMD64_RCX, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 8); - amd64_mov_reg_membase (code, AMD64_R11, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 8); - amd64_mov_membase_reg (code, AMD64_R11, 0, AMD64_RCX, 8); + amd64_mov_reg_membase (code, AMD64_RCX, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), sizeof(gpointer)); + amd64_mov_reg_membase (code, AMD64_R11, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), sizeof(gpointer)); + amd64_mov_membase_reg (code, AMD64_R11, 0, AMD64_RCX, sizeof(gpointer)); } /* Restore caller saved regs */ @@ -6534,7 +7040,11 @@ mono_arch_emit_epilog (MonoCompile *cfg) amd64_mov_reg_membase (code, AMD64_R14, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), 8); } if (cfg->used_int_regs & (1 << AMD64_R15)) { +#if defined(__default_codegen__) amd64_mov_reg_membase (code, AMD64_R15, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), 8); +#elif defined(__native_client_codegen__) + g_assert_not_reached(); +#endif } #ifdef HOST_WIN32 if (cfg->used_int_regs & (1 << AMD64_RDI)) { @@ -6558,10 +7068,10 @@ mono_arch_emit_epilog (MonoCompile *cfg) else { for (i = 0; i < AMD64_NREG; ++i) if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) - pos -= sizeof (gpointer); + pos -= sizeof(mgreg_t); if (pos) { - if (pos == - sizeof (gpointer)) { + if (pos == - sizeof(mgreg_t)) { /* Only one register, so avoid lea */ for (i = AMD64_NREG - 1; i > 0; --i) if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) { @@ -6590,13 +7100,13 @@ mono_arch_emit_epilog (MonoCompile *cfg) for (quad = 0; quad < 2; quad ++) { switch (ainfo->pair_storage [quad]) { case ArgInIReg: - amd64_mov_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)), sizeof (gpointer)); + amd64_mov_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof(mgreg_t)), sizeof(mgreg_t)); break; case ArgInFloatSSEReg: - amd64_movss_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer))); + amd64_movss_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof(mgreg_t))); break; case ArgInDoubleSSEReg: - amd64_movsd_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer))); + amd64_movsd_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof(mgreg_t))); break; case ArgNone: break; @@ -6642,9 +7152,16 @@ mono_arch_emit_exceptions (MonoCompile *cfg) code_size += 8 + 7; /*sizeof (void*) + alignment */ } +#ifdef __native_client_codegen__ + /* Give us extra room on Native Client. This could be */ + /* more carefully calculated, but bundle alignment makes */ + /* it much trickier, so *2 like other places is good. */ + code_size *= 2; +#endif + while (cfg->code_len + code_size > (cfg->code_size - 16)) { cfg->code_size *= 2; - cfg->native_code = g_realloc (cfg->native_code, cfg->code_size); + cfg->native_code = mono_realloc_native_code (cfg); mono_jit_stats.code_reallocs++; } @@ -6705,6 +7222,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg) /* do nothing */ break; } + g_assert(code < cfg->native_code + cfg->code_size); } /* Handle relocations with RIP relative addressing */ @@ -6715,27 +7233,69 @@ mono_arch_emit_exceptions (MonoCompile *cfg) switch (patch_info->type) { case MONO_PATCH_INFO_R8: case MONO_PATCH_INFO_R4: { - guint8 *pos; + guint8 *pos, *patch_pos, *target_pos; /* The SSE opcodes require a 16 byte alignment */ +#if defined(__default_codegen__) code = (guint8*)ALIGN_TO (code, 16); - memset (orig_code, 0, code - orig_code); +#elif defined(__native_client_codegen__) + { + /* Pad this out with HLT instructions */ + /* or we can get garbage bytes emitted */ + /* which will fail validation */ + guint8 *aligned_code; + /* extra align to make room for */ + /* mov/push below */ + int extra_align = patch_info->type == MONO_PATCH_INFO_R8 ? 2 : 1; + aligned_code = (guint8*)ALIGN_TO (code + extra_align, 16); + /* The technique of hiding data in an */ + /* instruction has a problem here: we */ + /* need the data aligned to a 16-byte */ + /* boundary but the instruction cannot */ + /* cross the bundle boundary. so only */ + /* odd multiples of 16 can be used */ + if ((intptr_t)aligned_code % kNaClAlignment == 0) { + aligned_code += 16; + } + while (code < aligned_code) { + *(code++) = 0xf4; /* hlt */ + } + } +#endif pos = cfg->native_code + patch_info->ip.i; - - if (IS_REX (pos [1])) - *(guint32*)(pos + 5) = (guint8*)code - pos - 9; - else - *(guint32*)(pos + 4) = (guint8*)code - pos - 8; + if (IS_REX (pos [1])) { + patch_pos = pos + 5; + target_pos = code - pos - 9; + } + else { + patch_pos = pos + 4; + target_pos = code - pos - 8; + } if (patch_info->type == MONO_PATCH_INFO_R8) { +#ifdef __native_client_codegen__ + /* Hide 64-bit data in a */ + /* "mov imm64, r11" instruction. */ + /* write it before the start of */ + /* the data*/ + *(code-2) = 0x49; /* prefix */ + *(code-1) = 0xbb; /* mov X, %r11 */ +#endif *(double*)code = *(double*)patch_info->data.target; code += sizeof (double); } else { +#ifdef __native_client_codegen__ + /* Hide 32-bit data in a */ + /* "push imm32" instruction. */ + *(code-1) = 0x68; /* push */ +#endif *(float*)code = *(float*)patch_info->data.target; code += sizeof (float); } + *(guint32*)(patch_pos) = target_pos; + remove = TRUE; break; } @@ -6778,6 +7338,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg) tmp->next = patch_info->next; } } + g_assert (code < cfg->native_code + cfg->code_size); } cfg->code_len = code - cfg->native_code; @@ -7095,6 +7656,46 @@ mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guin return can_write; } +#if defined(__native_client_codegen__) +/* For membase calls, we want the base register. for Native Client, */ +/* all indirect calls have the following sequence with the given sizes: */ +/* mov %eXX,%eXX [2-3] */ +/* mov disp(%r15,%rXX,scale),%r11d [4-8] */ +/* and $0xffffffffffffffe0,%r11d [4] */ +/* add %r15,%r11 [3] */ +/* callq *%r11 [3] */ + + +/* Determine if code points to a NaCl call-through-register sequence, */ +/* (i.e., the last 3 instructions listed above) */ +int +is_nacl_call_reg_sequence(guint8* code) +{ + const char *sequence = "\x41\x83\xe3\xe0" /* and */ + "\x4d\x03\xdf" /* add */ + "\x41\xff\xd3"; /* call */ + return memcmp(code, sequence, 10) == 0; +} + +/* Determine if code points to the first opcode of the mov membase component */ +/* of an indirect call sequence (i.e. the first 2 instructions listed above) */ +/* (there could be a REX prefix before the opcode but it is ignored) */ +static int +is_nacl_indirect_call_membase_sequence(guint8* code) +{ + /* Check for mov opcode, reg-reg addressing mode (mod = 3), */ + return code[0] == 0x8b && amd64_modrm_mod(code[1]) == 3 && + /* and that src reg = dest reg */ + amd64_modrm_reg(code[1]) == amd64_modrm_rm(code[1]) && + /* Check that next inst is mov, uses SIB byte (rm = 4), */ + IS_REX(code[2]) && + code[3] == 0x8b && amd64_modrm_rm(code[4]) == 4 && + /* and has dst of r11 and base of r15 */ + (amd64_modrm_reg(code[4]) + amd64_rex_r(code[2])) == AMD64_R11 && + (amd64_sib_base(code[5]) + amd64_rex_b(code[2])) == AMD64_R15; +} +#endif /* __native_client_codegen__ */ + int mono_arch_get_this_arg_reg (guint8 *code) { @@ -7148,6 +7749,8 @@ get_delegate_invoke_impl (gboolean has_target, guint32 param_count, guint32 *cod g_assert ((code - start) < 64); } + nacl_global_codeman_validate(&start, 64, &code); + mono_debug_add_delegate_trampoline (start, code - start); if (code_len) @@ -7292,6 +7895,7 @@ mono_arch_free_jit_tls_data (MonoJitTlsData *tls) #ifdef MONO_ARCH_HAVE_IMT +#if defined(__default_codegen__) #define CMP_SIZE (6 + 1) #define CMP_REG_REG_SIZE (4 + 1) #define BR_SMALL_SIZE 2 @@ -7299,6 +7903,20 @@ mono_arch_free_jit_tls_data (MonoJitTlsData *tls) #define MOV_REG_IMM_SIZE 10 #define MOV_REG_IMM_32BIT_SIZE 6 #define JUMP_REG_SIZE (2 + 1) +#elif defined(__native_client_codegen__) +/* NaCl N-byte instructions can be padded up to N-1 bytes */ +#define CMP_SIZE ((6 + 1) * 2 - 1) +#define CMP_REG_REG_SIZE ((4 + 1) * 2 - 1) +#define BR_SMALL_SIZE (2 * 2 - 1) +#define BR_LARGE_SIZE (6 * 2 - 1) +#define MOV_REG_IMM_SIZE (10 * 2 - 1) +#define MOV_REG_IMM_32BIT_SIZE (6 * 2 - 1) +/* Jump reg for NaCl adds a mask (+4) and add (+3) */ +#define JUMP_REG_SIZE ((2 + 1 + 4 + 3) * 2 - 1) +/* Jump membase's size is large and unpredictable */ +/* in native client, just pad it out a whole bundle. */ +#define JUMP_MEMBASE_SIZE (kNaClAlignment) +#endif static int imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target) @@ -7338,6 +7956,9 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI item->chunk_size += MOV_REG_IMM_32BIT_SIZE; else item->chunk_size += MOV_REG_IMM_SIZE; +#ifdef __native_client_codegen__ + item->chunk_size += JUMP_MEMBASE_SIZE; +#endif } item->chunk_size += BR_SMALL_SIZE + JUMP_REG_SIZE; } else { @@ -7353,6 +7974,9 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI /* with assert below: * item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1; */ +#ifdef __native_client_codegen__ + item->chunk_size += JUMP_MEMBASE_SIZE; +#endif } } } else { @@ -7365,10 +7989,16 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI } size += item->chunk_size; } +#if defined(__native_client__) && defined(__native_client_codegen__) + /* In Native Client, we don't re-use thunks, allocate from the */ + /* normal code manager paths. */ + code = mono_domain_code_reserve (domain, size); +#else if (fail_tramp) code = mono_method_alloc_generic_virtual_thunk (domain, size); else code = mono_domain_code_reserve (domain, size); +#endif start = code; for (i = 0; i < count; ++i) { MonoIMTCheckItem *item = imt_entries [i]; @@ -7381,24 +8011,24 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI if (amd64_is_imm32 (item->key)) amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key); else { - amd64_mov_reg_imm (code, AMD64_R11, item->key); - amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R11); + amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, item->key); + amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, MONO_ARCH_IMT_SCRATCH_REG); } } item->jmp_code = code; amd64_branch8 (code, X86_CC_NE, 0, FALSE); if (item->has_target_code) { - amd64_mov_reg_imm (code, AMD64_R11, item->value.target_code); - amd64_jump_reg (code, AMD64_R11); + amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, item->value.target_code); + amd64_jump_reg (code, MONO_ARCH_IMT_SCRATCH_REG); } else { - amd64_mov_reg_imm (code, AMD64_R11, & (vtable->vtable [item->value.vtable_slot])); - amd64_jump_membase (code, AMD64_R11, 0); + amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, & (vtable->vtable [item->value.vtable_slot])); + amd64_jump_membase (code, MONO_ARCH_IMT_SCRATCH_REG, 0); } if (fail_case) { amd64_patch (item->jmp_code, code); - amd64_mov_reg_imm (code, AMD64_R11, fail_tramp); - amd64_jump_reg (code, AMD64_R11); + amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, fail_tramp); + amd64_jump_reg (code, MONO_ARCH_IMT_SCRATCH_REG); item->jmp_code = NULL; } } else { @@ -7407,27 +8037,33 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI if (amd64_is_imm32 (item->key)) amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key); else { - amd64_mov_reg_imm (code, AMD64_R11, item->key); - amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R11); + amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, item->key); + amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, MONO_ARCH_IMT_SCRATCH_REG); } item->jmp_code = code; amd64_branch8 (code, X86_CC_NE, 0, FALSE); - amd64_mov_reg_imm (code, AMD64_R11, & (vtable->vtable [item->value.vtable_slot])); - amd64_jump_membase (code, AMD64_R11, 0); + /* See the comment below about R10 */ + amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, & (vtable->vtable [item->value.vtable_slot])); + amd64_jump_membase (code, MONO_ARCH_IMT_SCRATCH_REG, 0); amd64_patch (item->jmp_code, code); amd64_breakpoint (code); item->jmp_code = NULL; #else - amd64_mov_reg_imm (code, AMD64_R11, & (vtable->vtable [item->value.vtable_slot])); - amd64_jump_membase (code, AMD64_R11, 0); + /* We're using R10 (MONO_ARCH_IMT_SCRATCH_REG) here because R11 (MONO_ARCH_IMT_REG) + needs to be preserved. R10 needs + to be preserved for calls which + require a runtime generic context, + but interface calls don't. */ + amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, & (vtable->vtable [item->value.vtable_slot])); + amd64_jump_membase (code, MONO_ARCH_IMT_SCRATCH_REG, 0); #endif } } else { if (amd64_is_imm32 (item->key)) amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key); else { - amd64_mov_reg_imm (code, AMD64_R11, item->key); - amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R11); + amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, item->key); + amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, MONO_ARCH_IMT_SCRATCH_REG); } item->jmp_code = code; if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx))) @@ -7451,6 +8087,8 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI mono_stats.imt_thunks_size += code - start; g_assert (code - start <= size); + nacl_domain_code_validate(domain, &start, size, &code); + return start; } diff --git a/mono/mini/mini-amd64.h b/mono/mini/mini-amd64.h index d71a60a108e..effe00bcbb0 100644 --- a/mono/mini/mini-amd64.h +++ b/mono/mini/mini-amd64.h @@ -5,6 +5,18 @@ #include #include +#ifdef __native_client_codegen__ +#define kNaClAlignmentAMD64 32 +#define kNaClAlignmentMaskAMD64 (kNaClAlignmentAMD64 - 1) + +/* TODO: use kamd64NaClLengthOfCallImm */ +/* temporarily using kNaClAlignmentAMD64 so padding in */ +/* image-writer.c doesn't happen */ +#define kNaClLengthOfCallImm kNaClAlignmentAMD64 + +int is_nacl_call_reg_sequence(guint8* code); +#endif + #ifdef HOST_WIN32 #include /* use SIG* defines if possible */ @@ -146,7 +158,13 @@ struct MonoLMF { gpointer lmf_addr; /* This is only set in trampoline LMF frames */ MonoMethod *method; +#if defined(__default_codegen__) || defined(HOST_WIN32) guint64 rip; +#elif defined(__native_client_codegen__) + /* On 64-bit compilers, default alignment is 8 for this field, */ + /* this allows the structure to match for 32-bit compilers. */ + guint64 rip __attribute__ ((aligned(8))); +#endif guint64 rbx; guint64 rbp; guint64 rsp; @@ -238,7 +256,7 @@ typedef struct { */ #define MONO_ARCH_VARARG_ICALLS 1 -#ifndef HOST_WIN32 +#if !defined( HOST_WIN32 ) && !defined(__native_client__) && !defined(__native_client_codegen__) #define MONO_ARCH_USE_SIGACTION 1 @@ -248,7 +266,7 @@ typedef struct { #endif -#endif /* HOST_WIN32 */ +#endif /* !HOST_WIN32 && !__native_client__ */ #if defined (__APPLE__) @@ -335,6 +353,7 @@ typedef struct { #define MONO_ARCH_HAVE_IMT 1 #define MONO_ARCH_HAVE_TLS_GET 1 #define MONO_ARCH_IMT_REG AMD64_R10 +#define MONO_ARCH_IMT_SCRATCH_REG AMD64_R11 #define MONO_ARCH_VTABLE_REG MONO_AMD64_ARG_REG1 /* * We use r10 for the imt/rgctx register rather than r11 because r11 is @@ -357,7 +376,7 @@ typedef struct { #define MONO_ARCH_HAVE_GET_TRAMPOLINES 1 #define MONO_ARCH_AOT_SUPPORTED 1 -#ifndef HOST_WIN32 +#if !defined( HOST_WIN32 ) && !defined( __native_client__ ) #define MONO_ARCH_SOFT_DEBUG_SUPPORTED 1 #else #define DISABLE_DEBUGGER_AGENT 1 diff --git a/mono/mini/mini-ops.h b/mono/mini/mini-ops.h index b79690642cb..687abadec16 100644 --- a/mono/mini/mini-ops.h +++ b/mono/mini/mini-ops.h @@ -891,6 +891,13 @@ MINI_OP(OP_GC_SPILL_SLOT_LIVENESS_DEF, "gc_spill_slot_liveness_def", NONE, NONE, MINI_OP(OP_GC_PARAM_SLOT_LIVENESS_DEF, "gc_param_slot_liveness_def", NONE, NONE, NONE) /* Arch specific opcodes */ +/* #if defined(__native_client_codegen__) || defined(__native_client__) */ +/* We have to define these in terms of the TARGET defines, not NaCl defines */ +/* because genmdesc.pl doesn't have multiple defines per platform. */ +#if defined(TARGET_AMD64) || defined(TARGET_X86) +MINI_OP(OP_NACL_GC_SAFE_POINT, "nacl_gc_safe_point", IREG, NONE, NONE) +#endif + #if defined(TARGET_X86) || defined(TARGET_AMD64) MINI_OP(OP_X86_TEST_NULL, "x86_test_null", NONE, IREG, NONE) MINI_OP(OP_X86_COMPARE_MEMBASE_REG,"x86_compare_membase_reg", NONE, IREG, IREG) diff --git a/mono/mini/mini-x86.c b/mono/mini/mini-x86.c index 26078a9dc99..66a39e08eb2 100644 --- a/mono/mini/mini-x86.c +++ b/mono/mini/mini-x86.c @@ -68,36 +68,13 @@ static CRITICAL_SECTION mini_arch_mutex; MonoBreakpointInfo mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE]; -static gpointer -mono_realloc_native_code (MonoCompile *cfg) -{ -#ifdef __native_client_codegen__ - guint old_padding; - gpointer native_code; - guint alignment_check; - - /* Save the old alignment offset so we can re-align after the realloc. */ - old_padding = (guint)(cfg->native_code - cfg->native_code_alloc); - - cfg->native_code_alloc = g_realloc (cfg->native_code_alloc, - cfg->code_size + kNaClAlignment); - - /* Align native_code to next nearest kNaClAlignment byte. */ - native_code = (guint)cfg->native_code_alloc + kNaClAlignment; - native_code = (guint)native_code & ~kNaClAlignmentMask; - - /* Shift the data to be 32-byte aligned again. */ - memmove (native_code, cfg->native_code_alloc + old_padding, cfg->code_size); - - alignment_check = (guint)native_code & kNaClAlignmentMask; - g_assert (alignment_check == 0); - return native_code; -#else - return g_realloc (cfg->native_code, cfg->code_size); -#endif -} #ifdef __native_client_codegen__ +const guint kNaClAlignment = kNaClAlignmentX86; +const guint kNaClAlignmentMask = kNaClAlignmentMaskX86; + +/* Default alignment for Native Client is 32-byte. */ +gint8 nacl_align_byte = -32; /* signed version of 0xe0 */ /* mono_arch_nacl_pad: Add pad bytes of alignment instructions at code, */ /* Check that alignment doesn't cross an alignment boundary. */ @@ -2333,6 +2310,16 @@ x86_pop_reg (code, X86_EAX); #ifndef DISABLE_JIT +#if defined(__native_client__) || defined(__native_client_codegen__) +void +mono_nacl_gc() +{ +#ifdef __native_client_gc__ + __nacl_suspend_thread_if_needed(); +#endif +} +#endif + void mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) { @@ -4694,6 +4681,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code; break; } + case OP_NACL_GC_SAFE_POINT: { +#if defined(__native_client_codegen__) + code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)mono_nacl_gc); +#endif + break; + } case OP_GC_LIVENESS_DEF: case OP_GC_LIVENESS_USE: case OP_GC_PARAM_SLOT_LIVENESS_DEF: @@ -4773,13 +4766,46 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono case MONO_PATCH_INFO_GENERIC_CLASS_INIT: case MONO_PATCH_INFO_MONITOR_ENTER: case MONO_PATCH_INFO_MONITOR_EXIT: +#if defined(__native_client_codegen__) && defined(__native_client__) + if (nacl_is_code_address (code)) { + /* For tail calls, code is patched after being installed */ + /* but not through the normal "patch callsite" method. */ + unsigned char buf[kNaClAlignment]; + unsigned char *aligned_code = (uintptr_t)code & ~kNaClAlignmentMask; + unsigned char *_target = target; + int ret; + /* All patch targets modified in x86_patch */ + /* are IP relative. */ + _target = _target + (uintptr_t)buf - (uintptr_t)aligned_code; + memcpy (buf, aligned_code, kNaClAlignment); + /* Patch a temp buffer of bundle size, */ + /* then install to actual location. */ + x86_patch (buf + ((uintptr_t)code - (uintptr_t)aligned_code), _target); + ret = nacl_dyncode_modify (aligned_code, buf, kNaClAlignment); + g_assert (ret == 0); + } + else { + x86_patch (ip, target); + } +#else x86_patch (ip, target); +#endif break; case MONO_PATCH_INFO_NONE: break; + case MONO_PATCH_INFO_R4: + case MONO_PATCH_INFO_R8: { + guint32 offset = mono_arch_get_patch_offset (ip); + *((gconstpointer *)(ip + offset)) = target; + break; + } default: { guint32 offset = mono_arch_get_patch_offset (ip); +#if !defined(__native_client__) *((gconstpointer *)(ip + offset)) = target; +#else + *((gconstpointer *)(ip + offset)) = nacl_modify_patch_target (target); +#endif break; } } @@ -4805,7 +4831,9 @@ mono_arch_emit_prolog (MonoCompile *cfg) if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE) cfg->code_size += 512; -#ifdef __native_client_codegen__ +#if defined(__default_codegen__) + code = cfg->native_code = g_malloc (cfg->code_size); +#elif defined(__native_client_codegen__) /* native_code_alloc is not 32-byte aligned, native_code is. */ cfg->native_code_alloc = g_malloc (cfg->code_size + kNaClAlignment); @@ -4817,8 +4845,6 @@ mono_arch_emit_prolog (MonoCompile *cfg) alignment_check = (guint)cfg->native_code & kNaClAlignmentMask; g_assert(alignment_check == 0); -#else - code = cfg->native_code = g_malloc (cfg->code_size); #endif /* Offset between RSP and the CFA */ @@ -5339,11 +5365,11 @@ mono_arch_emit_exceptions (MonoCompile *cfg) guint32 size; /* Compute size of code following the push */ -#ifdef __native_client_codegen__ +#if defined(__default_codegen__) + size = 5 + 5; +#elif defined(__native_client_codegen__) code = mono_nacl_align (code); size = kNaClAlignment; -#else - size = 5 + 5; #endif /*This is aligned to 16 bytes by the callee. This way we save a few bytes here.*/ @@ -5459,16 +5485,15 @@ mono_arch_free_jit_tls_data (MonoJitTlsData *tls) //[1 + 5] x86_jump_mem(inst,mem) #define CMP_SIZE 6 -#ifdef __native_client_codegen__ -/* These constants should be coming from cpu-x86.md */ +#if defined(__default_codegen__) +#define BR_SMALL_SIZE 2 +#define BR_LARGE_SIZE 5 +#elif defined(__native_client_codegen__) /* I suspect the size calculation below is actually incorrect. */ -/* TODO: fix the calculation that uses these sizes. */ +/* TODO: fix the calculation that uses these sizes. */ #define BR_SMALL_SIZE 16 #define BR_LARGE_SIZE 12 -#else -#define BR_SMALL_SIZE 2 -#define BR_LARGE_SIZE 5 -#endif /* __native_client_codegen__ */ +#endif /*__native_client_codegen__*/ #define JUMP_IMM_SIZE 6 #define ENABLE_WRONG_METHOD_CHECK 0 #define DEBUG_IMT 0 @@ -5493,9 +5518,6 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI int size = 0; guint8 *code, *start; -#ifdef __native_client_codegen__ - /* g_print("mono_arch_build_imt_thunk needs to be aligned.\n"); */ -#endif for (i = 0; i < count; ++i) { MonoIMTCheckItem *item = imt_entries [i]; if (item->is_equals) { @@ -5519,10 +5541,16 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI } size += item->chunk_size; } +#if defined(__native_client__) && defined(__native_client_codegen__) + /* In Native Client, we don't re-use thunks, allocate from the */ + /* normal code manager paths. */ + code = mono_domain_code_reserve (domain, size); +#else if (fail_tramp) code = mono_method_alloc_generic_virtual_thunk (domain, size); else code = mono_domain_code_reserve (domain, size); +#endif start = code; for (i = 0; i < count; ++i) { MonoIMTCheckItem *item = imt_entries [i]; @@ -5607,6 +5635,8 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI g_free (buff); } + nacl_domain_code_validate (domain, &start, size, &code); + return start; } @@ -5837,6 +5867,7 @@ static gpointer get_delegate_invoke_impl (gboolean has_target, guint32 param_count, guint32 *code_len) { guint8 *code, *start; + int code_reserve = 64; /* * The stack contains: @@ -5845,7 +5876,7 @@ get_delegate_invoke_impl (gboolean has_target, guint32 param_count, guint32 *cod */ if (has_target) { - start = code = mono_global_codeman_reserve (64); + start = code = mono_global_codeman_reserve (code_reserve); /* Replace the this argument with the target */ x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4); @@ -5853,15 +5884,15 @@ get_delegate_invoke_impl (gboolean has_target, guint32 param_count, guint32 *cod x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4); x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr)); - g_assert ((code - start) < 64); + g_assert ((code - start) < code_reserve); } else { int i = 0; /* 8 for mov_reg and jump, plus 8 for each parameter */ #ifdef __native_client_codegen__ /* TODO: calculate this size correctly */ - int code_reserve = 13 + (param_count * 8) + 2 * kNaClAlignment; + code_reserve = 13 + (param_count * 8) + 2 * kNaClAlignment; #else - int code_reserve = 8 + (param_count * 8); + code_reserve = 8 + (param_count * 8); #endif /* __native_client_codegen__ */ /* * The stack contains: @@ -5895,6 +5926,7 @@ get_delegate_invoke_impl (gboolean has_target, guint32 param_count, guint32 *cod g_assert ((code - start) < code_reserve); } + nacl_global_codeman_validate(&start, code_reserve, &code); mono_debug_add_delegate_trampoline (start, code - start); if (code_len) diff --git a/mono/mini/mini-x86.h b/mono/mini/mini-x86.h index 3063fc2ceca..3cc5b440367 100644 --- a/mono/mini/mini-x86.h +++ b/mono/mini/mini-x86.h @@ -3,6 +3,14 @@ #include #include + +#ifdef __native_client_codegen__ +#define kNaClAlignmentX86 32 +#define kNaClAlignmentMaskX86 (kNaClAlignmentX86 - 1) + +#define kNaClLengthOfCallImm kx86NaClLengthOfCallImm +#endif + #ifdef HOST_WIN32 #include /* use SIG* defines if possible */ @@ -59,12 +67,6 @@ struct sigcontext { #undef MONO_ARCH_USE_SIGACTION #endif -#if defined(__native_client_codegen__) || defined(__native_client__) -#define NACL_SIZE(a, b) (b) -#else -#define NACL_SIZE(a, b) (a) -#endif - #ifndef HOST_WIN32 #ifdef HAVE_WORKING_SIGALTSTACK diff --git a/mono/mini/mini.c b/mono/mini/mini.c index e8cade800a2..69e19cf74d1 100644 --- a/mono/mini/mini.c +++ b/mono/mini/mini.c @@ -89,10 +89,6 @@ static gpointer mono_jit_compile_method_with_opt (MonoMethod *method, guint32 opt, MonoException **ex); -#ifdef __native_client_codegen__ -/* Default alignment for Native Client is 32-byte. */ -guint8 nacl_align_byte = 0xe0; -#endif static guint32 default_opt = 0; static gboolean default_opt_set = FALSE; @@ -164,6 +160,38 @@ gboolean disable_vtypes_in_regs = FALSE; gboolean mono_dont_free_global_codeman; +gpointer +mono_realloc_native_code (MonoCompile *cfg) +{ +#if defined(__default_codegen__) + return g_realloc (cfg->native_code, cfg->code_size); +#elif defined(__native_client_codegen__) + guint old_padding; + gpointer native_code; + guint alignment_check; + + /* Save the old alignment offset so we can re-align after the realloc. */ + old_padding = (guint)(cfg->native_code - cfg->native_code_alloc); + + cfg->native_code_alloc = g_realloc ( cfg->native_code_alloc, + cfg->code_size + kNaClAlignment ); + + /* Align native_code to next nearest kNaClAlignment byte. */ + native_code = (guint)cfg->native_code_alloc + kNaClAlignment; + native_code = (guint)native_code & ~kNaClAlignmentMask; + + /* Shift the data to be 32-byte aligned again. */ + memmove (native_code, cfg->native_code_alloc + old_padding, cfg->code_size); + + alignment_check = (guint)native_code & kNaClAlignmentMask; + g_assert (alignment_check == 0); + return native_code; +#else + g_assert_not_reached (); + return cfg->native_code; +#endif +} + #ifdef __native_client_codegen__ /* Prevent instructions from straddling a 32-byte alignment boundary. */ @@ -430,6 +458,67 @@ void *mono_global_codeman_reserve (int size) } } +#if defined(__native_client_codegen__) && defined(__native_client__) +/* Given the temporary buffer (allocated by mono_global_codeman_reserve) into + * which we are generating code, return a pointer to the destination in the + * dynamic code segment into which the code will be copied when + * mono_global_codeman_commit is called. + * LOCKING: Acquires the jit lock. + */ +void* +nacl_global_codeman_get_dest (void *data) +{ + void *dest; + mono_jit_lock (); + dest = nacl_code_manager_get_code_dest (global_codeman, data); + mono_jit_unlock (); + return dest; +} + +void +mono_global_codeman_commit (void *data, int size, int newsize) +{ + mono_jit_lock (); + mono_code_manager_commit (global_codeman, data, size, newsize); + mono_jit_unlock (); +} + +/* + * Convenience function which calls mono_global_codeman_commit to validate and + * copy the code. The caller sets *buf_base and *buf_size to the start and size + * of the buffer (allocated by mono_global_codeman_reserve), and *code_end to + * the byte after the last instruction byte. On return, *buf_base will point to + * the start of the copied in the code segment, and *code_end will point after + * the end of the copied code. + */ +void +nacl_global_codeman_validate (guint8 **buf_base, int buf_size, guint8 **code_end) +{ + guint8 *tmp = nacl_global_codeman_get_dest (*buf_base); + mono_global_codeman_commit (*buf_base, buf_size, *code_end - *buf_base); + *code_end = tmp + (*code_end - *buf_base); + *buf_base = tmp; +} +#else +/* no-op versions of Native Client functions */ +void* +nacl_global_codeman_get_dest (void *data) +{ + return data; +} + +void +mono_global_codeman_commit (void *data, int size, int newsize) +{ +} + +void +nacl_global_codeman_validate (guint8 **buf_base, int buf_size, guint8 **code_end) +{ +} + +#endif /* __native_client__ */ + /** * mono_create_unwind_op: * @@ -1684,7 +1773,7 @@ mono_allocate_stack_slots_full2 (MonoCompile *cfg, gboolean backward, guint32 *s case MONO_TYPE_PTR: case MONO_TYPE_I: case MONO_TYPE_U: -#if SIZEOF_REGISTER == 4 +#if SIZEOF_VOID_P == 4 case MONO_TYPE_I4: #else case MONO_TYPE_I8: @@ -1918,7 +2007,7 @@ mono_allocate_stack_slots_full (MonoCompile *cfg, gboolean backward, guint32 *st vars = mono_varlist_sort (cfg, vars, 0); offset = 0; - *stack_align = sizeof (gpointer); + *stack_align = sizeof(mgreg_t); for (l = vars; l; l = l->next) { vmv = l->data; inst = cfg->varinfo [vmv->idx]; @@ -1973,7 +2062,7 @@ mono_allocate_stack_slots_full (MonoCompile *cfg, gboolean backward, guint32 *st case MONO_TYPE_PTR: case MONO_TYPE_I: case MONO_TYPE_U: -#if SIZEOF_REGISTER == 4 +#if SIZEOF_VOID_P == 4 case MONO_TYPE_I4: #else case MONO_TYPE_I8: @@ -2277,6 +2366,8 @@ mono_bblock_insert_before_ins (MonoBasicBlock *bb, MonoInst *ins, MonoInst *ins_ { if (ins == NULL) { ins = bb->code; + if (ins) + ins->prev = ins_to_insert; bb->code = ins_to_insert; ins_to_insert->next = ins; if (bb->last_ins == NULL) @@ -2859,7 +2950,13 @@ mono_resolve_patch_target (MonoMethod *method, MonoDomain *domain, guint8 *code, target = patch_info->data.inst->inst_c0 + code; break; case MONO_PATCH_INFO_IP: +#if defined(__native_client__) && defined(__native_client_codegen__) + /* Need to transform to the destination address, it's */ + /* emitted as an immediate in the code. */ + target = nacl_inverse_modify_patch_target(ip); +#else target = ip; +#endif break; case MONO_PATCH_INFO_METHOD_REL: target = code + patch_info->data.offset; @@ -2875,6 +2972,13 @@ mono_resolve_patch_target (MonoMethod *method, MonoDomain *domain, guint8 *code, } case MONO_PATCH_INFO_METHOD_JUMP: target = mono_create_jump_trampoline (domain, patch_info->data.method, FALSE); +#if defined(__native_client__) && defined(__native_client_codegen__) +#if defined(TARGET_AMD64) + /* This target is an absolute address, not relative to the */ + /* current code being emitted on AMD64. */ + target = nacl_inverse_modify_patch_target(target); +#endif +#endif break; case MONO_PATCH_INFO_METHOD: if (patch_info->data.method == method) { @@ -2888,6 +2992,11 @@ mono_resolve_patch_target (MonoMethod *method, MonoDomain *domain, guint8 *code, gpointer *jump_table; int i; +#if defined(__native_client__) && defined(__native_client_codegen__) + /* This memory will leak, but we don't care if we're */ + /* not deleting JIT'd methods anyway */ + jump_table = g_malloc0 (sizeof(gpointer) * patch_info->data.table->table_size); +#else if (method && method->dynamic) { jump_table = mono_code_manager_reserve (mono_dynamic_code_hash_lookup (domain, method)->code_mp, sizeof (gpointer) * patch_info->data.table->table_size); } else { @@ -2897,10 +3006,27 @@ mono_resolve_patch_target (MonoMethod *method, MonoDomain *domain, guint8 *code, jump_table = mono_domain_code_reserve (domain, sizeof (gpointer) * patch_info->data.table->table_size); } } +#endif - for (i = 0; i < patch_info->data.table->table_size; i++) + for (i = 0; i < patch_info->data.table->table_size; i++) { +#if defined(__native_client__) && defined(__native_client_codegen__) + /* 'code' is relative to the current code blob, we */ + /* need to do this transform on it to make the */ + /* pointers in this table absolute */ + jump_table [i] = nacl_inverse_modify_patch_target (code) + GPOINTER_TO_INT (patch_info->data.table->table [i]); +#else jump_table [i] = code + GPOINTER_TO_INT (patch_info->data.table->table [i]); +#endif + } + +#if defined(__native_client__) && defined(__native_client_codegen__) + /* jump_table is in the data section, we need to transform */ + /* it here so when it gets modified in amd64_patch it will */ + /* then point back to the absolute data address */ + target = nacl_inverse_modify_patch_target (jump_table); +#else target = jump_table; +#endif break; } case MONO_PATCH_INFO_METHODCONST: @@ -3246,11 +3372,18 @@ mono_postprocess_patches (MonoCompile *cfg) } case MONO_PATCH_INFO_SWITCH: { gpointer *table; +#if defined(__native_client__) && defined(__native_client_codegen__) + /* This memory will leak. */ + /* TODO: can we free this when */ + /* making the final jump table? */ + table = g_malloc0 (sizeof(gpointer) * patch_info->data.table->table_size); +#else if (cfg->method->dynamic) { table = mono_code_manager_reserve (cfg->dynamic_info->code_mp, sizeof (gpointer) * patch_info->data.table->table_size); } else { table = mono_domain_code_reserve (cfg->domain, sizeof (gpointer) * patch_info->data.table->table_size); } +#endif for (i = 0; i < patch_info->data.table->table_size; i++) { /* Might be NULL if the switch is eliminated */ @@ -3268,6 +3401,12 @@ mono_postprocess_patches (MonoCompile *cfg) GSList *list; MonoDomain *domain = cfg->domain; unsigned char *ip = cfg->native_code + patch_info->ip.i; +#if defined(__native_client__) && defined(__native_client_codegen__) + /* When this jump target gets evaluated, the method */ + /* will be installed in the dynamic code section, */ + /* not at the location of cfg->native_code. */ + ip = nacl_inverse_modify_patch_target (cfg->native_code) + patch_info->ip.i; +#endif mono_domain_lock (domain); if (!domain_jit_info (domain)->jump_target_hash) @@ -3407,6 +3546,15 @@ mono_codegen (MonoCompile *cfg) int max_epilog_size; guint8 *code; +#if defined(__native_client_codegen__) && defined(__native_client__) + void *code_dest; + + /* This keeps patch targets from being transformed during + * ordinary method compilation, for local branches and jumps. + */ + nacl_allow_target_modification (FALSE); +#endif + for (bb = cfg->bb_entry; bb; bb = bb->next_bb) { cfg->spill_count = 0; /* we reuse dfn here */ @@ -3459,6 +3607,9 @@ mono_codegen (MonoCompile *cfg) } } +#ifdef __native_client_codegen__ + mono_nacl_fix_patches (cfg->native_code, cfg->patch_info); +#endif mono_arch_emit_exceptions (cfg); max_epilog_size = 0; @@ -3489,9 +3640,14 @@ mono_codegen (MonoCompile *cfg) #endif code = mono_domain_code_reserve (cfg->domain, cfg->code_size + unwindlen); } +#if defined(__native_client_codegen__) && defined(__native_client__) + nacl_allow_target_modification (TRUE); +#endif memcpy (code, cfg->native_code, cfg->code_len); -#ifdef __native_client_codegen__ +#if defined(__default_codegen__) + g_free (cfg->native_code); +#elif defined(__native_client_codegen__) if (cfg->native_code_alloc) { g_free (cfg->native_code_alloc); cfg->native_code_alloc = 0; @@ -3499,9 +3655,7 @@ mono_codegen (MonoCompile *cfg) else if (cfg->native_code) { g_free (cfg->native_code); } -#else - g_free (cfg->native_code); -#endif +#endif /* __native_client_codegen__ */ cfg->native_code = code; code = cfg->native_code + cfg->code_len; @@ -3539,8 +3693,18 @@ if (valgrind_register){ #ifdef MONO_ARCH_HAVE_SAVE_UNWIND_INFO mono_arch_save_unwind_info (cfg); #endif - -#ifdef __native_client_codegen__ + +#if defined(__native_client_codegen__) && defined(__native_client__) + if (!cfg->compile_aot) { + if (cfg->method->dynamic) { + code_dest = nacl_code_manager_get_code_dest(cfg->dynamic_info->code_mp, cfg->native_code); + } else { + code_dest = nacl_domain_get_code_dest(cfg->domain, cfg->native_code); + } + } +#endif + +#if defined(__native_client_codegen__) mono_nacl_fix_patches (cfg->native_code, cfg->patch_info); #endif @@ -3551,6 +3715,9 @@ if (valgrind_register){ } else { mono_domain_code_commit (cfg->domain, cfg->native_code, cfg->code_size, cfg->code_len); } +#if defined(__native_client_codegen__) && defined(__native_client__) + cfg->native_code = code_dest; +#endif mono_profiler_code_buffer_new (cfg->native_code, cfg->code_len, MONO_PROFILER_CODE_BUFFER_METHOD, cfg->method); mono_arch_flush_icache (cfg->native_code, cfg->code_len); @@ -6149,6 +6316,9 @@ mini_init (const char *filename, const char *runtime_version) register_icall (mono_load_remote_field_new, "mono_load_remote_field_new", "object object ptr ptr", FALSE); register_icall (mono_store_remote_field_new, "mono_store_remote_field_new", "void object ptr ptr object", FALSE); +#if defined(__native_client__) || defined(__native_client_codegen__) + register_icall (mono_nacl_gc, "mono_nacl_gc", "void", TRUE); +#endif /* * NOTE, NOTE, NOTE, NOTE: * when adding emulation for some opcodes, remember to also add a dummy @@ -6219,7 +6389,11 @@ mini_init (const char *filename, const char *runtime_version) mono_register_opcode_emulation (OP_LCONV_TO_R_UN, "__emul_lconv_to_r8_un", "double long", mono_lconv_to_r8_un, FALSE); #endif #ifdef MONO_ARCH_EMULATE_FREM +#if defined(__default_codegen__) mono_register_opcode_emulation (OP_FREM, "__emul_frem", "double double double", fmod, FALSE); +#elif defined(__native_client_codegen__) + mono_register_opcode_emulation (OP_FREM, "__emul_frem", "double double double", mono_fmod, FALSE); +#endif #endif #ifdef MONO_ARCH_SOFT_FLOAT diff --git a/mono/mini/mini.h b/mono/mini/mini.h index 5901ab58450..33764f2bf5d 100644 --- a/mono/mini/mini.h +++ b/mono/mini/mini.h @@ -1490,7 +1490,7 @@ enum { #endif /* Opcodes to load/store regsize quantities */ -#ifdef __mono_ilp32__ +#if defined (__mono_ilp32__) #define OP_LOADR_MEMBASE OP_LOADI8_MEMBASE #define OP_STORER_MEMBASE_REG OP_STOREI8_MEMBASE_REG #else @@ -1776,6 +1776,7 @@ void mono_linterval_split (MonoCompile *cfg, MonoLiveInterval void mono_liveness_handle_exception_clauses (MonoCompile *cfg) MONO_INTERNAL; /* Native Client functions */ +gpointer mono_realloc_native_code(MonoCompile *cfg); #ifdef __native_client_codegen__ void mono_nacl_align_inst(guint8 **pcode, int instlen); void mono_nacl_align_call(guint8 **start, guint8 **pcode); @@ -1786,6 +1787,18 @@ void mono_nacl_fix_patches(const guint8 *code, MonoJumpInfo *ji); guint8 *mono_arch_nacl_pad(guint8 *code, int pad); guint8 *mono_arch_nacl_skip_nops(guint8 *code); +extern const guint kNaClAlignment; +extern const guint kNaClAlignmentMask; +#endif + +#if defined(__native_client__) || defined(__native_client_codegen__) +void mono_nacl_gc(); +#endif + +#if defined(__native_client_codegen__) || defined(__native_client__) +#define NACL_SIZE(a, b) (b) +#else +#define NACL_SIZE(a, b) (a) #endif /* AOT */ @@ -1894,6 +1907,9 @@ char* mono_get_rgctx_fetch_trampoline_name (int slot) MONO_INTERNAL; gboolean mono_running_on_valgrind (void) MONO_INTERNAL; void* mono_global_codeman_reserve (int size) MONO_INTERNAL; +void* nacl_global_codeman_get_dest(void *data) MONO_INTERNAL; +void mono_global_codeman_commit(void *data, int size, int newsize) MONO_INTERNAL; +void nacl_global_codeman_validate(guint8 **buf_base, int buf_size, guint8 **code_end) MONO_INTERNAL; const char *mono_regname_full (int reg, int bank) MONO_INTERNAL; gint32* mono_allocate_stack_slots_full (MonoCompile *cfg, gboolean backward, guint32 *stack_size, guint32 *stack_align) MONO_INTERNAL; gint32* mono_allocate_stack_slots (MonoCompile *cfg, guint32 *stack_size, guint32 *stack_align) MONO_INTERNAL; diff --git a/mono/mini/nacl.cs b/mono/mini/nacl.cs new file mode 100644 index 00000000000..24cd2c5cc82 --- /dev/null +++ b/mono/mini/nacl.cs @@ -0,0 +1,67 @@ +using System; +using Mono.Simd; + +class Tests { + struct myvt { + public int X; + public int Y; + } + + static int test_0_vector4i_cmp_gt () { + Vector4i a = new Vector4i (10, 5, 12, -1); + Vector4i b = new Vector4i (-1, 5, 10, 10); + + Vector4i c = a.CompareGreaterThan (b); + + if (c.X != -1) + return 1; + if (c.Y != 0) + return 2; + if (c.Z != -1) + return 3; + if (c.W != 0) + return 4; + return 0; + } + + static myvt CompareGT(myvt a, myvt b) { + myvt r; + r.X = a.X > b.X ? -1 : 0; + r.Y = a.Y > b.Y ? -1 : 0; + return r; + } + + static int test_0_struct2i_cmp_gt() { + myvt a; + myvt b; + a.X = 10; + a.Y = 5; + b.X = -1; + b.Y = 5; + myvt c = CompareGT(a, b); + if (c.X != -1) + return 1; + if (c.Y != 0) + return 2; + return 0; + } + + static int vararg_sum(params int[] args) { + int sum = 0; + foreach(int arg in args) { + sum += arg; + } + return sum; + } + static int test_21_vararg_test() { + int sum = 0; + sum += vararg_sum(); + sum += vararg_sum(1); + sum += vararg_sum(2, 3); + sum += vararg_sum(4, 5, 6); + return sum; + } + public static int Main(String[] args) { + return TestDriver.RunTests(typeof(Tests)); + } +} diff --git a/mono/mini/tramp-amd64.c b/mono/mini/tramp-amd64.c index cd42591d876..f576af9496c 100644 --- a/mono/mini/tramp-amd64.c +++ b/mono/mini/tramp-amd64.c @@ -25,6 +25,11 @@ #include "mini.h" #include "mini-amd64.h" +#if defined(__native_client_codegen__) && defined(__native_client__) +#include +#include +#endif + #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f)) static guint8* nullified_class_init_trampoline; @@ -56,6 +61,8 @@ mono_arch_get_unbox_trampoline (MonoMethod *m, gpointer addr) amd64_jump_reg (code, AMD64_RAX); g_assert ((code - start) < 20); + nacl_domain_code_validate (domain, &start, 20, &code); + mono_arch_flush_icache (start, code - start); return start; @@ -90,6 +97,7 @@ mono_arch_get_static_rgctx_trampoline (MonoMethod *m, MonoMethodRuntimeGenericCo amd64_jump_code (code, addr); g_assert ((code - start) < buf_len); + nacl_domain_code_validate (domain, &start, buf_len, &code); mono_arch_flush_icache (start, code - start); return start; @@ -117,6 +125,8 @@ mono_arch_get_llvm_imt_trampoline (MonoDomain *domain, MonoMethod *m, int vt_off g_assert ((code - start) < buf_len); + nacl_domain_code_validate (domain, &start, buf_len, &code); + mono_arch_flush_icache (start, code - start); return start; @@ -131,12 +141,14 @@ mono_arch_get_llvm_imt_trampoline (MonoDomain *domain, MonoMethod *m, int vt_off void mono_arch_patch_callsite (guint8 *method_start, guint8 *orig_code, guint8 *addr) { +#if defined(__default_codegen__) guint8 *code; guint8 buf [16]; gboolean can_write = mono_breakpoint_clean_code (method_start, orig_code, 14, buf, sizeof (buf)); code = buf + 14; + /* mov 64-bit imm into r11 (followed by call reg?) or direct call*/ if (((code [-13] == 0x49) && (code [-12] == 0xbb)) || (code [-5] == 0xe8)) { if (code [-5] != 0xe8) { if (can_write) { @@ -184,6 +196,38 @@ mono_arch_patch_callsite (guint8 *method_start, guint8 *orig_code, guint8 *addr) VALGRIND_DISCARD_TRANSLATIONS (orig_code - 5, sizeof (gpointer)); } } +#elif defined(__native_client__) + /* These are essentially the same 2 cases as above, modified for NaCl*/ + + /* Target must be bundle-aligned */ + g_assert (((guint32)addr & kNaClAlignmentMask) == 0); + /* Return target must be bundle-aligned */ + g_assert (((guint32)orig_code & kNaClAlignmentMask) == 0); + + if (orig_code[-5] == 0xe8) { + /* Direct call */ + int ret; + gint32 offset = (gint32)addr - (gint32)orig_code; + guint8 buf[sizeof(gint32)]; + *((gint32*)(buf)) = offset; + ret = nacl_dyncode_modify (orig_code - sizeof(gint32), buf, sizeof(gint32)); + g_assert (ret == 0); + } + + else if (is_nacl_call_reg_sequence (orig_code - 10) && orig_code[-16] == 0x41 && orig_code[-15] == 0xbb) { + int ret; + guint8 buf[sizeof(gint32)]; + *((gint32 *)(buf)) = addr; + /* orig_code[-14] is the start of the immediate. */ + ret = nacl_dyncode_modify (orig_code - 14, buf, sizeof(gint32)); + g_assert (ret == 0); + } + else { + g_assert_not_reached (); + } + + return; +#endif } void @@ -192,6 +236,7 @@ mono_arch_patch_plt_entry (guint8 *code, gpointer *got, mgreg_t *regs, guint8 *a gint32 disp; gpointer *plt_jump_table_entry; +#if defined(__default_codegen__) /* A PLT entry: jmp *(%rip) */ g_assert (code [0] == 0xff); g_assert (code [1] == 0x25); @@ -199,6 +244,24 @@ mono_arch_patch_plt_entry (guint8 *code, gpointer *got, mgreg_t *regs, guint8 *a disp = *(gint32*)(code + 2); plt_jump_table_entry = (gpointer*)(code + 6 + disp); +#elif defined(__native_client_codegen__) + /* A PLT entry: */ + /* mov (%rip), %r11d */ + /* nacljmp *%r11 */ + + /* Verify the 'mov' */ + g_assert (code [0] == 0x45); + g_assert (code [1] == 0x8b); + g_assert (code [2] == 0x1d); + + disp = *(gint32*)(code + 3); + + /* 7 = 3 (mov opcode) + 4 (disp) */ + /* This needs to resolve to the target of the RIP-relative offset */ + plt_jump_table_entry = (gpointer*)(code + 7 + disp); + +#endif /* __native_client_codegen__ */ + InterlockedExchangePointer (plt_jump_table_entry, addr); } @@ -321,22 +384,25 @@ guchar* mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInfo **info, gboolean aot) { guint8 *buf, *code, *tramp, *br [2], *r11_save_code, *after_r11_save_code; - int i, lmf_offset, offset, res_offset, arg_offset, rax_offset, tramp_offset; - int buf_len, saved_regs_offset; + int i, lmf_offset, offset, res_offset, arg_offset, rax_offset, tramp_offset, saved_regs_offset; int saved_fpregs_offset, rbp_offset, framesize, orig_rsp_to_rbp_offset, cfa_offset; gboolean has_caller; GSList *unwind_ops = NULL; MonoJumpInfo *ji = NULL; + const guint kMaxCodeSize = NACL_SIZE (548, 548*2); + +#if defined(__native_client_codegen__) + const guint kNaClTrampOffset = 17; +#endif if (tramp_type == MONO_TRAMPOLINE_JUMP) has_caller = FALSE; else has_caller = TRUE; - buf_len = 548; - code = buf = mono_global_codeman_reserve (buf_len); + code = buf = mono_global_codeman_reserve (kMaxCodeSize); - framesize = 538 + sizeof (MonoLMF); + framesize = kMaxCodeSize + sizeof (MonoLMF); framesize = (framesize + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1); orig_rsp_to_rbp_offset = 0; @@ -353,72 +419,76 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf /* Pop the return address off the stack */ amd64_pop_reg (code, AMD64_R11); - orig_rsp_to_rbp_offset += 8; + orig_rsp_to_rbp_offset += sizeof(mgreg_t); - cfa_offset -= 8; + cfa_offset -= sizeof(mgreg_t); mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset); /* * Allocate a new stack frame */ amd64_push_reg (code, AMD64_RBP); - cfa_offset += 8; + cfa_offset += sizeof(mgreg_t); mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset); mono_add_unwind_op_offset (unwind_ops, code, buf, AMD64_RBP, - cfa_offset); - orig_rsp_to_rbp_offset -= 8; - amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, 8); + orig_rsp_to_rbp_offset -= sizeof(mgreg_t); + amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof(mgreg_t)); mono_add_unwind_op_def_cfa_reg (unwind_ops, code, buf, AMD64_RBP); amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, framesize); offset = 0; rbp_offset = - offset; - offset += 8; + offset += sizeof(mgreg_t); rax_offset = - offset; - offset += 8; + offset += sizeof(mgreg_t); tramp_offset = - offset; - offset += 8; + offset += sizeof(gpointer); arg_offset = - offset; /* Compute the trampoline address from the return address */ if (aot) { +#if defined(__default_codegen__) /* 7 = length of call *(rip) */ amd64_alu_reg_imm (code, X86_SUB, AMD64_R11, 7); +#elif defined(__native_client_codegen__) + amd64_alu_reg_imm (code, X86_SUB, AMD64_R11, kNaClTrampOffset); +#endif } else { /* 5 = length of amd64_call_membase () */ amd64_alu_reg_imm (code, X86_SUB, AMD64_R11, 5); } - amd64_mov_membase_reg (code, AMD64_RBP, tramp_offset, AMD64_R11, 8); + amd64_mov_membase_reg (code, AMD64_RBP, tramp_offset, AMD64_R11, sizeof(gpointer)); - offset += 8; + offset += sizeof(mgreg_t); res_offset = - offset; /* Save all registers */ - offset += AMD64_NREG * 8; + offset += AMD64_NREG * sizeof(mgreg_t); saved_regs_offset = - offset; for (i = 0; i < AMD64_NREG; ++i) { if (i == AMD64_RBP) { /* RAX is already saved */ - amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RBP, rbp_offset, 8); - amd64_mov_membase_reg (code, AMD64_RBP, saved_regs_offset + (i * 8), AMD64_RAX, 8); + amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RBP, rbp_offset, sizeof(mgreg_t)); + amd64_mov_membase_reg (code, AMD64_RBP, saved_regs_offset + (i * sizeof(mgreg_t)), AMD64_RAX, sizeof(mgreg_t)); } else if (i != AMD64_R11) { - amd64_mov_membase_reg (code, AMD64_RBP, saved_regs_offset + (i * 8), i, 8); + amd64_mov_membase_reg (code, AMD64_RBP, saved_regs_offset + (i * sizeof(mgreg_t)), i, sizeof(mgreg_t)); } else { /* We have to save R11 right at the start of the trampoline code because it's used as a scratch register */ - amd64_mov_membase_reg (r11_save_code, AMD64_RSP, saved_regs_offset + orig_rsp_to_rbp_offset + (i * 8), i, 8); + amd64_mov_membase_reg (r11_save_code, AMD64_RSP, saved_regs_offset + orig_rsp_to_rbp_offset + (i * sizeof(mgreg_t)), i, sizeof(mgreg_t)); g_assert (r11_save_code == after_r11_save_code); } } - offset += 8 * 8; + offset += 8 * sizeof(mgreg_t); saved_fpregs_offset = - offset; for (i = 0; i < 8; ++i) - amd64_movsd_membase_reg (code, AMD64_RBP, saved_fpregs_offset + (i * 8), i); + amd64_movsd_membase_reg (code, AMD64_RBP, saved_fpregs_offset + (i * sizeof(mgreg_t)), i); if (tramp_type != MONO_TRAMPOLINE_GENERIC_CLASS_INIT && tramp_type != MONO_TRAMPOLINE_MONITOR_ENTER && @@ -426,14 +496,21 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf /* Obtain the trampoline argument which is encoded in the instruction stream */ if (aot) { /* Load the GOT offset */ - amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, tramp_offset, 8); + amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, tramp_offset, sizeof(gpointer)); +#if defined(__default_codegen__) amd64_mov_reg_membase (code, AMD64_RAX, AMD64_R11, 7, 4); +#elif defined(__native_client_codegen__) + /* The arg is hidden in a "push imm32" instruction, */ + /* add one to skip the opcode. */ + amd64_mov_reg_membase (code, AMD64_RAX, AMD64_R11, kNaClTrampOffset+1, 4); +#endif /* Compute the address of the GOT slot */ - amd64_alu_reg_reg_size (code, X86_ADD, AMD64_R11, AMD64_RAX, 8); + amd64_alu_reg_reg_size (code, X86_ADD, AMD64_R11, AMD64_RAX, sizeof(gpointer)); /* Load the value */ - amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0, 8); + amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0, sizeof(gpointer)); } else { - amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, tramp_offset, 8); + amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, tramp_offset, sizeof(gpointer)); +#if defined(__default_codegen__) amd64_mov_reg_membase (code, AMD64_RAX, AMD64_R11, 5, 1); amd64_widen_reg (code, AMD64_RAX, AMD64_RAX, TRUE, FALSE); amd64_alu_reg_imm_size (code, X86_CMP, AMD64_RAX, 4, 1); @@ -447,11 +524,15 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf mono_amd64_patch (br [0], code); amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 6, 8); mono_amd64_patch (br [1], code); +#elif defined(__native_client_codegen__) + /* All args are 32-bit pointers in NaCl */ + amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 6, 4); +#endif } - amd64_mov_membase_reg (code, AMD64_RBP, arg_offset, AMD64_R11, 8); + amd64_mov_membase_reg (code, AMD64_RBP, arg_offset, AMD64_R11, sizeof(gpointer)); } else { - amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, saved_regs_offset + (MONO_AMD64_ARG_REG1 * 8), 8); - amd64_mov_membase_reg (code, AMD64_RBP, arg_offset, AMD64_R11, 8); + amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, saved_regs_offset + (MONO_AMD64_ARG_REG1 * sizeof(mgreg_t)), sizeof(mgreg_t)); + amd64_mov_membase_reg (code, AMD64_RBP, arg_offset, AMD64_R11, sizeof(gpointer)); } /* Save LMF begin */ @@ -461,34 +542,34 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf /* Save ip */ if (has_caller) - amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, 8, 8); + amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, 8, sizeof(gpointer)); else amd64_mov_reg_imm (code, AMD64_R11, 0); - amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rip), AMD64_R11, 8); + amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rip), AMD64_R11, sizeof(mgreg_t)); /* Save fp */ - amd64_mov_reg_membase (code, AMD64_R11, AMD64_RSP, framesize, 8); - amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbp), AMD64_R11, 8); + amd64_mov_reg_membase (code, AMD64_R11, AMD64_RSP, framesize, sizeof(mgreg_t)); + amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbp), AMD64_R11, sizeof(mgreg_t)); /* Save sp */ - amd64_mov_reg_reg (code, AMD64_R11, AMD64_RSP, 8); + amd64_mov_reg_reg (code, AMD64_R11, AMD64_RSP, sizeof(mgreg_t)); amd64_alu_reg_imm (code, X86_ADD, AMD64_R11, framesize + 16); - amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rsp), AMD64_R11, 8); + amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rsp), AMD64_R11, sizeof(mgreg_t)); /* Save method */ if (tramp_type == MONO_TRAMPOLINE_JIT || tramp_type == MONO_TRAMPOLINE_JUMP) { - amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, arg_offset, 8); - amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), AMD64_R11, 8); + amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, arg_offset, sizeof(gpointer)); + amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), AMD64_R11, sizeof(gpointer)); } else { - amd64_mov_membase_imm (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), 0, 8); + amd64_mov_membase_imm (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), 0, sizeof(gpointer)); } /* Save callee saved regs */ #ifdef TARGET_WIN32 - amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rdi), AMD64_RDI, 8); - amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rsi), AMD64_RSI, 8); + amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rdi), AMD64_RDI, sizeof(mgreg_t)); + amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rsi), AMD64_RSI, sizeof(mgreg_t)); #endif - amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), AMD64_RBX, 8); - amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), AMD64_R12, 8); - amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), AMD64_R13, 8); - amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), AMD64_R14, 8); - amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), AMD64_R15, 8); + amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), AMD64_RBX, sizeof(mgreg_t)); + amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), AMD64_R12, sizeof(mgreg_t)); + amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), AMD64_R13, sizeof(mgreg_t)); + amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), AMD64_R14, sizeof(mgreg_t)); + amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), AMD64_R15, sizeof(mgreg_t)); if (aot) { code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_get_lmf_addr"); @@ -498,15 +579,15 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf amd64_call_reg (code, AMD64_R11); /* Save lmf_addr */ - amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), AMD64_RAX, 8); + amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), AMD64_RAX, sizeof(gpointer)); /* Save previous_lmf */ /* Set the lowest bit to 1 to signal that this LMF has the ip field set */ - amd64_mov_reg_membase (code, AMD64_R11, AMD64_RAX, 0, 8); - amd64_alu_reg_imm_size (code, X86_ADD, AMD64_R11, 1, 8); - amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_R11, 8); + amd64_mov_reg_membase (code, AMD64_R11, AMD64_RAX, 0, sizeof(gpointer)); + amd64_alu_reg_imm_size (code, X86_ADD, AMD64_R11, 1, sizeof(gpointer)); + amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_R11, sizeof(gpointer)); /* Set new lmf */ amd64_lea_membase (code, AMD64_R11, AMD64_RBP, lmf_offset); - amd64_mov_membase_reg (code, AMD64_RAX, 0, AMD64_R11, 8); + amd64_mov_membase_reg (code, AMD64_RAX, 0, AMD64_R11, sizeof(gpointer)); /* Save LMF end */ @@ -515,15 +596,15 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf /* Arg2 is the address of the calling code */ if (has_caller) - amd64_mov_reg_membase (code, AMD64_ARG_REG2, AMD64_RBP, 8, 8); + amd64_mov_reg_membase (code, AMD64_ARG_REG2, AMD64_RBP, 8, sizeof(gpointer)); else amd64_mov_reg_imm (code, AMD64_ARG_REG2, 0); /* Arg3 is the method/vtable ptr */ - amd64_mov_reg_membase (code, AMD64_ARG_REG3, AMD64_RBP, arg_offset, 8); + amd64_mov_reg_membase (code, AMD64_ARG_REG3, AMD64_RBP, arg_offset, sizeof(gpointer)); /* Arg4 is the trampoline address */ - amd64_mov_reg_membase (code, AMD64_ARG_REG4, AMD64_RBP, tramp_offset, 8); + amd64_mov_reg_membase (code, AMD64_ARG_REG4, AMD64_RBP, tramp_offset, sizeof(gpointer)); if (aot) { char *icall_name = g_strdup_printf ("trampoline_func_%d", tramp_type); @@ -539,7 +620,7 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf /* * Have to call the _force_ variant, since there could be a protected wrapper on the top of the stack. */ - amd64_mov_membase_reg (code, AMD64_RBP, res_offset, AMD64_RAX, 8); + amd64_mov_membase_reg (code, AMD64_RBP, res_offset, AMD64_RAX, sizeof(mgreg_t)); if (aot) { code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_thread_force_interruption_checkpoint"); } else { @@ -547,43 +628,45 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf } amd64_call_reg (code, AMD64_R11); - amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RBP, res_offset, 8); + amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RBP, res_offset, sizeof(mgreg_t)); /* Restore LMF */ - amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 8); - amd64_alu_reg_imm_size (code, X86_SUB, AMD64_RCX, 1, 8); - amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 8); - amd64_mov_membase_reg (code, AMD64_R11, 0, AMD64_RCX, 8); + amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), sizeof(gpointer)); + amd64_alu_reg_imm_size (code, X86_SUB, AMD64_RCX, 1, sizeof(gpointer)); + amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), sizeof(gpointer)); + amd64_mov_membase_reg (code, AMD64_R11, 0, AMD64_RCX, sizeof(gpointer)); /* * Save rax to the stack, after the leave instruction, this will become part of * the red zone. */ - amd64_mov_membase_reg (code, AMD64_RBP, rax_offset, AMD64_RAX, 8); + amd64_mov_membase_reg (code, AMD64_RBP, rax_offset, AMD64_RAX, sizeof(mgreg_t)); /* Restore argument registers, r10 (imt method/rgxtx) and rax (needed for direct calls to C vararg functions). */ for (i = 0; i < AMD64_NREG; ++i) if (AMD64_IS_ARGUMENT_REG (i) || i == AMD64_R10 || i == AMD64_RAX) - amd64_mov_reg_membase (code, i, AMD64_RBP, saved_regs_offset + (i * 8), 8); + amd64_mov_reg_membase (code, i, AMD64_RBP, saved_regs_offset + (i * sizeof(mgreg_t)), sizeof(mgreg_t)); for (i = 0; i < 8; ++i) - amd64_movsd_reg_membase (code, i, AMD64_RBP, saved_fpregs_offset + (i * 8)); + amd64_movsd_reg_membase (code, i, AMD64_RBP, saved_fpregs_offset + (i * sizeof(mgreg_t))); /* Restore stack */ amd64_leave (code); if (MONO_TRAMPOLINE_TYPE_MUST_RETURN (tramp_type)) { /* Load result */ - amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, rax_offset - 0x8, 8); + amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, rax_offset - sizeof(mgreg_t), sizeof(mgreg_t)); amd64_ret (code); } else { /* call the compiled method using the saved rax */ - amd64_jump_membase (code, AMD64_RSP, rax_offset - 0x8); + amd64_jump_membase (code, AMD64_RSP, rax_offset - sizeof(mgreg_t)); } - g_assert ((code - buf) <= buf_len); + g_assert ((code - buf) <= kMaxCodeSize); + + nacl_global_codeman_validate (&buf, kMaxCodeSize, &code); mono_arch_flush_icache (buf, code - buf); @@ -606,6 +689,8 @@ mono_arch_get_nullified_class_init_trampoline (MonoTrampInfo **info) code = buf = mono_global_codeman_reserve (16); amd64_ret (code); + nacl_global_codeman_validate(&buf, 16, &code); + mono_arch_flush_icache (buf, code - buf); if (info) @@ -625,15 +710,25 @@ mono_arch_create_specific_trampoline (gpointer arg1, MonoTrampolineType tramp_ty tramp = mono_get_trampoline_code (tramp_type); +#if defined(__default_codegen__) if ((((guint64)arg1) >> 32) == 0) size = 5 + 1 + 4; else size = 5 + 1 + 8; code = buf = mono_domain_code_reserve_align (domain, size, 1); +#elif defined(__native_client_codegen__) + size = 5 + 1 + 4; + /* Aligning the call site below could */ + /* add up to kNaClAlignment-1 bytes */ + size += (kNaClAlignment-1); + buf = mono_domain_code_reserve_align (domain, size, kNaClAlignment); + code = buf; +#endif amd64_call_code (code, tramp); /* The trampoline code will obtain the argument from the instruction stream */ +#if defined(__default_codegen__) if ((((guint64)arg1) >> 32) == 0) { *code = 0x4; *(guint32*)(code + 1) = (gint64)arg1; @@ -643,12 +738,20 @@ mono_arch_create_specific_trampoline (gpointer arg1, MonoTrampolineType tramp_ty *(guint64*)(code + 1) = (gint64)arg1; code += 9; } +#elif defined(__native_client_codegen__) + /* For NaCl, all tramp args are 32-bit because they're pointers */ + *code = 0x68; /* push imm32 */ + *(guint32*)(code + 1) = (gint32)arg1; + code += 5; +#endif g_assert ((code - buf) <= size); if (code_len) *code_len = size; + nacl_domain_code_validate(domain, &buf, size, &code); + mono_arch_flush_icache (buf, size); return buf; @@ -679,7 +782,7 @@ mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info index -= size - 1; } - tramp_size = 64 + 8 * depth; + tramp_size = NACL_SIZE (64 + 8 * depth, 128 + 8 * depth); code = buf = mono_global_codeman_reserve (tramp_size); @@ -692,7 +795,7 @@ mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8); } else { /* load rgctx ptr from vtable */ - amd64_mov_reg_membase (code, AMD64_RAX, AMD64_ARG_REG1, G_STRUCT_OFFSET (MonoVTable, runtime_generic_context), 8); + amd64_mov_reg_membase (code, AMD64_RAX, AMD64_ARG_REG1, G_STRUCT_OFFSET (MonoVTable, runtime_generic_context), sizeof(gpointer)); /* is the rgctx ptr null? */ amd64_test_reg_reg (code, AMD64_RAX, AMD64_RAX); /* if yes, jump to actual trampoline */ @@ -703,9 +806,9 @@ mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info for (i = 0; i < depth; ++i) { /* load ptr to next array */ if (mrgctx && i == 0) - amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RAX, MONO_SIZEOF_METHOD_RUNTIME_GENERIC_CONTEXT, 8); + amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RAX, MONO_SIZEOF_METHOD_RUNTIME_GENERIC_CONTEXT, sizeof(gpointer)); else - amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RAX, 0, 8); + amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RAX, 0, sizeof(gpointer)); /* is the ptr null? */ amd64_test_reg_reg (code, AMD64_RAX, AMD64_RAX); /* if yes, jump to actual trampoline */ @@ -714,7 +817,7 @@ mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info } /* fetch slot */ - amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RAX, sizeof (gpointer) * (index + 1), 8); + amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RAX, sizeof (gpointer) * (index + 1), sizeof(gpointer)); /* is the slot null? */ amd64_test_reg_reg (code, AMD64_RAX, AMD64_RAX); /* if yes, jump to actual trampoline */ @@ -724,12 +827,12 @@ mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info amd64_ret (code); for (i = mrgctx ? 1 : 0; i <= depth + 1; ++i) - x86_patch (rgctx_null_jumps [i], code); + mono_amd64_patch (rgctx_null_jumps [i], code); g_free (rgctx_null_jumps); /* move the rgctx pointer to the VTABLE register */ - amd64_mov_reg_reg (code, MONO_ARCH_VTABLE_REG, AMD64_ARG_REG1, 8); + amd64_mov_reg_reg (code, MONO_ARCH_VTABLE_REG, AMD64_ARG_REG1, sizeof(gpointer)); if (aot) { code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, g_strdup_printf ("specific_trampoline_lazy_fetch_%u", slot)); @@ -741,6 +844,7 @@ mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info amd64_jump_code (code, tramp); } + nacl_global_codeman_validate (&buf, tramp_size, &code); mono_arch_flush_icache (buf, code - buf); g_assert (code - buf <= tramp_size); @@ -788,6 +892,8 @@ mono_arch_create_generic_class_init_trampoline (MonoTrampInfo **info, gboolean a amd64_jump_code (code, tramp); } + nacl_global_codeman_validate (&buf, tramp_size, &code); + mono_arch_flush_icache (buf, code - buf); g_assert (code - buf <= tramp_size); @@ -911,6 +1017,8 @@ mono_arch_create_monitor_enter_trampoline (MonoTrampInfo **info, gboolean aot) amd64_jump_code (code, tramp); } + nacl_global_codeman_validate (&buf, tramp_size, &code); + mono_arch_flush_icache (code, code - buf); g_assert (code - buf <= tramp_size); @@ -1027,6 +1135,8 @@ mono_arch_create_monitor_exit_trampoline (MonoTrampInfo **info, gboolean aot) amd64_jump_code (code, tramp); } + nacl_global_codeman_validate (&buf, tramp_size, &code); + mono_arch_flush_icache (code, code - buf); g_assert (code - buf <= tramp_size); @@ -1118,5 +1228,11 @@ mono_arch_get_call_target (guint8 *code) guint32 mono_arch_get_plt_info_offset (guint8 *plt_entry, mgreg_t *regs, guint8 *code) { +#if defined(__native_client__) || defined(__native_client_codegen__) + /* 18 = 3 (mov opcode) + 4 (disp) + 10 (nacljmp) + 1 (push opcode) */ + /* See aot-compiler.c arch_emit_plt_entry for details. */ + return *(guint32*)(plt_entry + 18); +#else return *(guint32*)(plt_entry + 6); +#endif } diff --git a/mono/mini/tramp-x86.c b/mono/mini/tramp-x86.c index 97ec916d6c8..90b76fdbbe9 100644 --- a/mono/mini/tramp-x86.c +++ b/mono/mini/tramp-x86.c @@ -49,6 +49,8 @@ mono_arch_get_unbox_trampoline (MonoMethod *m, gpointer addr) x86_jump_code (code, addr); g_assert ((code - start) < 16); + nacl_domain_code_validate (domain, &start, 16, &code); + return start; } @@ -68,6 +70,7 @@ mono_arch_get_static_rgctx_trampoline (MonoMethod *m, MonoMethodRuntimeGenericCo x86_jump_code (code, addr); g_assert ((code - start) <= buf_len); + nacl_domain_code_validate (domain, &start, buf_len, &code); mono_arch_flush_icache (start, code - start); return start; @@ -96,6 +99,8 @@ mono_arch_get_llvm_imt_trampoline (MonoDomain *domain, MonoMethod *m, int vt_off g_assert ((code - start) < buf_len); + nacl_domain_code_validate (domain, &start, buf_len, &code); + mono_arch_flush_icache (start, code - start); return start; @@ -104,6 +109,7 @@ mono_arch_get_llvm_imt_trampoline (MonoDomain *domain, MonoMethod *m, int vt_off void mono_arch_patch_callsite (guint8 *method_start, guint8 *orig_code, guint8 *addr) { +#if defined(__default_codegen__) guint8 *code; guint8 buf [8]; gboolean can_write = mono_breakpoint_clean_code (method_start, orig_code, 8, buf, sizeof (buf)); @@ -135,6 +141,23 @@ mono_arch_patch_callsite (guint8 *method_start, guint8 *orig_code, guint8 *addr) code [4], code [5], code [6]); g_assert_not_reached (); } +#elif defined(__native_client__) + /* Target must be bundle-aligned */ + g_assert (((guint32)addr & kNaClAlignmentMask) == 0); + + /* 0xe8 = call , 0xe9 = jump */ + if ((orig_code [-5] == 0xe8) || orig_code [-6] == 0xe9) { + int ret; + gint32 offset = (gint32)addr - (gint32)orig_code; + guint8 buf[sizeof(gint32)]; + *((gint32*)(buf)) = offset; + ret = nacl_dyncode_modify (orig_code - sizeof(gint32), buf, sizeof(gint32)); + g_assert (ret == 0); + } else { + printf ("Invalid trampoline sequence %p: %02x %02x %02x %02x %02x\n", orig_code, orig_code [-5], orig_code [-4], orig_code [-3], orig_code [-2], orig_code[-1]); + g_assert_not_reached (); + } +#endif } void @@ -154,7 +177,7 @@ mono_arch_patch_plt_entry (guint8 *code, gpointer *got, mgreg_t *regs, guint8 *a g_assert (code [1] == 0x8b); offset = *(guint32*)(code + 2); -#else +#elif defined(__default_codegen__) /* A PLT entry: jmp *(%ebx) */ g_assert (code [0] == 0xff); g_assert (code [1] == 0xa3); @@ -222,6 +245,7 @@ mono_arch_nullify_class_init_trampoline (guint8 *code, mgreg_t *regs) code -= 5; if (code [0] == 0xe8) { +#if defined(__default_codegen__) if (!mono_running_on_valgrind ()) { guint32 ops; /* @@ -248,6 +272,9 @@ mono_arch_nullify_class_init_trampoline (guint8 *code, mgreg_t *regs) /* Tell valgrind to recompile the patched code */ //VALGRIND_DISCARD_TRANSLATIONS (code, 8); } +#elif defined(__native_client_codegen__) + mono_arch_patch_callsite (code, code + 5, nullified_class_init_trampoline); +#endif } else if (code [0] == 0x90 || code [0] == 0xeb) { /* Already changed by another thread */ ; @@ -499,6 +526,7 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf x86_ret (code); + nacl_global_codeman_validate (&buf, 256, &code); g_assert ((code - buf) <= 256); if (info) @@ -516,10 +544,13 @@ gpointer mono_arch_get_nullified_class_init_trampoline (MonoTrampInfo **info) { guint8 *code, *buf; + int tramp_size = NACL_SIZE (16, kNaClAlignment); - code = buf = mono_global_codeman_reserve (16); + code = buf = mono_global_codeman_reserve (tramp_size); x86_ret (code); + nacl_global_codeman_validate (&buf, tramp_size, &code); + mono_arch_flush_icache (buf, code - buf); if (info) @@ -546,6 +577,8 @@ mono_arch_create_specific_trampoline (gpointer arg1, MonoTrampolineType tramp_ty x86_jump_code (buf, tramp); g_assert ((buf - code) <= TRAMPOLINE_SIZE); + nacl_domain_code_validate (domain, &code, kNaClAlignment, &buf); + mono_arch_flush_icache (code, buf - code); if (code_len) @@ -581,13 +614,12 @@ mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info index -= size - 1; } -#ifdef __native_client_codegen__ - /* TODO: align for Native Client */ - tramp_size = (aot ? 64 : 36) + 2 * kNaClAlignment + - 6 * (depth + kNaClAlignment); -#else +#if defined(__default_codegen__) tramp_size = (aot ? 64 : 36) + 6 * depth; -#endif /* __native_client_codegen__ */ +#elif defined(__native_client_codegen__) + tramp_size = (aot ? 64 : 36) + 2 * kNaClAlignment + + 6 * (depth + kNaClAlignment); +#endif code = buf = mono_global_codeman_reserve (tramp_size); @@ -645,6 +677,7 @@ mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info x86_jump_code (code, tramp); } + nacl_global_codeman_validate (&buf, tramp_size, &code); mono_arch_flush_icache (buf, code - buf); g_assert (code - buf <= tramp_size); @@ -703,6 +736,9 @@ mono_arch_create_generic_class_init_trampoline (MonoTrampInfo **info, gboolean a #ifdef __native_client_codegen__ g_assert (code - buf <= kNaClAlignment); #endif + + nacl_global_codeman_validate (&buf, tramp_size, &code); + if (info) *info = mono_tramp_info_create (g_strdup_printf ("generic_class_init_trampoline"), buf, code - buf, ji, unwind_ops); @@ -850,6 +886,8 @@ mono_arch_create_monitor_enter_trampoline (MonoTrampInfo **info, gboolean aot) mono_arch_flush_icache (buf, code - buf); g_assert (code - buf <= tramp_size); + nacl_global_codeman_validate (&buf, tramp_size, &code); + if (info) *info = mono_tramp_info_create (g_strdup_printf ("monitor_enter_trampoline"), buf, code - buf, ji, unwind_ops); @@ -960,6 +998,8 @@ mono_arch_create_monitor_exit_trampoline (MonoTrampInfo **info, gboolean aot) x86_jump_code (code, tramp); } + nacl_global_codeman_validate (&buf, tramp_size, &code); + mono_arch_flush_icache (buf, code - buf); g_assert (code - buf <= tramp_size); @@ -1032,6 +1072,8 @@ mono_arch_create_handler_block_trampoline (void) x86_jump_code (code, handler_block_trampoline_helper); } + nacl_global_codeman_validate (&buf, tramp_size, &code); + mono_arch_flush_icache (buf, code - buf); g_assert (code - buf <= tramp_size); diff --git a/mono/utils/mono-codeman.c b/mono/utils/mono-codeman.c index 32d7319dc14..af73766a743 100644 --- a/mono/utils/mono-codeman.c +++ b/mono/utils/mono-codeman.c @@ -19,7 +19,12 @@ #ifdef HAVE_VALGRIND_MEMCHECK_H #include #endif - + +#if defined(__native_client_codegen__) && defined(__native_client__) +#include +#include +#endif + /* * AMD64 processors maintain icache coherency only for pages which are * marked executable. Also, windows DEP requires us to obtain executable memory from @@ -82,10 +87,119 @@ struct _MonoCodeManager { int read_only; CodeChunk *current; CodeChunk *full; +#if defined(__native_client_codegen__) && defined(__native_client__) + MonoGHashTable *hash; +#endif }; #define ALIGN_INT(val,alignment) (((val) + (alignment - 1)) & ~(alignment - 1)) +#if defined(__native_client_codegen__) && defined(__native_client__) +/* End of text segment, set by linker. + * Dynamic text starts on the next allocated page. + */ +extern char etext[]; +char *next_dynamic_code_addr = NULL; + +/* + * This routine gets the next available bundle aligned + * pointer in the dynamic code section. It does not check + * for the section end, this error will be caught in the + * service runtime. + */ +void* +allocate_code(intptr_t increment) +{ + char *addr; + if (increment < 0) return NULL; + increment = increment & kNaClBundleMask ? (increment & ~kNaClBundleMask) + kNaClBundleSize : increment; + addr = next_dynamic_code_addr; + next_dynamic_code_addr += increment; + return addr; +} + +int +nacl_is_code_address (void *target) +{ + return (char *)target < next_dynamic_code_addr; +} + +const int kMaxPatchDepth = 32; +__thread unsigned char **patch_source_base = NULL; +__thread unsigned char **patch_dest_base = NULL; +__thread int *patch_alloc_size = NULL; +__thread int patch_current_depth = -1; +__thread int allow_target_modification = 1; + +void +nacl_allow_target_modification (int val) +{ + allow_target_modification = val; +} + +static void +nacl_jit_check_init () +{ + if (patch_source_base == NULL) { + patch_source_base = g_malloc (kMaxPatchDepth * sizeof(unsigned char *)); + patch_dest_base = g_malloc (kMaxPatchDepth * sizeof(unsigned char *)); + patch_alloc_size = g_malloc (kMaxPatchDepth * sizeof(int)); + } +} + + +/* Given a patch target, modify the target such that patching will work when + * the code is copied to the data section. + */ +void* +nacl_modify_patch_target (unsigned char *target) +{ + /* This seems like a bit of an ugly way to do this but the advantage + * is we don't have to worry about all the conditions in + * mono_resolve_patch_target, and it can be used by all the bare uses + * of _patch. + */ + unsigned char *sb; + unsigned char *db; + + if (!allow_target_modification) return target; + + nacl_jit_check_init (); + sb = patch_source_base[patch_current_depth]; + db = patch_dest_base[patch_current_depth]; + + if (target >= sb && (target < sb + patch_alloc_size[patch_current_depth])) { + /* Do nothing. target is in the section being generated. + * no need to modify, the disp will be the same either way. + */ + } else { + int target_offset = target - db; + target = sb + target_offset; + } + return target; +} + +void* +nacl_inverse_modify_patch_target (unsigned char *target) +{ + unsigned char *sb; + unsigned char *db; + int target_offset; + + if (!allow_target_modification) return target; + + nacl_jit_check_init (); + sb = patch_source_base[patch_current_depth]; + db = patch_dest_base[patch_current_depth]; + + target_offset = target - sb; + target = db + target_offset; + return target; +} + + +#endif /* __native_client_codegen && __native_client__ */ + /** * mono_code_manager_new: * @@ -107,6 +221,24 @@ mono_code_manager_new (void) cman->full = NULL; cman->dynamic = 0; cman->read_only = 0; +#if defined(__native_client_codegen__) && defined(__native_client__) + if (next_dynamic_code_addr == NULL) { + const guint kPageMask = 0xFFFF; /* 64K pages */ + next_dynamic_code_addr = (uintptr_t)(etext + kPageMask) & ~kPageMask; + /* Workaround bug in service runtime, unable to allocate */ + /* from the first page in the dynamic code section. */ + /* TODO: remove */ + next_dynamic_code_addr += (uintptr_t)0x10000; + } + cman->hash = mono_g_hash_table_new (NULL, NULL); + /* Keep the hash table from being collected */ + mono_gc_register_root (&cman->hash, sizeof (void*), NULL); + if (patch_source_base == NULL) { + patch_source_base = g_malloc (kMaxPatchDepth * sizeof(unsigned char *)); + patch_dest_base = g_malloc (kMaxPatchDepth * sizeof(unsigned char *)); + patch_alloc_size = g_malloc (kMaxPatchDepth * sizeof(int)); + } +#endif return cman; } @@ -288,7 +420,10 @@ new_codechunk (int dynamic, int size) if (!ptr) return NULL; } else { - ptr = mono_valloc (NULL, chunk_size, MONO_PROT_RWX | ARCH_MAP_FLAGS); + /* Allocate MIN_ALIGN-1 more than we need so we can still */ + /* guarantee MIN_ALIGN alignment for individual allocs */ + /* from mono_code_manager_reserve_align. */ + ptr = mono_valloc (NULL, chunk_size + MIN_ALIGN - 1, MONO_PROT_RWX | ARCH_MAP_FLAGS); if (!ptr) return NULL; } @@ -333,8 +468,10 @@ new_codechunk (int dynamic, int size) void* mono_code_manager_reserve_align (MonoCodeManager *cman, int size, int alignment) { +#if !defined(__native_client__) || !defined(__native_client_codegen__) CodeChunk *chunk, *prev; void *ptr; + guint32 align_mask = alignment - 1; g_assert (!cman->read_only); @@ -357,8 +494,10 @@ mono_code_manager_reserve_align (MonoCodeManager *cman, int size, int alignment) for (chunk = cman->current; chunk; chunk = chunk->next) { if (ALIGN_INT (chunk->pos, alignment) + size <= chunk->size) { chunk->pos = ALIGN_INT (chunk->pos, alignment); - ptr = chunk->data + chunk->pos; - chunk->pos += size; + /* Align the chunk->data we add to chunk->pos */ + /* or we can't guarantee proper alignment */ + ptr = (void*)((((uintptr_t)chunk->data + align_mask) & ~align_mask) + chunk->pos); + chunk->pos = ((char*)ptr - chunk->data) + size; return ptr; } } @@ -385,9 +524,33 @@ mono_code_manager_reserve_align (MonoCodeManager *cman, int size, int alignment) chunk->next = cman->current; cman->current = chunk; chunk->pos = ALIGN_INT (chunk->pos, alignment); - ptr = chunk->data + chunk->pos; - chunk->pos += size; + /* Align the chunk->data we add to chunk->pos */ + /* or we can't guarantee proper alignment */ + ptr = (void*)((((uintptr_t)chunk->data + align_mask) & ~align_mask) + chunk->pos); + chunk->pos = ((char*)ptr - chunk->data) + size; return ptr; +#else + unsigned char *temp_ptr, *code_ptr; + /* Round up size to next bundle */ + alignment = kNaClBundleSize; + size = (size + kNaClBundleSize) & (~kNaClBundleMask); + /* Allocate a temp buffer */ + temp_ptr = memalign (alignment, size); + g_assert (((uintptr_t)temp_ptr & kNaClBundleMask) == 0); + /* Allocate code space from the service runtime */ + code_ptr = allocate_code (size); + /* Insert pointer to code space in hash, keyed by buffer ptr */ + mono_g_hash_table_insert (cman->hash, temp_ptr, code_ptr); + + nacl_jit_check_init (); + + patch_current_depth++; + patch_source_base[patch_current_depth] = temp_ptr; + patch_dest_base[patch_current_depth] = code_ptr; + patch_alloc_size[patch_current_depth] = size; + g_assert (patch_current_depth < kMaxPatchDepth); + return temp_ptr; +#endif } /** @@ -419,13 +582,45 @@ mono_code_manager_reserve (MonoCodeManager *cman, int size) void mono_code_manager_commit (MonoCodeManager *cman, void *data, int size, int newsize) { +#if !defined(__native_client__) || !defined(__native_client_codegen__) g_assert (newsize <= size); if (cman->current && (size != newsize) && (data == cman->current->data + cman->current->pos - size)) { cman->current->pos -= size - newsize; } +#else + unsigned char *code; + int status; + g_assert (newsize <= size); + code = mono_g_hash_table_lookup (cman->hash, data); + g_assert (code != NULL); + /* Pad space after code with HLTs */ + /* TODO: this is x86/amd64 specific */ + while (newsize & kNaClBundleMask) { + *((char *)data + newsize) = 0xf4; + newsize++; + } + status = nacl_dyncode_create (code, data, newsize); + if (status != 0) { + g_assert_not_reached (); + } + mono_g_hash_table_remove (cman->hash, data); + g_assert (data == patch_source_base[patch_current_depth]); + g_assert (code == patch_dest_base[patch_current_depth]); + patch_current_depth--; + g_assert (patch_current_depth >= -1); + free (data); +#endif } +#if defined(__native_client_codegen__) && defined(__native_client__) +void * +nacl_code_manager_get_code_dest (MonoCodeManager *cman, void *data) +{ + return mono_g_hash_table_lookup (cman->hash, data); +} +#endif + /** * mono_code_manager_size: * @cman: a code manager diff --git a/mono/utils/mono-codeman.h b/mono/utils/mono-codeman.h index 39cc1e28034..1507348b0b5 100644 --- a/mono/utils/mono-codeman.h +++ b/mono/utils/mono-codeman.h @@ -19,5 +19,21 @@ int mono_code_manager_size (MonoCodeManager *cman, int *used_siz typedef int (*MonoCodeManagerFunc) (void *data, int csize, int size, void *user_data); void mono_code_manager_foreach (MonoCodeManager *cman, MonoCodeManagerFunc func, void *user_data); +#if defined( __native_client_codegen__ ) && defined( __native_client__ ) + +#define kNaClBundleSize 32 +#define kNaClBundleMask (kNaClBundleSize-1) + +extern __thread unsigned char **patch_source_base; +extern __thread unsigned char **patch_dest_base; +extern __thread int patch_current_depth; + +int nacl_is_code_address (void *target); +void* nacl_code_manager_get_code_dest (MonoCodeManager *cman, void *data); +void nacl_allow_target_modification (int val); +void* nacl_modify_patch_target (unsigned char *target); +void* nacl_inverse_modify_patch_target (unsigned char *target); +#endif /* __native_client__ */ + #endif /* __MONO_CODEMAN_H__ */ diff --git a/mono/utils/mono-path.c b/mono/utils/mono-path.c index 32ad8899f52..ca71d98996f 100644 --- a/mono/utils/mono-path.c +++ b/mono/utils/mono-path.c @@ -30,6 +30,9 @@ /* Resolves '..' and '.' references in a path. If the path provided is relative, * it will be relative to the current directory */ + +/* For Native Client, the above is not true. Since there is no getcwd we fill */ +/* in the file being passed in relative to '.' and don't resolve it */ gchar * mono_path_canonicalize (const char *path) { @@ -39,9 +42,14 @@ mono_path_canonicalize (const char *path) if (g_path_is_absolute (path)) { abspath = g_strdup (path); } else { +#ifdef __native_client__ + gchar *tmpdir = "."; + abspath = g_build_filename (tmpdir, path, NULL); +#else gchar *tmpdir = g_get_current_dir (); abspath = g_build_filename (tmpdir, path, NULL); g_free (tmpdir); +#endif } #ifdef HOST_WIN32 diff --git a/nacl/README b/nacl/README new file mode 100644 index 00000000000..191fec693e1 --- /dev/null +++ b/nacl/README @@ -0,0 +1,92 @@ +Quick guide +=========== + +Prerequistites (see end of file for gclient & svn paths) +-------------- +1. Naclports from SVN + - needed for nacl toolchain (nacl-gcc, etc.) + - needed for packages (zlib for nacl, etc.) +2. Native Client repo from SVN + - currently needed for sel_ldr +3. Mono with NaCl support (you have it if you're reading this file) + +4. Directory conventions used in this document +(your directories may differ...) + ~/naclports Naclports repo from SVN + ~/nacl Native Client repo from SVN + ~/mono Mono for NaCl + +5. Setting your environment: + export NACL_SDK_PATH=/home/username/naclports + export NACL_PATH=/home/username/nacl +5a. Make sure you have a dbg sel_ldr available + /home/username/nacl/ + native_client/scons-out/dbg-${OS_SUBDIR}-x86-${BITSIZE}/staging +5b. If it is not available, scons build it (substitute + linux with mac or win as needed) + cd /home/username/nacl/native_client + ./scons MODE=dbg-linux,nacl [platform=x86-64] + +6. Build naclports libraries + cd /home/username/naclports/src/packages + ./nacl-install-all.sh + +7. Build NaCl Mono Runtime ('libmono.a' for NaCl, 5 minutes): + cd /home/username/mono/trunk/nacl + ./nacl-runtime-mono.sh [TARGET_BITSIZE=32/64 for cross-compiling runtime] + +8. (optional for AOT) Build NaCl Mono Compiler: ('nacl[64]-mono' AOT cross compiler for NaCl, 5 minutes): + cd /home/username/mono/trunk/nacl + ./nacl-mono.sh (32-bit cross-compiler) + ./nacl64-mono.sh (64-bit cross-compiler) + +Native Client Mono Install locations: + /home/username/mono/trunk/nacl/runtime + /home/username/mono/trunk/nacl/compiler + +Normal (not-Native-Client) Mono Install location: + /home/username/mono/trunk/nacl/normal-mono + +Simple Test (requires sel_ldr to run) + cd /home/username/mono/trunk/nacl/test + ./nacl [normal,aot,regression] (defaults to nacl,jit,simple test) + + +SVN Repos +========= + +1. Getting Naclports repo + cd ~ + mkdir naclports + cd naclports + gclient config https://naclports.googlecode.com/svn/trunk/src + gclient sync + +2. Getting Native Client repo + + cd ~ + mkdir nacl + cd nacl + vim .gclient +--------add text below------- +solutions = [ + { "name" : "native_client", + "url" : "svn://svn.chromium.org/native_client/trunk/src/native_client", + }, + { "name" : "supplement.DEPS", + "url" : "svn://svn.chromium.org/native_client/trunk/deps/supplement.DEPS", + }, +] +---------end text------------ + gclient sync + gclient runhooks --force + + +3. Getting Mono repo + + cd ~ + mkdir mono + cd mono + // see http://mono-project.com/Compiling_Mono_From_Git + + diff --git a/nacl/common.sh b/nacl/common.sh new file mode 100644 index 00000000000..bbf8b29d1c4 --- /dev/null +++ b/nacl/common.sh @@ -0,0 +1,204 @@ +# Copyright (c) 2009 The Native Client Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that be +# found in the LICENSE file. +# + +set -o nounset +set -o errexit + +# scripts that source this file must be run from within packages tree +readonly SAVE_PWD=$(pwd) + +# Pick platform directory for compiler. +readonly OS_NAME=$(uname -s) +if [ $OS_NAME = "Darwin" ]; then + readonly OS_SUBDIR="mac" + readonly OS_SUBDIR_SHORT="mac" +elif [ $OS_NAME = "Linux" ]; then + readonly OS_SUBDIR="linux" + readonly OS_SUBDIR_SHORT="linux" +else + readonly OS_SUBDIR="windows" + readonly OS_SUBDIR_SHORT="win" +fi + +readonly MACHINE=$(uname -m) +if [ $MACHINE = "x86_64" ]; then + readonly TARGET_BITSIZE=${TARGET_BITSIZE:-"64"} + readonly HOST_BITSIZE=${HOST_BITSIZE:-"64"} +else + # uname -m reports i686 on Linux and i386 on Mac + readonly TARGET_BITSIZE=${TARGET_BITSIZE:-"32"} + readonly HOST_BITSIZE=${HOST_BITSIZE:-"32"} +fi + +NACL_SDK_BASE=/usr/local/google/elijahtaylor/nacl_svn/native_client/tools/sdk/nacl-sdk +#NACL_SDK_BASE=/usr/local/google/elijahtaylor/nacl_svn/native_client/toolchain/linux_x86 + +if [ $TARGET_BITSIZE == "64" ]; then + readonly TARGET_BIT_PREFIX="64" +else + readonly TARGET_BIT_PREFIX="" +fi +# we might want to override the detected host platform (e.g. on OSX 10.6) +if [ $HOST_BITSIZE == "64" ]; then + readonly HOST_BIT_PREFIX="64" +else + readonly HOST_BIT_PREFIX="" +fi + +# locate default nacl_sdk toolchain +# TODO: no arm support +readonly NACL_SDK=${NACL_SDK_PATH:-/usr/local/google/elijahtaylor/naclports_svn} +readonly NACL_DEV=${NACL_PATH:-/usr/local/google/elijahtaylor/nacl_svn} +readonly NACL_NATIVE_CLIENT=${NACL_SDK}/src +readonly NACL_SDK_BASE=${NACL_SDK_BASE:-\ +${NACL_NATIVE_CLIENT}/toolchain/${OS_SUBDIR_SHORT}_x86} + +readonly NACL_BIN_PATH=${NACL_SDK_BASE}/bin +readonly NACLCC=${NACL_SDK_BASE}/bin/nacl${TARGET_BIT_PREFIX}-gcc +readonly NACLCXX=${NACL_SDK_BASE}/bin/nacl${TARGET_BIT_PREFIX}-g++ +readonly NACLAR=${NACL_SDK_BASE}/bin/nacl${TARGET_BIT_PREFIX}-ar +readonly NACLRANLIB=${NACL_SDK_BASE}/bin/nacl${TARGET_BIT_PREFIX}-ranlib +readonly NACLLD=${NACL_SDK_BASE}/bin/nacl${TARGET_BIT_PREFIX}-ld +readonly NACLAS=${NACL_SDK_BASE}/bin/nacl${TARGET_BIT_PREFIX}-as + +# NACL_SDK_GCC_SPECS_PATH is where nacl-gcc 'specs' file will be installed +readonly NACL_SDK_GCC_SPECS_PATH=${NACL_SDK_BASE}/lib/gcc/nacl64/4.4.3 + +# NACL_SDK_USR is where the headers, libraries, etc. will be installed +readonly NACL_SDK_USR=${NACL_SDK_BASE}/nacl/usr +readonly NACL_SDK_USR_INCLUDE=${NACL_SDK_USR}/include +readonly NACL_SDK_USR_LIB=${NACL_SDK_USR}/lib + + +###################################################################### +# Helper functions +###################################################################### + +Banner() { + echo "######################################################################" + echo $* + echo "######################################################################" +} + + +VerifyPath() { + # make sure path isn't all slashes (possibly from an unset variable) + local PATH=$1 + local TRIM=${PATH##/} + if [ ${#TRIM} -ne 0 ]; then + return 0 + else + return 1 + fi +} + + +ChangeDir() { + local NAME=$1 + if VerifyPath ${NAME}; then + cd ${NAME} + else + echo "ChangeDir called with bad path." + exit -1 + fi +} + + +Remove() { + local NAME=$1 + if VerifyPath ${NAME}; then + rm -rf ${NAME} + else + echo "Remove called with bad path." + exit -1 + fi +} + + +MakeDir() { + local NAME=$1 + if VerifyPath ${NAME}; then + mkdir -p ${NAME} + else + echo "MakeDir called with bad path." + exit -1 + fi +} + + +PatchSpecFile() { + # fix up spaces so gcc sees entire path + local SED_SAFE_SPACES_USR_INCLUDE=${NACL_SDK_USR_INCLUDE/ /\ /} + local SED_SAFE_SPACES_USR_LIB=${NACL_SDK_USR_LIB/ /\ /} + # have nacl-gcc dump specs file & add include & lib search paths + ${NACL_SDK_BASE}/bin/nacl-gcc -dumpspecs |\ + sed "/*cpp:/{ + N + s|$| -I${SED_SAFE_SPACES_USR_INCLUDE}| + }" |\ + sed "/*link_libgcc:/{ + N + s|$| -L${SED_SAFE_SPACES_USR_LIB}| + }" >${NACL_SDK_GCC_SPECS_PATH}/specs +} + + +DefaultConfigureStep() { + Banner "Configuring ${PACKAGE_NAME}" + # export the nacl tools + export CC=${NACLCC} + export CXX=${NACLCXX} + export AR=${NACLAR} + export RANLIB=${NACLRANLIB} + export PKG_CONFIG_PATH=${NACL_SDK_USR_LIB}/pkgconfig + export PKG_CONFIG_LIBDIR=${NACL_SDK_USR_LIB} + export PATH=${NACL_BIN_PATH}:${PATH}; + ChangeDir ${NACL_PACKAGES_REPOSITORY}/${PACKAGE_NAME} + Remove ${PACKAGE_NAME}-build + MakeDir ${PACKAGE_NAME}-build + cd ${PACKAGE_NAME}-build + ../configure \ + --host=nacl \ + --disable-shared \ + --prefix=${NACL_SDK_USR} \ + --exec-prefix=${NACL_SDK_USR} \ + --libdir=${NACL_SDK_USR_LIB} \ + --oldincludedir=${NACL_SDK_USR_INCLUDE} \ + --with-http=off \ + --with-html=off \ + --with-ftp=off \ + --with-x=no +} + + +DefaultBuildStep() { + # assumes pwd has makefile + make clean + make -j4 +} + + +DefaultInstallStep() { + # assumes pwd has makefile + make install +} + + +DefaultCleanUpStep() { + PatchSpecFile + ChangeDir ${SAVE_PWD} +} + + +DefaultPackageInstall() { + DefaultPreInstallStep + DefaultDownloadStep + DefaultExtractStep + DefaultPatchStep + DefaultConfigureStep + DefaultBuildStep + DefaultInstallStep + DefaultCleanUpStep +} diff --git a/nacl/config-nacl-runtime.cache b/nacl/config-nacl-runtime.cache new file mode 100644 index 00000000000..3772ac81017 --- /dev/null +++ b/nacl/config-nacl-runtime.cache @@ -0,0 +1,18 @@ +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +ac_cv_func_mmap=${ac_cv_func_mmap=no} +ac_cv_var_timezone=${ac_cv_var_timezone=yes} +ac_cv_host=${ac_cv_host=i686-pc-nacl} +ac_cv_target=${ac_cv_target=i686-pc-nacl} + diff --git a/nacl/config-nacl-runtime64.cache b/nacl/config-nacl-runtime64.cache new file mode 100644 index 00000000000..ce3bc3590bc --- /dev/null +++ b/nacl/config-nacl-runtime64.cache @@ -0,0 +1,18 @@ +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +ac_cv_func_mmap=${ac_cv_func_mmap=no} +ac_cv_var_timezone=${ac_cv_var_timezone=yes} +ac_cv_host=${ac_cv_host=x86_64-pc-nacl} +ac_cv_target=${ac_cv_target=x86_64-pc-nacl} + diff --git a/nacl/nacl-common.sh b/nacl/nacl-common.sh new file mode 100644 index 00000000000..b63b7eb8b71 --- /dev/null +++ b/nacl/nacl-common.sh @@ -0,0 +1,19 @@ + +CopyNormalMonoLibs() { + NORMAL_MSCORLIB_DLL=$MONO_TRUNK_NACL/normal-mono/lib/mono/2.0/mscorlib.dll + if [ ! -f ${NORMAL_MSCORLIB_DLL} ] + then + Banner "Normal mscorlib.dll not found, building normal mono" + cd ${MONO_TRUNK_NACL} + ./normal-mono.sh + fi + if [ ! -f ${NORMAL_MSCORLIB_DLL} ] + then + Banner "Normal mscorlib.dll not found after normal mono build, exiting..." + exit -1 + fi + Banner "Copying normal-mono libs to install dir" + mkdir -p ${INSTALL_PATH}/lib/mono + cp -R ${MONO_TRUNK_NACL}/normal-mono/lib/mono/* ${INSTALL_PATH}/lib/mono/ +} + diff --git a/nacl/nacl-mono-config-cache b/nacl/nacl-mono-config-cache new file mode 100644 index 00000000000..830854bb661 --- /dev/null +++ b/nacl/nacl-mono-config-cache @@ -0,0 +1,16 @@ +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +mono_cv_uscore=${mono_cv_uscore=no} +ac_cv_target=${ac_cv_target=i686-pc-nacl} + diff --git a/nacl/nacl-mono.sh b/nacl/nacl-mono.sh new file mode 100755 index 00000000000..a409db67b49 --- /dev/null +++ b/nacl/nacl-mono.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# nacl-mono.sh +# +# usage: nacl-mono.sh +# +# this script builds a compiler for 32-bit NaCl code +# (installed in ./compiler folder) +# + +readonly MONO_TRUNK_NACL=$(pwd) + +readonly PACKAGE_NAME=nacl-mono-build + +readonly INSTALL_PATH=${MONO_TRUNK_NACL}/compiler + +source common.sh +source nacl-common.sh + + +CustomConfigureStep() { + Banner "Configuring ${PACKAGE_NAME}" + set +e + cd ${PACKAGE_NAME} + make distclean + cd ${MONO_TRUNK_NACL} + set -e + Remove ${PACKAGE_NAME} + MakeDir ${PACKAGE_NAME} + cd ${PACKAGE_NAME} + cp ../nacl-mono-config-cache ../nacl-mono-config-cache.temp + if [ $HOST_BITSIZE = "64" ]; then + ../../configure \ + CC='cc -m32' CXX='g++ -m32' \ + --host=i386-pc-linux \ + --build=amd64-pc-linux \ + --target=nacl \ + --prefix=${INSTALL_PATH} \ + --with-tls=pthread \ + --enable-nacl-codegen \ + --disable-mono-debugger \ + --disable-mcs-build \ + --with-sigaltstack=no \ + --cache-file=../nacl-mono-config-cache.temp + else + ../../configure \ + --target=nacl \ + --prefix=${INSTALL_PATH} \ + --with-tls=pthread \ + --enable-nacl-codegen \ + --disable-mono-debugger \ + --disable-mcs-build \ + --with-sigaltstack=no \ + --cache-file=../nacl-mono-config-cache.temp + fi + + + rm ../nacl-mono-config-cache.temp +} + +CustomBuildStep() { + MONO_NACL_ALIGN_MASK_OFF=1 make -j4 +} + +CustomInstallStep() { + MONO_NACL_ALIGN_MASK_OFF=1 make install +} + +CustomPackageInstall() { + CustomConfigureStep + #CustomBuildStep + #CustomInstallStep + DefaultBuildStep + DefaultInstallStep +} + + +CustomPackageInstall +exit 0 diff --git a/nacl/nacl-runtime-mono.sh b/nacl/nacl-runtime-mono.sh new file mode 100755 index 00000000000..f00a9328465 --- /dev/null +++ b/nacl/nacl-runtime-mono.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# Copyright (c) 2009 The Native Client Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that be +# found in the LICENSE file. +# + +# nacl-runtime-mono.sh +# +# usage: nacl-runtime-mono.sh +# +# this script builds mono runtime for Native Client +# + +readonly MONO_TRUNK_NACL=$(pwd) + +source common.sh +source nacl-common.sh + +readonly PACKAGE_NAME=runtime${TARGET_BIT_PREFIX}-build +readonly INSTALL_PATH=${MONO_TRUNK_NACL}/runtime${TARGET_BIT_PREFIX} + + +CustomConfigureStep() { + Banner "Configuring ${PACKAGE_NAME}" + # export the nacl tools + set +e + if [ -f ${PACKAGE_NAME}/Makefile ] + then + cd ${PACKAGE_NAME} + fi + make distclean + cd ${MONO_TRUNK_NACL} + set -e + cp config-nacl-runtime${TARGET_BIT_PREFIX}.cache config-nacl-runtime${TARGET_BIT_PREFIX}.cache.temp + Remove ${PACKAGE_NAME} + MakeDir ${PACKAGE_NAME} + cd ${PACKAGE_NAME} + # TODO: remove this once libintl.h becomes available to nacl + CC=${NACLCC} CXX=${NACLCXX} AR=${NACLAR} RANLIB=${NACLRANLIB} PKG_CONFIG_PATH=${NACL_SDK_USR_LIB}/pkgconfig \ + PKG_CONFIG_LIBDIR=${NACL_SDK_USR_LIB} PATH=${NACL_BIN_PATH}:${PATH} LIBS="-lnosys -lg" \ + CFLAGS="-g -D_POSIX_PATH_MAX=256 -DPATH_MAX=256" ../../configure \ + --host=nacl${TARGET_BIT_PREFIX} \ + --exec-prefix=${INSTALL_PATH} \ + --libdir=${INSTALL_PATH}/lib \ + --prefix=${INSTALL_PATH} \ + --oldincludedir=${MONO_TRUNK_NACL}/runtime/include \ + --disable-shared \ + --disable-mcs-build \ + --with-glib=embedded \ + --with-tls=pthread \ + --enable-threads=posix \ + --without-sigaltstack \ + --without-mmap \ + --with-gc=included \ + --enable-nacl-gc \ + --enable-nacl-codegen \ + --cache-file=../config-nacl-runtime${TARGET_BIT_PREFIX}.cache.temp + echo "// --- Native Client runtime below" >> config.h + echo "#define pthread_cleanup_push(x, y)" >> config.h + echo "#define pthread_cleanup_pop(x)" >> config.h + echo "#undef HAVE_EPOLL" >> config.h + echo "#undef HAVE_WORKING_SIGALTSTACK" >> config.h + echo "extern long int timezone;" >> config.h + echo "extern int daylight;" >> config.h + echo "#define sem_trywait(x) sem_wait(x)" >> config.h + echo "#define sem_timedwait(x,y) sem_wait(x)" >> config.h + echo "#define getdtablesize() (32768)" >> config.h + echo "// --- Native Client runtime below" >> eglib/src/eglib-config.h + echo "#undef G_BREAKPOINT" >> eglib/src/eglib-config.h + echo "#define G_BREAKPOINT() G_STMT_START { __asm__ (\"hlt\"); } G_STMT_END" >> eglib/src/eglib-config.h + rm ../config-nacl-runtime${TARGET_BIT_PREFIX}.cache.temp +} + +CustomInstallStep() { + make install + CopyNormalMonoLibs +} + +CustomPackageInstall() { + CustomConfigureStep + DefaultBuildStep + CustomInstallStep +} + + +CustomPackageInstall +exit 0 diff --git a/nacl/nacl64-mono-config-cache b/nacl/nacl64-mono-config-cache new file mode 100644 index 00000000000..6e2d0423801 --- /dev/null +++ b/nacl/nacl64-mono-config-cache @@ -0,0 +1,16 @@ +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +mono_cv_uscore=${mono_cv_uscore=no} +ac_cv_target=${ac_cv_target=x86_64-pc-nacl} + diff --git a/nacl/nacl64-mono.sh b/nacl/nacl64-mono.sh new file mode 100755 index 00000000000..964bc43f002 --- /dev/null +++ b/nacl/nacl64-mono.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# nacl64-mono.sh +# +# usage: nacl64-mono.sh +# +# this script builds a compiler for 64-bit NaCl code +# (installed in ./compiler folder) +# + +readonly MONO_TRUNK_NACL=$(pwd) + +readonly PACKAGE_NAME=nacl64-mono-build + +readonly INSTALL_PATH=${MONO_TRUNK_NACL}/compiler + +source common.sh +source nacl-common.sh + + +CustomConfigureStep() { + Banner "Configuring ${PACKAGE_NAME}" + set +e + cd ${PACKAGE_NAME} + make distclean + cd ${MONO_TRUNK_NACL} + set -e + Remove ${PACKAGE_NAME} + MakeDir ${PACKAGE_NAME} + cd ${PACKAGE_NAME} + cp ../nacl64-mono-config-cache ../nacl64-mono-config-cache.temp + if [ $HOST_BITSIZE = "64" ]; then + ../../configure \ + CFLAGS="-O0" CXXFLAGS="-O0" CC='cc -m32' CXX='g++ -m32' \ + --host=i386-pc-linux \ + --build=amd64-pc-linux \ + --target=nacl64 \ + --prefix=${INSTALL_PATH} \ + --with-tls=pthread \ + --enable-nacl-codegen \ + --disable-mono-debugger \ + --disable-mcs-build \ + --with-sigaltstack=no \ + --cache-file=../nacl64-mono-config-cache.temp + else + ../../configure \ + --target=nacl64 \ + --prefix=${INSTALL_PATH} \ + --with-tls=pthread \ + --enable-nacl-codegen \ + --disable-mono-debugger \ + --disable-mcs-build \ + --with-sigaltstack=no \ + --cache-file=../nacl64-mono-config-cache.temp + fi + + + rm ../nacl64-mono-config-cache.temp +} + +CustomBuildStep() { + MONO_NACL_ALIGN_MASK_OFF=1 make -j4 +} + +CustomInstallStep() { + MONO_NACL_ALIGN_MASK_OFF=1 make install +} + +CustomPackageInstall() { + CustomConfigureStep + #CustomBuildStep + #CustomInstallStep + DefaultBuildStep + DefaultInstallStep +} + + +CustomPackageInstall +exit 0 diff --git a/nacl/normal-mono.sh b/nacl/normal-mono.sh new file mode 100755 index 00000000000..88ebe68d613 --- /dev/null +++ b/nacl/normal-mono.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Copyright (c) 2009 The Native Client Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that be +# found in the LICENSE file. +# + +# normal-mono.sh +# +# usage: normal-mono.sh +# +# this script builds normal x86 mono +# (installed in ./normal folder) +# + +readonly MONO_TRUNK_NACL=$(pwd) + +readonly PACKAGE_NAME=mono-normal-build + +source common.sh + + +CustomConfigureStep() { + Banner "Configuring ${PACKAGE_NAME}" + set +e + if [ -f ${PACKAGE_NAME}/Makefile ] + then + cd ${PACKAGE_NAME} + make distclean + fi + cd ${MONO_TRUNK_NACL} + set -e + Remove ${PACKAGE_NAME} + MakeDir ${PACKAGE_NAME} + cd ${PACKAGE_NAME} + ../../configure \ + --prefix=${MONO_TRUNK_NACL}/normal-mono \ + --disable-parallel-mark \ + --with-tls=pthread +} + +CustomPackageInstall() { + CustomConfigureStep + DefaultBuildStep + DefaultInstallStep +} + + +CustomPackageInstall +exit 0 diff --git a/nacl/test/hw.cs b/nacl/test/hw.cs new file mode 100644 index 00000000000..f82c33e5470 --- /dev/null +++ b/nacl/test/hw.cs @@ -0,0 +1,60 @@ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Collections; +using System.Text; +using System.Threading; + +namespace Test { + + public class c_code { + + [MethodImplAttribute (MethodImplOptions.InternalCall)] + public extern static void my_c_func(int x, string s, double d); + [MethodImplAttribute (MethodImplOptions.InternalCall)] + public extern static void my_c_pass(int x); + } + + public class HelloWorld + { + static public void Main () + { + } + + static public void Foobar (int x, string s) + { + // first line is a simple test + // 1. call back into c code 2. use mscorlib Math.Sqrt() + c_code.my_c_func(x, s, Math.Sqrt(3.1415 * 3.1415)); + + // second part of this test: + // attempt a try/catch, generate exception w/ throw + try { + c_code.my_c_pass(0); + // attempt an invalid cast + throw new InvalidCastException(); + c_code.my_c_pass(1); + } + catch (InvalidCastException e) { + c_code.my_c_pass(2); + } + c_code.my_c_pass(3); + + // third part of this test: + // attempt an invalid cast again, this time generating + // exception instead of using explicit throw. + try { + c_code.my_c_pass(0); + StringBuilder reference1 = new StringBuilder(); + object reference2 = reference1; + // attempt invalid cast + int reference3 = (int)reference2; + c_code.my_c_pass(4); + } + catch (InvalidCastException e) { + c_code.my_c_pass(5); + } + c_code.my_c_pass(3); + } + } +} diff --git a/nacl/test/my.c b/nacl/test/my.c new file mode 100644 index 00000000000..218cec4ddf4 --- /dev/null +++ b/nacl/test/my.c @@ -0,0 +1,139 @@ +#include +#include +#include + +#include +#include +#include + +extern void* mono_aot_module_mscorlib_info; + +extern void* mono_aot_module_hw_info; + +extern void mono_set_corlib_data(void *data, size_t size); +extern void mono_aot_register_module(void *aot_info); +extern void mono_aot_init(void); +extern void mono_jit_set_aot_only(int aot_only); +extern MonoDomain * mini_init (const char *filename, const char *runtime_version); + +#if !defined(TRUE) +#define TRUE 1 +#endif +#if !defined(FALSE) +#define FALSE 0 +#endif + +void my_c_func(int arg, const char *str, double d) { + /* str from c# is immutable */ + printf("*** my_c_func(%d, '%s', %1.4f) received\n", arg, str, (float)d); +} + + +void my_c_pass(int x) { + char *msg = "undefined"; + switch(x) { + case 0: msg = "about to throw an exception..."; break; + case 1: msg = "thrown invalid cast exception was not caught!"; break; + case 2: msg = "thrown invalid cast exception was safely caught!"; break; + case 3: msg = "...leaving exeception test."; break; + case 4: msg = "generated invalid cast exception was not caught!"; break; + case 5: msg = "generated invalid cast exception was safely caught!"; break; + } + printf("*** my_c_pass(%d): %s\n", x, msg); +} + + + +void try_mono() { + MonoDomain *domain; + MonoAssembly *ma; + MonoImage *mi; + MonoClass *mc; + MonoMethodDesc *mmd; + MonoMethod *mm; + MonoObject *mo; + FILE *mscorlib; + char *corlib_data = NULL; + void *args [2]; + static int x = 123000; + args [0] = &x; + args [1] = "hello world"; + +#if defined(__native_client__) + mscorlib = fopen("mscorlib.dll", "r"); + if (NULL != mscorlib) { + size_t size; + struct stat st; + if (0 == stat("mscorlib.dll", &st)) { + size = st.st_size; + printf("reading mscorlib.dll, size %ld\n", size); + corlib_data = malloc(size); + if (corlib_data != NULL) { + while (fread(corlib_data, 1, size, mscorlib) != 0) ; + if (!ferror(mscorlib)) { + mono_set_corlib_data(corlib_data, size); + } else { + perror("error reading mscorlib.dll"); + free(corlib_data); + corlib_data = NULL; + } + } else { + perror("Could not allocate memory"); + } + } else { + perror("stat error"); + } + fclose(mscorlib); + } +#endif + +#ifdef AOT_VERSION + printf("address of mono_aot_module_mscorlib_info: %p\n", mono_aot_module_mscorlib_info); + printf("address of mono_aot_module_hw_info: %p\n", mono_aot_module_hw_info); + + // mono_jit_set_aot_only(TRUE) should be enabled now. + // if not enabled, I suspect we're still jitting... + mono_jit_set_aot_only(TRUE); + + mono_aot_register_module(mono_aot_module_mscorlib_info); + mono_aot_register_module(mono_aot_module_hw_info); +#endif + + domain = mini_init("hw.exe", "v2.0.50727"); + printf("mono domain: %p\n", domain); + + ma = mono_domain_assembly_open(domain, "hw.exe"); + printf("mono assembly: %p\n", ma); + + mi = mono_assembly_get_image(ma); + printf("mono image: %p\n", mi); + + mc = mono_class_from_name(mi, "Test", "HelloWorld"); + printf("mono class: %p\n", mc); + + mmd = mono_method_desc_new("Test.HelloWorld:Foobar(int,string)", TRUE); + printf("mono desc method: %p\n", mmd); + + mm = mono_method_desc_search_in_image(mmd, mi); + printf("mono method: %p\n", mm); + + // add c functions for mono test code to invoke + mono_add_internal_call("Test.c_code::my_c_func", (void *) my_c_func); + mono_add_internal_call("Test.c_code::my_c_pass", (void *) my_c_pass); + + mo = mono_runtime_invoke(mm, NULL, args, NULL); + printf("mono object: %p\n", mo); + if (NULL != corlib_data) free(corlib_data); +} + + +int main() { + int i; + printf("address of main(): %p\n", main); + printf("address of stack : %p\n", &i); + printf("\nProgram a.out output:\n"); + printf("==========================\n"); + try_mono(); + printf("==========================\n\n"); + return 0; +} diff --git a/nacl/test/nacl b/nacl/test/nacl new file mode 100755 index 00000000000..626f3a24b62 --- /dev/null +++ b/nacl/test/nacl @@ -0,0 +1,152 @@ +#!/bin/bash + +set -o nounset +set -o errexit +set -o verbose + +source ../common.sh + +MONO_RUNTIME_BASE=../runtime${TARGET_BIT_PREFIX} +MONO_BASE=../compiler +MONO="${MONO_BASE}/bin/nacl${TARGET_BIT_PREFIX}-mono" +CC=$NACLCC +AS=$NACLAS +MODE=nacl +COMPILE_AOT=0 +AOT_OBJS= +CC_DEFINES= +MONO_SNAPSHOT=mono-normal-build +RELOCATE_RODATA="-Wl,--section-start=.rodata=0x2000000" +export NACL_ALLOW_DYNCODE_REPLACEMENT=1 + +if [ $# -gt 0 ]; then + while [ -n "$*" ] + do + if [ $1 == "normal" ]; then + MONO_RUNTIME_BASE=../normal-mono + MONO_BASE=../normal-mono + MONO=${MONO_BASE}/bin/mono + CC=gcc + AS=as + MODE=normal + elif [ $1 == "aot" ]; then + COMPILE_AOT=1 + CC_DEFINES=-DAOT_VERSION + RELOCATE_RODATA= + elif [ $1 == "regression" ]; then + MODE=regression + else + echo "Unrecognized option '$1'" + exit -1 + fi + shift + done +fi + +readonly MONO_NORMAL_BASE=../normal-mono +readonly NCVAL=ncval + +# add nacl-gcc to path (from NaCl SDK) +export PATH=${NACL_BIN_PATH}:$PATH + +# add sel_ldr to path (from NaCl dev tree) +export PATH=${NACL_DEV}/native_client/scons-out/dbg-${OS_SUBDIR}-x86-${TARGET_BITSIZE}/staging:$PATH + +# add nacl-mono to path +export PATH=../normal-mono/bin:$PATH + +# echo version of nacl-gcc +$CC --version + +echo $PATH +which sel_ldr + +# echo version of gmcs +which gmcs +../normal-mono/bin/gmcs --version + +# echo version of nacl-mono +${MONO} --version + +# add MONO_PATH so mono can crank on local directory +export MONO_PATH=$(pwd) +echo ${MONO_PATH} + +#----- +# enable the appropriate set of AOT options below. + +readonly AOT_OPTIONS=full,static,nodebug,ntrampolines=4096 +#----- + +# make a temp copy of mscorlib.dll in this directory +cp ${MONO_NORMAL_BASE}/lib/mono/2.0/mscorlib.dll . + +# compile c-sharp file with gmcs +MONO_PATH=. ../normal-mono/bin/gmcs -lib:. -warn:0 hw.cs + +# convert .exe to .exe.o assembly files +# convert mscorlib to a .dll.o file +if [ $COMPILE_AOT = "1" ]; then + MONO_PATH=. ${MONO} --aot=${AOT_OPTIONS} mscorlib.dll + MONO_PATH=. ${MONO} --aot=${AOT_OPTIONS} hw.exe + AOT_OBJS="hw.exe.o mscorlib.dll.o" +fi + +# compile c and assembly into a.out, all statically linked +# different options for normal and nacl-mono +if [ $MODE = "normal" ]; then + $CC -g -static my.c ${CC_DEFINES} ${AOT_OBJS} -o hw-test -lmono-2.0 -lpthread -lm -ldl -lrt -I${MONO_RUNTIME_BASE}/include -I${MONO_RUNTIME_BASE}/include/mono-2.0 -L${MONO_RUNTIME_BASE}/lib +elif [ $MODE = "nacl" ]; then + $CC -static my.c ${CC_DEFINES} ${AOT_OBJS} -o hw-test.nexe -lmono-2.0 -lpthread -lm -lnosys -I${MONO_RUNTIME_BASE}/include -I${MONO_RUNTIME_BASE}/include/mono-2.0 -L${MONO_RUNTIME_BASE}/lib ${RELOCATE_RODATA} +fi + +readonly fsatests="basic.exe basic-float.exe basic-long.exe basic-calls.exe objects.exe arrays.exe basic-math.exe exceptions.exe devirtualization.exe basic-simd.exe gc-stress.exe imt_big_iface_test.exe generics.exe iltests.exe nacl.exe" +if [ $MODE = "regression" ]; then + #rm -rf fsa-tmp + mkdir -p fsa-tmp + DIR=$(pwd) + cd ../${MONO_SNAPSHOT}/mono/mini/ + make $fsatests generics-variant-types.dll TestDriver.dll + cp $fsatests generics-variant-types.dll TestDriver.dll $DIR/fsa-tmp + cd - + + CLASS=${MONO_NORMAL_BASE}/lib/mono/2.0 + cp $CLASS/System.Core.dll $CLASS/System.dll $CLASS/Mono.Posix.dll $CLASS/System.Configuration.dll $CLASS/System.Security.dll $CLASS/System.Xml.dll $CLASS/Mono.Security.dll $CLASS/Mono.Simd.dll fsa-tmp + cp mscorlib.dll fsa-tmp + + AOT_OBJS="" + if [ $COMPILE_AOT = "1" ]; then + for t in $fsatests; do + MONO_PATH=fsa-tmp ${MONO} --aot=${AOT_OPTIONS} fsa-tmp/$t + AOT_OBJS="${AOT_OBJS} fsa-tmp/$t.o" + done + for d in fsa-tmp/*.dll; do + MONO_PATH=fsa-tmp ${MONO} --aot=${AOT_OPTIONS} $d + AOT_OBJS="${AOT_OBJS} $d.o" + done + fi + + $CC -o fsa-tmp/fsacheck.nexe -g -static ../../mono/mini/fsacheck.c ${CC_DEFINES} ${AOT_OBJS} -lmono-2.0 -lpthread -lm -lnosys -L${MONO_RUNTIME_BASE}/lib -I${MONO_RUNTIME_BASE}/include/mono-2.0 -I${MONO_RUNTIME_BASE}/include ${RELOCATE_RODATA} +fi + +if [ $MODE = "regression" ]; then + cd fsa-tmp + ${NCVAL} -readwrite_sfi fsacheck.nexe 2> validator_out || echo "fsacheck.nexe invalid: continuing anyway" + nacl-objdump -d fsacheck.nexe > fsacheck.disasm + for t in $fsatests; do + sel_ldr -a -c fsacheck.nexe $t || true + done +else + export MONO_PATH=$(pwd) + # run generated test(select one or more below) + if [ $MODE = "normal" ]; then + ./hw-test + else + ${NCVAL} -readwrite_sfi hw-test.nexe 2> validator_out || echo "hw-test.nexe invalid: continuing anyway" + nacl-objdump -d hw-test.nexe > hw-test.disasm + sel_ldr -a -c hw-test.nexe + fi +fi + +exit 0 + diff --git a/runtime/mono-wrapper.in b/runtime/mono-wrapper.in index eb6cd895921..3c94c5a8df1 100644 --- a/runtime/mono-wrapper.in +++ b/runtime/mono-wrapper.in @@ -3,5 +3,6 @@ r='@mono_build_root@' MONO_CFG_DIR='@mono_cfg_dir@' PATH="$r/runtime/_tmpinst/bin:$PATH" MONO_SHARED_DIR=$r/runtime +export MONO_NACL_ALIGN_MASK_OFF=@MONO_NACL_ALIGN_MASK_OFF@ export MONO_CFG_DIR MONO_SHARED_DIR PATH exec "$r/libtool" --mode=execute "$r/@mono_runtime@" --config "@mono_cfg_dir@/mono/config" "$@"