From bd193e46f162cd7f21002d7ecbd88c2be85d5343 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Sat, 28 Nov 2020 14:25:47 -0700 Subject: [PATCH] bc: W^X for AArch64 Mac OS --- racket/src/bc/foreign/foreign.c | 24 ++++++++++++++------- racket/src/bc/foreign/foreign.rktc | 20 ++++++++++++------ racket/src/bc/gc2/vm_osx.c | 3 +-- racket/src/bc/include/schthread.h | 4 ++++ racket/src/bc/sconfig.h | 1 + racket/src/bc/src/jitstate.c | 4 ++++ racket/src/bc/src/salloc.c | 34 ++++++++++++++++++++++++++++-- racket/src/bc/src/schpriv.h | 7 ++++++ 8 files changed, 79 insertions(+), 18 deletions(-) diff --git a/racket/src/bc/foreign/foreign.c b/racket/src/bc/foreign/foreign.c index 9341df9a14..cd7703cb4a 100644 --- a/racket/src/bc/foreign/foreign.c +++ b/racket/src/bc/foreign/foreign.c @@ -4258,6 +4258,7 @@ static Scheme_Object *ffi_callback_or_curry(const char *who, int curry, int argc ffi_abi abi; int is_atomic; int nargs, i; + ffi_status ffi_ok; /* ffi_closure objects are problematic when used with a moving GC. The * problem is that memory that is GC-visible can move at any time. The * solution is to use an immobile-box, which an immobile pointer (in a simple @@ -4361,20 +4362,25 @@ static Scheme_Object *ffi_callback_or_curry(const char *who, int curry, int argc } else do_callback = ffi_do_callback; /* malloc space for everything needed, so a single free gets rid of this */ - cl_cif_args = - scheme_malloc_code(sizeof(closure_and_cif) + nargs*sizeof(ffi_cif*)); + cl_cif_args = scheme_malloc_code(sizeof(closure_and_cif) + nargs*sizeof(ffi_cif*)); + scheme_thread_code_start_write(); cl = &(cl_cif_args->closure); /* cl is the same as cl_cif_args */ cif = &(cl_cif_args->cif); atypes = (ffi_type **)(((char*)cl_cif_args) + sizeof(closure_and_cif)); for (i=0, p=itypes; iso.type = ffi_callback_tag; data->callback = (cl_cif_args); @@ -4411,9 +4417,11 @@ static Scheme_Object *ffi_callback_or_curry(const char *who, int curry, int argc callback_data = (void *)tmp; } # endif /* MZ_USE_MZRT */ + scheme_thread_code_start_write(); cl_cif_args->data = callback_data; - if (ffi_prep_closure_loc(cl, cif, do_callback, (void*)(cl_cif_args->data), cl) - != FFI_OK) + ffi_ok = ffi_prep_closure_loc(cl, cif, do_callback, (void*)(cl_cif_args->data), cl); + scheme_thread_code_end_write(); + if (ffi_ok != FFI_OK) scheme_signal_error ("internal error: ffi_prep_closure did not return FFI_OK"); # ifdef MZ_USE_MZRT @@ -5361,8 +5369,8 @@ void scheme_init_foreign(Scheme_Startup_Env *env) int scheme_is_cpointer(Scheme_Object *cp) { return (SCHEME_FALSEP(cp) - || SCHEME_CPTRP(cp) - || SCHEME_BYTE_STRINGP(cp) + || SCHEME_CPTRP(x) + || SCHEME_BYTE_STRINGP(x) || (SCHEME_CHAPERONE_STRUCTP(cp) && scheme_struct_type_property_ref(scheme_cpointer_property, cp))); } diff --git a/racket/src/bc/foreign/foreign.rktc b/racket/src/bc/foreign/foreign.rktc index ca2a670146..0c51f8d39e 100755 --- a/racket/src/bc/foreign/foreign.rktc +++ b/racket/src/bc/foreign/foreign.rktc @@ -3415,6 +3415,7 @@ static Scheme_Object *ffi_callback_or_curry(const char *who, int curry, int argc ffi_abi abi; int is_atomic; int nargs, i; + ffi_status ffi_ok; /* ffi_closure objects are problematic when used with a moving GC. The * problem is that memory that is GC-visible can move at any time. The * solution is to use an immobile-box, which an immobile pointer (in a simple @@ -3518,20 +3519,25 @@ static Scheme_Object *ffi_callback_or_curry(const char *who, int curry, int argc } else do_callback = ffi_do_callback; /* malloc space for everything needed, so a single free gets rid of this */ - cl_cif_args = - scheme_malloc_code(sizeof(closure_and_cif) + nargs*sizeof(ffi_cif*)); + cl_cif_args = scheme_malloc_code(sizeof(closure_and_cif) + nargs*sizeof(ffi_cif*)); + scheme_thread_code_start_write(); cl = &(cl_cif_args->closure); /* cl is the same as cl_cif_args */ cif = &(cl_cif_args->cif); atypes = (ffi_type **)(((char*)cl_cif_args) + sizeof(closure_and_cif)); for (i=0, p=itypes; idata = callback_data; - if (ffi_prep_closure_loc(cl, cif, do_callback, (void*)(cl_cif_args->data), cl) - != FFI_OK) + ffi_ok = ffi_prep_closure_loc(cl, cif, do_callback, (void*)(cl_cif_args->data), cl); + scheme_thread_code_end_write(); + if (ffi_ok != FFI_OK) scheme_signal_error ("internal error: ffi_prep_closure did not return FFI_OK"); @@IFDEF{MZ_USE_MZRT}{ diff --git a/racket/src/bc/gc2/vm_osx.c b/racket/src/bc/gc2/vm_osx.c index 434e86ecac..38fdb38c30 100644 --- a/racket/src/bc/gc2/vm_osx.c +++ b/racket/src/bc/gc2/vm_osx.c @@ -232,8 +232,7 @@ static void os_protect_pages(void *p, size_t len, int writeable) } retval = vm_protect(task_self, (vm_address_t)p, len, FALSE, - writeable ? VM_PROT_ALL - : (VM_PROT_READ | VM_PROT_EXECUTE)); + (VM_PROT_READ | (writeable ? VM_PROT_WRITE : 0))); if(retval != KERN_SUCCESS) { GCPRINT(GCOUTF, "WARNING: couldn't protect %li bytes of page %p%s\n", len, p, mach_error_string(retval)); diff --git a/racket/src/bc/include/schthread.h b/racket/src/bc/include/schthread.h index 1a3009fd98..e2ad5d2913 100644 --- a/racket/src/bc/include/schthread.h +++ b/racket/src/bc/include/schthread.h @@ -159,6 +159,9 @@ typedef struct Thread_Local_Variables { void *jit_buffer_cache_; intptr_t jit_buffer_cache_size_; int jit_buffer_cache_registered_; +#ifdef MZ_USE_MAP_JIT + int jit_code_write_enabled_; +#endif int scheme_continuation_application_count_; int scheme_cont_capture_count_; int scheme_prompt_capture_count_; @@ -544,6 +547,7 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL; #define jit_buffer_cache XOA (scheme_get_thread_local_variables()->jit_buffer_cache_) #define jit_buffer_cache_size XOA (scheme_get_thread_local_variables()->jit_buffer_cache_size_) #define jit_buffer_cache_registered XOA (scheme_get_thread_local_variables()->jit_buffer_cache_registered_) +#define jit_code_write_enabled XOA (scheme_get_thread_local_variables()->jit_code_write_enabled_) #define scheme_continuation_application_count XOA (scheme_get_thread_local_variables()->scheme_continuation_application_count_) #define scheme_cont_capture_count XOA (scheme_get_thread_local_variables()->scheme_cont_capture_count_) #define scheme_prompt_capture_count XOA (scheme_get_thread_local_variables()->scheme_prompt_capture_count_) diff --git a/racket/src/bc/sconfig.h b/racket/src/bc/sconfig.h index f87c7f0bf9..5b88d82233 100644 --- a/racket/src/bc/sconfig.h +++ b/racket/src/bc/sconfig.h @@ -654,6 +654,7 @@ # define SCHEME_ARCH "arm" # elif defined(__arm64__) # define SCHEME_ARCH "aarch64" +# define MZ_USE_MAP_JIT # elif defined(__x86_64__) # define SCHEME_ARCH "x86_64" # else diff --git a/racket/src/bc/src/jitstate.c b/racket/src/bc/src/jitstate.c index 3c525a7d09..e92a755d7f 100644 --- a/racket/src/bc/src/jitstate.c +++ b/racket/src/bc/src/jitstate.c @@ -255,6 +255,7 @@ void *scheme_generate_one(mz_jit_state *old_jitter, } else { buffer = scheme_malloc_permanent_code(size); } + scheme_thread_code_start_write(); RECORD_CODE_SIZE(size); } else if (old_jitter) { /* this is a recursive generate, so use leftover space in @@ -299,6 +300,7 @@ void *scheme_generate_one(mz_jit_state *old_jitter, then switch over to long-jump mode. */ if (check_long_mode((uintptr_t)buffer, size)) { /* start over */ + scheme_thread_code_end_write(); scheme_performance_record_end("jit", &perf_state); return scheme_generate_one(old_jitter, generate, data, gcable, save_ptr, ndata); @@ -352,6 +354,7 @@ void *scheme_generate_one(mz_jit_state *old_jitter, if (!use_long_jumps) { if (check_long_mode((uintptr_t)buffer, size)) { /* start over */ + scheme_thread_code_end_write(); scheme_performance_record_end("jit", &perf_state); return scheme_generate_one(old_jitter, generate, data, gcable, save_ptr, ndata); @@ -417,6 +420,7 @@ void *scheme_generate_one(mz_jit_state *old_jitter, /* That was in the permanent area, so return: */ jit_flush_code(buffer, jit_get_raw_ip()); scheme_performance_record_end("jit", &perf_state); + scheme_thread_code_end_write(); return buffer; } else { /* Allocate permanent area and jit again: */ diff --git a/racket/src/bc/src/salloc.c b/racket/src/bc/src/salloc.c index 5d2d9bc06f..3da4647aea 100644 --- a/racket/src/bc/src/salloc.c +++ b/racket/src/bc/src/salloc.c @@ -947,6 +947,8 @@ THREAD_LOCAL_DECL(intptr_t scheme_code_page_total); THREAD_LOCAL_DECL(intptr_t scheme_code_total); THREAD_LOCAL_DECL(intptr_t scheme_code_count); +THREAD_LOCAL_DECL(int jit_code_write_enabled); + #if defined(MZ_CODE_ALLOC_USE_MPROTECT) && !defined(MAP_ANON) static int fd, fd_created; #endif @@ -957,6 +959,22 @@ static int fd, fd_created; # define MAYBE_PROT_EXEC PROT_EXEC #endif +#ifdef MZ_USE_MAP_JIT +# define MAYBE_MAP_JIT MAP_JIT +void scheme_thread_code_start_write(void) { + if (jit_code_write_enabled == 0) + pthread_jit_write_protect_np(0); + jit_code_write_enabled++; +} +void scheme_thread_code_end_write(void) { + --jit_code_write_enabled; + if (jit_code_write_enabled == 0) + pthread_jit_write_protect_np(1); +} +#else +# define MAYBE_MAP_JIT 0 +#endif + #define LOG_CODE_MALLOC(lvl, s) /* if (lvl > 1) s */ #define CODE_PAGE_OF(p) ((void *)(((uintptr_t)p) & ~(page_size - 1))) @@ -1031,13 +1049,13 @@ static void *malloc_page(intptr_t size) } #else # ifdef MAP_ANON - r = mmap(NULL, size, PROT_READ | PROT_WRITE | MAYBE_PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); + r = mmap(NULL, size, PROT_READ | PROT_WRITE | MAYBE_PROT_EXEC, MAP_PRIVATE | MAYBE_MAP_JIT | MAP_ANON, -1, 0); # else if (!fd_created) { fd_created = 1; fd = open("/dev/zero", O_RDWR); } - r = mmap(NULL, size, PROT_READ | PROT_WRITE | MAYBE_PROT_EXEC, MAP_PRIVATE, fd, 0); + r = mmap(NULL, size, PROT_READ | PROT_WRITE | MAYBE_PROT_EXEC, MAP_PRIVATE | MAYBE_MAP_JIT, fd, 0); # endif if (r == (void *)-1) r = NULL; @@ -1144,6 +1162,8 @@ void *scheme_malloc_code(intptr_t size) intptr_t size2, bucket, sz, page_size; void *p, *pg, *prev; + scheme_thread_code_start_write(); + if (size < CODE_HEADER_SIZE) { /* ensure CODE_HEADER_SIZE alignment and room for free-list pointers */ @@ -1213,6 +1233,8 @@ void *scheme_malloc_code(intptr_t size) LOG_CODE_MALLOC(0, printf("allocated %ld (->%ld / %ld)\n", size, size2, bucket)); } + scheme_thread_code_end_write(); + return p; #else return malloc(size); /* good luck! */ @@ -1267,6 +1289,8 @@ void scheme_free_code(void *p) int per_page, n; void *prev; + scheme_thread_code_start_write(); + page_size = get_page_size(); size = *(intptr_t *)CODE_PAGE_OF(p); @@ -1345,6 +1369,8 @@ void scheme_free_code(void *p) } } + scheme_thread_code_end_write(); + #else free(p); #endif @@ -1356,6 +1382,8 @@ void scheme_free_all_code(void) void *p, *next; intptr_t page_size; + scheme_thread_code_start_write(); + page_size = get_page_size(); for (p = code_allocation_page_list; p; p = next) { @@ -1368,6 +1396,8 @@ void scheme_free_all_code(void) code_allocation_page_list = NULL; free_page(free_list, page_size); + + scheme_thread_code_end_write(); #endif } diff --git a/racket/src/bc/src/schpriv.h b/racket/src/bc/src/schpriv.h index 4d42e61be2..beab24317f 100644 --- a/racket/src/bc/src/schpriv.h +++ b/racket/src/bc/src/schpriv.h @@ -285,6 +285,13 @@ void scheme_clear_ephemerons(void); #define SCHEME_PAIR_COPY_FLAGS(dest, src) (SCHEME_PAIR_FLAGS((dest)) |= (SCHEME_PAIR_FLAGS((src)) & PAIR_FLAG_MASK)) +#ifdef MZ_USE_MAP_JIT +XFORM_NONGCING void scheme_thread_code_start_write(void); +XFORM_NONGCING void scheme_thread_code_end_write(void); +#else +# define scheme_thread_code_start_write() do { } while (0) +# define scheme_thread_code_end_write() do { } while (0) +#endif /*========================================================================*/ /* initialization */