diff --git a/racket/src/ChezScheme/c/externs.h b/racket/src/ChezScheme/c/externs.h index ba35a6c398..a6b83236d0 100644 --- a/racket/src/ChezScheme/c/externs.h +++ b/racket/src/ChezScheme/c/externs.h @@ -383,7 +383,7 @@ extern INT matherr PROTO((struct exception *x)); /* segment.c */ extern void S_segment_init PROTO((void)); -extern void *S_getmem PROTO((iptr bytes, IBOOL zerofill)); +extern void *S_getmem PROTO((iptr bytes, IBOOL zerofill, IBOOL for_code)); extern void S_freemem PROTO((void *addr, iptr bytes)); extern iptr S_find_segments PROTO((thread_gc *creator, ISPC s, IGEN g, iptr n)); extern void S_free_chunk PROTO((chunkinfo *chunk)); @@ -393,6 +393,8 @@ extern uptr S_maxmembytes PROTO((void)); extern void S_resetmaxmembytes PROTO((void)); extern void S_adjustmembytes PROTO((iptr amt)); extern void S_move_to_chunk_list PROTO((chunkinfo *chunk, chunkinfo **pchunk_list)); +extern void S_thread_start_code_write(void); +extern void S_thread_end_code_write(void); /* stats.c */ extern void S_stats_init PROTO((void)); diff --git a/racket/src/ChezScheme/c/fasl.c b/racket/src/ChezScheme/c/fasl.c index c8a924cecb..fce37634fc 100644 --- a/racket/src/ChezScheme/c/fasl.c +++ b/racket/src/ChezScheme/c/fasl.c @@ -500,6 +500,8 @@ static ptr fasl_entry(ptr tc, IFASLCODE situation, unbufFaslFile uf, ptr externa Scompact_heap(); } + S_thread_start_code_write(); + switch (ty) { case fasl_type_gzip: case fasl_type_lz4: { @@ -547,6 +549,7 @@ static ptr fasl_entry(ptr tc, IFASLCODE situation, unbufFaslFile uf, ptr externa return (ptr)0; } S_flush_instruction_cache(tc); + S_thread_end_code_write(); return x; } else { uf_skipbytes(uf, size); diff --git a/racket/src/ChezScheme/c/gc.c b/racket/src/ChezScheme/c/gc.c index d42f56e795..acb92d0030 100644 --- a/racket/src/ChezScheme/c/gc.c +++ b/racket/src/ChezScheme/c/gc.c @@ -870,6 +870,8 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { GET_REAL_TIME(astart); + S_thread_start_code_write(); + /* flush instruction cache: effectively clear_code_mod but safer */ for (ls = S_threads; ls != Snil; ls = Scdr(ls)) { ptr t_tc = (ptr)THREADTC(Scar(ls)); @@ -1677,6 +1679,7 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { if (MAX_CG >= S_G.min_free_gen) S_free_chunks(); S_flush_instruction_cache(tc); + S_thread_end_code_write(); #ifndef NO_DIRTY_NEWSPACE_POINTERS /* mark dirty those newspace cards to which we've added wrong-way pointers */ @@ -2947,6 +2950,8 @@ static void setup_sweepers(thread_gc *tgc) { static s_thread_rv_t start_sweeper(void *_sweeper) { gc_sweeper *sweeper = _sweeper; + S_thread_start_code_write(); /* never ended */ + (void)s_thread_mutex_lock(&sweep_mutex); while (1) { while (sweeper->status != SWEEPER_SWEEPING) { diff --git a/racket/src/ChezScheme/c/globals.h b/racket/src/ChezScheme/c/globals.h index a2cd9b4ccf..b3a837d00d 100644 --- a/racket/src/ChezScheme/c/globals.h +++ b/racket/src/ChezScheme/c/globals.h @@ -63,6 +63,8 @@ EXTERN seginfo *S_segment_info[1<= 0; i -= 1) S_chunks[i] = NULL; + S_code_chunks_full = NULL; + for (i = PARTIAL_CHUNK_POOLS; i >= 0; i -= 1) { + S_chunks[i] = NULL; + S_code_chunks[i] = NULL; + } for (g = 0; g <= static_generation; g++) { for (s = 0; s <= max_real_space; s++) { S_G.occupied_segments[g][s] = NULL; @@ -79,7 +83,7 @@ static void out_of_memory(void) { } #if defined(USE_MALLOC) -void *S_getmem(iptr bytes, IBOOL zerofill) { +void *S_getmem(iptr bytes, IBOOL zerofill, UNUSED IBOOL for_code) { void *addr; if ((addr = malloc(bytes)) == (void *)0) out_of_memory(); @@ -99,7 +103,7 @@ void S_freemem(void *addr, iptr bytes) { #if defined(USE_VIRTUAL_ALLOC) #include -void *S_getmem(iptr bytes, IBOOL zerofill) { +void *S_getmem(iptr bytes, IBOOL zerofill, IBOOL for_code) { void *addr; if ((uptr)bytes < S_pagesize) { @@ -109,7 +113,8 @@ void *S_getmem(iptr bytes, IBOOL zerofill) { if (zerofill) memset(addr, 0, bytes); } else { uptr n = S_pagesize - 1; iptr p_bytes = (iptr)(((uptr)bytes + n) & ~n); - if ((addr = VirtualAlloc((void *)0, (SIZE_T)p_bytes, MEM_COMMIT, PAGE_EXECUTE_READWRITE)) == (void *)0) out_of_memory(); + int perm = (for_code ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE); + if ((addr = VirtualAlloc((void *)0, (SIZE_T)p_bytes, MEM_COMMIT, perm)) == (void *)0) out_of_memory(); if ((membytes += p_bytes) > maxmembytes) maxmembytes = membytes; debug(printf("getmem VirtualAlloc(%p => %p) -> %p\n", bytes, p_bytes, addr)) } @@ -136,7 +141,7 @@ void S_freemem(void *addr, iptr bytes) { #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif -void *S_getmem(iptr bytes, IBOOL zerofill) { +void *S_getmem(iptr bytes, IBOOL zerofill, IBOOL for_code) { void *addr; if ((uptr)bytes < S_pagesize) { @@ -146,12 +151,14 @@ void *S_getmem(iptr bytes, IBOOL zerofill) { if (zerofill) memset(addr, 0, bytes); } else { uptr n = S_pagesize - 1; iptr p_bytes = (iptr)(((uptr)bytes + n) & ~n); + int perm = (for_code ? S_PROT_CODE : (PROT_WRITE | PROT_READ)); + int flags = (MAP_PRIVATE | MAP_ANONYMOUS) | (for_code ? S_MAP_CODE : 0); #ifdef MAP_32BIT /* try for first 2GB of the memory space first of x86_64 so that we have a better chance of having short jump instructions */ - if ((addr = mmap(NULL, p_bytes, PROT_EXEC|PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS|MAP_32BIT, -1, 0)) == (void *)-1) { + if ((addr = mmap(NULL, p_bytes, perm, flags|MAP_32BIT, -1, 0)) == (void *)-1) { #endif - if ((addr = mmap(NULL, p_bytes, PROT_EXEC|PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)) == (void *)-1) { + if ((addr = mmap(NULL, p_bytes, perm, flags, -1, 0)) == (void *)-1) { out_of_memory(); debug(printf("getmem mmap(%p) -> %p\n", bytes, addr)) } @@ -258,26 +265,29 @@ static void initialize_seginfo(seginfo *si, NO_THREADS_UNUSED thread_gc *creator /* allocation mutex must be held */ iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr n; { - chunkinfo *chunk, *nextchunk; + chunkinfo *chunk, *nextchunk, **chunks; seginfo *si, *nextsi, **prevsi; iptr nunused_segs, j; INT i, loser_index; + IBOOL for_code = ((s == space_code)); if (g != static_generation) S_G.number_of_nonstatic_segments += n; debug(printf("attempting to find %d segments for space %d, generation %d\n", n, s, g)) + chunks = (for_code ? S_code_chunks : S_chunks); + if (n == 1) { for (i = 0; i <= PARTIAL_CHUNK_POOLS; i++) { - chunk = S_chunks[i]; + chunk = chunks[i]; if (chunk != NULL) { si = chunk->unused_segs; chunk->unused_segs = si->next; if (chunk->unused_segs == NULL) { - S_move_to_chunk_list(chunk, &S_chunks_full); + S_move_to_chunk_list(chunk, (for_code ? &S_code_chunks_full : &S_chunks_full)); } else if (i == PARTIAL_CHUNK_POOLS) { - S_move_to_chunk_list(chunk, &S_chunks[PARTIAL_CHUNK_POOLS-1]); + S_move_to_chunk_list(chunk, &chunks[PARTIAL_CHUNK_POOLS-1]); } chunk->nused_segs += 1; @@ -291,7 +301,7 @@ iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr } else { loser_index = (n == 2) ? 0 : find_index(n-1); for (i = find_index(n); i <= PARTIAL_CHUNK_POOLS; i += 1) { - chunk = S_chunks[i]; + chunk = chunks[i]; while (chunk != NULL) { if (n < (nunused_segs = (chunk->segs - chunk->nused_segs))) { sort_chunk_unused_segments(chunk); @@ -311,9 +321,9 @@ iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr if (--j == 0) { *prevsi = nextsi->next; if (chunk->unused_segs == NULL) { - S_move_to_chunk_list(chunk, &S_chunks_full); + S_move_to_chunk_list(chunk, (for_code ? &S_code_chunks_full : &S_chunks_full)); } else if (i == PARTIAL_CHUNK_POOLS) { - S_move_to_chunk_list(chunk, &S_chunks[PARTIAL_CHUNK_POOLS-1]); + S_move_to_chunk_list(chunk, &chunks[PARTIAL_CHUNK_POOLS-1]); } chunk->nused_segs += n; nextsi->next = S_G.occupied_segments[g][s]; @@ -329,7 +339,7 @@ iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr } nextchunk = chunk->next; if (i != loser_index && i != PARTIAL_CHUNK_POOLS) { - S_move_to_chunk_list(chunk, &S_chunks[loser_index]); + S_move_to_chunk_list(chunk, &chunks[loser_index]); } chunk = nextchunk; } @@ -337,7 +347,7 @@ iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr } /* we couldn't find space, so ask for more */ - si = allocate_segments(n); + si = allocate_segments(n, for_code); for (nextsi = si, i = 0; i < n; i += 1, nextsi += 1) { initialize_seginfo(nextsi, creator, s, g); /* add segment to appropriate list of occupied segments */ @@ -357,7 +367,7 @@ iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr * allocates a group of n contiguous fresh segments, returning the * segment number of the first segment of the group. */ -static seginfo *allocate_segments(nreq) uptr nreq; { +static seginfo *allocate_segments(uptr nreq, UNUSED IBOOL for_code) { uptr nact, bytes, base; void *addr; iptr i; chunkinfo *chunk; seginfo *si; @@ -365,7 +375,7 @@ static seginfo *allocate_segments(nreq) uptr nreq; { nact = nreq < minimum_segment_request ? minimum_segment_request : nreq; bytes = (nact + 1) * bytes_per_segment; - addr = S_getmem(bytes, 0); + addr = S_getmem(bytes, 0, for_code); debug(printf("allocate_segments addr = %p\n", addr)) base = addr_get_segment((uptr)TO_PTR(addr) + bytes_per_segment - 1); @@ -375,7 +385,7 @@ static seginfo *allocate_segments(nreq) uptr nreq; { if (build_ptr(base, 0) == TO_PTR(addr) && base + nact != ((uptr)1 << (ptr_bits - segment_offset_bits)) - 1) nact += 1; - chunk = S_getmem(sizeof(chunkinfo) + sizeof(seginfo) * nact, 0); + chunk = S_getmem(sizeof(chunkinfo) + sizeof(seginfo) * nact, 0, 0); debug(printf("allocate_segments chunk = %p\n", chunk)) chunk->addr = addr; chunk->base = base; @@ -406,9 +416,9 @@ static seginfo *allocate_segments(nreq) uptr nreq; { /* account for trailing empty segments */ if (nact > nreq) { S_G.number_of_empty_segments += nact - nreq; - add_to_chunk_list(chunk, &S_chunks[PARTIAL_CHUNK_POOLS-1]); + add_to_chunk_list(chunk, &((for_code ? S_code_chunks : S_chunks)[PARTIAL_CHUNK_POOLS-1])); } else { - add_to_chunk_list(chunk, &S_chunks_full); + add_to_chunk_list(chunk, (for_code ? &S_code_chunks_full : &S_chunks_full)); } return &chunk->sis[0]; @@ -428,15 +438,24 @@ void S_free_chunk(chunkinfo *chunk) { * nonempty nonstatic segment. */ void S_free_chunks(void) { iptr ntofree; - chunkinfo *chunk, *nextchunk; + chunkinfo *chunk, *code_chunk, *nextchunk= NULL, *code_nextchunk = NULL; ntofree = S_G.number_of_empty_segments - (iptr)(Sflonum_value(SYMVAL(S_G.heap_reserve_ratio_id)) * S_G.number_of_nonstatic_segments); - for (chunk = S_chunks[PARTIAL_CHUNK_POOLS]; ntofree > 0 && chunk != NULL; chunk = nextchunk) { - nextchunk = chunk->next; - ntofree -= chunk->segs; - S_free_chunk(chunk); + for (chunk = S_chunks[PARTIAL_CHUNK_POOLS], code_chunk = S_code_chunks[PARTIAL_CHUNK_POOLS]; + ntofree > 0 && ((chunk != NULL) || (code_chunk != NULL)); + chunk = nextchunk, code_chunk = code_nextchunk) { + if (chunk) { + nextchunk = chunk->next; + ntofree -= chunk->segs; + S_free_chunk(chunk); + } + if (code_chunk) { + code_nextchunk = code_chunk->next; + ntofree -= code_chunk->segs; + S_free_chunk(code_chunk); + } } } @@ -469,14 +488,14 @@ static void expand_segment_table(uptr base, uptr end, seginfo *si) { while (base != end) { #ifdef segment_t3_bits if ((t2i = S_segment_info[SEGMENT_T3_IDX(base)]) == NULL) { - S_segment_info[SEGMENT_T3_IDX(base)] = t2i = (t2table *)S_getmem(sizeof(t2table), 1); + S_segment_info[SEGMENT_T3_IDX(base)] = t2i = (t2table *)S_getmem(sizeof(t2table), 1, 0); } t2 = t2i->t2; #else t2 = S_segment_info; #endif if ((t1i = t2[SEGMENT_T2_IDX(base)]) == NULL) { - t2[SEGMENT_T2_IDX(base)] = t1i = (t1table *)S_getmem(sizeof(t1table), 1); + t2[SEGMENT_T2_IDX(base)] = t1i = (t1table *)S_getmem(sizeof(t1table), 1, 0); #ifdef segment_t3_bits t2i->refcount += 1; #endif @@ -540,3 +559,27 @@ static void contract_segment_table(uptr base, uptr end) { while (t1 < t1end) *t1++ = NULL; #endif } + +/* Bracket all writes to `space_code` memory with calls to + `S_thread_start_code_write` and `S_thread_start_code_write'. + + On a platform where a page cannot be both writable and executable + at the same time (a.k.a. W^X), AND assuming that the disposition is + thread-specific, the bracketing functions disable execution of the + code's memory while enabling writing. + + Note that these function will not work for a W^X implementation + where each page's disposition is process-wide. Indeed, a + process-wide W^X disposition seems incompatible with the Chez + Scheme rule that a foreign thread is allowed to invoke a callback + (as long as the callback is immobile/locked) at any time --- even, + say, while Scheme is collecting garbage and needs to write to + executable pages. */ + +void S_thread_start_code_write(void) { + S_ENABLE_CODE_WRITE(1); +} + +void S_thread_end_code_write(void) { + S_ENABLE_CODE_WRITE(0); +} diff --git a/racket/src/ChezScheme/c/version.h b/racket/src/ChezScheme/c/version.h index e8cd18f54e..9285967e8d 100644 --- a/racket/src/ChezScheme/c/version.h +++ b/racket/src/ChezScheme/c/version.h @@ -83,6 +83,8 @@ # if (machine_type == machine_type_tarm64osx) # define PTHREADS # endif +# define S_MAP_CODE MAP_JIT +# define S_ENABLE_CODE_WRITE(on) pthread_jit_write_protect_np(!(on)) #endif #if (machine_type == machine_type_pb) @@ -476,6 +478,16 @@ typedef char tputsputcchar; # define WRITE write #endif +#ifndef S_PROT_CODE +# define S_PROT_CODE (PROT_READ | PROT_WRITE | PROT_EXEC) +#endif +#ifndef S_MAP_CODE +# define S_MAP_CODE 0 +#endif +#ifndef S_ENABLE_CODE_WRITE +# define S_ENABLE_CODE_WRITE(on) do { } while (0) +#endif + #ifdef PTHREADS # define NO_THREADS_UNUSED /* empty */ #else diff --git a/racket/src/ChezScheme/c/vfasl.c b/racket/src/ChezScheme/c/vfasl.c index 4b0003cd39..84ce71fa28 100644 --- a/racket/src/ChezScheme/c/vfasl.c +++ b/racket/src/ChezScheme/c/vfasl.c @@ -213,11 +213,12 @@ ptr S_vfasl(ptr bv, void *stream, iptr offset, iptr input_len) # define VSPACE_END(s) ptr_add(vspaces[(s)], VSPACE_LENGTH(s)) ptr tc = get_thread_context(); vfasl_header header; - ptr data, table; + ptr table; vfoff *symrefs, *rtdrefs, *singletonrefs; octet *bm, *bm_end; iptr used_len; int s; + void *bv_addr; IBOOL to_static = 0; used_len = sizeof(header); @@ -241,49 +242,41 @@ ptr S_vfasl(ptr bv, void *stream, iptr offset, iptr input_len) } vspace_offsets[vspaces_count] = header.data_size; - if (bv) { - void *base_addr = &BVIT(bv, sizeof(vfasl_header) + offset); - newspace_find_room(tc, typemod, header.data_size, data); - memcpy(TO_VOIDP(data), base_addr, header.data_size); - table = ptr_add(TO_PTR(base_addr), header.data_size); - } else { - if (S_vfasl_boot_mode > 0) { - for (s = 0; s < vspaces_count; s++) { - uptr sz = vspace_offsets[s+1] - vspace_offsets[s]; - if (sz > 0) { - if ((s == vspace_reloc) && !S_G.retain_static_relocation) { - newspace_find_room(tc, typemod, sz, vspaces[s]); - } else { - find_room(tc, vspace_spaces[s], static_generation, typemod, sz, vspaces[s]); - } - if (S_fasl_stream_read(stream, TO_VOIDP(vspaces[s]), sz) < 0) - S_error("fasl-read", "input truncated"); - } else - vspaces[s] = (ptr)0; - } - for (s = vspaces_count - 1; s--; ) { - if (!vspaces[s]) - vspaces[s] = vspaces[s+1]; - } - data = (ptr)0; /* => initialize below */ - to_static = 1; - } else { - newspace_find_room(tc, typemod, header.data_size, data); - if (S_fasl_stream_read(stream, TO_VOIDP(data), header.data_size) < 0) - S_error("fasl-read", "input truncated"); - } + bv_addr = (bv ? &BVIT(bv, sizeof(vfasl_header) + offset) : NULL); + to_static = (S_vfasl_boot_mode > 0); + + for (s = 0; s < vspaces_count; s++) { + uptr sz = vspace_offsets[s+1] - vspace_offsets[s]; + if (sz > 0) { + if ((s == vspace_reloc) && to_static && !S_G.retain_static_relocation) { + newspace_find_room(tc, typemod, sz, vspaces[s]); + } else { + find_room(tc, vspace_spaces[s], (to_static ? static_generation : 0), typemod, sz, vspaces[s]); + } + if (bv) { + memcpy(TO_VOIDP(vspaces[s]), bv_addr, sz); + bv_addr = TO_VOIDP(ptr_add(TO_PTR(bv_addr), sz)); + } else { + if (S_fasl_stream_read(stream, TO_VOIDP(vspaces[s]), sz) < 0) + S_error("fasl-read", "input truncated"); + } + } else + vspaces[s] = (ptr)0; + } + for (s = vspaces_count - 1; s--; ) { + if (!vspaces[s]) + vspaces[s] = vspaces[s+1]; + } + + if (bv) + table = TO_PTR(bv_addr); + else { newspace_find_room(tc, typemod, ptr_align(header.table_size), table); if (S_fasl_stream_read(stream, TO_VOIDP(table), header.table_size) < 0) S_error("fasl-read", "input truncated"); } - if (data) { - for (s = 0; s < vspaces_count; s++) - vspaces[s] = ptr_add(data, vspace_offsets[s]); - } else - data = vspaces[0]; - symrefs = TO_VOIDP(table); rtdrefs = TO_VOIDP(ptr_add(TO_PTR(symrefs), header.symref_count * sizeof(vfoff))); singletonrefs = TO_VOIDP(ptr_add(TO_PTR(rtdrefs), header.rtdref_count * sizeof(vfoff))); diff --git a/racket/src/bc/collects-path.rkt b/racket/src/start/collects-path.rkt similarity index 100% rename from racket/src/bc/collects-path.rkt rename to racket/src/start/collects-path.rkt