Chez Scheme: distinguish code and non-code allocation segments

Code segments are executable and potentially non-writeable in a thread
--- except when specifically enabled, at which point code segments are
made non-executable for that thread. Non-code segments are always made
non-executable.
This commit is contained in:
Matthew Flatt 2020-11-28 13:39:56 -07:00
parent 1a03fe42f0
commit 64378caa1d
10 changed files with 134 additions and 70 deletions

View File

@ -383,7 +383,7 @@ extern INT matherr PROTO((struct exception *x));
/* segment.c */
extern void S_segment_init PROTO((void));
extern void *S_getmem PROTO((iptr bytes, IBOOL zerofill));
extern void *S_getmem PROTO((iptr bytes, IBOOL zerofill, IBOOL for_code));
extern void S_freemem PROTO((void *addr, iptr bytes));
extern iptr S_find_segments PROTO((thread_gc *creator, ISPC s, IGEN g, iptr n));
extern void S_free_chunk PROTO((chunkinfo *chunk));
@ -393,6 +393,8 @@ extern uptr S_maxmembytes PROTO((void));
extern void S_resetmaxmembytes PROTO((void));
extern void S_adjustmembytes PROTO((iptr amt));
extern void S_move_to_chunk_list PROTO((chunkinfo *chunk, chunkinfo **pchunk_list));
extern void S_thread_start_code_write(void);
extern void S_thread_end_code_write(void);
/* stats.c */
extern void S_stats_init PROTO((void));

View File

@ -500,6 +500,8 @@ static ptr fasl_entry(ptr tc, IFASLCODE situation, unbufFaslFile uf, ptr externa
Scompact_heap();
}
S_thread_start_code_write();
switch (ty) {
case fasl_type_gzip:
case fasl_type_lz4: {
@ -547,6 +549,7 @@ static ptr fasl_entry(ptr tc, IFASLCODE situation, unbufFaslFile uf, ptr externa
return (ptr)0;
}
S_flush_instruction_cache(tc);
S_thread_end_code_write();
return x;
} else {
uf_skipbytes(uf, size);

View File

@ -870,6 +870,8 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) {
GET_REAL_TIME(astart);
S_thread_start_code_write();
/* flush instruction cache: effectively clear_code_mod but safer */
for (ls = S_threads; ls != Snil; ls = Scdr(ls)) {
ptr t_tc = (ptr)THREADTC(Scar(ls));
@ -1677,6 +1679,7 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) {
if (MAX_CG >= S_G.min_free_gen) S_free_chunks();
S_flush_instruction_cache(tc);
S_thread_end_code_write();
#ifndef NO_DIRTY_NEWSPACE_POINTERS
/* mark dirty those newspace cards to which we've added wrong-way pointers */
@ -2947,6 +2950,8 @@ static void setup_sweepers(thread_gc *tgc) {
static s_thread_rv_t start_sweeper(void *_sweeper) {
gc_sweeper *sweeper = _sweeper;
S_thread_start_code_write(); /* never ended */
(void)s_thread_mutex_lock(&sweep_mutex);
while (1) {
while (sweeper->status != SWEEPER_SWEEPING) {

View File

@ -63,6 +63,8 @@ EXTERN seginfo *S_segment_info[1<<segment_t1_bits];
EXTERN chunkinfo *S_chunks_full;
EXTERN chunkinfo *S_chunks[PARTIAL_CHUNK_POOLS+1];
EXTERN chunkinfo *S_code_chunks_full;
EXTERN chunkinfo *S_code_chunks[PARTIAL_CHUNK_POOLS+1];
/* schsig.c */

View File

@ -33,7 +33,7 @@ void S_intern_init() {
S_G.oblist_length = MIN_OBLIST_LENGTH;
S_G.oblist_count = 0;
S_G.oblist = S_getmem(S_G.oblist_length * sizeof(bucket *), 1);
S_G.oblist = S_getmem(S_G.oblist_length * sizeof(bucket *), 1, 0);
for (g = 0; g < static_generation; g += 1) S_G.buckets_of_generation[g] = NULL;
}
@ -76,7 +76,7 @@ void S_resize_oblist(void) {
if (new_oblist_length == S_G.oblist_length)
return;
new_oblist = S_getmem(new_oblist_length * sizeof(bucket *), 1);
new_oblist = S_getmem(new_oblist_length * sizeof(bucket *), 1, 0);
for (i = 0; i < S_G.oblist_length; i += 1) {
for (b = S_G.oblist[i]; b != NULL; b = bnext) {

View File

@ -228,6 +228,8 @@ static void s_instantiate_code_object() {
cookie = S_get_scheme_arg(tc, 2);
proc = S_get_scheme_arg(tc, 3);
S_thread_start_code_write();
new = S_code(tc, CODETYPE(old), CODELEN(old));
S_immobilize_object(new);
@ -278,6 +280,8 @@ static void s_instantiate_code_object() {
}
S_flush_instruction_cache(tc);
S_thread_end_code_write();
AC0(tc) = new;
}

View File

@ -38,7 +38,7 @@ Low-level Memory management strategy:
static void out_of_memory PROTO((void));
static void initialize_seginfo PROTO((seginfo *si, thread_gc *creator, ISPC s, IGEN g));
static seginfo *allocate_segments PROTO((uptr nreq));
static seginfo *allocate_segments PROTO((uptr nreq, IBOOL for_code));
static void expand_segment_table PROTO((uptr base, uptr end, seginfo *si));
static void contract_segment_table PROTO((uptr base, uptr end));
static void add_to_chunk_list PROTO((chunkinfo *chunk, chunkinfo **pchunk_list));
@ -51,7 +51,11 @@ void S_segment_init() {
if (!S_boot_time) return;
S_chunks_full = NULL;
for (i = PARTIAL_CHUNK_POOLS; i >= 0; i -= 1) S_chunks[i] = NULL;
S_code_chunks_full = NULL;
for (i = PARTIAL_CHUNK_POOLS; i >= 0; i -= 1) {
S_chunks[i] = NULL;
S_code_chunks[i] = NULL;
}
for (g = 0; g <= static_generation; g++) {
for (s = 0; s <= max_real_space; s++) {
S_G.occupied_segments[g][s] = NULL;
@ -79,7 +83,7 @@ static void out_of_memory(void) {
}
#if defined(USE_MALLOC)
void *S_getmem(iptr bytes, IBOOL zerofill) {
void *S_getmem(iptr bytes, IBOOL zerofill, UNUSED IBOOL for_code) {
void *addr;
if ((addr = malloc(bytes)) == (void *)0) out_of_memory();
@ -99,7 +103,7 @@ void S_freemem(void *addr, iptr bytes) {
#if defined(USE_VIRTUAL_ALLOC)
#include <WinBase.h>
void *S_getmem(iptr bytes, IBOOL zerofill) {
void *S_getmem(iptr bytes, IBOOL zerofill, IBOOL for_code) {
void *addr;
if ((uptr)bytes < S_pagesize) {
@ -109,7 +113,8 @@ void *S_getmem(iptr bytes, IBOOL zerofill) {
if (zerofill) memset(addr, 0, bytes);
} else {
uptr n = S_pagesize - 1; iptr p_bytes = (iptr)(((uptr)bytes + n) & ~n);
if ((addr = VirtualAlloc((void *)0, (SIZE_T)p_bytes, MEM_COMMIT, PAGE_EXECUTE_READWRITE)) == (void *)0) out_of_memory();
int perm = (for_code ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
if ((addr = VirtualAlloc((void *)0, (SIZE_T)p_bytes, MEM_COMMIT, perm)) == (void *)0) out_of_memory();
if ((membytes += p_bytes) > maxmembytes) maxmembytes = membytes;
debug(printf("getmem VirtualAlloc(%p => %p) -> %p\n", bytes, p_bytes, addr))
}
@ -136,7 +141,7 @@ void S_freemem(void *addr, iptr bytes) {
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
void *S_getmem(iptr bytes, IBOOL zerofill) {
void *S_getmem(iptr bytes, IBOOL zerofill, IBOOL for_code) {
void *addr;
if ((uptr)bytes < S_pagesize) {
@ -146,12 +151,14 @@ void *S_getmem(iptr bytes, IBOOL zerofill) {
if (zerofill) memset(addr, 0, bytes);
} else {
uptr n = S_pagesize - 1; iptr p_bytes = (iptr)(((uptr)bytes + n) & ~n);
int perm = (for_code ? S_PROT_CODE : (PROT_WRITE | PROT_READ));
int flags = (MAP_PRIVATE | MAP_ANONYMOUS) | (for_code ? S_MAP_CODE : 0);
#ifdef MAP_32BIT
/* try for first 2GB of the memory space first of x86_64 so that we have a
better chance of having short jump instructions */
if ((addr = mmap(NULL, p_bytes, PROT_EXEC|PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS|MAP_32BIT, -1, 0)) == (void *)-1) {
if ((addr = mmap(NULL, p_bytes, perm, flags|MAP_32BIT, -1, 0)) == (void *)-1) {
#endif
if ((addr = mmap(NULL, p_bytes, PROT_EXEC|PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)) == (void *)-1) {
if ((addr = mmap(NULL, p_bytes, perm, flags, -1, 0)) == (void *)-1) {
out_of_memory();
debug(printf("getmem mmap(%p) -> %p\n", bytes, addr))
}
@ -258,26 +265,29 @@ static void initialize_seginfo(seginfo *si, NO_THREADS_UNUSED thread_gc *creator
/* allocation mutex must be held */
iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr n; {
chunkinfo *chunk, *nextchunk;
chunkinfo *chunk, *nextchunk, **chunks;
seginfo *si, *nextsi, **prevsi;
iptr nunused_segs, j;
INT i, loser_index;
IBOOL for_code = ((s == space_code));
if (g != static_generation) S_G.number_of_nonstatic_segments += n;
debug(printf("attempting to find %d segments for space %d, generation %d\n", n, s, g))
chunks = (for_code ? S_code_chunks : S_chunks);
if (n == 1) {
for (i = 0; i <= PARTIAL_CHUNK_POOLS; i++) {
chunk = S_chunks[i];
chunk = chunks[i];
if (chunk != NULL) {
si = chunk->unused_segs;
chunk->unused_segs = si->next;
if (chunk->unused_segs == NULL) {
S_move_to_chunk_list(chunk, &S_chunks_full);
S_move_to_chunk_list(chunk, (for_code ? &S_code_chunks_full : &S_chunks_full));
} else if (i == PARTIAL_CHUNK_POOLS) {
S_move_to_chunk_list(chunk, &S_chunks[PARTIAL_CHUNK_POOLS-1]);
S_move_to_chunk_list(chunk, &chunks[PARTIAL_CHUNK_POOLS-1]);
}
chunk->nused_segs += 1;
@ -291,7 +301,7 @@ iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr
} else {
loser_index = (n == 2) ? 0 : find_index(n-1);
for (i = find_index(n); i <= PARTIAL_CHUNK_POOLS; i += 1) {
chunk = S_chunks[i];
chunk = chunks[i];
while (chunk != NULL) {
if (n < (nunused_segs = (chunk->segs - chunk->nused_segs))) {
sort_chunk_unused_segments(chunk);
@ -311,9 +321,9 @@ iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr
if (--j == 0) {
*prevsi = nextsi->next;
if (chunk->unused_segs == NULL) {
S_move_to_chunk_list(chunk, &S_chunks_full);
S_move_to_chunk_list(chunk, (for_code ? &S_code_chunks_full : &S_chunks_full));
} else if (i == PARTIAL_CHUNK_POOLS) {
S_move_to_chunk_list(chunk, &S_chunks[PARTIAL_CHUNK_POOLS-1]);
S_move_to_chunk_list(chunk, &chunks[PARTIAL_CHUNK_POOLS-1]);
}
chunk->nused_segs += n;
nextsi->next = S_G.occupied_segments[g][s];
@ -329,7 +339,7 @@ iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr
}
nextchunk = chunk->next;
if (i != loser_index && i != PARTIAL_CHUNK_POOLS) {
S_move_to_chunk_list(chunk, &S_chunks[loser_index]);
S_move_to_chunk_list(chunk, &chunks[loser_index]);
}
chunk = nextchunk;
}
@ -337,7 +347,7 @@ iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr
}
/* we couldn't find space, so ask for more */
si = allocate_segments(n);
si = allocate_segments(n, for_code);
for (nextsi = si, i = 0; i < n; i += 1, nextsi += 1) {
initialize_seginfo(nextsi, creator, s, g);
/* add segment to appropriate list of occupied segments */
@ -357,7 +367,7 @@ iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr
* allocates a group of n contiguous fresh segments, returning the
* segment number of the first segment of the group.
*/
static seginfo *allocate_segments(nreq) uptr nreq; {
static seginfo *allocate_segments(uptr nreq, UNUSED IBOOL for_code) {
uptr nact, bytes, base; void *addr;
iptr i;
chunkinfo *chunk; seginfo *si;
@ -365,7 +375,7 @@ static seginfo *allocate_segments(nreq) uptr nreq; {
nact = nreq < minimum_segment_request ? minimum_segment_request : nreq;
bytes = (nact + 1) * bytes_per_segment;
addr = S_getmem(bytes, 0);
addr = S_getmem(bytes, 0, for_code);
debug(printf("allocate_segments addr = %p\n", addr))
base = addr_get_segment((uptr)TO_PTR(addr) + bytes_per_segment - 1);
@ -375,7 +385,7 @@ static seginfo *allocate_segments(nreq) uptr nreq; {
if (build_ptr(base, 0) == TO_PTR(addr) && base + nact != ((uptr)1 << (ptr_bits - segment_offset_bits)) - 1)
nact += 1;
chunk = S_getmem(sizeof(chunkinfo) + sizeof(seginfo) * nact, 0);
chunk = S_getmem(sizeof(chunkinfo) + sizeof(seginfo) * nact, 0, 0);
debug(printf("allocate_segments chunk = %p\n", chunk))
chunk->addr = addr;
chunk->base = base;
@ -406,9 +416,9 @@ static seginfo *allocate_segments(nreq) uptr nreq; {
/* account for trailing empty segments */
if (nact > nreq) {
S_G.number_of_empty_segments += nact - nreq;
add_to_chunk_list(chunk, &S_chunks[PARTIAL_CHUNK_POOLS-1]);
add_to_chunk_list(chunk, &((for_code ? S_code_chunks : S_chunks)[PARTIAL_CHUNK_POOLS-1]));
} else {
add_to_chunk_list(chunk, &S_chunks_full);
add_to_chunk_list(chunk, (for_code ? &S_code_chunks_full : &S_chunks_full));
}
return &chunk->sis[0];
@ -428,15 +438,24 @@ void S_free_chunk(chunkinfo *chunk) {
* nonempty nonstatic segment. */
void S_free_chunks(void) {
iptr ntofree;
chunkinfo *chunk, *nextchunk;
chunkinfo *chunk, *code_chunk, *nextchunk= NULL, *code_nextchunk = NULL;
ntofree = S_G.number_of_empty_segments -
(iptr)(Sflonum_value(SYMVAL(S_G.heap_reserve_ratio_id)) * S_G.number_of_nonstatic_segments);
for (chunk = S_chunks[PARTIAL_CHUNK_POOLS]; ntofree > 0 && chunk != NULL; chunk = nextchunk) {
nextchunk = chunk->next;
ntofree -= chunk->segs;
S_free_chunk(chunk);
for (chunk = S_chunks[PARTIAL_CHUNK_POOLS], code_chunk = S_code_chunks[PARTIAL_CHUNK_POOLS];
ntofree > 0 && ((chunk != NULL) || (code_chunk != NULL));
chunk = nextchunk, code_chunk = code_nextchunk) {
if (chunk) {
nextchunk = chunk->next;
ntofree -= chunk->segs;
S_free_chunk(chunk);
}
if (code_chunk) {
code_nextchunk = code_chunk->next;
ntofree -= code_chunk->segs;
S_free_chunk(code_chunk);
}
}
}
@ -469,14 +488,14 @@ static void expand_segment_table(uptr base, uptr end, seginfo *si) {
while (base != end) {
#ifdef segment_t3_bits
if ((t2i = S_segment_info[SEGMENT_T3_IDX(base)]) == NULL) {
S_segment_info[SEGMENT_T3_IDX(base)] = t2i = (t2table *)S_getmem(sizeof(t2table), 1);
S_segment_info[SEGMENT_T3_IDX(base)] = t2i = (t2table *)S_getmem(sizeof(t2table), 1, 0);
}
t2 = t2i->t2;
#else
t2 = S_segment_info;
#endif
if ((t1i = t2[SEGMENT_T2_IDX(base)]) == NULL) {
t2[SEGMENT_T2_IDX(base)] = t1i = (t1table *)S_getmem(sizeof(t1table), 1);
t2[SEGMENT_T2_IDX(base)] = t1i = (t1table *)S_getmem(sizeof(t1table), 1, 0);
#ifdef segment_t3_bits
t2i->refcount += 1;
#endif
@ -540,3 +559,27 @@ static void contract_segment_table(uptr base, uptr end) {
while (t1 < t1end) *t1++ = NULL;
#endif
}
/* Bracket all writes to `space_code` memory with calls to
`S_thread_start_code_write` and `S_thread_start_code_write'.
On a platform where a page cannot be both writable and executable
at the same time (a.k.a. W^X), AND assuming that the disposition is
thread-specific, the bracketing functions disable execution of the
code's memory while enabling writing.
Note that these function will not work for a W^X implementation
where each page's disposition is process-wide. Indeed, a
process-wide W^X disposition seems incompatible with the Chez
Scheme rule that a foreign thread is allowed to invoke a callback
(as long as the callback is immobile/locked) at any time --- even,
say, while Scheme is collecting garbage and needs to write to
executable pages. */
void S_thread_start_code_write(void) {
S_ENABLE_CODE_WRITE(1);
}
void S_thread_end_code_write(void) {
S_ENABLE_CODE_WRITE(0);
}

View File

@ -83,6 +83,8 @@
# if (machine_type == machine_type_tarm64osx)
# define PTHREADS
# endif
# define S_MAP_CODE MAP_JIT
# define S_ENABLE_CODE_WRITE(on) pthread_jit_write_protect_np(!(on))
#endif
#if (machine_type == machine_type_pb)
@ -476,6 +478,16 @@ typedef char tputsputcchar;
# define WRITE write
#endif
#ifndef S_PROT_CODE
# define S_PROT_CODE (PROT_READ | PROT_WRITE | PROT_EXEC)
#endif
#ifndef S_MAP_CODE
# define S_MAP_CODE 0
#endif
#ifndef S_ENABLE_CODE_WRITE
# define S_ENABLE_CODE_WRITE(on) do { } while (0)
#endif
#ifdef PTHREADS
# define NO_THREADS_UNUSED /* empty */
#else

View File

@ -213,11 +213,12 @@ ptr S_vfasl(ptr bv, void *stream, iptr offset, iptr input_len)
# define VSPACE_END(s) ptr_add(vspaces[(s)], VSPACE_LENGTH(s))
ptr tc = get_thread_context();
vfasl_header header;
ptr data, table;
ptr table;
vfoff *symrefs, *rtdrefs, *singletonrefs;
octet *bm, *bm_end;
iptr used_len;
int s;
void *bv_addr;
IBOOL to_static = 0;
used_len = sizeof(header);
@ -241,49 +242,41 @@ ptr S_vfasl(ptr bv, void *stream, iptr offset, iptr input_len)
}
vspace_offsets[vspaces_count] = header.data_size;
if (bv) {
void *base_addr = &BVIT(bv, sizeof(vfasl_header) + offset);
newspace_find_room(tc, typemod, header.data_size, data);
memcpy(TO_VOIDP(data), base_addr, header.data_size);
table = ptr_add(TO_PTR(base_addr), header.data_size);
} else {
if (S_vfasl_boot_mode > 0) {
for (s = 0; s < vspaces_count; s++) {
uptr sz = vspace_offsets[s+1] - vspace_offsets[s];
if (sz > 0) {
if ((s == vspace_reloc) && !S_G.retain_static_relocation) {
newspace_find_room(tc, typemod, sz, vspaces[s]);
} else {
find_room(tc, vspace_spaces[s], static_generation, typemod, sz, vspaces[s]);
}
if (S_fasl_stream_read(stream, TO_VOIDP(vspaces[s]), sz) < 0)
S_error("fasl-read", "input truncated");
} else
vspaces[s] = (ptr)0;
}
for (s = vspaces_count - 1; s--; ) {
if (!vspaces[s])
vspaces[s] = vspaces[s+1];
}
data = (ptr)0; /* => initialize below */
to_static = 1;
} else {
newspace_find_room(tc, typemod, header.data_size, data);
if (S_fasl_stream_read(stream, TO_VOIDP(data), header.data_size) < 0)
S_error("fasl-read", "input truncated");
}
bv_addr = (bv ? &BVIT(bv, sizeof(vfasl_header) + offset) : NULL);
to_static = (S_vfasl_boot_mode > 0);
for (s = 0; s < vspaces_count; s++) {
uptr sz = vspace_offsets[s+1] - vspace_offsets[s];
if (sz > 0) {
if ((s == vspace_reloc) && to_static && !S_G.retain_static_relocation) {
newspace_find_room(tc, typemod, sz, vspaces[s]);
} else {
find_room(tc, vspace_spaces[s], (to_static ? static_generation : 0), typemod, sz, vspaces[s]);
}
if (bv) {
memcpy(TO_VOIDP(vspaces[s]), bv_addr, sz);
bv_addr = TO_VOIDP(ptr_add(TO_PTR(bv_addr), sz));
} else {
if (S_fasl_stream_read(stream, TO_VOIDP(vspaces[s]), sz) < 0)
S_error("fasl-read", "input truncated");
}
} else
vspaces[s] = (ptr)0;
}
for (s = vspaces_count - 1; s--; ) {
if (!vspaces[s])
vspaces[s] = vspaces[s+1];
}
if (bv)
table = TO_PTR(bv_addr);
else {
newspace_find_room(tc, typemod, ptr_align(header.table_size), table);
if (S_fasl_stream_read(stream, TO_VOIDP(table), header.table_size) < 0)
S_error("fasl-read", "input truncated");
}
if (data) {
for (s = 0; s < vspaces_count; s++)
vspaces[s] = ptr_add(data, vspace_offsets[s]);
} else
data = vspaces[0];
symrefs = TO_VOIDP(table);
rtdrefs = TO_VOIDP(ptr_add(TO_PTR(symrefs), header.symref_count * sizeof(vfoff)));
singletonrefs = TO_VOIDP(ptr_add(TO_PTR(rtdrefs), header.rtdref_count * sizeof(vfoff)));