add PLT_EAGER_JIT to force JIT on linklet instantiation

Forcing JIT code generation through an environment variable is useful
to get a sense of how much machine code is generated for a program.
Setting `PLT_LINKLET_TIMES` causes the overall memory used by
JIT-generated code (including adminstrative overhead) to be printed on
exit.
This commit is contained in:
Matthew Flatt 2018-08-08 13:02:12 -06:00
parent 3e9196ab5c
commit ac601a095b
9 changed files with 102 additions and 17 deletions

View File

@ -311,6 +311,7 @@ typedef struct Thread_Local_Variables {
volatile short delayed_break_ready_;
struct Scheme_Thread *main_break_target_thread_;
intptr_t scheme_code_page_total_;
intptr_t scheme_code_total_;
intptr_t max_gc_pre_used_bytes_;
int num_major_garbage_collections_;
int num_minor_garbage_collections_;
@ -358,6 +359,7 @@ typedef struct Thread_Local_Variables {
struct Scheme_Object *is_syntax_proc_;
struct Scheme_Object *expander_syntax_to_datum_proc_;
struct Scheme_Hash_Table *local_primitive_tables_;
struct Scheme_Object *current_linklet_native_lambdas_;
} Thread_Local_Variables;
#if defined(IMPLEMENT_THREAD_LOCAL_VIA_PTHREADS)
@ -690,6 +692,7 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL;
#define delayed_break_ready XOA (scheme_get_thread_local_variables()->delayed_break_ready_)
#define main_break_target_thread XOA (scheme_get_thread_local_variables()->main_break_target_thread_)
#define scheme_code_page_total XOA (scheme_get_thread_local_variables()->scheme_code_page_total_)
#define scheme_code_total XOA (scheme_get_thread_local_variables()->scheme_code_total_)
#define max_gc_pre_used_bytes XOA (scheme_get_thread_local_variables()->max_gc_pre_used_bytes_)
#define num_major_garbage_collections XOA (scheme_get_thread_local_variables()->num_major_garbage_collections_)
#define num_minor_garbage_collections XOA (scheme_get_thread_local_variables()->num_minor_garbage_collections_)
@ -737,6 +740,7 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL;
#define is_syntax_proc XOA (scheme_get_thread_local_variables()->is_syntax_proc_)
#define expander_syntax_to_datum_proc XOA (scheme_get_thread_local_variables()->expander_syntax_to_datum_proc_)
#define local_primitive_tables XOA (scheme_get_thread_local_variables()->local_primitive_tables_)
#define current_linklet_native_lambdas XOA (scheme_get_thread_local_variables()->current_linklet_native_lambdas_)
/* **************************************** */

View File

@ -190,6 +190,7 @@ Scheme_Env *scheme_basic_env()
scheme_init_validate();
#ifdef MZ_USE_JIT
scheme_init_jit();
scheme_init_jitprep();
#endif
scheme_init_process_globals();

View File

@ -456,6 +456,8 @@ static Scheme_Object *extract_closure_local(int pos, mz_jit_state *jitter, int g
{
if (PAST_LIMIT()) return NULL;
if (!jitter->nc) return NULL;
if (pos >= jitter->self_pos - jitter->self_to_closure_delta) {
pos -= (jitter->self_pos - jitter->self_to_closure_delta);
if (pos < jitter->nc->code->u2.orig_code->closure_size) {
@ -493,6 +495,8 @@ Scheme_Object *scheme_specialize_to_constant(Scheme_Object *obj, mz_jit_state *j
if (PAST_LIMIT()) return obj;
if (!jitter->nc) return obj;
if (SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED) {
if (SAME_TYPE(SCHEME_TYPE(obj), scheme_local_type)) {
c = scheme_extract_closure_local(obj, jitter, extra_push, 1);
@ -1361,7 +1365,7 @@ static int generate_closure_fill(Scheme_Lambda *lam,
for (j = 0; j < size; j++) {
CHECK_LIMIT();
if (SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED)
if (jitter->nc && (SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED))
v = extract_closure_local(map[j], jitter, 1);
else
v = NULL;
@ -2116,7 +2120,7 @@ int scheme_generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w
START_JIT_DATA();
LOG_IT(("top-level\n"));
mz_rs_sync_fail_branch();
if (SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED) {
if (jitter->nc && (SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED)) {
/* Must be a top-level that is not yet defined. */
Scheme_Object *b;
mz_rs_sync_fail_branch();
@ -2271,7 +2275,7 @@ int scheme_generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w
START_JIT_DATA();
LOG_IT(("unbox local\n"));
if (SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED)
if (jitter->nc && (SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED))
specialized = scheme_extract_closure_local(obj, jitter, 0, 1);
pos = mz_remap(SCHEME_LOCAL_POS(obj));
@ -2996,7 +3000,7 @@ int scheme_generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w
if (lv->count == 1) {
/* Expect one result: */
Scheme_Object *specialized = NULL;
if (SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED)
if (jitter->nc && (SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED))
specialized = extract_closure_local(lv->position, jitter, 1);
scheme_generate_non_tail(lv->value, jitter, 0, 1, 0); /* no sync */
CHECK_LIMIT();
@ -3801,7 +3805,10 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data)
to_args = 0;
#endif
specialized = SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED;
if (jitter->nc)
specialized = SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED;
else
specialized = 0;
/* Extract closure to runstack: */
cnt = lam->closure_size;
@ -3970,9 +3977,9 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data)
return 1;
}
static void on_demand_generate_lambda(Scheme_Native_Closure *nc, int argc, Scheme_Object **argv, int argv_delta)
static void on_demand_generate_lambda(Scheme_Native_Closure *nc, Scheme_Native_Lambda *nlam,
int argc, Scheme_Object **argv, int argv_delta)
{
Scheme_Native_Lambda *nlam = nc->code;
Scheme_Lambda *lam;
Generate_Lambda gdata;
void *start_code, *tail_code, *arity_code;
@ -4065,7 +4072,7 @@ static void on_demand_generate_lambda(Scheme_Native_Closure *nc, int argc, Schem
void scheme_on_demand_generate_lambda(Scheme_Native_Closure *nc, int argc, Scheme_Object **argv, int argv_delta)
{
on_demand_generate_lambda(nc, argc, argv, argv_delta);
on_demand_generate_lambda(nc, nc->code, argc, argv, argv_delta);
}
Scheme_Object **scheme_on_demand_with_args(Scheme_Object **in_argv, Scheme_Object **argv, int argv_delta)
@ -4087,6 +4094,12 @@ Scheme_Object **scheme_on_demand(Scheme_Object **rs)
return scheme_on_demand_with_args(MZ_RUNSTACK, rs, 0);
}
void scheme_force_jit_generate(Scheme_Native_Lambda *nlam)
{
if (nlam->start_code == scheme_on_demand_jit_code)
on_demand_generate_lambda(NULL, nlam, 0, NULL, 0);
}
static Scheme_Native_Lambda *create_native_lambda(Scheme_Lambda *lam, int clear_code_after_jit,
Scheme_Native_Lambda *case_lam)
{
@ -4121,11 +4134,6 @@ static Scheme_Native_Lambda *create_native_lambda(Scheme_Lambda *lam, int clear_
nlam->max_let_depth = (JIT_RUNSTACK_RESERVE * sizeof(void*)) | (case_lam ? 0x2 : 0) | (clear_code_after_jit ? 0x1 : 0);
nlam->tl_map = lam->tl_map;
#if 0
/* Compile immediately: */
on_demand_generate_lambda(nlam);
#endif
return nlam;
}

View File

@ -33,8 +33,19 @@
#include "schpriv.h"
#include "schrunst.h"
THREAD_LOCAL_DECL(static Scheme_Object *current_linklet_native_lambdas);
static int force_jit;
#ifdef MZ_USE_JIT
void scheme_init_jitprep()
{
REGISTER_SO(current_linklet_native_lambdas);
if (getenv("PLT_EAGER_JIT"))
force_jit = 1;
}
static Scheme_Object *jit_expr(Scheme_Object *expr);
static Scheme_Object *jit_application(Scheme_Object *o)
@ -434,6 +445,8 @@ Scheme_Object *scheme_case_lambda_jit(Scheme_Object *expr)
((Scheme_Lambda *)val)->name = name;
if (((Scheme_Lambda *)val)->closure_size)
all_closed = 0;
if (current_linklet_native_lambdas)
current_linklet_native_lambdas = scheme_make_pair(val, current_linklet_native_lambdas);
}
/* Generating the code may cause empty closures to be formed: */
@ -561,6 +574,10 @@ Scheme_Object *scheme_jit_closure(Scheme_Object *code, Scheme_Object *context)
ndata = scheme_generate_lambda(data2, 1, NULL);
data2->u.native_code = ndata;
if (current_linklet_native_lambdas)
current_linklet_native_lambdas = scheme_make_pair((Scheme_Object *)ndata,
current_linklet_native_lambdas);
if (!context)
data->u.jit_clone = data2;
}
@ -664,6 +681,9 @@ Scheme_Linklet *scheme_jit_linklet(Scheme_Linklet *linklet, int step)
return new_linklet;
}
if (force_jit)
current_linklet_native_lambdas = scheme_null;
i = SCHEME_VEC_SIZE(linklet->bodies);
bodies = scheme_make_vector(i, NULL);
while (i--) {
@ -675,6 +695,9 @@ Scheme_Linklet *scheme_jit_linklet(Scheme_Linklet *linklet, int step)
new_linklet->jit_ready = 2;
new_linklet->native_lambdas = current_linklet_native_lambdas;
current_linklet_native_lambdas = NULL;
return new_linklet;
}

View File

@ -1411,6 +1411,36 @@ static void *instantiate_linklet_k(void)
if (!multi)
v = scheme_check_one_value(v);
if (linklet->native_lambdas) {
int mc;
Scheme_Object **mv, *l;
if (SAME_OBJ(v, SCHEME_MULTIPLE_VALUES)) {
p = scheme_current_thread;
mv = p->ku.multiple.array;
mc = p->ku.multiple.count;
if (SAME_OBJ(mv, p->values_buffer))
p->values_buffer = NULL;
} else {
mv = NULL;
mc = 0;
}
l = linklet->native_lambdas;
linklet->native_lambdas = NULL;
while (SCHEME_PAIRP(l)) {
scheme_force_jit_generate((Scheme_Native_Lambda *)SCHEME_CAR(l));
l = SCHEME_CDR(l);
}
if (mv) {
p = scheme_current_thread;
p->ku.multiple.array = mv;
p->ku.multiple.count = mc;
}
}
scheme_performance_record_end("instantiate", &perf_state);
return (void *)v;
@ -1912,7 +1942,7 @@ static void show_perf(Performance_Entry *perf_entries, int perf_count,
}
}
if (!depth)
if (!depth) {
fprintf(stderr, ";; %stotal%s %s%"PRIdPTR " [%s%"PRIdPTR"] ms\n",
tab_number(total, tab, len),
tab_string("total", name_tab, name_len),
@ -1920,6 +1950,12 @@ static void show_perf(Performance_Entry *perf_entries, int perf_count,
total,
tab_number(gc_total, gc_tab, gc_len),
gc_total);
#ifdef MZ_PRECISE_GC
fprintf(stderr, ";; [JIT code: %"PRIdPTR" bytes JIT code+admin: %"PRIdPTR" bytes]\n",
scheme_code_total,
scheme_code_page_total);
#endif
}
}
static void show_all_perf()

View File

@ -160,7 +160,7 @@ static int toplevel_obj_FIXUP(void *p, struct NewGC *gc) {
static int static_toplevel_obj_SIZE(void *p, struct NewGC *gc) {
#ifndef GC_NO_SIZE_NEEDED_FROM_PROCS
gcBYTES_TO_WORDS(sizeof(Scheme_Static_Toplevel));
gcBYTES_TO_WORDS(sizeof(Scheme_Toplevel));
#else
return 0;
#endif
@ -173,7 +173,7 @@ static int static_toplevel_obj_MARK(void *p, struct NewGC *gc) {
return 0;
# else
return
gcBYTES_TO_WORDS(sizeof(Scheme_Static_Toplevel));
gcBYTES_TO_WORDS(sizeof(Scheme_Toplevel));
# endif
#endif
}
@ -185,7 +185,7 @@ static int static_toplevel_obj_FIXUP(void *p, struct NewGC *gc) {
return 0;
# else
return
gcBYTES_TO_WORDS(sizeof(Scheme_Static_Toplevel));
gcBYTES_TO_WORDS(sizeof(Scheme_Toplevel));
# endif
#endif
}
@ -3507,6 +3507,7 @@ static int linklet_val_MARK(void *p, struct NewGC *gc) {
gcMARK2(l->bodies, gc);
gcMARK2(l->constants, gc);
gcMARK2(l->static_prefix, gc);
gcMARK2(l->native_lambdas, gc);
# ifdef GC_NO_SIZE_NEEDED_FROM_PROCS
return 0;
# else
@ -3528,6 +3529,7 @@ static int linklet_val_FIXUP(void *p, struct NewGC *gc) {
gcFIXUP2(l->bodies, gc);
gcFIXUP2(l->constants, gc);
gcFIXUP2(l->static_prefix, gc);
gcFIXUP2(l->native_lambdas, gc);
# ifdef GC_NO_SIZE_NEEDED_FROM_PROCS
return 0;
# else

View File

@ -992,6 +992,7 @@ linklet_val {
gcMARK2(l->bodies, gc);
gcMARK2(l->constants, gc);
gcMARK2(l->static_prefix, gc);
gcMARK2(l->native_lambdas, gc);
size:
gcBYTES_TO_WORDS(sizeof(Scheme_Linklet));
}

View File

@ -943,6 +943,7 @@ START_XFORM_SKIP;
THREAD_LOCAL_DECL(static void *code_allocation_page_list);
THREAD_LOCAL_DECL(intptr_t scheme_code_page_total);
THREAD_LOCAL_DECL(intptr_t scheme_code_total);
#if defined(MZ_CODE_ALLOC_USE_MPROTECT) && !defined(MAP_ANON)
static int fd, fd_created;
@ -1161,6 +1162,7 @@ void *scheme_malloc_code(intptr_t size)
sz = (sz + page_size - 1) & ~(page_size - 1);
pg = malloc_page(sz);
scheme_code_page_total += sz;
scheme_code_total += sz;
*(intptr_t *)pg = sz;
chain_page(pg);
LOG_CODE_MALLOC(1, printf("allocated large %p (%ld) [now %ld]\n",
@ -1170,6 +1172,8 @@ void *scheme_malloc_code(intptr_t size)
bucket = free_list_find_bucket(size);
size2 = free_list[bucket].size;
scheme_code_total += size2;
if (!free_list[bucket].elems) {
/* add a new page's worth of items to the free list */
int i, count = 0;
@ -1266,6 +1270,7 @@ void scheme_free_code(void *p)
if (size >= page_size) {
/* it was a large object on its own page(s) */
scheme_code_page_total -= size;
scheme_code_total -= size;
LOG_CODE_MALLOC(1, printf("freeing large %p (%ld) [%ld left]\n",
p, size, scheme_code_page_total));
unchain_page((char *)p - CODE_HEADER_SIZE);
@ -1279,6 +1284,7 @@ void scheme_free_code(void *p)
}
size2 = free_list[bucket].size;
scheme_code_total -= size2;
LOG_CODE_MALLOC(0, printf("freeing %ld / %ld\n", size2, bucket));

View File

@ -313,6 +313,7 @@ void scheme_init_stack_check(void);
void scheme_init_overflow(void);
#ifdef MZ_USE_JIT
void scheme_init_jit(void);
void scheme_init_jitprep(void);
#endif
#ifdef MZ_PRECISE_GC
void scheme_register_traversers(void);
@ -2995,6 +2996,7 @@ void scheme_jit_fill_threadlocal_table();
#ifdef MZ_USE_JIT
void scheme_on_demand_generate_lambda(Scheme_Native_Closure *nc, int argc, Scheme_Object **argv, int delta);
void scheme_force_jit_generate(Scheme_Native_Lambda *nlam);
#endif
struct Start_Module_Args;
@ -3292,6 +3294,8 @@ struct Scheme_Linklet
Scheme_Hash_Table *constants; /* holds info about the linklet's body for inlining */
Scheme_Prefix *static_prefix; /* non-NULL for a linklet compiled in static mode */
Scheme_Object *native_lambdas; /* non-NULL => native lambdas to force-JIT on instantiation */
};
#define SCHEME_DEFN_VAR_COUNT(d) (SCHEME_VEC_SIZE(d)-1)