allocation/GC tweaks, such as faster path for 3m cons

svn: r3871
This commit is contained in:
Matthew Flatt 2006-07-28 16:48:24 +00:00
parent 4d090051cc
commit 99737131f2
9 changed files with 261 additions and 67 deletions

View File

@ -281,7 +281,7 @@ int main(int argc, char *argv[])
#if defined(MZ_PRECISE_GC)
# ifndef wx_msw
stack_start = (void *)&__gc_var_stack__;
GC_init_type_tags(_scheme_last_type_, scheme_weak_box_type, scheme_ephemeron_type, scheme_rt_weak_array);
GC_init_type_tags(_scheme_last_type_, scheme_pair_type, scheme_weak_box_type, scheme_ephemeron_type, scheme_rt_weak_array);
# endif
/* For Windows, WinMain inits the type tags. */
#endif

View File

@ -89,8 +89,8 @@ typedef short Type_Tag;
/* Debugging and performance tools: */
#define TIME 0
#define SEARCH 0
#define CHECKS 1
#define CHECK_STACK_PTRS 1
#define CHECKS 0
#define CHECK_STACK_PTRS 0
#define NOISY 0
#define MARK_STATS 0
#define ALLOC_GC_PHASE 0
@ -159,6 +159,7 @@ void **GC_get_variable_stack() { return GC_variable_stack; }
void GC_set_variable_stack(void **p) { GC_variable_stack = p; }
/********************* Type tags *********************/
Type_Tag pair_tag = 42; /* set by client */
Type_Tag weak_box_tag = 42; /* set by client */
Type_Tag ephemeron_tag = 42; /* set by client */
Type_Tag weak_array_tag = 42; /* set by client */
@ -466,8 +467,9 @@ void GC_set_stack_base(void *base)
stack_base = (unsigned long)base;
}
void GC_init_type_tags(int count, int weakbox, int ephemeron, int weakarray)
void GC_init_type_tags(int count, int pair, int weakbox, int ephemeron, int weakarray)
{
pair_tag = pair;
weak_box_tag = weakbox;
ephemeron_tag = ephemeron;
weak_array_tag = weakarray;
@ -3845,6 +3847,23 @@ void *GC_malloc_one_small_tagged(size_t size_in_bytes)
return GC_malloc_one_tagged(size_in_bytes);
}
void *GC_malloc_pair(void *a, void *b)
{
void *p;
park[0] = a;
park[1] = b;
p = GC_malloc_one_tagged(3 << LOG_WORD_SIZE);
a = park[0];
b = park[1];
((Type_Tag *)p)[0] = pair_tag;
((void **)p)[1] = a;
((void **)p)[2] = b;
return p;
}
#ifndef gcINLINE
# define gcINLINE inline
#endif

View File

@ -86,6 +86,7 @@ void GC_set_finalizer(void *p, int tagged, int level, void (*f)(void *p, void *d
park[1] = data;
fnl = (Fnl *)GC_malloc_atomic(sizeof(Fnl));
memset(fnl, 0, sizeof(Fnl));
p = park[0];
park[0] = NULL;

View File

@ -64,7 +64,7 @@ GC2_EXTERN void GC_add_roots(void *start, void *end);
Called by MzScheme to install roots. The memory between
`start' (inclusive) and `end' (exclusive) contains pointers. */
GC2_EXTERN void GC_init_type_tags(int count, int weakbox, int ephemeron, int weakarray);
GC2_EXTERN void GC_init_type_tags(int count, int pair, int weakbox, int ephemeron, int weakarray);
/*
Called by MzScheme to indicate the number of different type tags it
uses, starting from 0. `count' is always less than 256. The weakbox
@ -131,6 +131,10 @@ GC2_EXTERN void *GC_malloc_one_small_tagged(size_t);
Like GC_malloc_one_tagged, but the size must be less than 1kb,
it must not be zero, and it must be a multiple of the word size. */
GC2_EXTERN void *GC_malloc_pair(void *car, void *cdr);
/*
Like GC_malloc_one_tagged, but even more streamline. */
GC2_EXTERN void *GC_malloc_one_xtagged(size_t);
/*
Alloc an item, initially zeroed. Rather than having a specific tag,

View File

@ -118,6 +118,8 @@ void (*GC_fixup_xtagged)(void *obj);
#include "my_qsort.c"
static void *park[2];
/*****************************************************************************/
/* OS-Level Memory Management Routines */
/*****************************************************************************/
@ -460,6 +462,11 @@ inline static void *allocate(size_t sizeb, int type)
} else {
void *retval = PTR(NUM(gen0_alloc_page) + gen0_alloc_page->size);
if (type == PAGE_ATOMIC)
*((void **)retval) = NULL; /* init objhead */
else
bzero(retval, sizeb);
info = (struct objhead *)retval;
info->type = type;
info->size = sizew;
@ -486,8 +493,6 @@ void *GC_malloc_one_small_tagged(size_t sizeb)
{
unsigned long newsize;
return GC_malloc_one_tagged(sizeb);
sizeb += WORD_SIZE;
sizeb = ALIGN_BYTES_SIZE(sizeb);
newsize = gen0_alloc_page->size + sizeb;
@ -498,6 +503,8 @@ void *GC_malloc_one_small_tagged(size_t sizeb)
void *retval = PTR(NUM(gen0_alloc_page) + gen0_alloc_page->size);
struct objhead *info = (struct objhead *)retval;
bzero(retval, sizeb);
/* info->type = type; */ /* We know that the type field is already 0 */
info->size = (sizeb >> gcLOG_WORD_SIZE);
gen0_alloc_page->size = newsize;
@ -507,6 +514,48 @@ void *GC_malloc_one_small_tagged(size_t sizeb)
}
}
void *GC_malloc_pair(void *car, void *cdr)
{
size_t sizeb;
unsigned long newsize;
void *retval;
sizeb = ALIGN_BYTES_SIZE(gcWORDS_TO_BYTES(gcBYTES_TO_WORDS(sizeof(Scheme_Simple_Object))) + WORD_SIZE);
newsize = gen0_alloc_page->size + sizeb;
if(newsize > GEN0_PAGE_SIZE) {
park[0] = car;
park[1] = cdr;
retval = GC_malloc_one_tagged(sizeb - WORD_SIZE);
car = park[0];
cdr = park[1];
park[0] = NULL;
park[1] = NULL;
} else {
struct objhead *info;
retval = PTR(NUM(gen0_alloc_page) + gen0_alloc_page->size);
info = (struct objhead *)retval;
((void **)retval)[0] = 0;
((void **)retval)[1] = 0;
bzero(retval, sizeb); /* <-- */
/* info->type = type; */ /* We know that the type field is already 0 */
info->size = (sizeb >> gcLOG_WORD_SIZE);
gen0_alloc_page->size = newsize;
gen0_current_size += sizeb;
retval = PTR(NUM(retval) + WORD_SIZE);
}
((short *)retval)[0] = scheme_pair_type;
((void **)retval)[1] = car;
((void **)retval)[2] = cdr;
return retval;
}
long GC_malloc_stays_put_threshold() { return gcWORDS_TO_BYTES(MAX_OBJECT_SIZEW); }
/* this function resizes generation 0 to the closest it can get (erring high)
@ -541,11 +590,11 @@ inline static void resize_gen0(unsigned long new_size)
break;
} else {
void **start = PPTR(work) + HEADER_SIZEW;
void **end = PPTR(NUM(work) + work->size);
/* We used to zero out the memory here, but its
better to zero out on allocation, instead:
better locality, and we don't have to zero
for atomic allocations. */
alloced_size += GEN0_PAGE_SIZE;
while(start < end) *start++ = NULL;
work->size = HEADER_SIZEB;
prev = work;
work = work->next;
@ -785,7 +834,6 @@ static void *get_backtrace(struct mpage *page, void *ptr)
/*****************************************************************************/
void **GC_variable_stack;
static unsigned long stack_base;
static void *park[2];
void **GC_get_variable_stack()
{
@ -1026,7 +1074,7 @@ void GC_finalization_weak_ptr(void **p, int offset)
park[0] = p; wfnl = GC_malloc_atomic(sizeof(struct weak_finalizer));
p = park[0]; park[0] = NULL;
wfnl->p = p; wfnl->offset = offset * sizeof(void*);
wfnl->p = p; wfnl->offset = offset * sizeof(void*); wfnl->saved = NULL;
wfnl->next = weak_finalizers; weak_finalizers = wfnl;
}
@ -1724,7 +1772,7 @@ void designate_modified(void *p)
#include "sighand.c"
void GC_init_type_tags(int count, int weakbox, int ephemeron, int weakarray)
void GC_init_type_tags(int count, int pair, int weakbox, int ephemeron, int weakarray)
{
static int initialized = 0;
@ -2593,6 +2641,20 @@ static void gc_overmem_abort()
GCERR((GCOUTF, "ERROR: out of memory during collection!\n"));
}
#if 0
extern double scheme_get_inexact_milliseconds(void);
# define TIME_DECLS() double start, task_start
# define TIME_INIT() start = task_start = scheme_get_inexact_milliseconds(); fprintf(stderr, "GC (%d):\n", gc_full)
# define TIME_STEP(task) fprintf(stderr, " %s: %lf\n", task, scheme_get_inexact_milliseconds() - task_start); \
task_start = scheme_get_inexact_milliseconds()
# define TIME_DONE() fprintf(stderr, " Total: %lf\n", scheme_get_inexact_milliseconds() - start)
#else
# define TIME_DECLS() /**/
# define TIME_INIT() /**/
# define TIME_STEP(task) /**/
# define TIME_DONE() /**/
#endif
static void garbage_collect(int force_full)
{
static unsigned long number = 0;
@ -2600,6 +2662,7 @@ static void garbage_collect(int force_full)
static unsigned int running_finalizers = 0;
static unsigned long last_full_mem_use = (20 * 1024 * 1024);
unsigned long old_mem_use = memory_in_use;
TIME_DECLS();
/* determine if this should be a full collection or not */
gc_full = force_full || !generations_available
@ -2616,22 +2679,33 @@ static void garbage_collect(int force_full)
in_unsafe_allocation_mode = 1;
unsafe_allocation_abort = gc_overmem_abort;
TIME_INIT();
/* inform the system (if it wants us to) that we're starting collection */
if(GC_collect_start_callback)
GC_collect_start_callback();
TIME_STEP("started");
prepare_pages_for_collection();
/* at this point, the page map should only include pages that contain
collectable objects */
TIME_STEP("prepared");
/* mark and repair the roots for collection */
mark_backpointers();
TIME_STEP("backpointered");
mark_finalizer_structs();
mark_weak_finalizer_structs();
TIME_STEP("pre-rooted");
mark_roots();
mark_immobiles();
TIME_STEP("rooted");
GC_mark_variable_stack(GC_variable_stack, 0, gc_stack_base);
TIME_STEP("stacked");
/* now propagate/repair the marks we got from these roots, and do the
finalizer passes */
propagate_marks(); mark_ready_ephemerons(); propagate_marks();
@ -2652,34 +2726,48 @@ static void garbage_collect(int force_full)
removed */
clear_stack_pages();
#endif
TIME_STEP("marked");
zero_weak_boxes();
zero_weak_arrays();
zero_remaining_ephemerons();
TIME_STEP("zeroed");
if(gc_full) do_heap_compact();
TIME_STEP("compacted");
/* do some cleanup structures that either change state based on the
heap state after collection or that become useless based on changes
in state after collection */
clean_up_thread_list();
clean_up_owner_table();
clean_up_account_hooks();
TIME_STEP("cleaned");
repair_finalizer_structs();
repair_weak_finalizer_structs();
repair_roots();
repair_immobiles();
GC_fixup_variable_stack(GC_variable_stack, 0, gc_stack_base);
TIME_STEP("reparied roots");
repair_heap();
TIME_STEP("repaired");
clean_up_heap();
TIME_STEP("cleaned heap");
reset_nursery();
TIME_STEP("reset nursurey");
do_btc_accounting();
TIME_STEP("accounted");
if (generations_available)
protect_old_pages();
if (gc_full)
flush_freed_pages();
reset_finalizer_tree();
TIME_STEP("reset");
/* new we do want the allocator freaking if we go over half */
in_unsafe_allocation_mode = 0;
@ -2701,6 +2789,10 @@ static void garbage_collect(int force_full)
if(GC_collect_start_callback)
GC_collect_end_callback();
TIME_STEP("ended");
TIME_DONE();
/* run any queued finalizers, EXCEPT in the case where this collection was
triggered by the execution of a finalizer. The outside world needs this
invariant in some corner case I don't have a reference for. In any case,

View File

@ -267,7 +267,7 @@ static int main_after_dlls(int argc, MAIN_char **MAIN_argv)
#if defined(MZ_PRECISE_GC)
stack_start = (void *)&__gc_var_stack__;
GC_init_type_tags(_scheme_last_type_, scheme_weak_box_type, scheme_ephemeron_type, scheme_rt_weak_array);
GC_init_type_tags(_scheme_last_type_, scheme_pair_type, scheme_weak_box_type, scheme_ephemeron_type, scheme_rt_weak_array);
#endif
scheme_set_stack_base(stack_start, 1);

View File

@ -1129,6 +1129,21 @@ static int is_simple(Scheme_Object *obj, int depth, int just_markless, mz_jit_st
return (type > _scheme_values_types_);
}
static int is_constant_and_avoids_r1(Scheme_Object *obj)
{
Scheme_Type t = SCHEME_TYPE(obj);
if (SAME_TYPE(t, scheme_toplevel_type)) {
return ((SCHEME_TOPLEVEL_FLAGS(obj) & SCHEME_TOPLEVEL_CONST)
? 1
: 0);
} else if (SAME_TYPE(t, scheme_local_type)) {
return 1;
} else
return (t >= _scheme_compiled_values_types_);
}
/*========================================================================*/
/* application codegen */
/*========================================================================*/
@ -2541,6 +2556,71 @@ static int generate_inlined_unary(mz_jit_state *jitter, Scheme_App2_Rec *app, in
return 0;
}
static int generate_two_args(Scheme_Object *rand1, Scheme_Object *rand2, mz_jit_state *jitter, int order_matters)
{
int simple1, simple2;
simple1 = is_constant_and_avoids_r1(rand1);
simple2 = is_constant_and_avoids_r1(rand2);
if (!simple1) {
if (simple2) {
mz_runstack_skipped(jitter, 2);
generate_non_tail(rand1, jitter, 0, 1);
CHECK_LIMIT();
jit_movr_p(JIT_R1, JIT_R0);
generate(rand2, jitter, 0, 0);
CHECK_LIMIT();
if (order_matters) {
/* Swap arguments: */
jit_movr_p(JIT_R2, JIT_R0);
jit_movr_p(JIT_R0, JIT_R1);
jit_movr_p(JIT_R1, JIT_R2);
}
mz_runstack_unskipped(jitter, 2);
} else {
jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(1));
mz_runstack_pushed(jitter, 1);
mz_runstack_skipped(jitter, 1);
generate_non_tail(rand1, jitter, 0, 1);
CHECK_LIMIT();
jit_str_p(JIT_RUNSTACK, JIT_R0);
generate_non_tail(rand2, jitter, 0, 1);
CHECK_LIMIT();
jit_movr_p(JIT_R1, JIT_R0);
jit_ldr_p(JIT_R0, JIT_RUNSTACK);
mz_runstack_unskipped(jitter, 1);
jit_addi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(1));
mz_runstack_popped(jitter, 1);
}
} else {
mz_runstack_skipped(jitter, 2);
if (simple2) {
generate(rand2, jitter, 0, 0);
} else {
generate_non_tail(rand2, jitter, 0, 1);
}
CHECK_LIMIT();
jit_movr_p(JIT_R1, JIT_R0);
generate(rand1, jitter, 0, 0);
CHECK_LIMIT();
mz_runstack_unskipped(jitter, 2);
}
return 1;
}
static int generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, int is_tail, int multi_ok,
jit_insn **for_branch, int branch_short)
{
@ -2608,22 +2688,7 @@ static int generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i
__END_SHORT_JUMPS__(branch_short);
} else {
/* Two complex expressions: */
mz_runstack_skipped(jitter, 1);
jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(1));
mz_runstack_pushed(jitter, 1);
generate_non_tail(a2, jitter, 0, 1);
CHECK_LIMIT();
jit_str_p(JIT_RUNSTACK, JIT_R0);
generate_non_tail(a1, jitter, 0, 1);
CHECK_LIMIT();
jit_ldr_p(JIT_R1, JIT_RUNSTACK);
jit_addi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(1));
mz_runstack_popped(jitter, 1);
mz_runstack_unskipped(jitter, 1);
generate_two_args(a1, a2, jitter, 0);
__START_SHORT_JUMPS__(branch_short);
@ -2689,7 +2754,7 @@ static int generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i
else
which = 2;
LOG_IT(("inlined vector-ref\n"));
LOG_IT(("inlined vector-/string-/bytes-ref\n"));
simple = (SCHEME_INTP(app->rand2)
&& (SCHEME_INT_VAL(app->rand2) >= 0));
@ -2749,6 +2814,24 @@ static int generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i
else
mz_runstack_unskipped(jitter, 1);
return 1;
} else if (IS_NAMED_PRIM(rator, "cons")) {
LOG_IT(("inlined cons\n"));
generate_two_args(app->rand1, app->rand2, jitter, 1);
CHECK_LIMIT();
JIT_UPDATE_THREAD_RSPTR_IF_NEEDED();
mz_prepare(2);
jit_pusharg_p(JIT_R1);
jit_pusharg_p(JIT_R0);
#ifdef MZ_PRECISE_GC
(void)mz_finish(GC_malloc_pair);
#else
(void)mz_finish(scheme_make_pair);
#endif
jit_retval(JIT_R0);
return 1;
}
}
@ -3186,6 +3269,7 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m
case scheme_toplevel_type:
{
int pos;
/* Other parts of the JIT rely on this code not modifying R1 */
START_JIT_DATA();
LOG_IT(("top-level\n"));
/* Load global array: */
@ -3207,7 +3291,7 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m
}
case scheme_local_type:
{
/* Other parts of thie JIT rely on this code modifying R0, only */
/* Other parts of the JIT rely on this code modifying R0, only */
int pos;
START_JIT_DATA();
LOG_IT(("local\n"));
@ -3899,7 +3983,7 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m
Scheme_Type type = SCHEME_TYPE(obj);
START_JIT_DATA();
/* Other parts of thie JIT rely on this code modifying R0, only */
/* Other parts of the JIT rely on this code modifying R0, only */
LOG_IT(("const\n"));

View File

@ -130,11 +130,9 @@ scheme_init_list (Scheme_Env *env)
SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
scheme_add_global_constant ("pair?", p, env);
scheme_add_global_constant ("cons",
scheme_make_prim_w_arity(cons_prim,
"cons",
2, 2),
env);
p = scheme_make_prim_w_arity(cons_prim, "cons", 2, 2);
SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED;
scheme_add_global_constant ("cons", p, env);
p = scheme_make_noncm_prim(scheme_checked_car, "car", 1, 1);
SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
@ -530,7 +528,9 @@ scheme_init_list (Scheme_Env *env)
Scheme_Object *scheme_make_pair(Scheme_Object *car, Scheme_Object *cdr)
{
#ifndef MZ_PRECISE_GC
Scheme_Object *cons;
#endif
#if 0
if (!car || !cdr
@ -541,11 +541,15 @@ Scheme_Object *scheme_make_pair(Scheme_Object *car, Scheme_Object *cdr)
*(long *)0x0 = 1;
#endif
#ifdef MZ_PRECISE_GC
return GC_malloc_pair(car, cdr);
#else
cons = scheme_alloc_object();
cons->type = scheme_pair_type;
SCHEME_CAR(cons) = car;
SCHEME_CDR(cons) = cdr;
return cons;
#endif
}
Scheme_Object *scheme_make_raw_pair(Scheme_Object *car, Scheme_Object *cdr)
@ -823,53 +827,43 @@ list_p_prim (int argc, Scheme_Object *argv[])
return scheme_false;
}
static Scheme_Object *
list_exec (int argc, Scheme_Object *argv[], int star, int immut)
{
int i;
Scheme_Object *l;
#define NORMAL_LIST_INIT() l = scheme_null
#define STAR_LIST_INIT() --argc; l = argv[argc]
#ifndef MZ_PRECISE_GC
# define GC_malloc_pair scheme_make_pair
#endif
if (star) {
--argc;
l = argv[argc];
} else
l = scheme_null;
if (immut) {
for (i = argc ; i--; ) {
l = scheme_make_immutable_pair(argv[i], l);
}
} else {
for (i = argc ; i--; ) {
l = scheme_make_pair(argv[i], l);
}
}
return l;
}
#define LIST_BODY(INIT, scheme_make_pair) \
int i; \
Scheme_Object *l; \
INIT; \
for (i = argc ; i--; ) { \
l = scheme_make_pair(argv[i], l); \
} \
return l
static Scheme_Object *
list_prim (int argc, Scheme_Object *argv[])
{
return list_exec(argc, argv, 0, 0);
LIST_BODY(NORMAL_LIST_INIT(), GC_malloc_pair);
}
static Scheme_Object *
list_immutable_prim (int argc, Scheme_Object *argv[])
{
return list_exec(argc, argv, 0, 1);
LIST_BODY(NORMAL_LIST_INIT(), scheme_make_immutable_pair);
}
static Scheme_Object *
list_star_prim (int argc, Scheme_Object *argv[])
{
return list_exec(argc, argv, 1, 0);
LIST_BODY(STAR_LIST_INIT(), GC_malloc_pair);
}
static Scheme_Object *
list_star_immutable_prim (int argc, Scheme_Object *argv[])
{
return list_exec(argc, argv, 1, 1);
LIST_BODY(STAR_LIST_INIT(), scheme_make_immutable_pair);
}
static Scheme_Object *

View File

@ -196,7 +196,7 @@ int wxWinMain(int wm_is_mred,
#if defined(MZ_PRECISE_GC)
mzscheme_stack_start = (void *)&__gc_var_stack__;
GC_init_type_tags(_scheme_last_type_, scheme_weak_box_type, scheme_ephemeron_type, scheme_rt_weak_array);
GC_init_type_tags(_scheme_last_type_, scheme_pair_type, scheme_weak_box_type, scheme_ephemeron_type, scheme_rt_weak_array);
#endif
scheme_set_stack_base(mzscheme_stack_start, 1);