From 93fdbdc79c6e8d5991da997355d9c0a1e5597766 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Fri, 20 Jun 2014 14:55:02 +0100 Subject: [PATCH] optimizer: refine virtual clock, more precise shift-fuel tracking Allow an effect-free `if` to not increment the effect-tracking virtual clock (but increment the clock during branches, to avoid moving computation into a branch). Spend empty-`let`-elimination fuel more precisely, so that more empty `let`s can be removed while still avoiding quadratic compile times. --- .../racket-test/tests/racket/optimize.rktl | 23 +++ racket/src/racket/src/optimize.c | 154 ++++++++++++++++-- 2 files changed, 167 insertions(+), 10 deletions(-) diff --git a/pkgs/racket-pkgs/racket-test/tests/racket/optimize.rktl b/pkgs/racket-pkgs/racket-test/tests/racket/optimize.rktl index e3a41fcfac..3e0ee17acc 100644 --- a/pkgs/racket-pkgs/racket-test/tests/racket/optimize.rktl +++ b/pkgs/racket-pkgs/racket-test/tests/racket/optimize.rktl @@ -1552,6 +1552,29 @@ (let ([r (if z 1 (+ z z))]) (list (if z 1 4) r)))) +(test-comp '(lambda (a b c f) + (let ((d (if a + a + b))) + (let ((e + (if b + c + (if (= f 90000) + #f + (add1 c))))) + (values d e)))) + '(lambda (a b c f) + (values (if a a b) + (if b c (if (= f 90000) + #f + (add1 c)))))) + +(test-comp '(lambda (x y) + (let ([z (+ x y)]) + (list (if x x y) z))) + '(lambda (x y) + (list (if x x y) (+ x y)))) + (test-comp '(let-values ([(x y) (values 1 2)]) (+ x y)) 3) diff --git a/racket/src/racket/src/optimize.c b/racket/src/racket/src/optimize.c index 37b0dc8a2f..9d1efbde33 100644 --- a/racket/src/racket/src/optimize.c +++ b/racket/src/racket/src/optimize.c @@ -57,7 +57,11 @@ struct Optimize_Info /* Propagated up and down the chain: */ int size; - int vclock; /* virtual clock that ticks for a side effect */ + int vclock; /* virtual clock that ticks for a side effect or branch; + the clock is only compared between binding sites and + uses, so we can rewind the clock at a join after an + increment that models a branch (if the branch is not + taken or doesn't increment the clock) */ int kclock; /* virtual clock that ticks for a potential continuation capture */ int psize; short inline_fuel, shift_fuel; @@ -81,6 +85,10 @@ struct Optimize_Info Scheme_Hash_Tree *types; /* maps position (from this frame) to predicate */ }; +typedef struct Optimize_Info_Sequence { + int init_shift_fuel, min_shift_fuel; +} Optimize_Info_Sequence; + #define OPT_IS_MUTATED 0x1 #define OPT_ESCAPES_AFTER_K_TICK 0x2 #define OPT_LOCAL_TYPE_ARG_SHIFT 2 @@ -123,6 +131,10 @@ static Optimize_Info *optimize_info_add_frame(Optimize_Info *info, int orig, int static int optimize_info_get_shift(Optimize_Info *info, int pos); static void optimize_info_done(Optimize_Info *info, Optimize_Info *parent); +static void optimize_info_seq_init(Optimize_Info *info, Optimize_Info_Sequence *info_seq); +static void optimize_info_seq_step(Optimize_Info *info, Optimize_Info_Sequence *info_seq); +static void optimize_info_seq_done(Optimize_Info *info, Optimize_Info_Sequence *info_seq); + static Scheme_Object *estimate_closure_size(Scheme_Object *e); static Scheme_Object *no_potential_size(Scheme_Object *value); @@ -1106,12 +1118,14 @@ Scheme_Object *scheme_make_struct_proc_shape(intptr_t k) } static int single_valued_noncm_expression(Scheme_Object *expr, int fuel) -/* Non-omittable/non-copyable but single-valued expresions that are not sensitive +/* Not necessarily omittable or copyable, but single-valued expresions that are not sensitive to being in tail position. */ { Scheme_Object *rator = NULL; switch (SCHEME_TYPE(expr)) { + case scheme_local_type: + return 1; case scheme_compiled_toplevel_type: return 1; case scheme_application_type: @@ -1769,6 +1783,7 @@ Scheme_Object *optimize_for_inline(Optimize_Info *info, Scheme_Object *le, int a if ((sz >= 0) && (single_use || (sz <= threshold))) { Optimize_Info *sub_info; if (nested_count) { + /* Pessimistcally assume that we moved inside past an effect */ sub_info = optimize_info_add_frame(info, nested_count, nested_count, 0); sub_info->vclock++; sub_info->kclock++; @@ -2494,6 +2509,7 @@ static Scheme_Object *optimize_application(Scheme_Object *o, Optimize_Info *info Scheme_Object *le; Scheme_App_Rec *app; int i, n, rator_flags = 0, sub_context = 0; + Optimize_Info_Sequence info_seq; app = (Scheme_App_Rec *)o; @@ -2506,6 +2522,8 @@ static Scheme_Object *optimize_application(Scheme_Object *o, Optimize_Info *info n = app->num_args + 1; + optimize_info_seq_init(info, &info_seq); + for (i = 0; i < n; i++) { if (!i) { le = optimize_for_inline(info, app->args[i], n - 1, app, NULL, NULL, &rator_flags, context, 0); @@ -2521,6 +2539,7 @@ static Scheme_Object *optimize_application(Scheme_Object *o, Optimize_Info *info sub_context = (ty << OPT_CONTEXT_TYPE_SHIFT); } + optimize_info_seq_step(info, &info_seq); le = scheme_optimize_expr(app->args[i], info, sub_context); app->args[i] = le; @@ -2532,6 +2551,8 @@ static Scheme_Object *optimize_application(Scheme_Object *o, Optimize_Info *info } } + optimize_info_seq_done(info, &info_seq); + /* Check for (apply ... (list ...)) after some optimizations: */ le = direct_apply((Scheme_Object *)app, app->args[0], app->args[app->num_args], info); if (le) return finish_optimize_app(le, info, context, rator_flags); @@ -2784,6 +2805,7 @@ static Scheme_Object *optimize_application2(Scheme_Object *o, Optimize_Info *inf Scheme_App2_Rec *app; Scheme_Object *le; int rator_flags = 0, sub_context = 0, ty; + Optimize_Info_Sequence info_seq; app = (Scheme_App2_Rec *)o; @@ -2794,6 +2816,8 @@ static Scheme_Object *optimize_application2(Scheme_Object *o, Optimize_Info *inf if (le) return le; + optimize_info_seq_init(info, &info_seq); + le = scheme_optimize_expr(app->rator, info, 0); app->rator = le; @@ -2812,9 +2836,13 @@ static Scheme_Object *optimize_application2(Scheme_Object *o, Optimize_Info *inf sub_context |= (ty << OPT_CONTEXT_TYPE_SHIFT); } + optimize_info_seq_step(info, &info_seq); + le = scheme_optimize_expr(app->rand, info, sub_context); app->rand = le; + optimize_info_seq_done(info, &info_seq); + return finish_optimize_application2(app, info, context, rator_flags); } @@ -3037,6 +3065,7 @@ static Scheme_Object *optimize_application3(Scheme_Object *o, Optimize_Info *inf Scheme_App3_Rec *app; Scheme_Object *le; int rator_flags = 0, sub_context = 0, ty, flags; + Optimize_Info_Sequence info_seq; app = (Scheme_App3_Rec *)o; @@ -3061,6 +3090,8 @@ static Scheme_Object *optimize_application3(Scheme_Object *o, Optimize_Info *inf if (le) return le; + optimize_info_seq_init(info, &info_seq); + le = scheme_optimize_expr(app->rator, info, sub_context); app->rator = le; @@ -3077,6 +3108,8 @@ static Scheme_Object *optimize_application3(Scheme_Object *o, Optimize_Info *inf if (ty) sub_context |= (ty << OPT_CONTEXT_TYPE_SHIFT); + optimize_info_seq_step(info, &info_seq); + le = scheme_optimize_expr(app->rand1, info, sub_context); app->rand1 = le; @@ -3088,9 +3121,13 @@ static Scheme_Object *optimize_application3(Scheme_Object *o, Optimize_Info *inf else sub_context &= ~OPT_CONTEXT_TYPE_MASK; + optimize_info_seq_step(info, &info_seq); + le = scheme_optimize_expr(app->rand2, info, sub_context); app->rand2 = le; + optimize_info_seq_done(info, &info_seq); + /* Check for (apply ... (list ...)) after some optimizations: */ le = direct_apply((Scheme_Object *)app, app->rator, app->rand2, info); if (le) return finish_optimize_app(le, info, context, rator_flags); @@ -3405,11 +3442,15 @@ static Scheme_Object *optimize_sequence(Scheme_Object *o, Optimize_Info *info, i Scheme_Object *le; int i, count, prev_size; int drop = 0, preserves_marks = 0, single_result = 0; + Optimize_Info_Sequence info_seq; + + optimize_info_seq_init(info, &info_seq); count = s->count; for (i = 0; i < count; i++) { prev_size = info->size; + optimize_info_seq_step(info, &info_seq); le = scheme_optimize_expr(s->array[i], info, ((i + 1 == count) ? scheme_optimize_tail_context(context) @@ -3432,6 +3473,8 @@ static Scheme_Object *optimize_sequence(Scheme_Object *o, Optimize_Info *info, i } } + optimize_info_seq_done(info, &info_seq); + info->preserves_marks = preserves_marks; info->single_result = single_result; @@ -3568,7 +3611,8 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int Scheme_Branch_Rec *b; Scheme_Object *t, *tb, *fb; Scheme_Hash_Tree *old_types; - int preserves_marks = 1, single_result = 1, init_kclock, then_kclock; + int preserves_marks = 1, single_result = 1, same_then_vclock, init_vclock, init_kclock, then_kclock; + Optimize_Info_Sequence info_seq; b = (Scheme_Branch_Rec *)o; @@ -3589,6 +3633,8 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int } } + optimize_info_seq_init(info, &info_seq); + t = scheme_optimize_expr(t, info, OPT_CONTEXT_BOOLEAN); /* Try optimize: (if (not x) y z) => (if x z y) */ @@ -3651,15 +3697,14 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int else ((Scheme_Compiled_Let_Value *)inside)->body = scheme_void; return make_sequence_2(t, - scheme_optimize_expr((Scheme_Object *)b, info, context)); + scheme_optimize_expr((Scheme_Object *)b, info, + scheme_optimize_tail_context(context))); } } - - info->vclock += 1; /* model branch as clock increment */ - init_kclock = info->kclock; if (SCHEME_TYPE(t) > _scheme_compiled_values_types_) { /* Branch is statically known */ + optimize_info_seq_done(info, &info_seq); info->size -= 1; if (SCHEME_FALSEP(t)) return scheme_optimize_expr(fb, info, scheme_optimize_tail_context(context)); @@ -3667,6 +3712,12 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int return scheme_optimize_expr(tb, info, scheme_optimize_tail_context(context)); } + optimize_info_seq_step(info, &info_seq); + + info->vclock += 1; /* model branch as clock increment */ + init_vclock = info->vclock; + init_kclock = info->kclock; + old_types = info->types; add_types(t, info, 5); @@ -3681,10 +3732,15 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int else if (info->single_result < 0) single_result = -1; + same_then_vclock = (init_vclock == info->vclock); + info->types = old_types; then_kclock = info->kclock; + info->vclock = init_vclock; info->kclock = init_kclock; + optimize_info_seq_step(info, &info_seq); + fb = scheme_optimize_expr(fb, info, scheme_optimize_tail_context(context)); if (!info->preserves_marks) @@ -3701,10 +3757,17 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int info->types = old_types; /* could try to take an intersection here ... */ - info->vclock += 1; /* model join as clock increment */ + if (same_then_vclock && (init_vclock == info->vclock)) { + /* we can rewind the vclock to just after the test, because the + `if` as a whole has no effect */ + info->vclock--; + } + info->preserves_marks = preserves_marks; info->single_result = single_result; + optimize_info_seq_done(info, &info_seq); + /* Try optimize: (if x x #f) => x */ if (SAME_TYPE(SCHEME_TYPE(t), scheme_local_type) && SAME_TYPE(SCHEME_TYPE(tb), scheme_local_type) @@ -3763,18 +3826,34 @@ static Scheme_Object *optimize_wcm(Scheme_Object *o, Optimize_Info *info, int co { Scheme_With_Continuation_Mark *wcm = (Scheme_With_Continuation_Mark *)o; Scheme_Object *k, *v, *b; + int init_vclock; + Optimize_Info_Sequence info_seq; + + optimize_info_seq_init(info, &info_seq); k = scheme_optimize_expr(wcm->key, info, 0); + optimize_info_seq_step(info, &info_seq); + v = scheme_optimize_expr(wcm->val, info, 0); /* The presence of a key can be detected by other expressions, to increment vclock to prevent expressions incorrectly moving under the mark: */ info->vclock++; + init_vclock = info->vclock; + + optimize_info_seq_step(info, &info_seq); b = scheme_optimize_expr(wcm->body, info, scheme_optimize_tail_context(context)); + if (init_vclock == info->vclock) { + /* body has no effect itself, so we can rewind the clock */ + info->vclock--; + } + + optimize_info_seq_done(info, &info_seq); + if (omittable_key(k, info) && scheme_omittable_expr(b, -1, 20, 0, info, info, 0, 0, ID_OMIT)) return make_discarding_first_sequence(v, b, info); @@ -3948,13 +4027,21 @@ static Scheme_Object * apply_values_optimize(Scheme_Object *data, Optimize_Info *info, int context) { Scheme_Object *f, *e; + Optimize_Info_Sequence info_seq; f = SCHEME_PTR1_VAL(data); e = SCHEME_PTR2_VAL(data); + optimize_info_seq_init(info, &info_seq); + f = scheme_optimize_expr(f, info, 0); + + optimize_info_seq_step(info, &info_seq); + e = scheme_optimize_expr(e, info, 0); + optimize_info_seq_done(info, &info_seq); + info->size += 1; info->vclock += 1; info->kclock += 1; @@ -4097,12 +4184,16 @@ begin0_optimize(Scheme_Object *obj, Optimize_Info *info, int context) int i, count, drop = 0, prev_size, single_result = 0; Scheme_Sequence *s = (Scheme_Sequence *)obj; Scheme_Object *le; + Optimize_Info_Sequence info_seq; count = s->count; + optimize_info_seq_init(info, &info_seq); for (i = 0; i < count; i++) { prev_size = info->size; + optimize_info_seq_step(info, &info_seq); + le = scheme_optimize_expr(s->array[i], info, (!i @@ -4148,6 +4239,8 @@ begin0_optimize(Scheme_Object *obj, Optimize_Info *info, int context) obj = (Scheme_Object *)s2; } + optimize_info_seq_done(info, &info_seq); + info->preserves_marks = 1; info->single_result = single_result; @@ -4708,6 +4801,7 @@ Scheme_Object * scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, int context) { Optimize_Info *sub_info, *body_info, *rhs_info; + Optimize_Info_Sequence info_seq; Scheme_Let_Header *head = (Scheme_Let_Header *)form; Scheme_Compiled_Let_Value *clv, *pre_body, *retry_start, *prev_body; Scheme_Object *body, *value, *ready_pairs = NULL, *rp_last = NULL, *ready_pairs_start; @@ -4960,6 +5054,8 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i } } + optimize_info_seq_init(rhs_info, &info_seq); + prev_body = NULL; body = head->body; pre_body = NULL; @@ -4998,6 +5094,7 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i } if (!skip_opts) { + optimize_info_seq_step(rhs_info, &info_seq); value = scheme_optimize_expr(pre_body->value, rhs_info, 0); pre_body->value = value; } else { @@ -5305,6 +5402,7 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i use_psize = rhs_info->use_psize; rhs_info->use_psize = info->use_psize; + optimize_info_seq_step(rhs_info, &info_seq); value = scheme_optimize_expr(self_value, rhs_info, 0); if (!OPT_DISCOURAGE_EARLY_INLINE) @@ -5401,6 +5499,8 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i body = pre_body->body; } + optimize_info_seq_done(rhs_info, &info_seq); + if (post_bind) optimize_info_done(rhs_info, body_info); else if (split_shift) @@ -5579,7 +5679,7 @@ optimize_closure_compilation(Scheme_Object *_data, Optimize_Info *info, int cont Scheme_Object *code, *ctx; Closure_Info *cl; mzshort dcs, *dcm; - int i, cnt; + int i, cnt, init_vclock, init_kclock; Scheme_Once_Used *first_once_used = NULL, *last_once_used = NULL; data = (Scheme_Closure_Data *)_data; @@ -5590,6 +5690,9 @@ optimize_closure_compilation(Scheme_Object *_data, Optimize_Info *info, int cont info = optimize_info_add_frame(info, data->num_params, data->num_params, SCHEME_LAMBDA_FRAME); + init_vclock = info->vclock; + init_kclock = info->kclock; + info->vclock += 1; /* model delayed evaluation as vclock increment */ info->kclock += 1; @@ -5661,6 +5764,10 @@ optimize_closure_compilation(Scheme_Object *_data, Optimize_Info *info, int cont cl->body_psize = info->psize; cl->has_nonleaf = info->has_nonleaf; + /* closure itself is not an effect */ + info->vclock = init_vclock; + info->kclock = init_kclock; + info->size++; data->closure_size = (cl->base_closure_size @@ -6051,6 +6158,7 @@ module_optimize(Scheme_Object *data, Optimize_Info *info, int context) Scheme_Hash_Table *originals = NULL; int cont, next_pos_ready = -1, inline_fuel, is_proc_def; Comp_Prefix *prev_cp; + Optimize_Info_Sequence info_seq; if (!m->comp_prefix) { /* already resolved */ @@ -6068,6 +6176,8 @@ module_optimize(Scheme_Object *data, Optimize_Info *info, int context) old_context = info->context; info->context = (Scheme_Object *)m; + optimize_info_seq_init(info, &info_seq); + prev_cp = info->cp; info->cp = m->comp_prefix; @@ -6176,6 +6286,7 @@ module_optimize(Scheme_Object *data, Optimize_Info *info, int context) info->inline_fuel = 2; } else inline_fuel = 0; + optimize_info_seq_step(info, &info_seq); e = scheme_optimize_expr(e, info, 0); if (is_proc_def && OPT_DISCOURAGE_EARLY_INLINE) { info->use_psize = 0; @@ -6352,6 +6463,7 @@ module_optimize(Scheme_Object *data, Optimize_Info *info, int context) } else old_sz = 0; + optimize_info_seq_step(info, &info_seq); e = scheme_optimize_expr(e, info, 0); SCHEME_VEC_ELS(m->bodies[0])[start_simltaneous] = e; @@ -6508,6 +6620,7 @@ module_optimize(Scheme_Object *data, Optimize_Info *info, int context) p = (k ? m->post_submodules : m->pre_submodules); if (p) { while (!SCHEME_NULLP(p)) { + optimize_info_seq_step(info, &info_seq); scheme_optimize_expr(SCHEME_CAR(p), info, 0); p = SCHEME_CDR(p); } @@ -6515,6 +6628,8 @@ module_optimize(Scheme_Object *data, Optimize_Info *info, int context) } } + optimize_info_seq_done(info, &info_seq); + return data; } @@ -7133,7 +7248,7 @@ Optimize_Info *scheme_optimize_info_create(Comp_Prefix *cp, int get_logger) info->type = scheme_rt_optimize_info; #endif info->inline_fuel = 32; - info->shift_fuel = 8; + info->shift_fuel = 16; info->cp = cp; if (get_logger) { @@ -7146,6 +7261,25 @@ Optimize_Info *scheme_optimize_info_create(Comp_Prefix *cp, int get_logger) return info; } +static void optimize_info_seq_init(Optimize_Info *info, Optimize_Info_Sequence *info_seq) +{ + info_seq->init_shift_fuel = info->shift_fuel; + info_seq->min_shift_fuel = info->shift_fuel; +} + +static void optimize_info_seq_step(Optimize_Info *info, Optimize_Info_Sequence *info_seq) +{ + if (info->shift_fuel < info_seq->min_shift_fuel) + info_seq->min_shift_fuel = info->shift_fuel; + info->shift_fuel = info_seq->init_shift_fuel; +} + +static void optimize_info_seq_done(Optimize_Info *info, Optimize_Info_Sequence *info_seq) +{ + if (info->shift_fuel > info_seq->min_shift_fuel) + info->shift_fuel = info_seq->min_shift_fuel; +} + void scheme_optimize_info_enforce_const(Optimize_Info *oi, int enforce_const) { oi->enforce_const = enforce_const;