optimizer: refine virtual clock, more precise shift-fuel tracking

Allow an effect-free `if` to not increment the effect-tracking
virtual clock (but increment the clock during branches, to avoid
moving computation into a branch).

Spend empty-`let`-elimination fuel more precisely, so that more
empty `let`s can be removed while still avoiding quadratic
compile times.
This commit is contained in:
Matthew Flatt 2014-06-20 14:55:02 +01:00
parent 25cf0ea610
commit 93fdbdc79c
2 changed files with 167 additions and 10 deletions

View File

@ -1552,6 +1552,29 @@
(let ([r (if z 1 (+ z z))])
(list (if z 1 4) r))))
(test-comp '(lambda (a b c f)
(let ((d (if a
a
b)))
(let ((e
(if b
c
(if (= f 90000)
#f
(add1 c)))))
(values d e))))
'(lambda (a b c f)
(values (if a a b)
(if b c (if (= f 90000)
#f
(add1 c))))))
(test-comp '(lambda (x y)
(let ([z (+ x y)])
(list (if x x y) z)))
'(lambda (x y)
(list (if x x y) (+ x y))))
(test-comp '(let-values ([(x y) (values 1 2)])
(+ x y))
3)

View File

@ -57,7 +57,11 @@ struct Optimize_Info
/* Propagated up and down the chain: */
int size;
int vclock; /* virtual clock that ticks for a side effect */
int vclock; /* virtual clock that ticks for a side effect or branch;
the clock is only compared between binding sites and
uses, so we can rewind the clock at a join after an
increment that models a branch (if the branch is not
taken or doesn't increment the clock) */
int kclock; /* virtual clock that ticks for a potential continuation capture */
int psize;
short inline_fuel, shift_fuel;
@ -81,6 +85,10 @@ struct Optimize_Info
Scheme_Hash_Tree *types; /* maps position (from this frame) to predicate */
};
typedef struct Optimize_Info_Sequence {
int init_shift_fuel, min_shift_fuel;
} Optimize_Info_Sequence;
#define OPT_IS_MUTATED 0x1
#define OPT_ESCAPES_AFTER_K_TICK 0x2
#define OPT_LOCAL_TYPE_ARG_SHIFT 2
@ -123,6 +131,10 @@ static Optimize_Info *optimize_info_add_frame(Optimize_Info *info, int orig, int
static int optimize_info_get_shift(Optimize_Info *info, int pos);
static void optimize_info_done(Optimize_Info *info, Optimize_Info *parent);
static void optimize_info_seq_init(Optimize_Info *info, Optimize_Info_Sequence *info_seq);
static void optimize_info_seq_step(Optimize_Info *info, Optimize_Info_Sequence *info_seq);
static void optimize_info_seq_done(Optimize_Info *info, Optimize_Info_Sequence *info_seq);
static Scheme_Object *estimate_closure_size(Scheme_Object *e);
static Scheme_Object *no_potential_size(Scheme_Object *value);
@ -1106,12 +1118,14 @@ Scheme_Object *scheme_make_struct_proc_shape(intptr_t k)
}
static int single_valued_noncm_expression(Scheme_Object *expr, int fuel)
/* Non-omittable/non-copyable but single-valued expresions that are not sensitive
/* Not necessarily omittable or copyable, but single-valued expresions that are not sensitive
to being in tail position. */
{
Scheme_Object *rator = NULL;
switch (SCHEME_TYPE(expr)) {
case scheme_local_type:
return 1;
case scheme_compiled_toplevel_type:
return 1;
case scheme_application_type:
@ -1769,6 +1783,7 @@ Scheme_Object *optimize_for_inline(Optimize_Info *info, Scheme_Object *le, int a
if ((sz >= 0) && (single_use || (sz <= threshold))) {
Optimize_Info *sub_info;
if (nested_count) {
/* Pessimistcally assume that we moved inside past an effect */
sub_info = optimize_info_add_frame(info, nested_count, nested_count, 0);
sub_info->vclock++;
sub_info->kclock++;
@ -2494,6 +2509,7 @@ static Scheme_Object *optimize_application(Scheme_Object *o, Optimize_Info *info
Scheme_Object *le;
Scheme_App_Rec *app;
int i, n, rator_flags = 0, sub_context = 0;
Optimize_Info_Sequence info_seq;
app = (Scheme_App_Rec *)o;
@ -2506,6 +2522,8 @@ static Scheme_Object *optimize_application(Scheme_Object *o, Optimize_Info *info
n = app->num_args + 1;
optimize_info_seq_init(info, &info_seq);
for (i = 0; i < n; i++) {
if (!i) {
le = optimize_for_inline(info, app->args[i], n - 1, app, NULL, NULL, &rator_flags, context, 0);
@ -2521,6 +2539,7 @@ static Scheme_Object *optimize_application(Scheme_Object *o, Optimize_Info *info
sub_context = (ty << OPT_CONTEXT_TYPE_SHIFT);
}
optimize_info_seq_step(info, &info_seq);
le = scheme_optimize_expr(app->args[i], info, sub_context);
app->args[i] = le;
@ -2532,6 +2551,8 @@ static Scheme_Object *optimize_application(Scheme_Object *o, Optimize_Info *info
}
}
optimize_info_seq_done(info, &info_seq);
/* Check for (apply ... (list ...)) after some optimizations: */
le = direct_apply((Scheme_Object *)app, app->args[0], app->args[app->num_args], info);
if (le) return finish_optimize_app(le, info, context, rator_flags);
@ -2784,6 +2805,7 @@ static Scheme_Object *optimize_application2(Scheme_Object *o, Optimize_Info *inf
Scheme_App2_Rec *app;
Scheme_Object *le;
int rator_flags = 0, sub_context = 0, ty;
Optimize_Info_Sequence info_seq;
app = (Scheme_App2_Rec *)o;
@ -2794,6 +2816,8 @@ static Scheme_Object *optimize_application2(Scheme_Object *o, Optimize_Info *inf
if (le)
return le;
optimize_info_seq_init(info, &info_seq);
le = scheme_optimize_expr(app->rator, info, 0);
app->rator = le;
@ -2812,9 +2836,13 @@ static Scheme_Object *optimize_application2(Scheme_Object *o, Optimize_Info *inf
sub_context |= (ty << OPT_CONTEXT_TYPE_SHIFT);
}
optimize_info_seq_step(info, &info_seq);
le = scheme_optimize_expr(app->rand, info, sub_context);
app->rand = le;
optimize_info_seq_done(info, &info_seq);
return finish_optimize_application2(app, info, context, rator_flags);
}
@ -3037,6 +3065,7 @@ static Scheme_Object *optimize_application3(Scheme_Object *o, Optimize_Info *inf
Scheme_App3_Rec *app;
Scheme_Object *le;
int rator_flags = 0, sub_context = 0, ty, flags;
Optimize_Info_Sequence info_seq;
app = (Scheme_App3_Rec *)o;
@ -3061,6 +3090,8 @@ static Scheme_Object *optimize_application3(Scheme_Object *o, Optimize_Info *inf
if (le)
return le;
optimize_info_seq_init(info, &info_seq);
le = scheme_optimize_expr(app->rator, info, sub_context);
app->rator = le;
@ -3077,6 +3108,8 @@ static Scheme_Object *optimize_application3(Scheme_Object *o, Optimize_Info *inf
if (ty)
sub_context |= (ty << OPT_CONTEXT_TYPE_SHIFT);
optimize_info_seq_step(info, &info_seq);
le = scheme_optimize_expr(app->rand1, info, sub_context);
app->rand1 = le;
@ -3088,9 +3121,13 @@ static Scheme_Object *optimize_application3(Scheme_Object *o, Optimize_Info *inf
else
sub_context &= ~OPT_CONTEXT_TYPE_MASK;
optimize_info_seq_step(info, &info_seq);
le = scheme_optimize_expr(app->rand2, info, sub_context);
app->rand2 = le;
optimize_info_seq_done(info, &info_seq);
/* Check for (apply ... (list ...)) after some optimizations: */
le = direct_apply((Scheme_Object *)app, app->rator, app->rand2, info);
if (le) return finish_optimize_app(le, info, context, rator_flags);
@ -3405,11 +3442,15 @@ static Scheme_Object *optimize_sequence(Scheme_Object *o, Optimize_Info *info, i
Scheme_Object *le;
int i, count, prev_size;
int drop = 0, preserves_marks = 0, single_result = 0;
Optimize_Info_Sequence info_seq;
optimize_info_seq_init(info, &info_seq);
count = s->count;
for (i = 0; i < count; i++) {
prev_size = info->size;
optimize_info_seq_step(info, &info_seq);
le = scheme_optimize_expr(s->array[i], info,
((i + 1 == count)
? scheme_optimize_tail_context(context)
@ -3432,6 +3473,8 @@ static Scheme_Object *optimize_sequence(Scheme_Object *o, Optimize_Info *info, i
}
}
optimize_info_seq_done(info, &info_seq);
info->preserves_marks = preserves_marks;
info->single_result = single_result;
@ -3568,7 +3611,8 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int
Scheme_Branch_Rec *b;
Scheme_Object *t, *tb, *fb;
Scheme_Hash_Tree *old_types;
int preserves_marks = 1, single_result = 1, init_kclock, then_kclock;
int preserves_marks = 1, single_result = 1, same_then_vclock, init_vclock, init_kclock, then_kclock;
Optimize_Info_Sequence info_seq;
b = (Scheme_Branch_Rec *)o;
@ -3589,6 +3633,8 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int
}
}
optimize_info_seq_init(info, &info_seq);
t = scheme_optimize_expr(t, info, OPT_CONTEXT_BOOLEAN);
/* Try optimize: (if (not x) y z) => (if x z y) */
@ -3651,15 +3697,14 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int
else
((Scheme_Compiled_Let_Value *)inside)->body = scheme_void;
return make_sequence_2(t,
scheme_optimize_expr((Scheme_Object *)b, info, context));
scheme_optimize_expr((Scheme_Object *)b, info,
scheme_optimize_tail_context(context)));
}
}
info->vclock += 1; /* model branch as clock increment */
init_kclock = info->kclock;
if (SCHEME_TYPE(t) > _scheme_compiled_values_types_) {
/* Branch is statically known */
optimize_info_seq_done(info, &info_seq);
info->size -= 1;
if (SCHEME_FALSEP(t))
return scheme_optimize_expr(fb, info, scheme_optimize_tail_context(context));
@ -3667,6 +3712,12 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int
return scheme_optimize_expr(tb, info, scheme_optimize_tail_context(context));
}
optimize_info_seq_step(info, &info_seq);
info->vclock += 1; /* model branch as clock increment */
init_vclock = info->vclock;
init_kclock = info->kclock;
old_types = info->types;
add_types(t, info, 5);
@ -3681,10 +3732,15 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int
else if (info->single_result < 0)
single_result = -1;
same_then_vclock = (init_vclock == info->vclock);
info->types = old_types;
then_kclock = info->kclock;
info->vclock = init_vclock;
info->kclock = init_kclock;
optimize_info_seq_step(info, &info_seq);
fb = scheme_optimize_expr(fb, info, scheme_optimize_tail_context(context));
if (!info->preserves_marks)
@ -3701,10 +3757,17 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int
info->types = old_types; /* could try to take an intersection here ... */
info->vclock += 1; /* model join as clock increment */
if (same_then_vclock && (init_vclock == info->vclock)) {
/* we can rewind the vclock to just after the test, because the
`if` as a whole has no effect */
info->vclock--;
}
info->preserves_marks = preserves_marks;
info->single_result = single_result;
optimize_info_seq_done(info, &info_seq);
/* Try optimize: (if x x #f) => x */
if (SAME_TYPE(SCHEME_TYPE(t), scheme_local_type)
&& SAME_TYPE(SCHEME_TYPE(tb), scheme_local_type)
@ -3763,18 +3826,34 @@ static Scheme_Object *optimize_wcm(Scheme_Object *o, Optimize_Info *info, int co
{
Scheme_With_Continuation_Mark *wcm = (Scheme_With_Continuation_Mark *)o;
Scheme_Object *k, *v, *b;
int init_vclock;
Optimize_Info_Sequence info_seq;
optimize_info_seq_init(info, &info_seq);
k = scheme_optimize_expr(wcm->key, info, 0);
optimize_info_seq_step(info, &info_seq);
v = scheme_optimize_expr(wcm->val, info, 0);
/* The presence of a key can be detected by other expressions,
to increment vclock to prevent expressions incorrectly
moving under the mark: */
info->vclock++;
init_vclock = info->vclock;
optimize_info_seq_step(info, &info_seq);
b = scheme_optimize_expr(wcm->body, info, scheme_optimize_tail_context(context));
if (init_vclock == info->vclock) {
/* body has no effect itself, so we can rewind the clock */
info->vclock--;
}
optimize_info_seq_done(info, &info_seq);
if (omittable_key(k, info)
&& scheme_omittable_expr(b, -1, 20, 0, info, info, 0, 0, ID_OMIT))
return make_discarding_first_sequence(v, b, info);
@ -3948,13 +4027,21 @@ static Scheme_Object *
apply_values_optimize(Scheme_Object *data, Optimize_Info *info, int context)
{
Scheme_Object *f, *e;
Optimize_Info_Sequence info_seq;
f = SCHEME_PTR1_VAL(data);
e = SCHEME_PTR2_VAL(data);
optimize_info_seq_init(info, &info_seq);
f = scheme_optimize_expr(f, info, 0);
optimize_info_seq_step(info, &info_seq);
e = scheme_optimize_expr(e, info, 0);
optimize_info_seq_done(info, &info_seq);
info->size += 1;
info->vclock += 1;
info->kclock += 1;
@ -4097,12 +4184,16 @@ begin0_optimize(Scheme_Object *obj, Optimize_Info *info, int context)
int i, count, drop = 0, prev_size, single_result = 0;
Scheme_Sequence *s = (Scheme_Sequence *)obj;
Scheme_Object *le;
Optimize_Info_Sequence info_seq;
count = s->count;
optimize_info_seq_init(info, &info_seq);
for (i = 0; i < count; i++) {
prev_size = info->size;
optimize_info_seq_step(info, &info_seq);
le = scheme_optimize_expr(s->array[i],
info,
(!i
@ -4148,6 +4239,8 @@ begin0_optimize(Scheme_Object *obj, Optimize_Info *info, int context)
obj = (Scheme_Object *)s2;
}
optimize_info_seq_done(info, &info_seq);
info->preserves_marks = 1;
info->single_result = single_result;
@ -4708,6 +4801,7 @@ Scheme_Object *
scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, int context)
{
Optimize_Info *sub_info, *body_info, *rhs_info;
Optimize_Info_Sequence info_seq;
Scheme_Let_Header *head = (Scheme_Let_Header *)form;
Scheme_Compiled_Let_Value *clv, *pre_body, *retry_start, *prev_body;
Scheme_Object *body, *value, *ready_pairs = NULL, *rp_last = NULL, *ready_pairs_start;
@ -4960,6 +5054,8 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i
}
}
optimize_info_seq_init(rhs_info, &info_seq);
prev_body = NULL;
body = head->body;
pre_body = NULL;
@ -4998,6 +5094,7 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i
}
if (!skip_opts) {
optimize_info_seq_step(rhs_info, &info_seq);
value = scheme_optimize_expr(pre_body->value, rhs_info, 0);
pre_body->value = value;
} else {
@ -5305,6 +5402,7 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i
use_psize = rhs_info->use_psize;
rhs_info->use_psize = info->use_psize;
optimize_info_seq_step(rhs_info, &info_seq);
value = scheme_optimize_expr(self_value, rhs_info, 0);
if (!OPT_DISCOURAGE_EARLY_INLINE)
@ -5401,6 +5499,8 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i
body = pre_body->body;
}
optimize_info_seq_done(rhs_info, &info_seq);
if (post_bind)
optimize_info_done(rhs_info, body_info);
else if (split_shift)
@ -5579,7 +5679,7 @@ optimize_closure_compilation(Scheme_Object *_data, Optimize_Info *info, int cont
Scheme_Object *code, *ctx;
Closure_Info *cl;
mzshort dcs, *dcm;
int i, cnt;
int i, cnt, init_vclock, init_kclock;
Scheme_Once_Used *first_once_used = NULL, *last_once_used = NULL;
data = (Scheme_Closure_Data *)_data;
@ -5590,6 +5690,9 @@ optimize_closure_compilation(Scheme_Object *_data, Optimize_Info *info, int cont
info = optimize_info_add_frame(info, data->num_params, data->num_params,
SCHEME_LAMBDA_FRAME);
init_vclock = info->vclock;
init_kclock = info->kclock;
info->vclock += 1; /* model delayed evaluation as vclock increment */
info->kclock += 1;
@ -5661,6 +5764,10 @@ optimize_closure_compilation(Scheme_Object *_data, Optimize_Info *info, int cont
cl->body_psize = info->psize;
cl->has_nonleaf = info->has_nonleaf;
/* closure itself is not an effect */
info->vclock = init_vclock;
info->kclock = init_kclock;
info->size++;
data->closure_size = (cl->base_closure_size
@ -6051,6 +6158,7 @@ module_optimize(Scheme_Object *data, Optimize_Info *info, int context)
Scheme_Hash_Table *originals = NULL;
int cont, next_pos_ready = -1, inline_fuel, is_proc_def;
Comp_Prefix *prev_cp;
Optimize_Info_Sequence info_seq;
if (!m->comp_prefix) {
/* already resolved */
@ -6068,6 +6176,8 @@ module_optimize(Scheme_Object *data, Optimize_Info *info, int context)
old_context = info->context;
info->context = (Scheme_Object *)m;
optimize_info_seq_init(info, &info_seq);
prev_cp = info->cp;
info->cp = m->comp_prefix;
@ -6176,6 +6286,7 @@ module_optimize(Scheme_Object *data, Optimize_Info *info, int context)
info->inline_fuel = 2;
} else
inline_fuel = 0;
optimize_info_seq_step(info, &info_seq);
e = scheme_optimize_expr(e, info, 0);
if (is_proc_def && OPT_DISCOURAGE_EARLY_INLINE) {
info->use_psize = 0;
@ -6352,6 +6463,7 @@ module_optimize(Scheme_Object *data, Optimize_Info *info, int context)
} else
old_sz = 0;
optimize_info_seq_step(info, &info_seq);
e = scheme_optimize_expr(e, info, 0);
SCHEME_VEC_ELS(m->bodies[0])[start_simltaneous] = e;
@ -6508,6 +6620,7 @@ module_optimize(Scheme_Object *data, Optimize_Info *info, int context)
p = (k ? m->post_submodules : m->pre_submodules);
if (p) {
while (!SCHEME_NULLP(p)) {
optimize_info_seq_step(info, &info_seq);
scheme_optimize_expr(SCHEME_CAR(p), info, 0);
p = SCHEME_CDR(p);
}
@ -6515,6 +6628,8 @@ module_optimize(Scheme_Object *data, Optimize_Info *info, int context)
}
}
optimize_info_seq_done(info, &info_seq);
return data;
}
@ -7133,7 +7248,7 @@ Optimize_Info *scheme_optimize_info_create(Comp_Prefix *cp, int get_logger)
info->type = scheme_rt_optimize_info;
#endif
info->inline_fuel = 32;
info->shift_fuel = 8;
info->shift_fuel = 16;
info->cp = cp;
if (get_logger) {
@ -7146,6 +7261,25 @@ Optimize_Info *scheme_optimize_info_create(Comp_Prefix *cp, int get_logger)
return info;
}
static void optimize_info_seq_init(Optimize_Info *info, Optimize_Info_Sequence *info_seq)
{
info_seq->init_shift_fuel = info->shift_fuel;
info_seq->min_shift_fuel = info->shift_fuel;
}
static void optimize_info_seq_step(Optimize_Info *info, Optimize_Info_Sequence *info_seq)
{
if (info->shift_fuel < info_seq->min_shift_fuel)
info_seq->min_shift_fuel = info->shift_fuel;
info->shift_fuel = info_seq->init_shift_fuel;
}
static void optimize_info_seq_done(Optimize_Info *info, Optimize_Info_Sequence *info_seq)
{
if (info->shift_fuel > info_seq->min_shift_fuel)
info->shift_fuel = info_seq->min_shift_fuel;
}
void scheme_optimize_info_enforce_const(Optimize_Info *oi, int enforce_const)
{
oi->enforce_const = enforce_const;