From 2bfb851ccc49b3de72e5c5d60b8d337d890a068e Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Fri, 4 Mar 2016 16:05:10 -0700 Subject: [PATCH] optimizer: generalize intraprocedural type tracking Enable the optimizer to figure to figure out that a loop argument is always a real number, for example, in much the same way that it can detect fixnums and flonums for unboxing. Unboxing information was only needed at the resolve level, but `real?` information is useful only to the optimizer, so the generalization enables the optimizer to reach approximations of type information earlier (e.g., among a subset of a function's arguments). --- .../tests/racket/optimize.rktl | 14 + racket/src/racket/src/compile.c | 28 +- racket/src/racket/src/mzmark_compile.inc | 6 +- racket/src/racket/src/mzmarksrc.c | 3 +- racket/src/racket/src/optimize.c | 415 ++++++++++++------ racket/src/racket/src/resolve.c | 25 +- racket/src/racket/src/schpriv.h | 29 +- racket/src/racket/src/stypes.h | 145 +++--- racket/src/racket/src/type.c | 4 +- 9 files changed, 434 insertions(+), 235 deletions(-) diff --git a/pkgs/racket-test-core/tests/racket/optimize.rktl b/pkgs/racket-test-core/tests/racket/optimize.rktl index 2dedca9cd6..5faa402581 100644 --- a/pkgs/racket-test-core/tests/racket/optimize.rktl +++ b/pkgs/racket-test-core/tests/racket/optimize.rktl @@ -2288,6 +2288,20 @@ (+ (- y x) (+ x y)) (void)))) +(parameterize ([compile-context-preservation-enabled + ;; Avoid different amounts of unrolling + #t]) + ;; Inferece of loop variable as number should allow + ;; additions to be reordered: + (test-comp '(lambda () + (let loop ([n 0] [m 9]) + (let ([a (+ n 9)] + [b (+ m 10)]) + (loop b a)))) + '(lambda () + (let loop ([n 0] [m 9]) + (loop (+ m 10) (+ n 9)))))) + (test-comp '(lambda (z) (let-values ([(x y) (if z diff --git a/racket/src/racket/src/compile.c b/racket/src/racket/src/compile.c index 5c655f8240..a85d500513 100644 --- a/racket/src/racket/src/compile.c +++ b/racket/src/racket/src/compile.c @@ -4205,7 +4205,7 @@ inner_compile_list(Scheme_Object *form, Scheme_Comp_Env *env, static Scheme_Object *compile_application(Scheme_Object *form, Scheme_Comp_Env *env, Scheme_Compile_Info *rec, int drec) { - Scheme_Object *result; + Scheme_Object *result, *rator; int len; form = scheme_stx_taint_disarm(form, NULL); @@ -4221,7 +4221,31 @@ static Scheme_Object *compile_application(Scheme_Object *form, Scheme_Comp_Env * form = inner_compile_list(form, scheme_no_defines(env), rec, drec, 1); result = scheme_make_application(form, NULL); - + + /* Record which application this is for a variable that is used only in + application positions. */ + if (SAME_TYPE(SCHEME_TYPE(result), scheme_application_type)) + rator = ((Scheme_App_Rec *)result)->args[0]; + else if (SAME_TYPE(SCHEME_TYPE(result), scheme_application2_type)) + rator = ((Scheme_App2_Rec *)result)->rator; + else if (SAME_TYPE(SCHEME_TYPE(result), scheme_application3_type)) + rator = ((Scheme_App3_Rec *)result)->rator; + else + rator = NULL; + if (rator) { + rator = scheme_optimize_extract_tail_inside(rator); + if (SAME_TYPE(SCHEME_TYPE(rator), scheme_ir_local_type)) { + if (SCHEME_VAR(rator)->use_count < SCHEME_USE_COUNT_INF) { + if (SAME_TYPE(SCHEME_TYPE(result), scheme_application_type)) + SCHEME_APPN_FLAGS((Scheme_App_Rec *)result) |= SCHEME_VAR(rator)->use_count; + else if (SAME_TYPE(SCHEME_TYPE(result), scheme_application2_type)) + SCHEME_APPN_FLAGS((Scheme_App2_Rec *)result) |= SCHEME_VAR(rator)->use_count; + else if (SAME_TYPE(SCHEME_TYPE(result), scheme_application3_type)) + SCHEME_APPN_FLAGS((Scheme_App3_Rec *)result) |= SCHEME_VAR(rator)->use_count; + } + } + } + return result; } diff --git a/racket/src/racket/src/mzmark_compile.inc b/racket/src/racket/src/mzmark_compile.inc index ede28a1894..b783c06915 100644 --- a/racket/src/racket/src/mzmark_compile.inc +++ b/racket/src/racket/src/mzmark_compile.inc @@ -14,7 +14,8 @@ static int mark_ir_lambda_info_MARK(void *p, struct NewGC *gc) { gcMARK2(i->base_closure, gc); gcMARK2(i->vars, gc); - gcMARK2(i->local_type_map, gc); + gcMARK2(i->arg_types, gc); + gcMARK2(i->arg_type_contributors, gc); # ifdef GC_NO_SIZE_NEEDED_FROM_PROCS return 0; @@ -31,7 +32,8 @@ static int mark_ir_lambda_info_FIXUP(void *p, struct NewGC *gc) { gcFIXUP2(i->base_closure, gc); gcFIXUP2(i->vars, gc); - gcFIXUP2(i->local_type_map, gc); + gcFIXUP2(i->arg_types, gc); + gcFIXUP2(i->arg_type_contributors, gc); # ifdef GC_NO_SIZE_NEEDED_FROM_PROCS return 0; diff --git a/racket/src/racket/src/mzmarksrc.c b/racket/src/racket/src/mzmarksrc.c index bca3f2b20b..e8903b7f6a 100644 --- a/racket/src/racket/src/mzmarksrc.c +++ b/racket/src/racket/src/mzmarksrc.c @@ -2260,7 +2260,8 @@ mark_ir_lambda_info { gcMARK2(i->base_closure, gc); gcMARK2(i->vars, gc); - gcMARK2(i->local_type_map, gc); + gcMARK2(i->arg_types, gc); + gcMARK2(i->arg_type_contributors, gc); size: gcBYTES_TO_WORDS(sizeof(Scheme_IR_Lambda_Info)); diff --git a/racket/src/racket/src/optimize.c b/racket/src/racket/src/optimize.c index 13f52e6079..509fffcaa9 100644 --- a/racket/src/racket/src/optimize.c +++ b/racket/src/racket/src/optimize.c @@ -103,12 +103,11 @@ typedef struct Optimize_Info_Sequence { int init_flatten_fuel, min_flatten_fuel; } Optimize_Info_Sequence; -static char *get_closure_local_type_map(Scheme_Lambda *lam, int arg_n, int *ok); -static void set_closure_local_type_map(Scheme_Lambda *lam, char *local_type_map); -static void merge_closure_local_type_map(Scheme_Lambda *lam1, Scheme_Lambda *lam2); -static int closure_body_size(Scheme_Lambda *lam, int check_assign, - Optimize_Info *info, int *is_leaf); -static int closure_has_top_level(Scheme_Lambda *lam); +static void merge_lambda_arg_types(Scheme_Lambda *lam1, Scheme_Lambda *lam2); +static void check_lambda_arg_types_registered(Scheme_Lambda *lam, int app_count); +static int lambda_body_size_plus_info(Scheme_Lambda *lam, int check_assign, + Optimize_Info *info, int *is_leaf); +static int lambda_has_top_level(Scheme_Lambda *lam); static int wants_local_type_arguments(Scheme_Object *rator, int argpos); @@ -123,7 +122,6 @@ static void add_type(Optimize_Info *info, Scheme_Object *var, Scheme_Object *pre static void merge_types(Optimize_Info *src_info, Optimize_Info *info, Scheme_Hash_Tree *skip_vars); static Scheme_Object *lookup_constant_proc(Optimize_Info *info, Scheme_Object *rand); -static int predicate_to_local_type(Scheme_Object *pred); static Scheme_Object *expr_implies_predicate(Scheme_Object *expr, Optimize_Info *info, int *_involves_k_cross, int fuel); static int produces_local_type(Scheme_Object *rator, int argc); @@ -165,6 +163,10 @@ static int movable_expression(Scheme_Object *expr, Optimize_Info *info, #define SCHEME_LAMBDAP(vals_expr) (SAME_TYPE(SCHEME_TYPE(vals_expr), scheme_ir_lambda_type) \ || SAME_TYPE(SCHEME_TYPE(vals_expr), scheme_case_lambda_sequence_type)) +#define SCHEME_WILL_BE_LAMBDAP(v) SAME_TYPE(SCHEME_TYPE(v), scheme_will_be_lambda_type) +#define SCHEME_WILL_BE_LAMBDA_SIZE(v) SCHEME_PINT_VAL(v) +#define SCHEME_WILL_BE_LAMBDA(v) SCHEME_IPTR_VAL(v) + static int lambda_body_size(Scheme_Object *o, int less_args); typedef struct Scheme_Once_Used { @@ -897,6 +899,13 @@ static void extract_tail_inside(Scheme_Object **_t2, Scheme_Object **_inside) } } +Scheme_Object *scheme_optimize_extract_tail_inside(Scheme_Object *t2) +{ + Scheme_Object *inside; + extract_tail_inside(&t2, &inside); + return t2; +} + /*========================================================================*/ /* detecting `make-struct-type` calls and struct shapes */ /*========================================================================*/ @@ -1593,7 +1602,7 @@ int scheme_is_ir_lambda(Scheme_Object *o, int can_be_closed, int can_be_liftable if (!can_be_closed && !lam->closure_size) return 0; /* Because procs that reference only globals are lifted: */ - if (!can_be_liftable && (lam->closure_size == 1) && closure_has_top_level(lam)) + if (!can_be_liftable && (lam->closure_size == 1) && lambda_has_top_level(lam)) return 0; } return 1; @@ -1815,14 +1824,21 @@ static int estimate_expr_size(Scheme_Object *expr, int sz, int fuel) static Scheme_Object *estimate_closure_size(Scheme_Object *e) { + Scheme_Object *wbl; int sz; sz = estimate_expr_size(e, 0, 32); - return scheme_box(scheme_make_integer(sz)); + + wbl = scheme_alloc_object(); + wbl->type = scheme_will_be_lambda_type; + SCHEME_WILL_BE_LAMBDA_SIZE(wbl) = sz; + SCHEME_WILL_BE_LAMBDA(wbl) = e; + + return wbl; } static Scheme_Object *no_potential_size(Scheme_Object *v) { - if (v && SCHEME_BOXP(v)) + if (v && SCHEME_WILL_BE_LAMBDAP(v)) return NULL; else return v; @@ -1961,8 +1977,8 @@ int check_potential_size(Scheme_Object *var) Scheme_Object* n; n = SCHEME_VAR(var)->optimize.known_val; - if (n && SCHEME_BOXP(n)) { - return (int)SCHEME_INT_VAL(SCHEME_BOX_VAL(n)); + if (n && SCHEME_WILL_BE_LAMBDAP(n)) { + return SCHEME_PINT_VAL(n); } return 0; @@ -2050,8 +2066,8 @@ Scheme_Object *optimize_for_inline(Optimize_Info *info, Scheme_Object *le, int a } if (SAME_TYPE(SCHEME_TYPE(le), scheme_ir_toplevel_type) && info->top_level_consts) { le = scheme_hash_get(info->top_level_consts, scheme_make_integer(pos)); - if (le && SCHEME_BOXP(le)) { - psize = SCHEME_INT_VAL(SCHEME_BOX_VAL(le)); + if (le && SCHEME_WILL_BE_LAMBDAP(le)) { + psize = SCHEME_WILL_BE_LAMBDA_SIZE(le); le = NULL; } if (!le) @@ -2111,7 +2127,7 @@ Scheme_Object *optimize_for_inline(Optimize_Info *info, Scheme_Object *le, int a return NULL; } - sz = closure_body_size(lam, 1, info, &is_leaf); + sz = lambda_body_size_plus_info(lam, 1, info, &is_leaf); if (is_leaf) { /* encourage inlining of leaves: */ sz >>= 2; @@ -2220,7 +2236,7 @@ Scheme_Object *optimize_for_inline(Optimize_Info *info, Scheme_Object *le, int a static int is_local_type_expression(Scheme_Object *expr, Optimize_Info *info) /* Get an unboxing type (e.g., flonum) for `expr` */ { - return predicate_to_local_type(expr_implies_predicate(expr, info, NULL, 5)); + return scheme_predicate_to_local_type(expr_implies_predicate(expr, info, NULL, 5)); } static void register_local_argument_types(Scheme_App_Rec *app, Scheme_App2_Rec *app2, Scheme_App3_Rec *app3, @@ -2230,57 +2246,96 @@ static void register_local_argument_types(Scheme_App_Rec *app, Scheme_App2_Rec * procedure will accept unboxed arguments at run time. */ { Scheme_Object *rator, *rand, *le; - int n, i; + int n, i, nth_app; if (app) { rator = app->args[0]; n = app->num_args; + nth_app = SCHEME_APPN_FLAGS(app) & APPN_POSITION_MASK; } else if (app2) { rator = app2->rator; n = 1; + nth_app = SCHEME_APPN_FLAGS(app2) & APPN_POSITION_MASK; } else { rator = app3->rator; n = 2; + nth_app = SCHEME_APPN_FLAGS(app3) & APPN_POSITION_MASK; } if (SAME_TYPE(SCHEME_TYPE(rator), scheme_ir_local_type)) { - { - le = optimize_info_lookup_lambda(rator); - if (le && SAME_TYPE(SCHEME_TYPE(le), scheme_ir_lambda_type)) { - Scheme_Lambda *lam = (Scheme_Lambda *)le; - char *map; - int ok; + le = optimize_info_lookup_lambda(rator); + if (SCHEME_VAR(rator)->optimize.known_val + && SCHEME_WILL_BE_LAMBDAP(SCHEME_VAR(rator)->optimize.known_val)) + le = SCHEME_WILL_BE_LAMBDA(SCHEME_VAR(rator)->optimize.known_val); - map = get_closure_local_type_map(lam, n, &ok); + if (le && SAME_TYPE(SCHEME_TYPE(le), scheme_ir_lambda_type)) { + Scheme_Lambda *lam = (Scheme_Lambda *)le; + if ((lam->num_params == n) + && !(SCHEME_LAMBDA_FLAGS(lam) & LAMBDA_HAS_REST)) { + Scheme_Object *pred; - if (ok) { - for (i = 0; i < n; i++) { - int ct; - - if (app) - rand = app->args[i+1]; - else if (app2) - rand = app2->rand; - else { - if (!i) - rand = app3->rand1; - else - rand = app3->rand2; - } - - ct = is_local_type_expression(rand, info); - if (ct) { - if (!map) { - map = MALLOC_N_ATOMIC(char, n); - memset(map, ct, n); - memset(map, 0, i); - } - } - if (map) - map[i] = ct; + if (!lam->ir_info->arg_types) { + Scheme_Object **arg_types; + short *contributors; + arg_types = MALLOC_N(Scheme_Object*, n); + lam->ir_info->arg_types = arg_types; + contributors = MALLOC_N_ATOMIC(short, n); + memset(contributors, 0, sizeof(short) * n); + lam->ir_info->arg_type_contributors = contributors; + } + + for (i = 0; i < n; i++) { + if (app) + rand = app->args[i+1]; + else if (app2) + rand = app2->rand; + else { + if (!i) + rand = app3->rand1; + else + rand = app3->rand2; } - set_closure_local_type_map(lam, map); + if (lam->ir_info->arg_types[i] + || !lam->ir_info->arg_type_contributors[i]) { + int widen_to_top = 0; + + pred = expr_implies_predicate(rand, info, NULL, 5); + + if (pred) { + if (!lam->ir_info->arg_type_contributors[i]) { + lam->ir_info->arg_types[i] = pred; + if (nth_app) + lam->ir_info->arg_type_contributors[i] |= (1 << (nth_app-1)); + } else if (predicate_implies(pred, lam->ir_info->arg_types[i])) { + /* ok */ + if (nth_app) + lam->ir_info->arg_type_contributors[i] |= (1 << (nth_app-1)); + } else if (predicate_implies(lam->ir_info->arg_types[i], pred)) { + /* widen */ + lam->ir_info->arg_types[i] = pred; + lam->ir_info->arg_type_contributors[i] |= (1 << (nth_app-1)); + } else + widen_to_top = 1; + } else + widen_to_top = 1; + + if (widen_to_top) { + if (nth_app) { + /* Since we cant provide a nice type right now, just + don't check in, in case a future iteration provides + better information. If we never check in with a type, + it will count as widening in the end. */ + } else { + /* since we don't have an identity, the lambda won't + be able to tell whether all apps have checked in, + so we have to registers a "top" as an anonymous + contributor. */ + lam->ir_info->arg_type_contributors[i] |= (1 << (SCHEME_USE_COUNT_INF-1)); + lam->ir_info->arg_types[i] = NULL; + } + } + } } } } @@ -2463,7 +2518,7 @@ static Scheme_Object *local_type_to_predicate(int t) return NULL; } -static int predicate_to_local_type(Scheme_Object *pred) +int scheme_predicate_to_local_type(Scheme_Object *pred) { if (!pred) return 0; @@ -2479,7 +2534,7 @@ static int predicate_to_local_type(Scheme_Object *pred) int scheme_expr_produces_local_type(Scheme_Object *expr, int *_involves_k_cross) { if (_involves_k_cross) *_involves_k_cross = 0; - return predicate_to_local_type(expr_implies_predicate(expr, NULL, _involves_k_cross, 10)); + return scheme_predicate_to_local_type(expr_implies_predicate(expr, NULL, _involves_k_cross, 10)); } static Scheme_Object *rator_implies_predicate(Scheme_Object *rator, int argc) @@ -2556,6 +2611,10 @@ static Scheme_Object *expr_implies_predicate(Scheme_Object *expr, Optimize_Info *_involves_k_cross = 1; return p; } + + if ((SCHEME_VAR(expr)->mode == SCHEME_VAR_MODE_OPTIMIZE) + && SCHEME_VAR(expr)->optimize.known_val) + return expr_implies_predicate(SCHEME_VAR(expr)->optimize.known_val, info, _involves_k_cross, fuel-1); } } break; @@ -5573,7 +5632,7 @@ int scheme_ir_propagate_ok(Scheme_Object *value, Optimize_Info *info) if (SAME_TYPE(SCHEME_TYPE(value), scheme_ir_lambda_type)) { int sz; - sz = closure_body_size((Scheme_Lambda *)value, 1, info, NULL); + sz = lambda_body_size_plus_info((Scheme_Lambda *)value, 1, info, NULL); if ((sz >= 0) && (sz <= MAX_PROC_INLINE_SIZE)) return 1; else { @@ -5842,9 +5901,9 @@ static int set_one_code_flags(Scheme_Object *value, int flags, } if (merge_local_typed) { - merge_closure_local_type_map(lam, lam2); - merge_closure_local_type_map(lam, lam3); - merge_closure_local_type_map(lam, lam2); + merge_lambda_arg_types(lam, lam2); + merge_lambda_arg_types(lam, lam3); + merge_lambda_arg_types(lam, lam2); } if (!just_tentative || (SCHEME_LAMBDA_FLAGS(lam) & LAMBDA_RESULT_TENTATIVE)) { @@ -5901,14 +5960,14 @@ static int lambda_body_size(Scheme_Object *o, int less_args) int bsz; if (SAME_TYPE(SCHEME_TYPE(o), scheme_ir_lambda_type)) { - bsz = closure_body_size((Scheme_Lambda *)o, 0, NULL, NULL); + bsz = lambda_body_size_plus_info((Scheme_Lambda *)o, 0, NULL, NULL); if (less_args) bsz -= ((Scheme_Lambda *)o)->num_params; return bsz; } else if (SAME_TYPE(SCHEME_TYPE(o), scheme_case_lambda_sequence_type)) { Scheme_Case_Lambda *cl = (Scheme_Case_Lambda *)o; int i, sz = 0; for (i = cl->count; i--; ) { - bsz = closure_body_size((Scheme_Lambda *)cl->array[i], 0, NULL, NULL); + bsz = lambda_body_size_plus_info((Scheme_Lambda *)cl->array[i], 0, NULL, NULL); if (less_args) { bsz -= ((Scheme_Lambda *)cl->array[i])->num_params; if (bsz > sz) sz = bsz; @@ -5998,6 +6057,65 @@ void advance_clocks_for_optimized(Scheme_Object *o, scheme_signal_error("internal error: optimizer clock tracking has gone wrong"); } +static void set_application_types(Scheme_Object *o, Optimize_Info *info, int fuel) +/* Peek ahead in an expression to set readily apparent type information + for function calls. This information is useful for type-invariant loop + arguments, for example. */ +{ + if (!fuel) return; + + switch (SCHEME_TYPE(o)) { + case scheme_application_type: + { + Scheme_App_Rec *app = (Scheme_App_Rec *)o; + int i; + register_local_argument_types(app, NULL, NULL, info); + for (i = 0; i < app->num_args+1; i++) { + set_application_types(app->args[i], info, fuel - 1); + } + } + break; + case scheme_application2_type: + { + Scheme_App2_Rec *app = (Scheme_App2_Rec *)o; + register_local_argument_types(NULL, app, NULL, info); + set_application_types(app->rator, info, fuel - 1); + set_application_types(app->rand, info, fuel - 1); + break; + } + case scheme_application3_type: + { + Scheme_App3_Rec *app = (Scheme_App3_Rec *)o; + register_local_argument_types(NULL, NULL, app, info); + set_application_types(app->rator, info, fuel - 1); + set_application_types(app->rand1, info, fuel - 1); + set_application_types(app->rand2, info, fuel - 1); + } + break; + case scheme_sequence_type: + case scheme_begin0_sequence_type: + { + Scheme_Sequence *seq = (Scheme_Sequence *)o; + int i; + + for (i = 0; i < seq->count; i++) { + set_application_types(seq->array[i], info, fuel - 1); + } + } + break; + case scheme_branch_type: + { + Scheme_Branch_Rec *b = (Scheme_Branch_Rec *)o; + set_application_types(b->test, info, fuel - 1); + set_application_types(b->tbranch, info, fuel - 1); + set_application_types(b->fbranch, info, fuel - 1); + } + break; + default: + break; + } +} + static int can_unwrap(Scheme_Object *v) /* Can `v` be unwrapped from `(let ([x v]) v)`? */ { @@ -6272,9 +6390,13 @@ static Scheme_Object *optimize_lets(Scheme_Object *form, Optimize_Info *info, in if (!found_escapes) { optimize_info_seq_step(rhs_info, &info_seq); value = scheme_optimize_expr(pre_body->value, rhs_info, - ((pre_body->count == 1) - ? OPT_CONTEXT_SINGLED - : 0)); + (((pre_body->count == 1) + ? OPT_CONTEXT_SINGLED + : 0) + | (((pre_body->count == 1) + && !pre_body->vars[0]->non_app_count) + ? (pre_body->vars[0]->use_count << OPT_CONTEXT_APP_COUNT_SHIFT) + : 0))); pre_body->value = value; if (rhs_info->escapes) found_escapes = 1; @@ -6498,6 +6620,7 @@ static Scheme_Object *optimize_lets(Scheme_Object *form, Optimize_Info *info, in pred = NULL; } else pred = expr_implies_predicate(value, rhs_info, NULL, 5); + if (pred) add_type(body_info, (Scheme_Object *)pre_body->vars[0], pred); @@ -6535,6 +6658,12 @@ static Scheme_Object *optimize_lets(Scheme_Object *form, Optimize_Info *info, in but then assume not for all if any turn out not (i.e., approximate fix point). */ int flags; Scheme_Object *clones, *cl, *cl_first; + + /* If this is the last binding, peek ahead in the body to + check for easy type info in function calls */ + if (!i) + set_application_types(pre_body->body, body_info, 5); + /* Reset "unready" flags: */ for (rp_last = ready_pairs_start; !SAME_OBJ(rp_last, ready_pairs); rp_last = SCHEME_CDR(rp_last)) { SCHEME_VAR(SCHEME_CAR(rp_last))->optimize_unready = 1; @@ -6585,10 +6714,14 @@ static Scheme_Object *optimize_lets(Scheme_Object *form, Optimize_Info *info, in optimize_info_seq_step(rhs_info, &info_seq); value = scheme_optimize_expr(self_value, rhs_info, - ((irlv->count == 1) - ? OPT_CONTEXT_SINGLED - : 0)); - + (((irlv->count == 1) + ? OPT_CONTEXT_SINGLED + : 0) + | (((irlv->count == 1) + && !irlv->vars[0]->non_app_count) + ? (irlv->vars[0]->use_count << OPT_CONTEXT_APP_COUNT_SHIFT) + : 0))); + if (!OPT_DISCOURAGE_EARLY_INLINE) --rhs_info->letrec_not_twice; rhs_info->use_psize = use_psize; @@ -6744,7 +6877,7 @@ static Scheme_Object *optimize_lets(Scheme_Object *form, Optimize_Info *info, in int used = 0, j; pre_body = (Scheme_IR_Let_Value *)body; - + for (j = pre_body->count; j--; ) { if (pre_body->vars[j]->optimize_used) { used = 1; @@ -6907,6 +7040,7 @@ optimize_lambda(Scheme_Object *_lam, Optimize_Info *info, int context) Scheme_IR_Lambda_Info *cl; int i, init_vclock, init_aclock, init_kclock, init_sclock; Scheme_Hash_Table *ht; + int app_count = OPT_CONTEXT_APP_COUNT(context); lam = (Scheme_Lambda *)_lam; @@ -6944,6 +7078,14 @@ optimize_lambda(Scheme_Object *_lam, Optimize_Info *info, int context) cl->vars[i]->optimize.lambda_depth = info->lambda_depth; cl->vars[i]->optimize_used = 0; cl->vars[i]->optimize.init_kclock = info->kclock; + if (app_count + && (app_count < SCHEME_USE_COUNT_INF) + && cl->arg_types + && cl->arg_types[i] + && (cl->arg_type_contributors[i] == ((1 << app_count) - 1))) { + /* All uses accounted for, so we can rely on type info */ + add_type(info, (Scheme_Object *)cl->vars[i], cl->arg_types[i]); + } } code = scheme_optimize_expr(lam->body, info, 0); @@ -6993,79 +7135,56 @@ optimize_lambda(Scheme_Object *_lam, Optimize_Info *info, int context) return (Scheme_Object *)lam; } -static char *get_closure_local_type_map(Scheme_Lambda *lam, int arg_n, int *ok) -{ - Scheme_IR_Lambda_Info *cl = lam->ir_info; - - if ((SCHEME_LAMBDA_FLAGS(lam) & LAMBDA_HAS_REST) - || (arg_n != lam->num_params)) { - *ok = 0; - return NULL; - } - - if (cl->has_tymap && !cl->local_type_map) { - *ok = 0; - return NULL; - } - - *ok = 1; - return cl->local_type_map; -} - -static void set_closure_local_type_map(Scheme_Lambda *lam, char *local_type_map) -{ - Scheme_IR_Lambda_Info *cl = lam->ir_info; - int i; - - if (!cl->local_type_map) { - cl->has_tymap = 1; - cl->local_type_map = local_type_map; - } - - if (local_type_map) { - for (i = lam->num_params; i--; ) { - if (local_type_map[i]) break; - } - - if (i < 0) { - cl->local_type_map = NULL; - } - } -} - -static void merge_closure_local_type_map(Scheme_Lambda *lam1, Scheme_Lambda *lam2) +static void merge_lambda_arg_types(Scheme_Lambda *lam1, Scheme_Lambda *lam2) { Scheme_IR_Lambda_Info *cl1 = lam1->ir_info; Scheme_IR_Lambda_Info *cl2 = lam2->ir_info; + int i; - if (cl1->has_tymap) { - if (!cl1->local_type_map || !cl2->has_tymap) { - cl2->has_tymap = 1; - cl2->local_type_map = cl1->local_type_map; - } else if (cl2->local_type_map) { - int i, recheck = 0; - for (i = lam1->num_params; i--; ) { - if (cl1->local_type_map[i] != cl2->local_type_map[i]) { - cl1->local_type_map[i] = 0; - cl2->local_type_map[i] = 0; - recheck = 1; - } - } - if (recheck) { - for (i = lam1->num_params; i--; ) { - if (cl1->local_type_map[i]) break; - } - if (i < 0) { - cl1->local_type_map = NULL; - cl2->local_type_map = NULL; - } - } - } else { - cl1->local_type_map = NULL; + if (!cl1->arg_types) { + if (cl2->arg_types) { + cl1->arg_types = cl2->arg_types; + cl1->arg_type_contributors = cl2->arg_type_contributors; + } + } else { + if (cl2->arg_types) { + for (i = lam1->num_params; i--; ) { + if (!cl1->arg_type_contributors[i]) { + cl1->arg_types[i] = cl2->arg_types[i]; + cl1->arg_type_contributors[i] = cl2->arg_type_contributors[i]; + } else if (cl2->arg_type_contributors[i]) { + if (!cl2->arg_types[i]) + cl1->arg_types[i] = NULL; + else if (predicate_implies(cl1->arg_types[i], cl2->arg_types[i])) + cl1->arg_types[i] = cl2->arg_types[i]; + else if (!predicate_implies(cl2->arg_types[i], cl1->arg_types[i])) { + cl1->arg_types[i] = NULL; + cl1->arg_type_contributors[i] |= (1 << (SCHEME_USE_COUNT_INF-1)); + } + cl1->arg_type_contributors[i] |= cl2->arg_type_contributors[i]; + } + } + } + + cl2->arg_types = cl1->arg_types; + cl2->arg_type_contributors = cl1->arg_type_contributors; + } +} + +static void check_lambda_arg_types_registered(Scheme_Lambda *lam, int app_count) +{ + if (lam->ir_info->arg_types) { + int i; + for (i = lam->num_params; i--; ) { + if (lam->ir_info->arg_types[i]) { + if ((lam->ir_info->arg_type_contributors[i] & (1 << (SCHEME_USE_COUNT_INF-1))) + || (lam->ir_info->arg_type_contributors[i] < ((1 << app_count) - 1))) { + /* someone caller didn't weigh in with a type, + of an anonymous caller had no type to record */ + lam->ir_info->arg_types[i] = NULL; + } + } } - } else if (cl2->has_tymap) { - cl1->has_tymap = 1; - cl1->local_type_map = cl2->local_type_map; } } @@ -7106,7 +7225,8 @@ static Scheme_Object *clone_lambda(int single_use, Scheme_Object *_lam, Optimize Scheme_IR_Lambda_Info *cl; Scheme_IR_Local **vars; int sz; - char *local_type_map; + Scheme_Object **arg_types; + short *arg_type_contributors; lam = (Scheme_Lambda *)_lam; @@ -7127,11 +7247,14 @@ static Scheme_Object *clone_lambda(int single_use, Scheme_Object *_lam, Optimize lam2->body = body; - if (cl->local_type_map) { + if (cl->arg_types) { sz = lam2->num_params; - local_type_map = (char *)scheme_malloc_atomic(sz); - memcpy(local_type_map, cl->local_type_map, sz); - cl->local_type_map = local_type_map; + arg_types = MALLOC_N(Scheme_Object*, sz); + arg_type_contributors = MALLOC_N_ATOMIC(short, sz); + memcpy(arg_types, cl->arg_types, sz * sizeof(Scheme_Object*)); + memcpy(arg_type_contributors, cl->arg_type_contributors, sz * sizeof(short)); + cl->arg_types = arg_types; + cl->arg_type_contributors = arg_type_contributors; } if (cl->base_closure && var_map->count) { @@ -7153,8 +7276,8 @@ static Scheme_Object *clone_lambda(int single_use, Scheme_Object *_lam, Optimize return (Scheme_Object *)lam2; } -static int closure_body_size(Scheme_Lambda *lam, int check_assign, - Optimize_Info *info, int *is_leaf) +static int lambda_body_size_plus_info(Scheme_Lambda *lam, int check_assign, + Optimize_Info *info, int *is_leaf) { int i; Scheme_IR_Lambda_Info *cl; @@ -7175,7 +7298,7 @@ static int closure_body_size(Scheme_Lambda *lam, int check_assign, return cl->body_size + ((info && info->use_psize) ? cl->body_psize : 0); } -static int closure_has_top_level(Scheme_Lambda *lam) +static int lambda_has_top_level(Scheme_Lambda *lam) { return lam->ir_info->has_tl; } @@ -8137,6 +8260,8 @@ Scheme_Object *optimize_clone(int single_use, Scheme_Object *expr, Optimize_Info app2->rand = expr; SCHEME_APPN_FLAGS(app2) |= (SCHEME_APPN_FLAGS(app) & APPN_FLAG_MASK); + if (single_use) + SCHEME_APPN_FLAGS(app2) |= (SCHEME_APPN_FLAGS(app) & APPN_POSITION_MASK); return (Scheme_Object *)app2; } @@ -8154,6 +8279,8 @@ Scheme_Object *optimize_clone(int single_use, Scheme_Object *expr, Optimize_Info } SCHEME_APPN_FLAGS(app2) |= (SCHEME_APPN_FLAGS(app) & APPN_FLAG_MASK); + if (single_use) + SCHEME_APPN_FLAGS(app2) |= (SCHEME_APPN_FLAGS(app) & APPN_POSITION_MASK); return (Scheme_Object *)app2; } @@ -8177,6 +8304,8 @@ Scheme_Object *optimize_clone(int single_use, Scheme_Object *expr, Optimize_Info app2->rand2 = expr; SCHEME_APPN_FLAGS(app2) |= (SCHEME_APPN_FLAGS(app) & APPN_FLAG_MASK); + if (single_use) + SCHEME_APPN_FLAGS(app2) |= (SCHEME_APPN_FLAGS(app) & APPN_POSITION_MASK); return (Scheme_Object *)app2; } @@ -8561,7 +8690,7 @@ static Scheme_Object *optimize_info_propagate_local(Scheme_Object *var) } if (!val - || SCHEME_BOXP(val) /* A potential-size record */ + || SCHEME_WILL_BE_LAMBDAP(val) || SCHEME_LAMBDAP(val) || SAME_TYPE(SCHEME_TYPE(val), scheme_once_used_type)) { if (SAME_OBJ(last, var)) diff --git a/racket/src/racket/src/resolve.c b/racket/src/racket/src/resolve.c index 577324bddb..3cdb6e607d 100644 --- a/racket/src/racket/src/resolve.c +++ b/racket/src/racket/src/resolve.c @@ -1706,22 +1706,25 @@ resolve_lambda(Scheme_Object *_lam, Resolve_Info *info, closure. */ closure_size = lam->closure_size; - if (cl->local_type_map) { + if (cl->arg_types) { int at_least_one = 0; for (i = lam->num_params; i--; ) { - if (cl->local_type_map[i]) { - if ((cl->vars[i]->arg_type == cl->local_type_map[i]) + if (cl->arg_types[i]) { + int ct; + ct = scheme_predicate_to_local_type(cl->arg_types[i]); + if (ct + && (cl->vars[i]->arg_type == ct) && (!cl->vars[i]->escapes_after_k_tick || ALWAYS_PREFER_UNBOX_TYPE(cl->vars[i]->arg_type))) at_least_one = 1; else - cl->local_type_map[i] = 0; + cl->arg_types[i] = NULL; } } if (at_least_one) need_type_map = 1; else - cl->local_type_map = NULL; + cl->arg_types = NULL; } has_tl = cl->has_tl; @@ -1884,12 +1887,16 @@ resolve_lambda(Scheme_Object *_lam, Resolve_Info *info, /* If we're lifting this function, then arguments can have unboxing types, because the valdiator will be able to check all the calls: */ - if (cl->local_type_map) - cl->vars[i]->val_type = cl->local_type_map[i]; + int lt; + if (cl->arg_types) { + lt = scheme_predicate_to_local_type(cl->arg_types[i]); + cl->vars[i]->val_type = lt; + } else + lt = 0; if (need_type_map) { - if (cl->local_type_map && cl->local_type_map[i]) + if (lt) scheme_boxmap_set(closure_map, i + new_params, - cl->local_type_map[i] + LAMBDA_TYPE_TYPE_OFFSET, + lt + LAMBDA_TYPE_TYPE_OFFSET, closure_size); } } diff --git a/racket/src/racket/src/schpriv.h b/racket/src/racket/src/schpriv.h index 1754777297..73e7e0a305 100644 --- a/racket/src/racket/src/schpriv.h +++ b/racket/src/racket/src/schpriv.h @@ -1592,6 +1592,17 @@ typedef struct { /* After array of f & args, array of chars for eval type */ } Scheme_App_Rec; +#define SCHEME_APPN_FLAGS(app) MZ_OPT_HASH_KEY(&(app)->iso) +/* For all application types, throgh optimization, the low bits of the flags + are used to hold an index for an application indicate that it's the Nth + application of an identifier, which is useful to type inference. + The same bits are used after resolve for app2 and app3 to indicate + lookahead types (as below) */ + +/* A value of N means that the application is the (N-1)th + application of a variable, where 0 means "unknown". */ +#define APPN_POSITION_MASK SCHEME_USE_COUNT_INF + /* Lookahead types for evaluating application arguments. */ /* 4 cases + else => magic number for some compilers doing a switch? */ enum { @@ -1620,8 +1631,6 @@ typedef struct { Scheme_Object *rand; } Scheme_App2_Rec; -#define SCHEME_APPN_FLAGS(app) MZ_OPT_HASH_KEY(&(app)->iso) - typedef struct { Scheme_Inclhash_Object iso; /* keyex used for flags */ Scheme_Object *rator; @@ -2875,8 +2884,14 @@ typedef struct { MZTAG_IF_REQUIRED Scheme_Hash_Table *base_closure; Scheme_IR_Local **vars; - char *local_type_map; /* determined by callers; NULL when has_tymap set => no local types */ - char has_tl, has_tymap, has_nonleaf, is_dup; + Scheme_Object **arg_types; /* predicates for the arguments, as determined by callers */ + short *arg_type_contributors; /* bitmap of applications that have provided type info; + when the number of calls is know, this information + can reveal when all callers have checked in; the + contributor SCHEME_USE_COUNT_INF is an anonymous + contributor; if a contributor set is non-empty; + then NULL for a type mean "top" */ + char has_tl, has_nonleaf, is_dup; int body_size, body_psize; } Scheme_IR_Lambda_Info; @@ -3239,9 +3254,11 @@ Scheme_Object *scheme_optimize_expr(Scheme_Object *, Optimize_Info *, int contex #define OPT_CONTEXT_TYPE_SHIFT 4 #define OPT_CONTEXT_TYPE_MASK (SCHEME_MAX_LOCAL_TYPE_MASK << OPT_CONTEXT_TYPE_SHIFT) #define OPT_CONTEXT_TYPE(oc) ((oc & OPT_CONTEXT_TYPE_MASK) >> OPT_CONTEXT_TYPE_SHIFT) +#define OPT_CONTEXT_APP_COUNT_SHIFT (OPT_CONTEXT_TYPE_SHIFT + SCHEME_MAX_LOCAL_TYPE_BITS) +#define OPT_CONTEXT_APP_COUNT(oc) ((oc >> OPT_CONTEXT_APP_COUNT_SHIFT) & SCHEME_USE_COUNT_INF) #define scheme_optimize_result_context(c) (c & (~(OPT_CONTEXT_TYPE_MASK | OPT_CONTEXT_NO_SINGLE | OPT_CONTEXT_SINGLED))) -#define scheme_optimize_tail_context(c) scheme_optimize_result_context(c) +#define scheme_optimize_tail_context(c) scheme_optimize_result_context(c) Scheme_Object *scheme_optimize_apply_values(Scheme_Object *f, Scheme_Object *e, Optimize_Info *info, @@ -3251,7 +3268,9 @@ Scheme_Object *scheme_optimize_apply_values(Scheme_Object *f, Scheme_Object *e, int scheme_ir_duplicate_ok(Scheme_Object *o, int cross_mod); int scheme_ir_propagate_ok(Scheme_Object *o, Optimize_Info *info); int scheme_is_statically_proc(Scheme_Object *value, Optimize_Info *info); +XFORM_NONGCING int scheme_predicate_to_local_type(Scheme_Object *pred); Scheme_Object *scheme_make_noninline_proc(Scheme_Object *e); +Scheme_Object *scheme_optimize_extract_tail_inside(Scheme_Object *t2); Scheme_Object *scheme_resolve_expr(Scheme_Object *, Resolve_Info *); Scheme_Object *scheme_resolve_list(Scheme_Object *, Resolve_Info *); diff --git a/racket/src/racket/src/stypes.h b/racket/src/racket/src/stypes.h index 241f0b6109..9a3d269404 100644 --- a/racket/src/racket/src/stypes.h +++ b/racket/src/racket/src/stypes.h @@ -231,85 +231,86 @@ enum { scheme_plumber_type, /* 200 */ scheme_plumber_handle_type, /* 201 */ scheme_deferred_expr_type, /* 202 */ + scheme_will_be_lambda_type, /* 203 */ #ifdef MZTAG_REQUIRED - _scheme_last_normal_type_, /* 203 */ + _scheme_last_normal_type_, /* 204 */ /* The remaining tags exist for GC tracing (in non-conservative mode), but they are not needed for run-time tag tests */ - scheme_rt_weak_array, /* 204 */ + scheme_rt_weak_array, /* 205 */ - scheme_rt_comp_env, /* 205 */ - scheme_rt_constant_binding, /* 206 */ - scheme_rt_resolve_info, /* 207 */ - scheme_rt_unresolve_info, /* 208 */ - scheme_rt_optimize_info, /* 209 */ - scheme_rt_cont_mark, /* 210 */ - scheme_rt_saved_stack, /* 211 */ - scheme_rt_reply_item, /* 212 */ - scheme_rt_ir_lambda_info, /* 213 */ - scheme_rt_overflow, /* 214 */ - scheme_rt_overflow_jmp, /* 215 */ - scheme_rt_meta_cont, /* 216 */ - scheme_rt_dyn_wind_cell, /* 217 */ - scheme_rt_dyn_wind_info, /* 218 */ - scheme_rt_dyn_wind, /* 219 */ - scheme_rt_dup_check, /* 220 */ - scheme_rt_thread_memory, /* 221 */ - scheme_rt_input_file, /* 222 */ - scheme_rt_input_fd, /* 223 */ - scheme_rt_oskit_console_input, /* 224 */ - scheme_rt_tested_input_file, /* 225 */ - scheme_rt_tested_output_file, /* 226 */ - scheme_rt_indexed_string, /* 227 */ - scheme_rt_output_file, /* 228 */ - scheme_rt_load_handler_data, /* 229 */ - scheme_rt_pipe, /* 230 */ - scheme_rt_beos_process, /* 231 */ - scheme_rt_system_child, /* 232 */ - scheme_rt_tcp, /* 233 */ - scheme_rt_write_data, /* 234 */ - scheme_rt_tcp_select_info, /* 235 */ - scheme_rt_param_data, /* 236 */ - scheme_rt_will, /* 237 */ - scheme_rt_linker_name, /* 238 */ - scheme_rt_param_map, /* 239 */ - scheme_rt_finalization, /* 240 */ - scheme_rt_finalizations, /* 241 */ - scheme_rt_cpp_object, /* 242 */ - scheme_rt_cpp_array_object, /* 243 */ - scheme_rt_stack_object, /* 244 */ - scheme_rt_preallocated_object, /* 245 */ - scheme_thread_hop_type, /* 246 */ - scheme_rt_srcloc, /* 247 */ - scheme_rt_evt, /* 248 */ - scheme_rt_syncing, /* 249 */ - scheme_rt_comp_prefix, /* 250 */ - scheme_rt_user_input, /* 251 */ - scheme_rt_user_output, /* 252 */ - scheme_rt_compact_port, /* 253 */ - scheme_rt_read_special_dw, /* 254 */ - scheme_rt_regwork, /* 255 */ - scheme_rt_rx_lazy_string, /* 256 */ - scheme_rt_buf_holder, /* 257 */ - scheme_rt_parameterization, /* 258 */ - scheme_rt_print_params, /* 259 */ - scheme_rt_read_params, /* 260 */ - scheme_rt_native_code, /* 261 */ - scheme_rt_native_code_plus_case, /* 262 */ - scheme_rt_jitter_data, /* 263 */ - scheme_rt_module_exports, /* 264 */ - scheme_rt_delay_load_info, /* 265 */ - scheme_rt_marshal_info, /* 266 */ - scheme_rt_unmarshal_info, /* 267 */ - scheme_rt_runstack, /* 268 */ - scheme_rt_sfs_info, /* 269 */ - scheme_rt_validate_clearing, /* 270 */ - scheme_rt_lightweight_cont, /* 271 */ - scheme_rt_export_info, /* 272 */ - scheme_rt_cont_jmp, /* 273 */ - scheme_rt_letrec_check_frame, /* 274 */ + scheme_rt_comp_env, /* 206 */ + scheme_rt_constant_binding, /* 207 */ + scheme_rt_resolve_info, /* 208 */ + scheme_rt_unresolve_info, /* 209 */ + scheme_rt_optimize_info, /* 210 */ + scheme_rt_cont_mark, /* 211 */ + scheme_rt_saved_stack, /* 212 */ + scheme_rt_reply_item, /* 213 */ + scheme_rt_ir_lambda_info, /* 214 */ + scheme_rt_overflow, /* 215 */ + scheme_rt_overflow_jmp, /* 216 */ + scheme_rt_meta_cont, /* 217 */ + scheme_rt_dyn_wind_cell, /* 218 */ + scheme_rt_dyn_wind_info, /* 219 */ + scheme_rt_dyn_wind, /* 220 */ + scheme_rt_dup_check, /* 221 */ + scheme_rt_thread_memory, /* 222 */ + scheme_rt_input_file, /* 223 */ + scheme_rt_input_fd, /* 224 */ + scheme_rt_oskit_console_input, /* 225 */ + scheme_rt_tested_input_file, /* 226 */ + scheme_rt_tested_output_file, /* 227 */ + scheme_rt_indexed_string, /* 228 */ + scheme_rt_output_file, /* 229 */ + scheme_rt_load_handler_data, /* 230 */ + scheme_rt_pipe, /* 231 */ + scheme_rt_beos_process, /* 232 */ + scheme_rt_system_child, /* 233 */ + scheme_rt_tcp, /* 234 */ + scheme_rt_write_data, /* 235 */ + scheme_rt_tcp_select_info, /* 236 */ + scheme_rt_param_data, /* 237 */ + scheme_rt_will, /* 238 */ + scheme_rt_linker_name, /* 239 */ + scheme_rt_param_map, /* 240 */ + scheme_rt_finalization, /* 241 */ + scheme_rt_finalizations, /* 242 */ + scheme_rt_cpp_object, /* 243 */ + scheme_rt_cpp_array_object, /* 244 */ + scheme_rt_stack_object, /* 245 */ + scheme_rt_preallocated_object, /* 246 */ + scheme_thread_hop_type, /* 247 */ + scheme_rt_srcloc, /* 248 */ + scheme_rt_evt, /* 249 */ + scheme_rt_syncing, /* 250 */ + scheme_rt_comp_prefix, /* 251 */ + scheme_rt_user_input, /* 252 */ + scheme_rt_user_output, /* 253 */ + scheme_rt_compact_port, /* 254 */ + scheme_rt_read_special_dw, /* 255 */ + scheme_rt_regwork, /* 256 */ + scheme_rt_rx_lazy_string, /* 257 */ + scheme_rt_buf_holder, /* 258 */ + scheme_rt_parameterization, /* 259 */ + scheme_rt_print_params, /* 260 */ + scheme_rt_read_params, /* 261 */ + scheme_rt_native_code, /* 262 */ + scheme_rt_native_code_plus_case, /* 263 */ + scheme_rt_jitter_data, /* 264 */ + scheme_rt_module_exports, /* 265 */ + scheme_rt_delay_load_info, /* 266 */ + scheme_rt_marshal_info, /* 267 */ + scheme_rt_unmarshal_info, /* 268 */ + scheme_rt_runstack, /* 269 */ + scheme_rt_sfs_info, /* 270 */ + scheme_rt_validate_clearing, /* 271 */ + scheme_rt_lightweight_cont, /* 272 */ + scheme_rt_export_info, /* 273 */ + scheme_rt_cont_jmp, /* 274 */ + scheme_rt_letrec_check_frame, /* 275 */ #endif _scheme_last_type_ diff --git a/racket/src/racket/src/type.c b/racket/src/racket/src/type.c index f0a2e1434d..77d2596cd3 100644 --- a/racket/src/racket/src/type.c +++ b/racket/src/racket/src/type.c @@ -717,7 +717,9 @@ void scheme_register_traversers(void) GC_REG_TRAV(scheme_progress_evt_type, twoptr_obj); GC_REG_TRAV(scheme_already_comp_type, iptr_obj); - + + GC_REG_TRAV(scheme_will_be_lambda_type, iptr_obj); + GC_REG_TRAV(scheme_thread_cell_values_type, small_object); GC_REG_TRAV(scheme_global_ref_type, twoptr_obj);