diff --git a/pkgs/racket-pkgs/racket-test/tests/racket/optimize.rktl b/pkgs/racket-pkgs/racket-test/tests/racket/optimize.rktl index 245eb2cb51..a46dce5847 100644 --- a/pkgs/racket-pkgs/racket-test/tests/racket/optimize.rktl +++ b/pkgs/racket-pkgs/racket-test/tests/racket/optimize.rktl @@ -3277,6 +3277,23 @@ (check pred t1 e1))))) ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Check unboxing with mutual recusion: + +(let () + ;; Literal lists thwart inlining: + (define (odd n acc) + (if (fl= n 0.0) + '(nope) + (even (fl- n 1.0) + (fl+ acc 0.5)))) + (define (even n acc) + (if (fl= n 0.0) + (cons acc '(yep)) + (odd (fl- n 1.0) + (fl+ acc 0.5)))) + (test '(0.5e7 yep) (lambda () (even 1e7 0.0)))) + +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (report-errs) diff --git a/racket/src/racket/src/jit.c b/racket/src/racket/src/jit.c index fc81b73d1a..ea1cb9e0b1 100644 --- a/racket/src/racket/src/jit.c +++ b/racket/src/racket/src/jit.c @@ -1099,6 +1099,7 @@ int scheme_generate_flonum_local_boxing(mz_jit_state *jitter, int pos, int offse __END_TINY_JUMPS__(1); CHECK_LIMIT(); jit_movi_l(JIT_R0, offset); + LOG_IT((" {box flonum}\n")); MZ_FPUSEL_STMT(extfl, (void)jit_calli(sjc.box_extflonum_from_stack_code), (void)jit_calli(sjc.box_flonum_from_stack_code)); @@ -3570,12 +3571,18 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data) } } - LOG_IT(("PROC: %s, %d args, flags: %x\n", - (data->name ? scheme_format_utf8("~s", 2, 1, &data->name, NULL) : "???"), - data->num_params, - SCHEME_CLOSURE_DATA_FLAGS(data))); - FOR_LOG(jitter->log_depth++); - + FOR_LOG({ + char *log_name; + Scheme_Object *a[1]; + a[0] = data->name; + log_name = (data->name ? scheme_format_utf8("~s", 2, 1, a, NULL) : "???"); + LOG_IT(("PROC: %s, %d args, flags: %x\n", + log_name, + data->num_params, + SCHEME_CLOSURE_DATA_FLAGS(data))); + jitter->log_depth++; + }); + jitter->self_data = data; jitter->self_restart_code = jit_get_ip(); diff --git a/racket/src/racket/src/jit.h b/racket/src/racket/src/jit.h index 4183ceedf6..3206fadbd6 100644 --- a/racket/src/racket/src/jit.h +++ b/racket/src/racket/src/jit.h @@ -1463,7 +1463,8 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ mz_jit_state *jitter, int is_tail, int multi_ok, int ignored_result, int no_call); int scheme_generate_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs, - int is_inline, Scheme_Native_Closure *direct_to_code, jit_direct_arg *direct_arg); + int is_inline, Scheme_Native_Closure *direct_to_code, jit_direct_arg *direct_arg, + Scheme_Closure_Data *direct_data); int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs, int multi_ok, int result_ignored, int nontail_self, int pop_and_jump, int is_inlined, int unboxed_args); diff --git a/racket/src/racket/src/jitcall.c b/racket/src/racket/src/jitcall.c index 411c294bec..5539570e95 100644 --- a/racket/src/racket/src/jitcall.c +++ b/racket/src/racket/src/jitcall.c @@ -27,6 +27,15 @@ #include "jit.h" +#ifdef USE_FLONUM_UNBOXING +static int generate_argument_boxing(mz_jit_state *jitter, Scheme_Closure_Data *data, + int num_rands, int args_already_in_place, + int offset, int direct_flostack_offset, + int save_reg, + /* used only to skip unneeded checks: */ + Scheme_App_Rec *app, Scheme_Object **alt_rands); +#endif + int scheme_direct_call_count, scheme_indirect_call_count; struct jit_direct_arg { @@ -332,12 +341,15 @@ static int generate_direct_prim_tail_call(mz_jit_state *jitter, int num_rands) static const int direct_arg_regs[] = { JIT_V1, JIT_R1, JIT_R0 }; int scheme_generate_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs, - int is_inline, Scheme_Native_Closure *direct_to_code, jit_direct_arg *direct_args) + int is_inline, Scheme_Native_Closure *direct_to_code, jit_direct_arg *direct_args, + Scheme_Closure_Data *direct_data) /* Proc is in V1 unless direct_to_code, args are at RUNSTACK. If num_rands < 0, then argc is in LOCAL2 and arguments are already below RUNSTACK_BASE. If direct_native == 2, then some arguments are already in place (shallower in the runstack than the arguments to move). - If direct_args, then R0, R1, V1 hold arguments. */ + If direct_args, then R0, R1, V1 hold arguments. + If direct data in unboxing mode, slow path needs to box flonum arguments; num_rands + must be >= 0 */ { int i, r2_has_runstack = 0; GC_CAN_IGNORE jit_insn *refagain, *ref, *ref2, *ref4, *ref5; @@ -487,6 +499,23 @@ int scheme_generate_tail_call(mz_jit_state *jitter, int num_rands, int direct_na if (ref4) mz_patch_branch(ref4); CHECK_LIMIT(); +#ifdef USE_FLONUM_UNBOXING + if (direct_data) { + if (SCHEME_CLOSURE_DATA_FLAGS(direct_data) & CLOS_HAS_TYPED_ARGS) { + /* Need to box flonum arguments. Flonums are currently in the place where + the target function expects them unpacked from arguments. We need to save + JIT_V1. */ + generate_argument_boxing(jitter, direct_data, + num_rands, 0, + 0, 0, + JIT_V1, + NULL, NULL); + CHECK_LIMIT(); + mz_rs_sync(); + scheme_mz_flostack_restore(jitter, 0, 0, 1, 0); + } + } +#endif if (need_set_rs) { JIT_UPDATE_THREAD_RSPTR(); } @@ -1074,6 +1103,82 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc return 1; } +#ifdef USE_FLONUM_UNBOXING +static int generate_argument_boxing(mz_jit_state *jitter, Scheme_Closure_Data *data, + int num_rands, int args_already_in_place, + int offset, int direct_flostack_offset, + int save_reg, + /* used only to skip unneeded checks: */ + Scheme_App_Rec *app, Scheme_Object **alt_rands) +{ + int i, arg_tmp_offset; + Scheme_Object *rand; + + arg_tmp_offset = offset - direct_flostack_offset; + for (i = num_rands; i--; ) { + int extfl; + extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(data, i + args_already_in_place); + if (extfl || CLOSURE_ARGUMENT_IS_FLONUM(data, i + args_already_in_place)) { + rand = (alt_rands + ? alt_rands[i+1+args_already_in_place] + : (app + ? app->args[i+1+args_already_in_place] + : NULL)); + arg_tmp_offset += MZ_FPUSEL(extfl, 2*sizeof(double), sizeof(double)); + /* Boxing definitely isn't needed if the value was from a local that doesn't hold + an unboxed value, otherwise we generate code to check dynamically. */ + if (!rand + || !SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type) + || (!extfl && (SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_FLONUM)) + || (extfl && (SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_EXTFLONUM))) { + GC_CAN_IGNORE jit_insn *iref; + int aoffset; + aoffset = JIT_FRAME_FLOSTACK_OFFSET - arg_tmp_offset; + if (save_reg == JIT_R0) { + if (i != num_rands - 1) + mz_pushr_p(JIT_R0); + } else { + mz_pushr_p(JIT_V1); + } + if (!rand || SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) { + /* assert: !rand or SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_FLONUM + or SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_EXTFLONUM */ + /* So, we have to check for an existing box */ + if ((save_reg != JIT_R0) || (i != num_rands - 1)) + mz_rs_ldxi(JIT_R0, i+1); + mz_rs_sync(); + __START_TINY_JUMPS__(1); + iref = jit_bnei_p(jit_forward(), JIT_R0, NULL); + __END_TINY_JUMPS__(1); + } else + iref = NULL; + jit_movi_l(JIT_R0, aoffset); + mz_rs_sync(); + MZ_FPUSEL_STMT(extfl, + (void)jit_calli(sjc.box_extflonum_from_stack_code), + (void)jit_calli(sjc.box_flonum_from_stack_code)); + if ((save_reg != JIT_R0) || (i != num_rands - 1)) + mz_rs_stxi(i+1, JIT_R0); + if (iref) { + __START_TINY_JUMPS__(1); + mz_patch_branch(iref); + __END_TINY_JUMPS__(1); + } + CHECK_LIMIT(); + if (save_reg == JIT_R0) { + if (i != num_rands - 1) + mz_popr_p(JIT_R0); + } else { + mz_popr_p(JIT_V1); + } + } + } + } + + return 1; +} +#endif + static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, int num_rands, GC_CAN_IGNORE jit_insn *slow_code, int args_already_in_place, int direct_flostack_offset, Scheme_App_Rec *app, Scheme_Object **alt_rands) @@ -1183,55 +1288,12 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i #ifdef USE_FLONUM_UNBOXING /* Need to box any arguments that we have only in flonum form */ if (SCHEME_CLOSURE_DATA_FLAGS(jitter->self_data) & CLOS_HAS_TYPED_ARGS) { - arg_tmp_offset = offset - direct_flostack_offset; - for (i = num_rands; i--; ) { - int extfl; - extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(jitter->self_data, i + args_already_in_place); - if (extfl || CLOSURE_ARGUMENT_IS_FLONUM(jitter->self_data, i + args_already_in_place)) { - rand = (alt_rands - ? alt_rands[i+1+args_already_in_place] - : app->args[i+1+args_already_in_place]); - arg_tmp_offset += MZ_FPUSEL(extfl, 2*sizeof(double), sizeof(double)); - /* Boxing definitely isn't needed if the value was from a local that doesn't hold - an unboxed value, otherwise we generate code to check dynamically. */ - if (!SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type) - || (!extfl && (SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_FLONUM)) - || (extfl && (SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_EXTFLONUM))) { - GC_CAN_IGNORE jit_insn *iref; - int aoffset; - aoffset = JIT_FRAME_FLOSTACK_OFFSET - arg_tmp_offset; - if (i != num_rands - 1) - mz_pushr_p(JIT_R0); - if (SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) { - /* assert: SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_FLONUM - or SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_EXTFLONUM */ - /* So, we have to check for an existing box */ - if (i != num_rands - 1) - mz_rs_ldxi(JIT_R0, i+1); - mz_rs_sync(); - __START_TINY_JUMPS__(1); - iref = jit_bnei_p(jit_forward(), JIT_R0, NULL); - __END_TINY_JUMPS__(1); - } else - iref = NULL; - jit_movi_l(JIT_R0, aoffset); - mz_rs_sync(); - MZ_FPUSEL_STMT(extfl, - (void)jit_calli(sjc.box_extflonum_from_stack_code), - (void)jit_calli(sjc.box_flonum_from_stack_code)); - if (i != num_rands - 1) - mz_rs_stxi(i+1, JIT_R0); - if (iref) { - __START_TINY_JUMPS__(1); - mz_patch_branch(iref); - __END_TINY_JUMPS__(1); - } - CHECK_LIMIT(); - if (i != num_rands - 1) - mz_popr_p(JIT_R0); - } - } - } + generate_argument_boxing(jitter, jitter->self_data, + num_rands, args_already_in_place, + offset, direct_flostack_offset, + JIT_R0, + app, alt_rands); + CHECK_LIMIT(); /* Arguments already in place may also need to be boxed. */ arg_tmp_offset = jitter->self_restart_offset; @@ -1368,7 +1430,7 @@ static int do_generate_shared_call(mz_jit_state *jitter, void *_data) ok = generate_direct_prim_tail_call(jitter, data->num_rands); else ok = scheme_generate_tail_call(jitter, data->num_rands, data->direct_native, 1, 0, - NULL, NULL); + NULL, NULL, NULL); scheme_jit_register_helper_func(jitter, code, 0); @@ -1653,6 +1715,7 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ int i, offset, need_safety = 0, apply_to_list = 0; int direct_prim = 0, need_non_tail = 0, direct_native = 0, direct_self = 0, nontail_self = 0; Scheme_Native_Closure *inline_direct_native = NULL; + int almost_inline_direct_native = 0; #ifdef USE_FLONUM_UNBOXING Scheme_Closure_Data *direct_data = NULL; #endif @@ -1751,7 +1814,8 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ if (ZERO_SIZED_CLOSUREP(c)) { /* If we're calling a constant function in tail position, then there's a good chance that this function is a wrapper to - get to a loop. Inline the jump to the potential loop, + get to a loop, a nested loop, or a mutually recursive call. + Inline the jump to the potential loop, absorbing the runstack and C stack checks into the check for this function --- only works if we can JIT the target of the call. */ @@ -1778,6 +1842,16 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ unboxed_non_tail_args = 1; } } + } else { + if (is_tail) { + /* To tie a mutally recursive loop, we've leave an indirection + and a runstack-space check, but we can still handle unboxed + arguments. */ +#ifdef USE_FLONUM_UNBOXING + direct_data = data; +#endif + almost_inline_direct_native = 1; + } } } } @@ -1800,8 +1874,10 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ reorder_ok = 0; /* superceded by direct_self */ } + FOR_LOG(if (direct_native) { LOG_IT((" [direct]\n")); } ) + /* Direct native tail with same number of args as just received? */ - if (direct_native && is_tail && num_rands + if (direct_native && is_tail && num_rands && !almost_inline_direct_native && (num_rands == jitter->self_data->num_params) && !(SCHEME_CLOSURE_DATA_FLAGS(jitter->self_data) & CLOS_HAS_REST)) { /* Check whether the actual arguments refer to Scheme-stack @@ -1828,6 +1904,7 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ mz_runstack_skipped(jitter, args_already_in_place); num_rands -= args_already_in_place; } + LOG_IT((" [args in place: %d]\n", args_already_in_place)); } if (inline_direct_native) { @@ -1913,6 +1990,10 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ direct_data = jitter->self_data; #endif +#ifdef JIT_PRECISE_GC + FOR_LOG(if (direct_data) { LOG_IT((" [typed]\n")); } ) +#endif + #ifdef USE_FLONUM_UNBOXING /* we want to push flonums into local storage in reverse order of evaluation, so make a pass to create space: */ @@ -2046,6 +2127,7 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ /* leave actual call to inlining code */ } else if (!(direct_self && is_tail) && !inline_direct_native + && !almost_inline_direct_native && (num_rands >= MAX_SHARED_CALL_RANDS)) { LOG_IT(("<-many args\n")); if (is_tail) { @@ -2058,7 +2140,7 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ mz_set_local_p(JIT_R2, JIT_LOCAL2); } scheme_generate_tail_call(jitter, num_rands, direct_native, jitter->need_set_rs, 1, - NULL, NULL); + NULL, NULL, NULL); } } else { if (direct_prim) @@ -2089,7 +2171,7 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ generate_self_tail_call(rator, jitter, num_rands, code, args_already_in_place, direct_flostack_offset, app, alt_rands); CHECK_LIMIT(); - } else if (inline_direct_native) { + } else if (inline_direct_native || almost_inline_direct_native) { LOG_IT(("<-native-tail\n")); #ifdef USE_FLONUM_UNBOXING /* Copy unboxed flonums into place where the target code expects them: */ @@ -2107,7 +2189,13 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ mz_set_local_p(JIT_R2, JIT_LOCAL2); } scheme_generate_tail_call(jitter, num_rands, direct_native, jitter->need_set_rs, 1, - inline_direct_native, inline_direct_args); + inline_direct_native, inline_direct_args, +#ifdef USE_FLONUM_UNBOXING + almost_inline_direct_native ? direct_data : NULL +#else + NULL +#endif + ); CHECK_LIMIT(); } else { scheme_mz_flostack_restore(jitter, 0, 0, 1, 1); diff --git a/racket/src/racket/src/jitcommon.c b/racket/src/racket/src/jitcommon.c index fb6c4b6074..0c1212ab04 100644 --- a/racket/src/racket/src/jitcommon.c +++ b/racket/src/racket/src/jitcommon.c @@ -3613,7 +3613,7 @@ static int more_common1(mz_jit_state *jitter, void *_data) __END_SHORT_JUMPS__(1); - scheme_generate_tail_call(jitter, -1, 0, 1, 0, NULL, NULL); + scheme_generate_tail_call(jitter, -1, 0, 1, 0, NULL, NULL, NULL); CHECK_LIMIT(); } diff --git a/racket/src/racket/src/jitinline.c b/racket/src/racket/src/jitinline.c index edbb75e8ea..58bc9f30a1 100644 --- a/racket/src/racket/src/jitinline.c +++ b/racket/src/racket/src/jitinline.c @@ -1677,7 +1677,7 @@ int scheme_generate_inlined_unary(mz_jit_state *jitter, Scheme_App2_Rec *app, in } else if (IS_NAMED_PRIM(rator, "unsafe-flrandom")) { mz_jit_unbox_state ubs; - LOG_IT(("inlined %s\n", name)); + LOG_IT(("inlined unsafe-flrandom\n")); mz_runstack_skipped(jitter, 1);