improve JIT support for unboxed flonums in a tail-call case

Applies to tail calls to immediate functions, which can be
generated by a `for' or `let loop' pattern in a tail
position.
This commit is contained in:
Matthew Flatt 2012-07-03 17:43:45 -06:00
parent c428d4ed4c
commit b08c202a12
3 changed files with 101 additions and 9 deletions

View File

@ -1092,8 +1092,8 @@ int scheme_generate_flonum_local_unboxing(mz_jit_state *jitter, int push)
int offset;
if (jitter->flostack_offset == jitter->flostack_space) {
int space = 4 * sizeof(double);
jitter->flostack_space += 4;
int space = FLOSTACK_SPACE_CHUNK * sizeof(double);
jitter->flostack_space += FLOSTACK_SPACE_CHUNK;
jit_subi_l(JIT_SP, JIT_SP, space);
}
@ -3292,11 +3292,21 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data)
#ifdef USE_FLONUM_UNBOXING
/* Unpack flonum arguments */
if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS) {
GC_CAN_IGNORE jit_insn *zref;
int f_offset;
/* In the case of an inline_direct_native call, the flonums are
already unpacked and JIT_SP is set up. Check whether JIT_SP
is already different than the 0 flonums. */
f_offset = JIT_FRAME_FLONUM_OFFSET - (jitter->flostack_offset * sizeof(double));
jit_subr_p(JIT_R1, JIT_SP, JIT_FP);
zref = jit_bnei_l(jit_forward(), JIT_R1, f_offset);
for (i = data->num_params; i--; ) {
if (CLOSURE_ARGUMENT_IS_FLONUM(data, i)) {
mz_rs_ldxi(JIT_R1, i);
jit_ldxi_d_fppush(JIT_FPR0, JIT_R1, &((Scheme_Double *)0x0)->double_val);
scheme_generate_flonum_local_unboxing(jitter, 1);
scheme_generate_flonum_local_unboxing(jitter, 1);
CHECK_LIMIT();
} else {
mz_runstack_pushed(jitter, 1);
@ -3304,6 +3314,8 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data)
}
jitter->self_pos = 0;
jitter->depth = 0;
mz_patch_branch(zref);
}
#endif

View File

@ -826,6 +826,8 @@ void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg)
# define mz_repush_threadlocal() /* empty */
#endif
#define FLOSTACK_SPACE_CHUNK 4
#define mz_patch_branch(a) mz_patch_branch_at(a, (_jit.x.pc))
#define mz_patch_ucbranch(a) mz_patch_ucbranch_at(a, (_jit.x.pc))

View File

@ -1410,6 +1410,68 @@ static jit_direct_arg *check_special_direct_args(Scheme_App_Rec *app, Scheme_Obj
return inline_direct_args;
}
int generate_fp_argument_shuffle(int direct_flostack_offset, mz_jit_state *jitter)
{
int i, j;
/* Copy unboxed flonums into place where the target code expects them,
which is shifted and reverse of the order that we pushed. */
# define mz_ld_fppush(r, i) jit_ldxi_d_fppush(r, JIT_FP, (JIT_FRAME_FLONUM_OFFSET - ((i) * sizeof(double))))
# define mz_st_fppop(i, r) (void)jit_stxi_d_fppop((JIT_FRAME_FLONUM_OFFSET - ((i) * sizeof(double))), JIT_FP, r)
if (direct_flostack_offset
&& ((direct_flostack_offset > 1)
|| (direct_flostack_offset != jitter->flostack_offset))) {
/* If the source and target areas don't overlap (or if they
overlap only by one item), we can do it in one step, otherwise
reverse then shift. */
if (jitter->flostack_offset >= ((2 * direct_flostack_offset) - 1)) {
/* one step: */
if (direct_flostack_offset != jitter->flostack_offset) {
/* shift: */
for (i = 0; i < direct_flostack_offset; i++) {
int i_pos, a_pos;
i_pos = jitter->flostack_offset - direct_flostack_offset + i + 1;
a_pos = direct_flostack_offset - i;
if (i_pos != a_pos) {
mz_ld_fppush(JIT_FPR0, i_pos);
mz_st_fppop(a_pos, JIT_FPR0);
CHECK_LIMIT();
}
}
}
} else {
/* reverse: */
for (i = 0, j = direct_flostack_offset-1; i < j; i++, j--) {
int i_pos, j_pos;
i_pos = jitter->flostack_offset - direct_flostack_offset + i + 1;
j_pos = jitter->flostack_offset - direct_flostack_offset + j + 1;
mz_ld_fppush(JIT_FPR1, i_pos);
mz_ld_fppush(JIT_FPR0, j_pos);
mz_st_fppop(i_pos, JIT_FPR0);
mz_st_fppop(j_pos, JIT_FPR1);
CHECK_LIMIT();
}
if (direct_flostack_offset != jitter->flostack_offset) {
/* shift: */
for (i = 0; i < direct_flostack_offset; i++) {
int i_pos, a_pos;
i_pos = jitter->flostack_offset - direct_flostack_offset + i + 1;
mz_ld_fppush(JIT_FPR0, i_pos);
a_pos = i + 1;
mz_st_fppop(a_pos, JIT_FPR0);
CHECK_LIMIT();
}
}
}
}
return 1;
}
int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_rands,
mz_jit_state *jitter, int is_tail, int multi_ok, int no_call)
/* de-sync'd ok
@ -1419,6 +1481,8 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
int i, offset, need_safety = 0, apply_to_list = 0;
int direct_prim = 0, need_non_tail = 0, direct_native = 0, direct_self = 0, nontail_self = 0;
Scheme_Native_Closure *inline_direct_native = NULL;
Scheme_Closure_Data *direct_data = NULL;
int direct_flostack_offset = 0;
jit_direct_arg *inline_direct_args = NULL;
int proc_already_in_place = 0;
Scheme_Object *rator, *v, *arg;
@ -1526,6 +1590,8 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
if (nc->code->max_let_depth > jitter->max_tail_depth)
jitter->max_tail_depth = nc->code->max_let_depth;
direct_data = data; /* for flonum handling */
inline_direct_native = nc;
}
}
@ -1646,6 +1712,9 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
}
}
/* not sync'd...*/
if (direct_self && is_tail)
direct_data = jitter->self_data;
for (i = 0; i < num_rands; i++) {
PAUSE_JIT_DATA();
@ -1658,11 +1727,9 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
need_safety = 0;
}
#ifdef USE_FLONUM_UNBOXING
if (direct_self
&& is_tail
&& (SCHEME_CLOSURE_DATA_FLAGS(jitter->self_data) & CLOS_HAS_TYPED_ARGS)
&& (CLOSURE_ARGUMENT_IS_FLONUM(jitter->self_data, i+args_already_in_place))) {
if (direct_data
&& (SCHEME_CLOSURE_DATA_FLAGS(direct_data) & CLOS_HAS_TYPED_ARGS)
&& (CLOSURE_ARGUMENT_IS_FLONUM(direct_data, i+args_already_in_place))) {
int directly;
jitter->unbox++;
if (scheme_can_unbox_inline(arg, 5, JIT_FPR_NUM-1, 0))
@ -1685,6 +1752,7 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
} else {
(void)jit_movi_p(JIT_R0, NULL);
}
direct_flostack_offset++;
} else
#endif
if (inline_direct_args) {
@ -1795,7 +1863,17 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
CHECK_LIMIT();
} else if (inline_direct_native) {
LOG_IT(("<-native-tail\n"));
scheme_mz_flostack_restore(jitter, 0, 0, 1, 1);
#ifdef USE_FLONUM_UNBOXING
/* Copy unboxed flonums into place where the target code expects them: */
generate_fp_argument_shuffle(direct_flostack_offset, jitter);
CHECK_LIMIT();
#endif
scheme_mz_flostack_restore(jitter,
FLOSTACK_SPACE_CHUNK * ((direct_flostack_offset + (FLOSTACK_SPACE_CHUNK - 1))
/ FLOSTACK_SPACE_CHUNK),
direct_flostack_offset,
1, 1);
/* move args and call function: */
if (args_already_in_place) {
jit_movi_l(JIT_R2, args_already_in_place);
mz_set_local_p(JIT_R2, JIT_LOCAL2);