also improve JIT support for unboxed flonums in non-tail-call

Applies to non-tail calls to immediate functions, which can be
generated by a `for' or `let loop' pattern in a non-tail
position.
This commit is contained in:
Matthew Flatt 2012-07-05 10:16:43 -06:00
parent 8e6a6738bb
commit f71037c775
7 changed files with 273 additions and 79 deletions

View File

@ -1979,6 +1979,53 @@
(for-each values numlist)
(+ n1 n2)))))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Check JIT handling of unboxed arguments in loops,
;; including a loop starts in tail and non-tail positions.
(let ()
(define N 100000)
(define (non-tail)
(define-values (a b)
(let loop ([n N] [x -1.0] [y 1.0])
(cond
[(zero? n) (values x y)]
[else (loop (sub1 n)
(fl+ x -1.0)
(fl+ y 1.0))])))
(values a b))
(define (non-tail2)
(for/fold ([v 0.0]) ([i (in-range N)])
(define-values (a b)
(let loop ([n 10] [x -1.0] [y 1.0])
(cond
[(zero? n) (values x y)]
[else (loop (sub1 n)
(fl+ x -1.0)
(fl+ y 1.0))])))
(fl+ v (fl- a b))))
(define (tail)
(let loop ([n N] [x -1.0] [y 1.0])
(cond
[(zero? n) (values x y)]
[else (loop (sub1 n)
(fl+ x -1.0)
(fl+ y 1.0))])))
(define x-tail #f)
(define x-non-tail #f)
(define x-non-tail2 #f)
(set! x-tail tail)
(set! x-non-tail non-tail)
(set! x-non-tail2 non-tail2)
(test-values '(-100001.0 100001.0) non-tail)
(test -2200000.0 non-tail2)
(test-values '(-100001.0 100001.0) tail))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(report-errs)

View File

@ -1089,8 +1089,6 @@ static int generate_flonum_local_boxing(mz_jit_state *jitter, int pos, int local
int scheme_generate_flonum_local_unboxing(mz_jit_state *jitter, int push)
/* Move FPR0 onto C stack */
{
int offset;
if (jitter->flostack_offset == jitter->flostack_space) {
int space = FLOSTACK_SPACE_CHUNK * sizeof(double);
jitter->flostack_space += FLOSTACK_SPACE_CHUNK;
@ -1102,8 +1100,7 @@ int scheme_generate_flonum_local_unboxing(mz_jit_state *jitter, int push)
mz_runstack_flonum_pushed(jitter, jitter->flostack_offset);
CHECK_LIMIT();
offset = JIT_FRAME_FLONUM_OFFSET - (jitter->flostack_offset * sizeof(double));
(void)jit_stxi_d_fppop(offset, JIT_FP, JIT_FPR0);
mz_st_fppop(jitter->flostack_offset, JIT_FPR0);
return 1;
}
@ -2278,7 +2275,7 @@ int scheme_generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w
if (is_tail) {
if (!sjc.shared_tail_argc_code) {
sjc.shared_tail_argc_code = scheme_generate_shared_call(-1, jitter, 1, 1, 0, 0, 0);
sjc.shared_tail_argc_code = scheme_generate_shared_call(-1, jitter, 1, 1, 0, 0, 0, 0);
}
mz_set_local_p(JIT_R0, JIT_LOCAL2);
(void)jit_jmpi(sjc.shared_tail_argc_code);
@ -2287,7 +2284,7 @@ int scheme_generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w
void *code;
if (!sjc.shared_non_tail_argc_code[mo]) {
scheme_ensure_retry_available(jitter, multi_ok);
code = scheme_generate_shared_call(-2, jitter, multi_ok, 0, 0, 0, 0);
code = scheme_generate_shared_call(-2, jitter, multi_ok, 0, 0, 0, 0, 0);
sjc.shared_non_tail_argc_code[mo] = code;
}
code = sjc.shared_non_tail_argc_code[mo];
@ -3295,10 +3292,10 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data)
GC_CAN_IGNORE jit_insn *zref;
int f_offset;
/* In the case of an inline_direct_native call, the flonums are
already unpacked and JIT_SP is set up. Check whether JIT_SP
is already different than the 0 flonums. */
f_offset = JIT_FRAME_FLONUM_OFFSET - (jitter->flostack_offset * sizeof(double));
/* In the case of an direct native call, the flonums can be
already unpacked, in which case JIT_SP is set up. Check whether
JIT_SP is already different than the 0-flonums case. */
f_offset = JIT_FRAME_FLONUM_OFFSET - (jitter->flostack_space * sizeof(double));
jit_subr_p(JIT_R1, JIT_SP, JIT_FP);
zref = jit_bnei_l(jit_forward(), JIT_R1, f_offset);

View File

@ -210,7 +210,7 @@ struct scheme_jit_common_record {
#define MAX_SHARED_CALL_RANDS 25
void *shared_tail_code[4][MAX_SHARED_CALL_RANDS];
void *shared_non_tail_code[4][MAX_SHARED_CALL_RANDS][2];
void *shared_non_tail_code[5][MAX_SHARED_CALL_RANDS][2];
void *shared_non_tail_retry_code[2];
void *shared_non_tail_argc_code[2];
void *shared_tail_argc_code;
@ -683,13 +683,16 @@ int check_location;
# define NEED_LOCAL4
#endif
#define mz_set_local_p(x, l) mz_set_local_p_x(x, l, JIT_FP)
#define mz_get_local_p(x, l) mz_get_local_p_x(x, l, JIT_FP)
#ifdef MZ_USE_JIT_PPC
/* JIT_LOCAL1, JIT_LOCAL2, and JIT_LOCAL3 are offsets in the stack frame. */
# define JIT_LOCAL1 56
# define JIT_LOCAL2 60
# define JIT_LOCAL3 64
# define mz_set_local_p(x, l) jit_stxi_p(l, JIT_FP, x)
# define mz_get_local_p(x, l) jit_ldxi_p(x, JIT_FP, l)
# define mz_set_local_p_x(x, l, FP) jit_stxi_p(l, FP, x)
# define mz_get_local_p_x(x, l, FP) jit_ldxi_p(x, FP, l)
# define mz_patch_branch_at(a, v) (_jitl.long_jumps ? (void)jit_patch_movei(a-4, a-3, v) : (void)jit_patch_branch(a-1, v))
# define mz_patch_ucbranch_at(a, v) (_jitl.long_jumps ? (void)jit_patch_movei(a-4, a-3, v) : (void)jit_patch_ucbranch(a-1, v))
# define mz_prolog(x) (MFLRr(x), mz_set_local_p(x, JIT_LOCAL2))
@ -741,8 +744,8 @@ void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg)
1 word (for the return address) below alignment. */
# define JIT_LOCAL1 -(JIT_WORD_SIZE * 4)
# define JIT_LOCAL2 -(JIT_WORD_SIZE * 5)
# define mz_set_local_p(x, l) jit_stxi_p((l), JIT_FP, (x))
# define mz_get_local_p(x, l) jit_ldxi_p((x), JIT_FP, (l))
# define mz_set_local_p_x(x, l, FP) jit_stxi_p((l), FP, (x))
# define mz_get_local_p_x(x, l, FP) jit_ldxi_p((x), FP, (l))
# define mz_patch_branch_at(a, v) jit_patch_branch_at(a, v)
# define mz_patch_ucbranch_at(a, v) jit_patch_ucbranch_at(a, v)
/* The ABI for _CALL_DARWIN or JIT_X86_64 requires alignment. Even
@ -826,7 +829,23 @@ void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg)
# define mz_repush_threadlocal() /* empty */
#endif
#if 0
static jit_insn *fp_tmpr;
# define check_fp_depth(i, FP) \
(jit_addi_l(FP, FP, (JIT_FRAME_FLONUM_OFFSET - ((i) * sizeof(double)))), \
fp_tmpr = jit_bger_l(0, FP, JIT_SP), \
jit_ldi_p(FP, 0), \
mz_patch_branch(fp_tmpr), \
jit_subi_l(FP, FP, (JIT_FRAME_FLONUM_OFFSET - ((i) * sizeof(double)))))
#else
# define check_fp_depth(i, FP) (void)0
#endif
#define FLOSTACK_SPACE_CHUNK 4
# define mz_ld_fppush_x(r, i, FP) (check_fp_depth(i, FP), jit_ldxi_d_fppush(r, FP, (JIT_FRAME_FLONUM_OFFSET - ((i) * sizeof(double)))))
# define mz_ld_fppush(r, i) mz_ld_fppush_x(r, i, JIT_FP)
# define mz_st_fppop_x(i, r, FP) (check_fp_depth(i, FP), (void)jit_stxi_d_fppop((JIT_FRAME_FLONUM_OFFSET - ((i) * sizeof(double))), FP, r))
# define mz_st_fppop(i, r) mz_st_fppop_x(i, r, JIT_FP)
#define mz_patch_branch(a) mz_patch_branch_at(a, (_jit.x.pc))
#define mz_patch_ucbranch(a) mz_patch_ucbranch_at(a, (_jit.x.pc))
@ -1198,14 +1217,14 @@ int scheme_generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj
typedef struct jit_direct_arg jit_direct_arg;
void *scheme_generate_shared_call(int num_rands, mz_jit_state *old_jitter, int multi_ok, int is_tail,
int direct_prim, int direct_native, int nontail_self);
int direct_prim, int direct_native, int nontail_self, int unboxed_args);
void scheme_ensure_retry_available(mz_jit_state *jitter, int multi_ok);
int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_rands,
mz_jit_state *jitter, int is_tail, int multi_ok, int no_call);
int scheme_generate_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs,
int is_inline, Scheme_Native_Closure *direct_to_code, jit_direct_arg *direct_arg);
int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs,
int multi_ok, int nontail_self, int pop_and_jump, int is_inlined);
int multi_ok, int nontail_self, int pop_and_jump, int is_inlined, int unboxed_args);
int scheme_generate_finish_tail_call(mz_jit_state *jitter, int direct_native);
void scheme_jit_register_sub_func(mz_jit_state *jitter, void *code, Scheme_Object *protocol);
void scheme_jit_register_helper_func(mz_jit_state *jitter, void *code);
@ -1242,8 +1261,6 @@ int scheme_generate_non_tail_with_branch(Scheme_Object *obj, mz_jit_state *jitte
int scheme_generate(Scheme_Object *obj, mz_jit_state *jitter, int tail_ok, int wcm_may_replace, int multi_ok, int target,
Branch_Info *for_branch);
int scheme_generate_unboxed(Scheme_Object *obj, mz_jit_state *jitter, int inlined_ok, int unbox_anyway);
void *scheme_generate_shared_call(int num_rands, mz_jit_state *old_jitter, int multi_ok, int is_tail,
int direct_prim, int direct_native, int nontail_self);
#ifdef USE_FLONUM_UNBOXING
int scheme_generate_flonum_local_unboxing(mz_jit_state *jitter, int push);

View File

@ -143,7 +143,8 @@ int scheme_can_unbox_inline(Scheme_Object *obj, int fuel, int regs, int unsafely
just unbox it without using more than `regs' registers? There
cannot be any errors or function calls, unless we've specifically
instrumented them to save/pop floating-point values before
jumping. */
jumping. If the result is true, then arguments must be evaluated in
order. */
{
Scheme_Type t;
@ -932,11 +933,9 @@ int scheme_generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj
if (!(inlined_flonum1 && inlined_flonum2)) {
if ((can_direct1 || (unsafe_fl > 0)) && !inlined_flonum2) {
#ifdef USE_FLONUM_UNBOXING
int aoffset;
int fpr0;
fpr0 = JIT_FPR_0(jitter->unbox_depth);
aoffset = JIT_FRAME_FLONUM_OFFSET - (jitter->flostack_offset * sizeof(double));
jit_ldxi_d_fppush(fpr0, JIT_FP, aoffset);
mz_ld_fppush(fpr0, jitter->flostack_offset);
scheme_mz_flostack_restore(jitter, flostack, flopos, 1, 1);
CHECK_LIMIT();
jitter->unbox_depth++;

View File

@ -667,11 +667,16 @@ static int generate_clear_slow_previous_args(mz_jit_state *jitter)
}
int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs,
int multi_ok, int nontail_self, int pop_and_jump, int is_inlined)
int multi_ok, int nontail_self, int pop_and_jump, int is_inlined,
int unboxed_args)
{
/* Non-tail call.
Proc is in V1, args are at RUNSTACK.
If nontail_self, then R0 has proc pointer, and R2 has max_let_depth.
If unboxed_args, LOCAL3 holds address with argument-copying code,
where R2 is set before jumping to the old FP, and R1 holds
return address back here, and V1 and R0 must be preserved;
num_rands >= 0 in this case, and the "slow path" returns NULL.
If num_rands < 0, then argc is in R0, and need to pop runstack before returning.
If num_rands == -1, skip prolog. */
GC_CAN_IGNORE jit_insn *ref, *ref2, *ref4, *ref5, *ref6, *ref7, *ref8, *ref9;
@ -738,6 +743,9 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc
jit_movr_p(JIT_R2, JIT_V1);
jit_movr_p(JIT_V1, JIT_R0);
}
if (unboxed_args) {
jit_movr_p(JIT_R2, JIT_FP); /* save old FP */
}
jit_shuffle_saved_regs(); /* maybe copies V registers to be restored */
#ifdef MZ_USE_JIT_I386
/* keep call & ret paired by jumping to where we really
@ -752,7 +760,7 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc
if (num_rands >= 0) {
if (nontail_self) { jit_movr_p(JIT_R1, JIT_R0); }
jit_movr_p(JIT_R0, JIT_V1); /* closure */
if (!nontail_self) {
if (!nontail_self && !unboxed_args) {
/* nontail_self is only enabled when there are no rest args: */
jit_movi_i(JIT_R1, num_rands); /* argc */
jit_movr_p(JIT_R2, JIT_RUNSTACK); /* argv */
@ -769,8 +777,20 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc
jit_movr_p(JIT_R2, JIT_RUNSTACK); /* argv */
}
CHECK_LIMIT();
if (unboxed_args) {
/* old FP is still in R2 */
mz_get_local_p_x(JIT_V1, JIT_LOCAL3, JIT_R2);
}
mz_push_locals();
mz_repush_threadlocal();
if (unboxed_args) {
GC_CAN_IGNORE jit_insn *refrr;
refrr = jit_patchable_movi_p(JIT_R1, jit_forward());
jit_jmpr(JIT_V1);
jit_patch_movi(refrr, _jit.x.pc);
jit_movi_i(JIT_R1, num_rands); /* argc */
jit_movr_p(JIT_R2, JIT_RUNSTACK); /* argv */
}
if (!nontail_self) {
jit_ldxi_p(JIT_V1, JIT_R0, &((Scheme_Native_Closure *)0x0)->code);
if (direct_native) {
@ -925,10 +945,13 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc
/* The slow way: */
mz_patch_branch(ref9);
generate_pause_for_gc_and_retry(jitter,
1, /* in short jumps */
JIT_V1, /* expose V1 to GC */
refagain); /* retry code pointer */
if (!unboxed_args) {
generate_pause_for_gc_and_retry(jitter,
1, /* in short jumps */
JIT_V1, /* expose V1 to GC */
refagain); /* retry code pointer */
}
CHECK_LIMIT();
if (!direct_native) {
mz_patch_branch(ref);
@ -939,24 +962,34 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc
#ifndef FUEL_AUTODECEREMENTS
mz_patch_branch(ref11);
#endif
if (need_set_rs) {
JIT_UPDATE_THREAD_RSPTR();
}
if (num_rands >= 0) {
jit_movi_i(JIT_R0, num_rands);
}
mz_prepare(3);
CHECK_LIMIT();
jit_pusharg_p(JIT_RUNSTACK);
jit_pusharg_i(JIT_R0);
jit_pusharg_p(JIT_V1);
if (num_rands < 0) { jit_movr_p(JIT_V1, JIT_R0); } /* save argc to manually pop runstack */
if (multi_ok) {
(void)mz_finish_lwe(x_ts__scheme_apply_multi_from_native, refrts);
if (unboxed_args) {
/* no slow path here; return NULL to box arguments fall back to generic */
jit_movi_p(JIT_R0, NULL);
if (pop_and_jump) {
mz_epilog(JIT_V1);
}
} else {
(void)mz_finish_lwe(x_ts__scheme_apply_from_native, refrts);
/* normal slow path: */
if (need_set_rs) {
JIT_UPDATE_THREAD_RSPTR();
}
if (num_rands >= 0) {
jit_movi_i(JIT_R0, num_rands);
}
mz_prepare(3);
CHECK_LIMIT();
jit_pusharg_p(JIT_RUNSTACK);
jit_pusharg_i(JIT_R0);
jit_pusharg_p(JIT_V1);
if (num_rands < 0) { jit_movr_p(JIT_V1, JIT_R0); } /* save argc to manually pop runstack */
if (multi_ok) {
(void)mz_finish_lwe(x_ts__scheme_apply_multi_from_native, refrts);
} else {
(void)mz_finish_lwe(x_ts__scheme_apply_from_native, refrts);
}
CHECK_LIMIT();
}
CHECK_LIMIT();
mz_patch_ucbranch(ref5);
if (!direct_native) {
mz_patch_ucbranch(ref8);
@ -1032,13 +1065,11 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i
int is_flonum, already_unboxed = 0;
if ((SCHEME_CLOSURE_DATA_FLAGS(jitter->self_data) & CLOS_HAS_TYPED_ARGS)
&& CLOSURE_ARGUMENT_IS_FLONUM(jitter->self_data, i + args_already_in_place)) {
int aoffset;
is_flonum = 1;
rand = (alt_rands
? alt_rands[i+1+args_already_in_place]
: app->args[i+1+args_already_in_place]);
aoffset = JIT_FRAME_FLONUM_OFFSET - (arg_tmp_offset * sizeof(double));
jit_ldxi_d_fppush(JIT_FPR0, JIT_FP, aoffset);
mz_ld_fppush(JIT_FPR0, arg_tmp_offset);
--arg_tmp_offset;
already_unboxed = 1;
if (!already_loaded && !SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) {
@ -1053,11 +1084,9 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i
jit_stxi_p(WORDS_TO_BYTES(i + closure_size + args_already_in_place), JIT_R2, JIT_R0);
#ifdef USE_FLONUM_UNBOXING
if (is_flonum) {
int aoffset;
if (!already_unboxed)
jit_ldxi_d_fppush(JIT_FPR0, JIT_R0, &((Scheme_Double *)0x0)->double_val);
aoffset = JIT_FRAME_FLONUM_OFFSET - (arg_offset * sizeof(double));
(void)jit_stxi_d_fppop(aoffset, JIT_FP, JIT_FPR0);
mz_st_fppop(arg_offset, JIT_FPR0);
arg_offset++;
}
#endif
@ -1143,9 +1172,7 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i
iref = jit_bnei_p(jit_forward(), JIT_R0, NULL);
__END_TINY_JUMPS__(1);
{
int aoffset;
aoffset = JIT_FRAME_FLONUM_OFFSET - (arg_tmp_offset * sizeof(double));
jit_ldxi_d_fppush(JIT_FPR0, JIT_FP, aoffset);
mz_ld_fppush(JIT_FPR0, arg_tmp_offset);
(void)jit_calli(sjc.box_flonum_from_stack_code);
mz_ld_runstack_base_alt(JIT_R2);
jit_subi_p(JIT_R2, JIT_RUNSTACK_BASE_OR_ALT(JIT_R2), WORDS_TO_BYTES(num_rands + closure_size + args_already_in_place));
@ -1186,7 +1213,7 @@ typedef struct {
mz_jit_state *old_jitter;
int multi_ok;
int is_tail;
int direct_prim, direct_native, nontail_self;
int direct_prim, direct_native, nontail_self, unboxed_args;
} Generate_Call_Data;
void scheme_jit_register_sub_func(mz_jit_state *jitter, void *code, Scheme_Object *protocol)
@ -1243,7 +1270,7 @@ static int do_generate_shared_call(mz_jit_state *jitter, void *_data)
ok = generate_direct_prim_non_tail_call(jitter, data->num_rands, data->multi_ok, 1);
else
ok = scheme_generate_non_tail_call(jitter, data->num_rands, data->direct_native, 1,
data->multi_ok, data->nontail_self, 1, 0);
data->multi_ok, data->nontail_self, 1, 0, data->unboxed_args);
scheme_jit_register_sub_func(jitter, code, scheme_false);
@ -1252,7 +1279,7 @@ static int do_generate_shared_call(mz_jit_state *jitter, void *_data)
}
void *scheme_generate_shared_call(int num_rands, mz_jit_state *old_jitter, int multi_ok, int is_tail,
int direct_prim, int direct_native, int nontail_self)
int direct_prim, int direct_native, int nontail_self, int unboxed_args)
{
Generate_Call_Data data;
@ -1263,6 +1290,7 @@ void *scheme_generate_shared_call(int num_rands, mz_jit_state *old_jitter, int m
data.direct_prim = direct_prim;
data.direct_native = direct_native;
data.nontail_self = nontail_self;
data.unboxed_args = unboxed_args;
return scheme_generate_one(old_jitter, do_generate_shared_call, &data, 0, NULL, NULL);
}
@ -1272,7 +1300,7 @@ void scheme_ensure_retry_available(mz_jit_state *jitter, int multi_ok)
int mo = multi_ok ? 1 : 0;
if (!sjc.shared_non_tail_retry_code[mo]) {
void *code;
code = scheme_generate_shared_call(-1, jitter, multi_ok, 0, 0, 0, 0);
code = scheme_generate_shared_call(-1, jitter, multi_ok, 0, 0, 0, 0, 0);
sjc.shared_non_tail_retry_code[mo] = code;
}
}
@ -1410,17 +1438,14 @@ static jit_direct_arg *check_special_direct_args(Scheme_App_Rec *app, Scheme_Obj
return inline_direct_args;
}
#ifdef USE_FLONUM_UNBOXING
int generate_fp_argument_shuffle(int direct_flostack_offset, mz_jit_state *jitter)
static int generate_fp_argument_shuffle(int direct_flostack_offset, mz_jit_state *jitter)
{
int i, j;
/* Copy unboxed flonums into place where the target code expects them,
which is shifted and reverse of the order that we pushed. */
# define mz_ld_fppush(r, i) jit_ldxi_d_fppush(r, JIT_FP, (JIT_FRAME_FLONUM_OFFSET - ((i) * sizeof(double))))
# define mz_st_fppop(i, r) (void)jit_stxi_d_fppop((JIT_FRAME_FLONUM_OFFSET - ((i) * sizeof(double))), JIT_FP, r)
if (direct_flostack_offset
&& ((direct_flostack_offset > 1)
|| (direct_flostack_offset != jitter->flostack_offset))) {
@ -1472,6 +1497,78 @@ int generate_fp_argument_shuffle(int direct_flostack_offset, mz_jit_state *jitte
return 1;
}
static int generate_call_path_with_unboxes(mz_jit_state *jitter, int direct_flostack_offset, void *unboxed_code,
GC_CAN_IGNORE jit_insn **_refdone,
int num_rands, Scheme_Closure_Data *direct_data, Scheme_Object *rator)
{
GC_CAN_IGNORE jit_insn *refdone, *refgo, *refcopy, *ref;
int i, k, offset;
refgo = jit_jmpi(jit_forward());
refcopy = _jit.x.pc;
/* Callback code to copy unboxed arguments.
R1 has the return address, R2 holds the old FP */
offset = FLOSTACK_SPACE_CHUNK * ((direct_flostack_offset + (FLOSTACK_SPACE_CHUNK - 1))
/ FLOSTACK_SPACE_CHUNK);
jit_subi_l(JIT_SP, JIT_SP, offset * sizeof(double));
for (i = 0; i < direct_flostack_offset; i++) {
int i_pos, a_pos;
i_pos = jitter->flostack_offset - direct_flostack_offset + i + 1;
a_pos = direct_flostack_offset - i;
mz_ld_fppush_x(JIT_FPR0, i_pos, JIT_R2);
mz_st_fppop(a_pos, JIT_FPR0);
CHECK_LIMIT();
}
jit_jmpr(JIT_R1);
mz_patch_ucbranch(refgo);
/* install callback pointer and jump to shared code: */
(void)jit_patchable_movi_p(JIT_R1, refcopy);
mz_set_local_p(JIT_R1, JIT_LOCAL3);
(void)jit_calli(unboxed_code);
refdone = jit_bnei_p(jit_forward(), JIT_R0, NULL);
*_refdone = refdone;
CHECK_LIMIT();
/* box arguments for slow path */
for (i = 0, k = 0; i < num_rands; i++) {
if ((SCHEME_CLOSURE_DATA_FLAGS(direct_data) & CLOS_HAS_TYPED_ARGS)
&& (CLOSURE_ARGUMENT_IS_FLONUM(direct_data, i))) {
k++;
offset = jitter->flostack_offset - direct_flostack_offset + k;
offset = JIT_FRAME_FLONUM_OFFSET - (offset * sizeof(double));
jit_ldxi_p(JIT_R0, JIT_RUNSTACK, WORDS_TO_BYTES(i));
__START_TINY_JUMPS__(1);
ref = jit_bnei_p(jit_forward(), JIT_R0, NULL);
__END_TINY_JUMPS__(1);
CHECK_LIMIT();
jit_movi_l(JIT_R0, offset);
(void)jit_calli(sjc.box_flonum_from_stack_code);
jit_stxi_p(WORDS_TO_BYTES(i), JIT_RUNSTACK, JIT_R0);
__START_TINY_JUMPS__(1);
mz_patch_branch(ref);
__END_TINY_JUMPS__(1);
}
}
/* Reset V1 to rator for slow path: */
scheme_generate(rator, jitter, 0, 0, 0, JIT_V1, NULL);
mz_rs_sync();
return 1;
}
#endif
int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_rands,
mz_jit_state *jitter, int is_tail, int multi_ok, int no_call)
/* de-sync'd ok
@ -1482,7 +1579,7 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
int direct_prim = 0, need_non_tail = 0, direct_native = 0, direct_self = 0, nontail_self = 0;
Scheme_Native_Closure *inline_direct_native = NULL;
Scheme_Closure_Data *direct_data = NULL;
int direct_flostack_offset = 0;
int direct_flostack_offset = 0, unboxed_non_tail_args = 0;
jit_direct_arg *inline_direct_args = NULL;
int proc_already_in_place = 0;
Scheme_Object *rator, *v, *arg;
@ -1570,7 +1667,7 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
direct_self = 1;
else if (jitter->self_nontail_code)
nontail_self = 1;
} else if (is_tail) {
} else {
Scheme_Closure *c = (Scheme_Closure *)rator;
if (ZERO_SIZED_CLOSUREP(c)) {
/* If we're calling a constant function in tail position, then
@ -1587,12 +1684,19 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
}
}
if (nc->code->start_code != scheme_on_demand_jit_code) {
if (nc->code->max_let_depth > jitter->max_tail_depth)
jitter->max_tail_depth = nc->code->max_let_depth;
direct_data = data; /* for flonum handling */
inline_direct_native = nc;
if (is_tail) {
if (nc->code->max_let_depth > jitter->max_tail_depth)
jitter->max_tail_depth = nc->code->max_let_depth;
direct_data = data; /* for flonum handling */
inline_direct_native = nc;
} else {
if (num_rands < MAX_SHARED_CALL_RANDS) {
direct_data = data;
unboxed_non_tail_args = 1;
}
}
}
}
}
@ -1844,16 +1948,18 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
generate_nontail_self_setup(jitter);
}
scheme_generate_non_tail_call(jitter, num_rands, direct_native, jitter->need_set_rs,
multi_ok, nontail_self, 0, 1);
multi_ok, nontail_self, 0, 1, 0);
}
}
} else {
/* Jump to code to implement a tail call for num_rands arguments */
/* Jump to code to implement a [tail-]call for `num_rands' arguments */
void *code;
int dp = (direct_prim ? 1 : (direct_native ? (1 + direct_native + (nontail_self ? 1 : 0)) : 0));
/* if unboxed_non_tail_args, then we'll also use index 4 in place of dp */
if (is_tail) {
if (!sjc.shared_tail_code[dp][num_rands]) {
code = scheme_generate_shared_call(num_rands, jitter, multi_ok, is_tail, direct_prim, direct_native, 0);
code = scheme_generate_shared_call(num_rands, jitter, multi_ok, is_tail, direct_prim, direct_native, 0, 0);
sjc.shared_tail_code[dp][num_rands] = code;
}
code = sjc.shared_tail_code[dp][num_rands];
@ -1897,10 +2003,24 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
}
} else {
int mo = (multi_ok ? 1 : 0);
void *unboxed_code;
if (unboxed_non_tail_args && !direct_flostack_offset)
unboxed_non_tail_args = 0;
if (unboxed_non_tail_args) {
if (!sjc.shared_non_tail_code[4][num_rands][mo]) {
scheme_ensure_retry_available(jitter, multi_ok);
code = scheme_generate_shared_call(num_rands, jitter, multi_ok, is_tail, direct_prim, direct_native, nontail_self, 1);
sjc.shared_non_tail_code[4][num_rands][mo] = code;
}
unboxed_code = sjc.shared_non_tail_code[4][num_rands][mo];
} else
unboxed_code = NULL;
if (!sjc.shared_non_tail_code[dp][num_rands][mo]) {
scheme_ensure_retry_available(jitter, multi_ok);
code = scheme_generate_shared_call(num_rands, jitter, multi_ok, is_tail, direct_prim, direct_native, nontail_self);
code = scheme_generate_shared_call(num_rands, jitter, multi_ok, is_tail, direct_prim, direct_native, nontail_self, 0);
sjc.shared_non_tail_code[dp][num_rands][mo] = code;
}
LOG_IT(("<-non-tail %d %d %d\n", dp, num_rands, mo));
@ -1917,7 +2037,21 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
else
(void)jit_calli(sjc.apply_to_list_code);
} else {
GC_CAN_IGNORE jit_insn *refdone;
#ifdef USE_FLONUM_UNBOXING
if (unboxed_code) {
generate_call_path_with_unboxes(jitter, direct_flostack_offset, unboxed_code, &refdone,
num_rands, direct_data, rator);
CHECK_LIMIT();
} else
#endif
refdone = NULL;
(void)jit_calli(code);
if (refdone)
mz_patch_branch(refdone);
}
if (direct_prim) {

View File

@ -971,7 +971,7 @@ static int generate_apply_proxy(mz_jit_state *jitter, int setter)
CHECK_LIMIT();
JIT_UPDATE_THREAD_RSPTR();
__END_SHORT_JUMPS__(1);
scheme_generate_non_tail_call(jitter, 3, 0, 0, 0, 0, 0, 1);
scheme_generate_non_tail_call(jitter, 3, 0, 0, 0, 0, 0, 1, 0);
__START_SHORT_JUMPS__(1);
CHECK_LIMIT();
if (setter) {
@ -2844,7 +2844,7 @@ static int more_common0(mz_jit_state *jitter, void *_data)
mz_rs_sync();
__END_SHORT_JUMPS__(1);
scheme_generate_non_tail_call(jitter, 2, 0, 1, 0, 0, 0, 0);
scheme_generate_non_tail_call(jitter, 2, 0, 1, 0, 0, 0, 0, 0);
CHECK_LIMIT();
__START_SHORT_JUMPS__(1);
@ -3279,7 +3279,7 @@ static int more_common1(mz_jit_state *jitter, void *_data)
__END_SHORT_JUMPS__(1);
scheme_generate_non_tail_call(jitter, -1, 0, 1, multi_ok, 0, 1, 0);
scheme_generate_non_tail_call(jitter, -1, 0, 1, multi_ok, 0, 1, 0, 0);
scheme_jit_register_sub_func(jitter, code, scheme_false);
}

View File

@ -98,7 +98,7 @@
#define jit_ldxr_d(f0, r0, r1) MOVSDmr(0, r0, r1, _SCL1, f0)
#define jit_ldxi_d(f0, r0, i0) MOVSDmr(i0, r0, _NOREG, _SCL1, f0);
#define jit_ldxi_d(f0, r0, i0) MOVSDmr(i0, r0, _NOREG, _SCL1, f0)
#define jit_str_d(r0, f0) MOVSDrm(f0, 0, r0, _NOREG, _SCL1)