JIT: refactor unboxing flostack to better support other datatypes

Instead of keeping offsets in terms of `double's, keep it in
terms of bytes. This change is a step toward putting other kinds
of values on the flostack, such as extended-precision floats.
This commit is contained in:
Matthew Flatt 2012-11-17 21:43:01 -07:00
parent 4e6f8af667
commit 81fc033f93
4 changed files with 41 additions and 41 deletions

View File

@ -1087,8 +1087,8 @@ int scheme_generate_flonum_local_boxing(mz_jit_state *jitter, int pos, int offse
static int generate_flonum_local_boxing(mz_jit_state *jitter, int pos, int local_pos, int target) static int generate_flonum_local_boxing(mz_jit_state *jitter, int pos, int local_pos, int target)
{ {
int offset; int offset;
offset = scheme_mz_flonum_pos(jitter, local_pos); offset = scheme_mz_flostack_pos(jitter, local_pos);
offset = JIT_FRAME_FLONUM_OFFSET - (offset * sizeof(double)); offset = JIT_FRAME_FLOSTACK_OFFSET - offset;
if (jitter->unbox) { if (jitter->unbox) {
int fpr0; int fpr0;
fpr0 = JIT_FPR_0(jitter->unbox_depth); fpr0 = JIT_FPR_0(jitter->unbox_depth);
@ -1105,13 +1105,13 @@ static int generate_flonum_local_boxing(mz_jit_state *jitter, int pos, int local
int scheme_generate_flonum_local_unboxing(mz_jit_state *jitter, int push) int scheme_generate_flonum_local_unboxing(mz_jit_state *jitter, int push)
/* Move FPR0 onto C stack */ /* Move FPR0 onto C stack */
{ {
if (jitter->flostack_offset == jitter->flostack_space) { if ((jitter->flostack_offset + sizeof(double)) > jitter->flostack_space) {
int space = FLOSTACK_SPACE_CHUNK * sizeof(double); int space = FLOSTACK_SPACE_CHUNK;
jitter->flostack_space += FLOSTACK_SPACE_CHUNK; jitter->flostack_space += space;
jit_subi_l(JIT_SP, JIT_SP, space); jit_subi_l(JIT_SP, JIT_SP, space);
} }
jitter->flostack_offset += 1; jitter->flostack_offset += sizeof(double);
if (push) if (push)
mz_runstack_flonum_pushed(jitter, jitter->flostack_offset); mz_runstack_flonum_pushed(jitter, jitter->flostack_offset);
CHECK_LIMIT(); CHECK_LIMIT();
@ -3339,7 +3339,7 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data)
/* In the case of a direct native call, the flonums can be /* In the case of a direct native call, the flonums can be
already unpacked, in which case JIT_SP is set up. Check whether already unpacked, in which case JIT_SP is set up. Check whether
JIT_SP is already different than the 0-flonums case. */ JIT_SP is already different than the 0-flonums case. */
f_offset = JIT_FRAME_FLONUM_OFFSET - (jitter->flostack_space * sizeof(double)); f_offset = JIT_FRAME_FLOSTACK_OFFSET - jitter->flostack_space;
jit_subr_p(JIT_R1, JIT_SP, JIT_FP); jit_subr_p(JIT_R1, JIT_SP, JIT_FP);
zref = jit_bnei_l(jit_forward(), JIT_R1, f_offset); zref = jit_bnei_l(jit_forward(), JIT_R1, f_offset);

View File

@ -329,7 +329,7 @@ typedef struct mz_jit_state {
. shift >>2 to get flags . shift >>2 to get flags
. 1 -> case 0x2 bit: . 1 -> case 0x2 bit:
. 0 -> shift >>2 to get new (native) pushed . 0 -> shift >>2 to get new (native) pushed
. 1 -> shift >>2 to get flonum stack pos */ . 1 -> shift >>2 to get flostack offset */
int num_mappings, mappings_size; int num_mappings, mappings_size;
int retained, retained_double; int retained, retained_double;
int need_set_rs; int need_set_rs;
@ -706,7 +706,7 @@ int check_location;
LOCAL4 (x86_64: = saved R14 otherwise when THREAD_LOCAL LOCAL4 (x86_64: = saved R14 otherwise when THREAD_LOCAL
x86: = RUNSTACK_BASE or THREAD_LOCAL) x86: = RUNSTACK_BASE or THREAD_LOCAL)
[some empty slots, maybe, depending on alignment] [some empty slots, maybe, depending on alignment]
[space for local, unboxed flonums] [space for "flostack" --- local unboxed values, such as flonums]
Registers: JIT_V1 = RUNSTACK, JIT_V2 = x86_64: RUNSTACK_BASE Registers: JIT_V1 = RUNSTACK, JIT_V2 = x86_64: RUNSTACK_BASE
x86: RUNSTACK_BASE or THREAD_LOCAL x86: RUNSTACK_BASE or THREAD_LOCAL
x86_64: JIT_R14 = THREAD_LOCAL x86_64: JIT_R14 = THREAD_LOCAL
@ -823,7 +823,7 @@ void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg)
# endif # endif
# define mz_push_locals() SUBQir((LOCAL_FRAME_SIZE << JIT_LOG_WORD_SIZE), JIT_SP) # define mz_push_locals() SUBQir((LOCAL_FRAME_SIZE << JIT_LOG_WORD_SIZE), JIT_SP)
# define mz_pop_locals() ADDQir((LOCAL_FRAME_SIZE << JIT_LOG_WORD_SIZE), JIT_SP) # define mz_pop_locals() ADDQir((LOCAL_FRAME_SIZE << JIT_LOG_WORD_SIZE), JIT_SP)
# define JIT_FRAME_FLONUM_OFFSET (-(JIT_WORD_SIZE * (LOCAL_FRAME_SIZE + 3))) # define JIT_FRAME_FLOSTACK_OFFSET (-(JIT_WORD_SIZE * (LOCAL_FRAME_SIZE + 3)))
# define _jit_prolog_again(jitter, n, ret_addr_reg) (PUSHQr(ret_addr_reg), jit_base_prolog()) # define _jit_prolog_again(jitter, n, ret_addr_reg) (PUSHQr(ret_addr_reg), jit_base_prolog())
# if defined(MZ_USE_JIT_X86_64) && !defined(_WIN64) # if defined(MZ_USE_JIT_X86_64) && !defined(_WIN64)
# define jit_shuffle_saved_regs() (MOVQrr(_ESI, _R12), MOVQrr(_EDI, _R13)) # define jit_shuffle_saved_regs() (MOVQrr(_ESI, _R12), MOVQrr(_EDI, _R13))
@ -865,19 +865,19 @@ void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg)
#if 0 #if 0
static jit_insn *fp_tmpr; static jit_insn *fp_tmpr;
# define check_fp_depth(i, FP) \ # define check_fp_depth(i, FP) \
(jit_addi_l(FP, FP, (JIT_FRAME_FLONUM_OFFSET - ((i) * sizeof(double)))), \ (jit_addi_l(FP, FP, (JIT_FRAME_FLOSTACK_OFFSET - (i))), \
fp_tmpr = jit_bger_l(0, FP, JIT_SP), \ fp_tmpr = jit_bger_l(0, FP, JIT_SP), \
jit_ldi_p(FP, 0), \ jit_ldi_p(FP, 0), \
mz_patch_branch(fp_tmpr), \ mz_patch_branch(fp_tmpr), \
jit_subi_l(FP, FP, (JIT_FRAME_FLONUM_OFFSET - ((i) * sizeof(double))))) jit_subi_l(FP, FP, (JIT_FRAME_FLOSTACK_OFFSET - (i))))
#else #else
# define check_fp_depth(i, FP) (void)0 # define check_fp_depth(i, FP) (void)0
#endif #endif
#define FLOSTACK_SPACE_CHUNK 4 #define FLOSTACK_SPACE_CHUNK 16
# define mz_ld_fppush_x(r, i, FP) (check_fp_depth(i, FP), jit_ldxi_d_fppush(r, FP, (JIT_FRAME_FLONUM_OFFSET - ((i) * sizeof(double))))) # define mz_ld_fppush_x(r, i, FP) (check_fp_depth(i, FP), jit_ldxi_d_fppush(r, FP, (JIT_FRAME_FLOSTACK_OFFSET - (i))))
# define mz_ld_fppush(r, i) mz_ld_fppush_x(r, i, JIT_FP) # define mz_ld_fppush(r, i) mz_ld_fppush_x(r, i, JIT_FP)
# define mz_st_fppop_x(i, r, FP) (check_fp_depth(i, FP), (void)jit_stxi_d_fppop((JIT_FRAME_FLONUM_OFFSET - ((i) * sizeof(double))), FP, r)) # define mz_st_fppop_x(i, r, FP) (check_fp_depth(i, FP), (void)jit_stxi_d_fppop((JIT_FRAME_FLOSTACK_OFFSET - (i)), FP, r))
# define mz_st_fppop(i, r) mz_st_fppop_x(i, r, JIT_FP) # define mz_st_fppop(i, r) mz_st_fppop_x(i, r, JIT_FP)
#define mz_patch_branch(a) mz_patch_branch_at(a, (_jit.x.pc)) #define mz_patch_branch(a) mz_patch_branch_at(a, (_jit.x.pc))
@ -1195,7 +1195,7 @@ void scheme_mz_popr_p_it(mz_jit_state *jitter, int reg, int discard);
void scheme_mz_need_space(mz_jit_state *jitter, int need_extra); void scheme_mz_need_space(mz_jit_state *jitter, int need_extra);
int scheme_stack_safety(mz_jit_state *jitter, int cnt, int offset); int scheme_stack_safety(mz_jit_state *jitter, int cnt, int offset);
#ifdef USE_FLONUM_UNBOXING #ifdef USE_FLONUM_UNBOXING
int scheme_mz_flonum_pos(mz_jit_state *jitter, int i); int scheme_mz_flostack_pos(mz_jit_state *jitter, int i);
#endif #endif
void scheme_mz_load_retained(mz_jit_state *jitter, int rs, void *o); void scheme_mz_load_retained(mz_jit_state *jitter, int rs, void *o);

View File

@ -1079,7 +1079,7 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i
int closure_size = jitter->self_closure_size; int closure_size = jitter->self_closure_size;
int space, offset; int space, offset;
#ifdef USE_FLONUM_UNBOXING #ifdef USE_FLONUM_UNBOXING
int arg_offset = 1, arg_tmp_offset; int arg_offset = 0, arg_tmp_offset;
Scheme_Object *rand; Scheme_Object *rand;
#endif #endif
@ -1124,7 +1124,7 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i
? alt_rands[i+1+args_already_in_place] ? alt_rands[i+1+args_already_in_place]
: app->args[i+1+args_already_in_place]); : app->args[i+1+args_already_in_place]);
mz_ld_fppush(JIT_FPR0, arg_tmp_offset); mz_ld_fppush(JIT_FPR0, arg_tmp_offset);
--arg_tmp_offset; arg_tmp_offset -= sizeof(double);
already_unboxed = 1; already_unboxed = 1;
if (!already_loaded && !SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) { if (!already_loaded && !SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) {
already_loaded = 1; already_loaded = 1;
@ -1140,8 +1140,8 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i
if (is_flonum) { if (is_flonum) {
if (!already_unboxed) if (!already_unboxed)
jit_ldxi_d_fppush(JIT_FPR0, JIT_R0, &((Scheme_Double *)0x0)->double_val); jit_ldxi_d_fppush(JIT_FPR0, JIT_R0, &((Scheme_Double *)0x0)->double_val);
arg_offset += sizeof(double);
mz_st_fppop(arg_offset, JIT_FPR0); mz_st_fppop(arg_offset, JIT_FPR0);
arg_offset++;
} }
#endif #endif
CHECK_LIMIT(); CHECK_LIMIT();
@ -1179,7 +1179,7 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i
: app->args[i+1+args_already_in_place]); : app->args[i+1+args_already_in_place]);
if (!SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type) if (!SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)
|| (SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_FLONUM)) { || (SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_FLONUM)) {
int aoffset = JIT_FRAME_FLONUM_OFFSET - (arg_tmp_offset * sizeof(double)); int aoffset = JIT_FRAME_FLOSTACK_OFFSET - arg_tmp_offset;
GC_CAN_IGNORE jit_insn *iref; GC_CAN_IGNORE jit_insn *iref;
if (i != num_rands - 1) if (i != num_rands - 1)
mz_pushr_p(JIT_R0); mz_pushr_p(JIT_R0);
@ -1207,7 +1207,7 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i
CHECK_LIMIT(); CHECK_LIMIT();
if (i != num_rands - 1) if (i != num_rands - 1)
mz_popr_p(JIT_R0); mz_popr_p(JIT_R0);
--arg_tmp_offset; arg_tmp_offset -= sizeof(double);
} }
} }
} }
@ -1226,7 +1226,7 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i
iref = jit_bnei_p(jit_forward(), JIT_R0, NULL); iref = jit_bnei_p(jit_forward(), JIT_R0, NULL);
__END_TINY_JUMPS__(1); __END_TINY_JUMPS__(1);
{ {
int aoffset = JIT_FRAME_FLONUM_OFFSET - (arg_tmp_offset * sizeof(double)); int aoffset = JIT_FRAME_FLOSTACK_OFFSET - arg_tmp_offset;
jit_movi_l(JIT_R0, aoffset); jit_movi_l(JIT_R0, aoffset);
(void)jit_calli(sjc.box_flonum_from_stack_code); (void)jit_calli(sjc.box_flonum_from_stack_code);
mz_ld_runstack_base_alt(JIT_R2); mz_ld_runstack_base_alt(JIT_R2);
@ -1238,7 +1238,7 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i
__END_TINY_JUMPS__(1); __END_TINY_JUMPS__(1);
mz_popr_p(JIT_R0); mz_popr_p(JIT_R0);
CHECK_LIMIT(); CHECK_LIMIT();
--arg_tmp_offset; arg_tmp_offset -= sizeof(double);
} }
} }
} }
@ -1521,18 +1521,18 @@ static int generate_fp_argument_shuffle(int direct_flostack_offset, mz_jit_state
/* Copy unboxed flonums into place where the target code expects them, /* Copy unboxed flonums into place where the target code expects them,
which is shifted and reverse of the order that we pushed. */ which is shifted and reverse of the order that we pushed. */
if (direct_flostack_offset if (direct_flostack_offset
&& ((direct_flostack_offset > 1) && ((direct_flostack_offset > sizeof(double))
|| (direct_flostack_offset != jitter->flostack_offset))) { || (direct_flostack_offset != jitter->flostack_offset))) {
/* If the source and target areas don't overlap (or if they /* If the source and target areas don't overlap (or if they
overlap only by one item), we can do it in one step, otherwise overlap only by one item), we can do it in one step, otherwise
reverse then shift. */ reverse then shift. */
if (jitter->flostack_offset >= ((2 * direct_flostack_offset) - 1)) { if (jitter->flostack_offset >= ((2 * direct_flostack_offset) - sizeof(double))) {
/* one step: */ /* one step: */
if (direct_flostack_offset != jitter->flostack_offset) { if (direct_flostack_offset != jitter->flostack_offset) {
/* shift: */ /* shift: */
for (i = 0; i < direct_flostack_offset; i++) { for (i = 0; i < direct_flostack_offset; i += sizeof(double)) {
int i_pos, a_pos; int i_pos, a_pos;
i_pos = jitter->flostack_offset - direct_flostack_offset + i + 1; i_pos = jitter->flostack_offset - direct_flostack_offset + i + sizeof(double);
a_pos = direct_flostack_offset - i; a_pos = direct_flostack_offset - i;
if (i_pos != a_pos) { if (i_pos != a_pos) {
mz_ld_fppush(JIT_FPR0, i_pos); mz_ld_fppush(JIT_FPR0, i_pos);
@ -1543,10 +1543,10 @@ static int generate_fp_argument_shuffle(int direct_flostack_offset, mz_jit_state
} }
} else { } else {
/* reverse: */ /* reverse: */
for (i = 0, j = direct_flostack_offset-1; i < j; i++, j--) { for (i = 0, j = direct_flostack_offset-sizeof(double); i < j; i += sizeof(double), j -= sizeof(double)) {
int i_pos, j_pos; int i_pos, j_pos;
i_pos = jitter->flostack_offset - direct_flostack_offset + i + 1; i_pos = jitter->flostack_offset - direct_flostack_offset + i + sizeof(double);
j_pos = jitter->flostack_offset - direct_flostack_offset + j + 1; j_pos = jitter->flostack_offset - direct_flostack_offset + j + sizeof(double);
mz_ld_fppush(JIT_FPR1, i_pos); mz_ld_fppush(JIT_FPR1, i_pos);
mz_ld_fppush(JIT_FPR0, j_pos); mz_ld_fppush(JIT_FPR0, j_pos);
mz_st_fppop(i_pos, JIT_FPR0); mz_st_fppop(i_pos, JIT_FPR0);
@ -1556,11 +1556,11 @@ static int generate_fp_argument_shuffle(int direct_flostack_offset, mz_jit_state
if (direct_flostack_offset != jitter->flostack_offset) { if (direct_flostack_offset != jitter->flostack_offset) {
/* shift: */ /* shift: */
for (i = 0; i < direct_flostack_offset; i++) { for (i = 0; i < direct_flostack_offset; i += sizeof(double)) {
int i_pos, a_pos; int i_pos, a_pos;
i_pos = jitter->flostack_offset - direct_flostack_offset + i + 1; i_pos = jitter->flostack_offset - direct_flostack_offset + i + sizeof(double);
mz_ld_fppush(JIT_FPR0, i_pos); mz_ld_fppush(JIT_FPR0, i_pos);
a_pos = i + 1; a_pos = i + sizeof(double);
mz_st_fppop(a_pos, JIT_FPR0); mz_st_fppop(a_pos, JIT_FPR0);
CHECK_LIMIT(); CHECK_LIMIT();
} }
@ -1586,11 +1586,11 @@ static int generate_call_path_with_unboxes(mz_jit_state *jitter, int direct_flos
offset = FLOSTACK_SPACE_CHUNK * ((direct_flostack_offset + (FLOSTACK_SPACE_CHUNK - 1)) offset = FLOSTACK_SPACE_CHUNK * ((direct_flostack_offset + (FLOSTACK_SPACE_CHUNK - 1))
/ FLOSTACK_SPACE_CHUNK); / FLOSTACK_SPACE_CHUNK);
jit_subi_l(JIT_SP, JIT_SP, offset * sizeof(double)); jit_subi_l(JIT_SP, JIT_SP, offset);
for (i = 0; i < direct_flostack_offset; i++) { for (i = 0; i < direct_flostack_offset; i += sizeof(double)) {
int i_pos, a_pos; int i_pos, a_pos;
i_pos = jitter->flostack_offset - direct_flostack_offset + i + 1; i_pos = jitter->flostack_offset - direct_flostack_offset + i + sizeof(double);
a_pos = direct_flostack_offset - i; a_pos = direct_flostack_offset - i;
mz_ld_fppush_x(JIT_FPR0, i_pos, JIT_R2); mz_ld_fppush_x(JIT_FPR0, i_pos, JIT_R2);
mz_st_fppop(a_pos, JIT_FPR0); mz_st_fppop(a_pos, JIT_FPR0);
@ -1617,9 +1617,9 @@ static int generate_call_path_with_unboxes(mz_jit_state *jitter, int direct_flos
for (i = 0, k = 0; i < num_rands; i++) { for (i = 0, k = 0; i < num_rands; i++) {
if ((SCHEME_CLOSURE_DATA_FLAGS(direct_data) & CLOS_HAS_TYPED_ARGS) if ((SCHEME_CLOSURE_DATA_FLAGS(direct_data) & CLOS_HAS_TYPED_ARGS)
&& (CLOSURE_ARGUMENT_IS_FLONUM(direct_data, i))) { && (CLOSURE_ARGUMENT_IS_FLONUM(direct_data, i))) {
k++; k += sizeof(double);
offset = jitter->flostack_offset - direct_flostack_offset + k; offset = jitter->flostack_offset - direct_flostack_offset + k;
offset = JIT_FRAME_FLONUM_OFFSET - (offset * sizeof(double)); offset = JIT_FRAME_FLOSTACK_OFFSET - offset;
jit_ldxi_p(JIT_R0, JIT_RUNSTACK, WORDS_TO_BYTES(i)); jit_ldxi_p(JIT_R0, JIT_RUNSTACK, WORDS_TO_BYTES(i));
scheme_generate_flonum_local_boxing(jitter, i, offset, JIT_R0); scheme_generate_flonum_local_boxing(jitter, i, offset, JIT_R0);
} }
@ -1939,7 +1939,7 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
} else { } else {
(void)jit_movi_p(JIT_R0, NULL); (void)jit_movi_p(JIT_R0, NULL);
} }
direct_flostack_offset++; direct_flostack_offset += sizeof(double);
} else } else
#endif #endif
if (inline_direct_args) { if (inline_direct_args) {

View File

@ -652,7 +652,7 @@ void scheme_mz_flostack_restore(mz_jit_state *jitter, int space, int pos, int ge
if (space != jitter->flostack_space) { if (space != jitter->flostack_space) {
if (gen) { if (gen) {
int delta = jitter->flostack_space - space; int delta = jitter->flostack_space - space;
jit_addi_p(JIT_SP, JIT_SP, delta * sizeof(double)); jit_addi_p(JIT_SP, JIT_SP, delta);
} }
if (adj) jitter->flostack_space = space; if (adj) jitter->flostack_space = space;
} }
@ -722,7 +722,7 @@ int scheme_mz_is_closure(mz_jit_state *jitter, int i, int arity, int *_flags)
} }
#ifdef USE_FLONUM_UNBOXING #ifdef USE_FLONUM_UNBOXING
int scheme_mz_flonum_pos(mz_jit_state *jitter, int i) int scheme_mz_flostack_pos(mz_jit_state *jitter, int i)
{ {
int j = i, p = jitter->num_mappings, c; int j = i, p = jitter->num_mappings, c;
while (p && (j >= 0)) { while (p && (j >= 0)) {