From 87a23c75a3a1d444e591daec7947cc074709605a Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Sat, 11 Feb 2006 13:46:49 +0000 Subject: [PATCH] inline bitwise ops, fix PPC ldxi JIT for large offsets svn: r2196 --- src/mzscheme/src/jit.c | 209 ++++++++++++++++++++++---- src/mzscheme/src/lightning/README | 6 + src/mzscheme/src/lightning/ppc/core.h | 8 +- src/mzscheme/src/number.c | 46 +++--- src/mzscheme/src/portfun.c | 12 +- 5 files changed, 217 insertions(+), 64 deletions(-) diff --git a/src/mzscheme/src/jit.c b/src/mzscheme/src/jit.c index cf1cad5c0b..19f36c9adf 100644 --- a/src/mzscheme/src/jit.c +++ b/src/mzscheme/src/jit.c @@ -31,6 +31,7 @@ #define JIT_LOG_WORD_SIZE 2 #define WORDS_TO_BYTES(x) ((x) << JIT_LOG_WORD_SIZE) #define JIT_WORD_SIZE (1 << JIT_LOG_WORD_SIZE) +#define MAX_TRY_SHIFT 30 #define JIT_NOT_RET JIT_R1 #if JIT_NOT_RET == JIT_RET @@ -1369,12 +1370,14 @@ static int generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ return is_tail ? 2 : 1; } -static void generate_arith_slow_path(mz_jit_state *jitter, Scheme_Object *rator, - jit_insn **_ref, jit_insn **_ref4, - jit_insn **for_branch, - int orig_args, int reversed, int use_v, int v) +static jit_insn *generate_arith_slow_path(mz_jit_state *jitter, Scheme_Object *rator, + jit_insn **_ref, jit_insn **_ref4, + jit_insn **for_branch, + int orig_args, int reversed, int arith, int use_v, int v) { - jit_insn *ref, *ref4; + jit_insn *ref, *ref4, *refslow; + + refslow = _jit.x.pc; (void)jit_movi_p(JIT_R2, ((Scheme_Primitive_Proc *)rator)->prim_val); if (for_branch) { @@ -1413,6 +1416,25 @@ static void generate_arith_slow_path(mz_jit_state *jitter, Scheme_Object *rator, *_ref = ref; *_ref4 = ref4; + + if (arith == 6) { + /* Add tag back to first arg, just in case. See arithmetic-shift branch to refslow. */ + ref = _jit.x.pc; + + if (reversed || use_v) { + jit_ori_l(JIT_R0, JIT_R0, 0x1); + } else { + jit_ori_l(JIT_R1, JIT_R1, 0x1); + } + + __START_SHORT_JUMPS__(1); + (void)jit_jmpi(refslow); + __END_SHORT_JUMPS__(1); + + return ref; + } else { + return refslow; + } } static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Object *rand, Scheme_Object *rand2, @@ -1424,18 +1446,30 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj LOG_IT(("inlined %s\n", ((Scheme_Primitive_Proc *)rator)->name)); if (rand2) { - if (SCHEME_INTP(rand2)) { + if (SCHEME_INTP(rand2) + && ((arith != 6) + || ((SCHEME_INT_VAL(rand2) <= MAX_TRY_SHIFT) + && (SCHEME_INT_VAL(rand2) >= -MAX_TRY_SHIFT)))) { + /* Second is constant, so use constant mode. + For arithmetic shift, only do this if the constant + is in range. */ v = SCHEME_INT_VAL(rand2); rand2 = NULL; - } else if (SCHEME_INTP(rand) && (arith != -1)) { + } else if (SCHEME_INTP(rand) + && (arith != 6)) { + /* First is constant; swap argument order and use constant mode. */ v = SCHEME_INT_VAL(rand); cmp = -cmp; rand = rand2; rand2 = NULL; reversed = 1; - } else if ((arith != -1) - && SAME_TYPE(SCHEME_TYPE(rand2), scheme_local_type) - && !SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) { + } else if ((SAME_TYPE(SCHEME_TYPE(rand2), scheme_local_type) + || SCHEME_INTP(rand2)) + && !(SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type) + || SCHEME_INTP(rand))) { + /* Second expression is side-effect-free, unlike the first; + swap order and use the fast path for when the first arg is + side-effect free. */ Scheme_Object *t = rand2; rand2 = rand; rand = t; @@ -1445,7 +1479,8 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj } if (rand2) { - simple_rand = SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type); + simple_rand = (SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type) + || SCHEME_INTP(rand)); } else simple_rand = 0; @@ -1468,18 +1503,24 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj CHECK_LIMIT(); if (simple_rand) { - int pos; - pos = mz_remap(SCHEME_LOCAL_POS(rand)); - jit_ldxi_p(JIT_R1, JIT_RUNSTACK, WORDS_TO_BYTES(pos)); + int pos, va; + + if (SCHEME_INTP(rand)) { + jit_movi_i(JIT_R1, rand); + va = JIT_R0; + } else { + pos = mz_remap(SCHEME_LOCAL_POS(rand)); + jit_ldxi_p(JIT_R1, JIT_RUNSTACK, WORDS_TO_BYTES(pos)); + jit_andr_ul(JIT_R2, JIT_R0, JIT_R1); + va = JIT_R2; + } - jit_andr_ul(JIT_R2, JIT_R0, JIT_R1); __START_SHORT_JUMPS__(1); - ref2 = jit_bmsi_ul(jit_forward(), JIT_R2, 0x1); + ref2 = jit_bmsi_ul(jit_forward(), va, 0x1); __END_SHORT_JUMPS__(1); /* Slow path */ - refslow = _jit.x.pc; - generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, orig_args, reversed, 0, 0); + refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, orig_args, reversed, arith, 0, 0); __START_SHORT_JUMPS__(1); mz_patch_branch(ref2); @@ -1496,8 +1537,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj __END_SHORT_JUMPS__(1); /* Slow path */ - refslow = _jit.x.pc; - generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, orig_args, reversed, 0, 0); + refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, orig_args, reversed, arith, 0, 0); __START_SHORT_JUMPS__(1); mz_patch_branch(ref2); @@ -1509,40 +1549,131 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj __END_SHORT_JUMPS__(1); /* Slow path */ - refslow = _jit.x.pc; - generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, orig_args, reversed, 1, v); + refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, orig_args, reversed, arith, 1, v); __START_SHORT_JUMPS__(1); mz_patch_branch(ref2); __END_SHORT_JUMPS__(1); } + CHECK_LIMIT(); + mz_runstack_unskipped(jitter, skipped); __START_SHORT_JUMPS__(branch_short); if (arith) { if (rand2) { - if (arith > 0) { + /* First arg is in JIT_R1, second is in JIT_R0 */ + if (arith == 1) { jit_andi_ul(JIT_R2, JIT_R1, (~0x1)); (void)jit_boaddr_i(refslow, JIT_R2, JIT_R0); jit_movr_p(JIT_R0, JIT_R2); - } else { - jit_movr_p(JIT_R2, JIT_R1); - (void)jit_bosubr_i(refslow, JIT_R2, JIT_R0); + } else if (arith == -1) { + if (reversed) { + jit_movr_p(JIT_R2, JIT_R0); + (void)jit_bosubr_i(refslow, JIT_R2, JIT_R1); + } else { + jit_movr_p(JIT_R2, JIT_R1); + (void)jit_bosubr_i(refslow, JIT_R2, JIT_R0); + } jit_ori_ul(JIT_R0, JIT_R2, 0x1); + } else if (arith == 3) { + /* and */ + jit_andr_ul(JIT_R0, JIT_R1, JIT_R0); + } else if (arith == 4) { + /* ior */ + jit_orr_ul(JIT_R0, JIT_R1, JIT_R0); + } else if (arith == 5) { + /* xor */ + jit_andi_ul(JIT_R0, JIT_R0, (~0x1)); + jit_xorr_ul(JIT_R0, JIT_R1, JIT_R0); + } else if (arith == 6) { + /* arithmetic-shift + This is a lot of code, but if you're using + arihtmetic-shift, then you probably want it. */ + int v1 = (reversed ? JIT_R0 : JIT_R1); + int v2 = (reversed ? JIT_R1 : JIT_R0); + jit_insn *refi, *refc; + + refi = jit_bgei_l(refslow, v2, scheme_make_integer(0)); + + /* Right shift (always works for a small enough shift) */ + (void)jit_blti_l(refslow, v2, scheme_make_integer(-MAX_TRY_SHIFT)); + jit_notr_l(JIT_V1, v2); + jit_rshi_l(JIT_V1, JIT_V1, 0x1); + jit_addi_l(JIT_V1, JIT_V1, 0x1); + jit_rshr_l(JIT_R2, v1, JIT_V1); + jit_ori_l(JIT_R0, JIT_R2, 0x1); + refc = jit_jmpi(jit_forward()); + + /* Left shift */ + mz_patch_branch(refi); + (void)jit_bgti_l(refslow, v2, scheme_make_integer(MAX_TRY_SHIFT)); + jit_rshi_l(JIT_V1, v2, 0x1); + jit_andi_l(v1, v1, (~0x1)); + jit_lshr_l(JIT_R2, v1, JIT_V1); + /* If shifting back right produces a different result, that's overflow... */ + jit_rshr_l(JIT_V1, JIT_R2, JIT_V1); + /* !! In case we go refslow, it nseed to add back tag to v1 !! */ + (void)jit_bner_p(refslow, JIT_V1, v1); + /* No overflow. */ + jit_ori_l(JIT_R0, JIT_R2, 0x1); + + mz_patch_ucbranch(refc); } } else { - jit_movr_p(JIT_R2, JIT_R0); - if (arith > 0) { + /* Non-constant arg is in JIT_R0 */ + if (arith == 1) { + jit_movr_p(JIT_R2, JIT_R0); (void)jit_boaddi_i(refslow, JIT_R2, v << 1); + jit_movr_p(JIT_R0, JIT_R2); + } else if (arith == -1) { + if (reversed) { + (void)jit_movi_p(JIT_R2, scheme_make_integer(v)); + (void)jit_bosubr_i(refslow, JIT_R2, JIT_R0); + jit_addi_ul(JIT_R0, JIT_R2, 0x1); + } else { + jit_movr_p(JIT_R2, JIT_R0); + (void)jit_bosubi_i(refslow, JIT_R2, v << 1); + jit_movr_p(JIT_R0, JIT_R2); + } } else { - (void)jit_bosubi_i(refslow, JIT_R2, v << 1); + if (arith == 3) { + /* and */ + jit_andi_ul(JIT_R0, JIT_R0, scheme_make_integer(v)); + } else if (arith == 4) { + /* ior */ + jit_ori_ul(JIT_R0, JIT_R0, scheme_make_integer(v)); + } else if (arith == 5) { + /* xor */ + jit_xori_ul(JIT_R0, JIT_R0, v << 1); + } else if (arith == 6) { + /* arithmetic-shift */ + /* We only get here when v is between -MAX_TRY_SHIFT and MAX_TRY_SHIFT, inclusive */ + if (v <= 0) { + jit_rshi_l(JIT_R0, JIT_R0, -v); + jit_ori_l(JIT_R0, JIT_R0, 0x1); + } else { + jit_andi_l(JIT_R0, JIT_R0, (~0x1)); + jit_lshi_l(JIT_R2, JIT_R0, v); + /* If shifting back right produces a different result, that's overflow... */ + jit_rshi_l(JIT_V1, JIT_R2, v); + /* !! In case we go refslow, it nseed to add back tag to JIT_R0 !! */ + (void)jit_bner_p(refslow, JIT_V1, JIT_R0); + /* No overflow. */ + jit_ori_l(JIT_R0, JIT_R2, 0x1); + } + } else if (arith == 7) { + jit_notr_ul(JIT_R0, JIT_R0); + jit_ori_ul(JIT_R0, JIT_R0, 0x1); + } } - jit_movr_p(JIT_R0, JIT_R2); } jit_patch_movi(ref, (_jit.x.pc)); } else { + /* If second is constant, first arg is in JIT_R0. */ + /* Otherwise, first arg is in JIT_R1, second is in JIT_R0 */ switch (cmp) { case -2: if (rand2) { @@ -1739,6 +1870,9 @@ static int generate_inlined_unary(mz_jit_state *jitter, Scheme_App2_Rec *app, in } else if (IS_NAMED_PRIM(rator, "real?")) { generate_inlined_type_test(jitter, app, scheme_integer_type, scheme_complex_izi_type, for_branch, branch_short); return 1; + } else if (IS_NAMED_PRIM(rator, "eof-object?")) { + generate_inlined_constant_test(jitter, app, scheme_eof, NULL, for_branch, branch_short); + return 1; } else if (IS_NAMED_PRIM(rator, "zero?")) { generate_arith(jitter, rator, app->rand, NULL, 1, 0, 0, 0, for_branch, branch_short); return 1; @@ -1810,6 +1944,9 @@ static int generate_inlined_unary(mz_jit_state *jitter, Scheme_App2_Rec *app, in } else if (IS_NAMED_PRIM(rator, "sub1")) { generate_arith(jitter, rator, app->rand, NULL, 1, -1, 0, 1, NULL, 1); return 1; + } else if (IS_NAMED_PRIM(rator, "bitwise-not")) { + generate_arith(jitter, rator, app->rand, NULL, 1, 7, 0, 9, NULL, 1); + return 1; } } @@ -1930,6 +2067,18 @@ static int generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i } else if (IS_NAMED_PRIM(rator, "-")) { generate_arith(jitter, rator, app->rand1, app->rand2, 2, -1, 0, 0, NULL, 1); return 1; + } else if (IS_NAMED_PRIM(rator, "bitwise-and")) { + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 3, 0, 0, NULL, 1); + return 1; + } else if (IS_NAMED_PRIM(rator, "bitwise-ior")) { + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 4, 0, 0, NULL, 1); + return 1; + } else if (IS_NAMED_PRIM(rator, "bitwise-xor")) { + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 5, 0, 0, NULL, 1); + return 1; + } else if (IS_NAMED_PRIM(rator, "arithmetic-shift")) { + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 6, 0, 0, NULL, 1); + return 1; } else if (IS_NAMED_PRIM(rator, "vector-ref")) { int simple; diff --git a/src/mzscheme/src/lightning/README b/src/mzscheme/src/lightning/README index efc6e5c9ac..f44478c98b 100644 --- a/src/mzscheme/src/lightning/README +++ b/src/mzscheme/src/lightning/README @@ -17,3 +17,9 @@ GNU Lightning authors: i386 and PPC assemblers by Ian Piumarta Major PPC contributions by Laurent Michel +---------------------------------------- + +Bugs fixed since 1.2: + + * PPC jit_ldxi_* checked register instead of immediate for whether it + fits in 16 bits diff --git a/src/mzscheme/src/lightning/ppc/core.h b/src/mzscheme/src/lightning/ppc/core.h index b46651ec0e..0bef1fc55b 100644 --- a/src/mzscheme/src/lightning/ppc/core.h +++ b/src/mzscheme/src/lightning/ppc/core.h @@ -203,10 +203,10 @@ struct jit_local_state { #define jit_jmpr(reg) (MTLRr(reg), BLR()) #define jit_ldxi_c(d, rs, is) (jit_ldxi_uc((d), (rs), (is)), jit_extr_c_i((d), (d))) #define jit_ldxr_c(d, s1, s2) (jit_ldxr_uc((d), (s1), (s2)), jit_extr_c_i((d), (d))) -#define jit_ldxi_i(d, rs, is) jit_chk_ims((d), LWZrm((d), (is), (rs)), LWZrx((d), JIT_AUX, (rs))) -#define jit_ldxi_s(d, rs, is) jit_chk_ims((d), LHArm((d), (is), (rs)), LHArx((d), JIT_AUX, (rs))) -#define jit_ldxi_uc(d, rs, is) jit_chk_ims((d), LBZrm((d), (is), (rs)), LBZrx((d), JIT_AUX, (rs))) -#define jit_ldxi_us(d, rs, is) jit_chk_ims((d), LHZrm((d), (is), (rs)), LHZrx((d), JIT_AUX, (rs))) +#define jit_ldxi_i(d, rs, is) jit_chk_ims((is), LWZrm((d), (is), (rs)), LWZrx((d), JIT_AUX, (rs))) +#define jit_ldxi_s(d, rs, is) jit_chk_ims((is), LHArm((d), (is), (rs)), LHArx((d), JIT_AUX, (rs))) +#define jit_ldxi_uc(d, rs, is) jit_chk_ims((is), LBZrm((d), (is), (rs)), LBZrx((d), JIT_AUX, (rs))) +#define jit_ldxi_us(d, rs, is) jit_chk_ims((is), LHZrm((d), (is), (rs)), LHZrx((d), JIT_AUX, (rs))) #define jit_ldxr_i(d, s1, s2) LWZrx((d), (s1), (s2)) #define jit_ldxr_s(d, s1, s2) LHArx((d), (s1), (s2)) #define jit_ldxr_uc(d, s1, s2) LBZrx((d), (s1), (s2)) diff --git a/src/mzscheme/src/number.c b/src/mzscheme/src/number.c index 24a9638d67..d91d19fa14 100644 --- a/src/mzscheme/src/number.c +++ b/src/mzscheme/src/number.c @@ -263,31 +263,27 @@ scheme_init_number (Scheme_Env *env) "even?", 1, 1, 1), env); - scheme_add_global_constant("bitwise-and", - scheme_make_folding_prim(scheme_bitwise_and, - "bitwise-and", - 1, -1, 1), - env); - scheme_add_global_constant("bitwise-ior", - scheme_make_folding_prim(bitwise_or, - "bitwise-ior", - 1, -1, 1), - env); - scheme_add_global_constant("bitwise-xor", - scheme_make_folding_prim(bitwise_xor, - "bitwise-xor", - 1, -1, 1), - env); - scheme_add_global_constant("bitwise-not", - scheme_make_folding_prim(bitwise_not, - "bitwise-not", - 1, 1, 1), - env); - scheme_add_global_constant("arithmetic-shift", - scheme_make_folding_prim(scheme_bitwise_shift, - "arithmetic-shift", - 2, 2, 1), - env); + + p = scheme_make_folding_prim(scheme_bitwise_and, "bitwise-and", 1, -1, 1); + SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + scheme_add_global_constant("bitwise-and", p, env); + + p = scheme_make_folding_prim(bitwise_or, "bitwise-ior", 1, -1, 1); + SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + scheme_add_global_constant("bitwise-ior", p, env); + + p = scheme_make_folding_prim(bitwise_xor, "bitwise-xor", 1, -1, 1); + SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + scheme_add_global_constant("bitwise-xor", p, env); + + p = scheme_make_folding_prim(bitwise_not, "bitwise-not", 1, 1, 1); + SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED; + scheme_add_global_constant("bitwise-not", p, env); + + p = scheme_make_folding_prim(scheme_bitwise_shift, "arithmetic-shift", 2, 2, 1); + SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + scheme_add_global_constant("arithmetic-shift", p, env); + scheme_add_global_constant("gcd", scheme_make_folding_prim(gcd, "gcd", diff --git a/src/mzscheme/src/portfun.c b/src/mzscheme/src/portfun.c index 18698fcade..e6cb953b42 100644 --- a/src/mzscheme/src/portfun.c +++ b/src/mzscheme/src/portfun.c @@ -153,6 +153,8 @@ Scheme_Object *scheme_write_proc, *scheme_display_proc, *scheme_print_proc; void scheme_init_port_fun(Scheme_Env *env) { + Scheme_Object *p; + #ifdef MZ_PRECISE_GC register_traversers(); #endif @@ -551,11 +553,11 @@ scheme_init_port_fun(Scheme_Env *env) "peek-byte-or-special", 0, 3), env); - scheme_add_global_constant("eof-object?", - scheme_make_folding_prim(eof_object_p, - "eof-object?", - 1, 1, 1), - env); + + p = scheme_make_folding_prim(eof_object_p, "eof-object?", 1, 1, 1); + SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED; + scheme_add_global_constant("eof-object?", p, env); + scheme_add_global_constant("byte-ready?", scheme_make_noncm_prim(byte_ready_p, "byte-ready?",