From 4041e65a867ec47140a0e6aa412ab4ef9a04e1a9 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Sat, 17 Nov 2012 08:00:03 -0700 Subject: [PATCH] JIT simplification The scheme_generate_arith() function effectively had its own copy of of the general scheme_generate_two_args() function that predates the general one. Using scheme_generate_two_args() instead simplifies and clarifies the code. --- src/racket/src/jit.h | 2 + src/racket/src/jitarith.c | 137 +++++++------------------------------ src/racket/src/jitinline.c | 99 +++++++++++++++++---------- 3 files changed, 88 insertions(+), 150 deletions(-) diff --git a/src/racket/src/jit.h b/src/racket/src/jit.h index 8fb6235cbe..e417152811 100644 --- a/src/racket/src/jit.h +++ b/src/racket/src/jit.h @@ -1237,6 +1237,8 @@ int scheme_generate_cons_alloc(mz_jit_state *jitter, int rev, int inline_retry, int scheme_generate_struct_alloc(mz_jit_state *jitter, int num_args, int inline_slow, int pop_and_jump, int is_tail, int multi_ok, int dest); +int scheme_generate_two_args(Scheme_Object *rand1, Scheme_Object *rand2, mz_jit_state *jitter, + int order_matters, int skipped); /**********************************************************************/ /* jitalloc */ diff --git a/src/racket/src/jitarith.c b/src/racket/src/jitarith.c index 8360ec6b4c..604b4d41c3 100644 --- a/src/racket/src/jitarith.c +++ b/src/racket/src/jitarith.c @@ -845,7 +845,7 @@ int scheme_generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj { GC_CAN_IGNORE jit_insn *ref, *ref2, *ref3, *ref4, *refd = NULL, *refdt = NULL; GC_CAN_IGNORE jit_insn *refslow; - int skipped, simple_rand, simple_rand2, reversed = 0; + int reversed = 0; int has_fixnum_fast = 1, has_flonum_fast = 1; int inlined_flonum1, inlined_flonum2; @@ -1081,75 +1081,39 @@ int scheme_generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj /* Unary subtract */ reversed = 1; } - + if (rand2) { - simple_rand = (scheme_ok_to_move_local(rand) - || SCHEME_INTP(rand)); - simple_rand2 = (SAME_TYPE(SCHEME_TYPE(rand2), scheme_local_type) - && (SCHEME_GET_LOCAL_TYPE(rand2) != SCHEME_LOCAL_TYPE_FLONUM)); - if (simple_rand && simple_rand2) { - if (mz_CURRENT_REG_STATUS_VALID() - && (jitter->r0_status >= 0) - && !(SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type) - && SCHEME_LOCAL_POS(rand) == SCHEME_LOCAL_POS(rand2))) { - /* prefer to evaluate the rand2 second, so that we can use R0 if - it's helpful to set up R1 as rand */ - simple_rand = 0; - } else - simple_rand2 = 0; + int dir; + dir = scheme_generate_two_args(rand, rand2, jitter, 0, orig_args); + CHECK_LIMIT(); + /* Since we want rand in R1 and rand2 in R0, direction is backwards: */ + if (dir > 0) { + Scheme_Object *t = rand2; + rand2 = rand; + rand = t; + cmp = -cmp; + reversed = !reversed; } } else { - simple_rand = 0; - simple_rand2 = 0; - } - - if (rand2 && !simple_rand && !simple_rand2) - skipped = orig_args - 1; - else - skipped = orig_args; - - mz_runstack_skipped(jitter, skipped); - - if (rand2 && !simple_rand && !simple_rand2) { - mz_runstack_skipped(jitter, 1); + mz_runstack_skipped(jitter, orig_args); scheme_generate_non_tail(rand, jitter, 0, 1, 0); /* sync'd later */ CHECK_LIMIT(); - mz_runstack_unskipped(jitter, 1); - mz_rs_dec(1); + mz_runstack_unskipped(jitter, orig_args); CHECK_RUNSTACK_OVERFLOW(); - mz_runstack_pushed(jitter, 1); - mz_rs_str(JIT_R0); } /* not sync'd... */ - if (simple_rand2) { - if (SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) - scheme_generate(rand, jitter, 0, 0, 0, JIT_R1, NULL); /* sync'd below */ - else { - scheme_generate_non_tail(rand, jitter, 0, 1, 0); /* sync'd below */ - CHECK_LIMIT(); - jit_movr_p(JIT_R1, JIT_R0); - } - CHECK_LIMIT(); - scheme_generate(rand2, jitter, 0, 0, 0, JIT_R0, NULL); /* sync'd below */ - } else { - scheme_generate_non_tail(rand2 ? rand2 : rand, jitter, 0, 1, 0); /* sync'd below */ - } - CHECK_LIMIT(); - /* sync'd in three branches below */ + /* two arguments: rand2 in R0, and rand in R1 */ + /* one argument: rand in R0 */ - /* rand2 in R0, and rand in R1 unless it's simple */ - - if (simple_rand || simple_rand2) { + if (rand2) { int va; - if (simple_rand && SCHEME_INTP(rand)) { - (void)jit_movi_p(JIT_R1, rand); - va = JIT_R0; + if (SCHEME_INTP(rand)) { + va = JIT_R0; /* check only rand2 */ + } else if (SCHEME_INTP(rand2)) { + va = JIT_R1; /* check only rand */ } else { - if (simple_rand) { - scheme_generate(rand, jitter, 0, 0, 0, JIT_R1, NULL); - } if (!unsafe_fx && !unsafe_fl) { /* check both fixnum bits at once by ANDing into R2: */ jit_andr_ul(JIT_R2, JIT_R0, JIT_R1); @@ -1186,7 +1150,8 @@ int scheme_generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj } /* Slow path */ - refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, branch_short, orig_args, reversed, arith, 0, 0, dest); + refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, branch_short, + orig_args, reversed, arith, 0, 0, dest); if (has_fixnum_fast) { __START_TINY_JUMPS_IF_COMPACT__(1); @@ -1199,57 +1164,6 @@ int scheme_generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj ref4 = NULL; } CHECK_LIMIT(); - } else if (rand2) { - /* Move rand result back into R1 */ - mz_rs_ldr(JIT_R1); - mz_rs_inc(1); - mz_runstack_popped(jitter, 1); - - if (!unsafe_fx && !unsafe_fl) { - mz_rs_sync(); - - /* check both fixnum bits at once by ANDing into R2: */ - jit_andr_ul(JIT_R2, JIT_R0, JIT_R1); - __START_TINY_JUMPS_IF_COMPACT__(1); - ref2 = jit_bmsi_ul(jit_forward(), JIT_R2, 0x1); - __END_TINY_JUMPS_IF_COMPACT__(1); - CHECK_LIMIT(); - } else { - if (for_branch) mz_rs_sync(); - ref2 = NULL; - CHECK_LIMIT(); - } - - if (unsafe_fl || (!unsafe_fx && has_flonum_fast && can_fast_double(arith, cmp, 1))) { - /* Maybe they're both doubles... */ - if (unsafe_fl) mz_rs_sync(); - generate_double_arith(jitter, rator, arith, cmp, reversed, 1, 0, &refd, &refdt, - for_branch, branch_short, unsafe_fl, 0, unbox, dest); - CHECK_LIMIT(); - } - - if (!unsafe_fx && !unsafe_fl) { - if (!has_fixnum_fast) { - __START_TINY_JUMPS_IF_COMPACT__(1); - mz_patch_branch(ref2); - __END_TINY_JUMPS_IF_COMPACT__(1); - } - - /* Slow path */ - refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, branch_short, orig_args, reversed, arith, 0, 0, dest); - - if (has_fixnum_fast) { - /* Fixnum branch: */ - __START_TINY_JUMPS_IF_COMPACT__(1); - mz_patch_branch(ref2); - __END_TINY_JUMPS_IF_COMPACT__(1); - } - CHECK_LIMIT(); - } else { - refslow = overflow_refslow; - ref = NULL; - ref4 = NULL; - } } else { /* Only one argument: */ if (!unsafe_fx && !unsafe_fl) { @@ -1284,7 +1198,8 @@ int scheme_generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj } /* Slow path */ - refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, branch_short, orig_args, reversed, arith, 1, v, dest); + refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, branch_short, + orig_args, reversed, arith, 1, v, dest); if (has_fixnum_fast) { __START_TINY_JUMPS_IF_COMPACT__(1); @@ -1299,8 +1214,6 @@ int scheme_generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj } CHECK_LIMIT(); - - mz_runstack_unskipped(jitter, skipped); } __START_SHORT_JUMPS__(branch_short); diff --git a/src/racket/src/jitinline.c b/src/racket/src/jitinline.c index b5b57b5fda..8bf749ad42 100644 --- a/src/racket/src/jitinline.c +++ b/src/racket/src/jitinline.c @@ -124,9 +124,6 @@ static Scheme_Object *cont_mark_set_first_try_fast(Scheme_Object *cms, Scheme_Ob return ts_extract_one_cc_mark_to_tag(nullableCms, key, prompt_tag); } -static int generate_two_args(Scheme_Object *rand1, Scheme_Object *rand2, mz_jit_state *jitter, - int order_matters, int skipped); - static int check_val_struct_prim(Scheme_Object *p, int arity) { if (p && SCHEME_PRIMP(p)) { @@ -388,7 +385,7 @@ static int generate_inlined_struct_op(int kind, mz_jit_state *jitter, LOG_IT(("inlined struct op\n")); if (!rand2) { - generate_two_args(rator, rand, jitter, 1, 1); /* sync'd below */ + scheme_generate_two_args(rator, rand, jitter, 1, 1); /* sync'd below */ CHECK_LIMIT(); } else { Scheme_Object *args[3]; @@ -1821,7 +1818,22 @@ int scheme_generate_inlined_unary(mz_jit_state *jitter, Scheme_App2_Rec *app, in return 0; } -static int generate_two_args(Scheme_Object *rand1, Scheme_Object *rand2, mz_jit_state *jitter, +static int already_in_register(Scheme_Object *o, mz_jit_state *jitter) +{ + if (SAME_TYPE(SCHEME_TYPE(o), scheme_local_type)) { + if (mz_CURRENT_REG_STATUS_VALID()) { + int pos; + pos = mz_remap(SCHEME_LOCAL_POS(o)); + if ((pos == jitter->r0_status) + || (pos == jitter->r1_status)) + return 1; + } + } + + return 0; +} + +int scheme_generate_two_args(Scheme_Object *rand1, Scheme_Object *rand2, mz_jit_state *jitter, int order_matters, int skipped) /* de-sync's rs. Results go into R0 and R1. If !order_matters, and if only the @@ -1878,8 +1890,13 @@ static int generate_two_args(Scheme_Object *rand1, Scheme_Object *rand2, mz_jit_ scheme_generate_non_tail(rand2, jitter, 0, 1, 0); /* no sync... */ CHECK_LIMIT(); - jit_movr_p(JIT_R1, JIT_R0); - mz_rs_ldr(JIT_R0); + if (order_matters) { + jit_movr_p(JIT_R1, JIT_R0); + mz_rs_ldr(JIT_R0); + } else { + mz_rs_ldr(JIT_R1); + direction = -1; + } mz_runstack_unskipped(jitter, skipped-1); mz_rs_inc(1); @@ -1888,16 +1905,22 @@ static int generate_two_args(Scheme_Object *rand1, Scheme_Object *rand2, mz_jit_ } else { mz_runstack_skipped(jitter, skipped); - if (simple2) { - scheme_generate(rand2, jitter, 0, 0, 0, JIT_R1, NULL); /* no sync... */ - CHECK_LIMIT(); + if (simple2 && !order_matters && already_in_register(rand1, jitter)) { + scheme_generate(rand1, jitter, 0, 0, 0, JIT_R1, NULL); /* no sync... */ + scheme_generate(rand2, jitter, 0, 0, 0, JIT_R0, NULL); /* no sync... */ + direction = -1; } else { - scheme_generate_non_tail(rand2, jitter, 0, 1, 0); /* no sync... */ - CHECK_LIMIT(); - jit_movr_p(JIT_R1, JIT_R0); - } + if (simple2) { + scheme_generate(rand2, jitter, 0, 0, 0, JIT_R1, NULL); /* no sync... */ + CHECK_LIMIT(); + } else { + scheme_generate_non_tail(rand2, jitter, 0, 1, 0); /* no sync... */ + CHECK_LIMIT(); + jit_movr_p(JIT_R1, JIT_R0); + } - scheme_generate(rand1, jitter, 0, 0, 0, JIT_R0, NULL); /* no sync... */ + scheme_generate(rand1, jitter, 0, 0, 0, JIT_R0, NULL); /* no sync... */ + } CHECK_LIMIT(); mz_runstack_unskipped(jitter, skipped); @@ -1918,7 +1941,7 @@ static int generate_binary_char(mz_jit_state *jitter, Scheme_App3_Rec *app, r1 = app->rand1; r2 = app->rand2; - direction = generate_two_args(r1, r2, jitter, 1, 2); + direction = scheme_generate_two_args(r1, r2, jitter, 0, 2); CHECK_LIMIT(); mz_rs_sync(); @@ -2266,7 +2289,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i __END_SHORT_JUMPS__(branch_short); } else { /* Two complex expressions: */ - generate_two_args(a2, a1, jitter, 0, 2); + scheme_generate_two_args(a2, a1, jitter, 0, 2); CHECK_LIMIT(); if (need_sync) mz_rs_sync(); @@ -2298,7 +2321,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i } else if (IS_NAMED_PRIM(rator, "equal?")) { GC_CAN_IGNORE jit_insn *ref_f, *ref_d, *refr; - generate_two_args(app->rand1, app->rand2, jitter, 0, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 0, 2); CHECK_LIMIT(); mz_rs_sync(); @@ -2340,7 +2363,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i GC_CAN_IGNORE jit_insn *ref_f1, *ref_f2, *ref_f3, *ref_f4, *ref_f5; GC_CAN_IGNORE jit_insn *ref_d1, *ref_d2, *ref_t1; - generate_two_args(app->rand1, app->rand2, jitter, 0, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 0, 2); CHECK_LIMIT(); if (need_sync) mz_rs_sync(); @@ -2717,7 +2740,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i && (SCHEME_INT_VAL(app->rand2) >= 0)); if (!simple) { - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); CHECK_LIMIT(); if (!unsafe || can_chaperone) @@ -2835,7 +2858,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i is_f64 = IS_NAMED_PRIM(rator, "unsafe-f64vector-ref"); jitter->unbox = 0; /* no unboxing of vector and index arguments */ - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); jitter->unbox = unbox; CHECK_LIMIT(); @@ -2871,7 +2894,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i is_u = IS_NAMED_PRIM(rator, "unsafe-u16vector-ref"); - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); jit_ldxi_p(JIT_R0, JIT_R0, (intptr_t)&(((Scheme_Structure *)0x0)->slots[0])); jit_ldxi_p(JIT_R0, JIT_R0, (intptr_t)&SCHEME_CPTR_VAL(0x0)); @@ -2887,7 +2910,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i return 1; } else if (IS_NAMED_PRIM(rator, "list-ref") || IS_NAMED_PRIM(rator, "list-tail")) { - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); mz_rs_sync(); if (IS_NAMED_PRIM(rator, "list-ref")) @@ -2921,7 +2944,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i } } - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); if (IS_NAMED_PRIM(rator, "unsafe-list-ref")) (void)jit_calli(sjc.list_ref_code); @@ -2939,7 +2962,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i LOG_IT(("inlined set-mcar!\n")); - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); CHECK_LIMIT(); mz_rs_sync_fail_branch(); @@ -2974,7 +2997,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i LOG_IT(("inlined unsafe-set-mcar!\n")); - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); CHECK_LIMIT(); if (set_mcar) (void)jit_stxi_p(&((Scheme_Simple_Object *)0x0)->u.pair_val.car, JIT_R0, JIT_R1); @@ -2994,7 +3017,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i unsafe = IS_NAMED_PRIM(rator, "unsafe-set-box!"); - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); CHECK_LIMIT(); mz_rs_sync(); __START_TINY_JUMPS__(1); @@ -3028,7 +3051,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i } else if (IS_NAMED_PRIM(rator, "unsafe-set-box*!")) { LOG_IT(("inlined unsafe-set-box*!\n")); - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); CHECK_LIMIT(); (void)jit_stxi_p(&SCHEME_BOX_VAL(0x0), JIT_R0, JIT_R1); @@ -3041,7 +3064,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i int dir, known_list; LOG_IT(("inlined cons\n")); - dir = generate_two_args(app->rand1, app->rand2, jitter, 0, 2); + dir = scheme_generate_two_args(app->rand1, app->rand2, jitter, 0, 2); CHECK_LIMIT(); mz_rs_sync(); @@ -3055,7 +3078,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i int dir; LOG_IT(("inlined unsafe-cons-list\n")); - dir = generate_two_args(app->rand1, app->rand2, jitter, 0, 2); + dir = scheme_generate_two_args(app->rand1, app->rand2, jitter, 0, 2); CHECK_LIMIT(); mz_rs_sync(); @@ -3063,7 +3086,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i } else if (IS_NAMED_PRIM(rator, "mcons")) { LOG_IT(("inlined mcons\n")); - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); CHECK_LIMIT(); mz_rs_sync(); @@ -3092,7 +3115,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i } else if (IS_NAMED_PRIM(rator, "list")) { LOG_IT(("inlined list\n")); - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); CHECK_LIMIT(); mz_rs_dec(1); @@ -3120,7 +3143,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i LOG_IT(("inlined make-rectangular\n")); - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); CHECK_LIMIT(); mz_rs_sync(); @@ -3177,7 +3200,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i LOG_IT(("inlined make-rectangular\n")); - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); CHECK_LIMIT(); mz_rs_sync(); @@ -3198,7 +3221,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i } else if (IS_NAMED_PRIM(rator, "unsafe-make-flrectangular")) { LOG_IT(("inlined make-rectangular\n")); - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); CHECK_LIMIT(); allocate_rectangular(jitter, dest); @@ -3207,7 +3230,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i } else if (IS_NAMED_PRIM(rator, "procedure-arity-includes?")) { LOG_IT(("inlined procedure-arity-includes?\n")); - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); CHECK_LIMIT(); mz_rs_sync(); @@ -3242,7 +3265,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i LOG_IT(("inlined continuation-mark-set-first\n")); - generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + scheme_generate_two_args(app->rand1, app->rand2, jitter, 1, 2); CHECK_LIMIT(); /* R0 has the first argument, R1 has the second argument */ @@ -3982,7 +4005,7 @@ static int generate_vector_alloc(mz_jit_state *jitter, Scheme_Object *rator, mz_runstack_unskipped(jitter, 1); c = 1; } else if (app3) { - generate_two_args(app3->rand1, app3->rand2, jitter, 1, 2); /* sync'd below */ + scheme_generate_two_args(app3->rand1, app3->rand2, jitter, 1, 2); /* sync'd below */ c = 2; } else { c = app->num_args;