diff --git a/collects/compiler/decompile.ss b/collects/compiler/decompile.ss index 15461c8866..250fc75867 100644 --- a/collects/compiler/decompile.ss +++ b/collects/compiler/decompile.ss @@ -292,7 +292,7 @@ (let ([vars (for/list ([i (in-range num-params)] [type (in-list arg-types)]) (gensym (format "~a~a-" - (if (eq? type 'ref) "argbox" "arg") + (case type [(ref) "argbox"] [(flonum) "argfl"] [else "arg"]) i)))] [rest-vars (if rest? (list (gensym 'rest)) null)] [captures (map (lambda (v) @@ -351,6 +351,7 @@ (if (and (symbol? (car a)) (case (length a) [(2) (memq (car a) '(unsafe-flabs + unsafe-flsqrt unsafe-fx->fl))] [(3) (memq (car a) '(unsafe-fl+ unsafe-fl- unsafe-fl* unsafe-fl/ unsafe-fl< unsafe-fl> diff --git a/collects/compiler/main.ss b/collects/compiler/main.ss index 3ef078a905..94fff4300c 100644 --- a/collects/compiler/main.ss +++ b/collects/compiler/main.ss @@ -130,7 +130,7 @@ [("--expand") ,(lambda (f) 'expand) ((,(format "Write macro-expanded Scheme source(s) to stdout") ""))] - [("--decompile") + [("-r" "--decompile") ,(lambda (f) 'decompile) ((,(format "Write quasi-Scheme for ~a file(s) to stdout" (extract-suffix append-zo-suffix)) ""))] [("-z" "--zo") @@ -457,14 +457,15 @@ (for ([zo-file source-files]) (let ([zo-file (path->complete-path zo-file)]) (let-values ([(base name dir?) (split-path zo-file)]) - (parameterize ([current-load-relative-directory base] - [print-graph #t]) - (pretty-print - (decompile - (call-with-input-file* - zo-file - (lambda (in) - (zo-parse in))))))))))] + (let ([alt-file (build-path base "compiled" (path-add-suffix name #".zo"))]) + (parameterize ([current-load-relative-directory base] + [print-graph #t]) + (pretty-print + (decompile + (call-with-input-file* + (if (file-exists? alt-file) alt-file zo-file) + (lambda (in) + (zo-parse in)))))))))))] [(make-zo) (let ([n (make-base-empty-namespace)] [mc (dynamic-require 'compiler/cm 'managed-compile-zo)] diff --git a/collects/compiler/zo-marshal.ss b/collects/compiler/zo-marshal.ss index 49669966a9..dce8346914 100644 --- a/collects/compiler/zo-marshal.ss +++ b/collects/compiler/zo-marshal.ss @@ -664,14 +664,14 @@ (list->vector (append (vector->list closure-map) - (let ([v (make-vector (ceiling (/ num-params BITS_PER_MZSHORT)))]) + (let ([v (make-vector (ceiling (/ (* 2 num-params) BITS_PER_MZSHORT)))]) (for ([t (in-list param-types)] [i (in-naturals)]) (when (eq? t 'ref) - (let ([pos (quotient i BITS_PER_MZSHORT)]) + (let ([pos (quotient (* 2 i) BITS_PER_MZSHORT)]) (vector-set! v pos (bitwise-ior (vector-ref v pos) - (arithmetic-shift 1 (modulo i BITS_PER_MZSHORT))))))) + (arithmetic-shift 1 (modulo (* 2 i) BITS_PER_MZSHORT))))))) (vector->list v)))) closure-map)) l)] diff --git a/collects/compiler/zo-parse.ss b/collects/compiler/zo-parse.ss index 61cecff581..c995346f7b 100644 --- a/collects/compiler/zo-parse.ss +++ b/collects/compiler/zo-parse.ss @@ -138,12 +138,13 @@ (if (zero? (bitwise-and flags CLOS_HAS_REF_ARGS)) (for/list ([i (in-range num-params)]) 'val) (for/list ([i (in-range num-params)]) - (if (bitwise-bit-set? - (vector-ref closed-over - (+ closure-size (quotient i BITS_PER_MZSHORT))) - (remainder i BITS_PER_MZSHORT)) - 'ref - 'val))))]) + (let ([byte (vector-ref closed-over + (+ closure-size (quotient (* 2 i) BITS_PER_MZSHORT)))]) + (if (bitwise-bit-set? byte (remainder (* 2 i) BITS_PER_MZSHORT)) + 'ref + (if (bitwise-bit-set? byte (add1 (remainder (* 2 i) BITS_PER_MZSHORT))) + 'flonum + 'val))))))]) (make-lam name (append (if (zero? (bitwise-and flags flags CLOS_PRESERVES_MARKS)) null '(preserves-marks)) diff --git a/collects/scribblings/guide/performance.scrbl b/collects/scribblings/guide/performance.scrbl index 72f11e1422..6adecb9fa6 100644 --- a/collects/scribblings/guide/performance.scrbl +++ b/collects/scribblings/guide/performance.scrbl @@ -270,15 +270,16 @@ boxing and unboxing intermediate results. Expressions involving a combination of unchecked flonum operations, @scheme[unsafe-fx->fl], constants, and variable references are optimized to avoid boxing. When such a result is bound with @scheme[let] and then consumed by another -unchecked flonum operation, the result is similarly unboxed, unless it -is captured in a closure. The bytecode decompiler (see @secref[#:doc -'(lib "scribblings/mzc/mzc.scrbl") "decompile"] annotates combinations -where the JIT can avoid boxes with @schemeidfont{#%flonum}, +unchecked flonum operation, the result is similarly unboxed. Finally, +the compiler can detect some flonum-valued loop accumulators. The +bytecode decompiler (see @secref[#:doc '(lib +"scribblings/mzc/mzc.scrbl") "decompile"] annotates combinations where +the JIT can avoid boxes with @schemeidfont{#%flonum}, @schemeidfont{#%as-flonum}, and @schemeidfont{#%from-flonum}. See also @secref["unchecked-unsafe"], especially the warnings about unsafety. -@margin-note{Unboxing of local bindings is not supported by the JIT for -PowerPC.} +@margin-note{Unboxing of local bindings and accumualtors is not +supported by the JIT for PowerPC.} @; ---------------------------------------------------------------------- diff --git a/collects/tests/mzscheme/optimize.ss b/collects/tests/mzscheme/optimize.ss index 9b01b837d5..79e0a6bccf 100644 --- a/collects/tests/mzscheme/optimize.ss +++ b/collects/tests/mzscheme/optimize.ss @@ -989,6 +989,6 @@ (err/rt-test (cwv-2-5-f (lambda () 1) (lambda (y z) (+ y 2))) exn:fail:contract:arity?) (err/rt-test (cwv-2-5-f (lambda () (values 1 2 3)) (lambda (y z) (+ y 2))) exn:fail:contract:arity?) -;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (report-errs) diff --git a/collects/tests/mzscheme/unsafe.ss b/collects/tests/mzscheme/unsafe.ss index b799ba5d95..00c98c371f 100644 --- a/collects/tests/mzscheme/unsafe.ss +++ b/collects/tests/mzscheme/unsafe.ss @@ -224,4 +224,39 @@ (void)) +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Interaction of unboxing, closures, etc. +(let ([f (lambda (x) + (let ([x (unsafe-fl+ x 1.0)]) + (let loop ([v 0.0][n 10000]) + (if (zero? n) + v + (loop (unsafe-fl+ v x) + (- n 1))))))]) + (test 20000.0 f 1.0)) +(let ([f (lambda (x) + (let ([x (unsafe-fl+ x 1.0)]) + (let loop ([v 0.0][n 10000][q 2.0]) + (if (zero? n) + (unsafe-fl+ v q) + (loop (unsafe-fl+ v x) + (- n 1) + (unsafe-fl- 0.0 q))))))]) + (test 20002.0 f 1.0)) +(let ([f (lambda (x) + (let loop ([a 0.0][v 0.0][n 1000000]) + (if (zero? n) + v + (if (odd? n) + (let ([b (unsafe-fl+ a a)]) + (loop b v (sub1 n))) + ;; First arg is un place, but may need re-boxing + (loop a + (unsafe-fl+ v x) + (- n 1))))))]) + (test 500000.0 f 1.0)) + +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + (report-errs) diff --git a/src/mzscheme/src/cstartup.inc b/src/mzscheme/src/cstartup.inc index c06141b3a2..a757dff6a2 100644 --- a/src/mzscheme/src/cstartup.inc +++ b/src/mzscheme/src/cstartup.inc @@ -1,5 +1,5 @@ { - static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,54,50,0,0,0,1,0,0,3,0,12,0, + static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,55,50,0,0,0,1,0,0,3,0,12,0, 19,0,23,0,28,0,31,0,36,0,43,0,50,0,63,0,67,0,72,0,78, 0,92,0,106,0,109,0,115,0,119,0,121,0,132,0,134,0,148,0,155,0, 177,0,179,0,193,0,4,1,33,1,44,1,55,1,65,1,101,1,134,1,167, @@ -99,7 +99,7 @@ EVAL_ONE_SIZED_STR((char *)expr, 2018); } { - static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,54,62,0,0,0,1,0,0,13,0,18,0, + static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,55,62,0,0,0,1,0,0,13,0,18,0, 35,0,50,0,68,0,84,0,94,0,112,0,132,0,148,0,166,0,197,0,226, 0,248,0,6,1,12,1,26,1,31,1,41,1,49,1,77,1,109,1,115,1, 160,1,205,1,229,1,12,2,14,2,71,2,161,3,202,3,37,5,141,5,245, @@ -428,7 +428,7 @@ EVAL_ONE_SIZED_STR((char *)expr, 6834); } { - static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,54,8,0,0,0,1,0,0,6,0,19,0, + static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,55,8,0,0,0,1,0,0,6,0,19,0, 34,0,48,0,62,0,76,0,118,0,0,0,38,1,0,0,65,113,117,111,116, 101,29,94,2,1,67,35,37,117,116,105,108,115,11,29,94,2,1,69,35,37, 110,101,116,119,111,114,107,11,29,94,2,1,68,35,37,112,97,114,97,109,122, @@ -447,7 +447,7 @@ EVAL_ONE_SIZED_STR((char *)expr, 331); } { - static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,54,55,0,0,0,1,0,0,11,0,38,0, + static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,55,55,0,0,0,1,0,0,11,0,38,0, 44,0,57,0,66,0,73,0,95,0,117,0,143,0,155,0,173,0,193,0,205, 0,221,0,244,0,0,1,31,1,38,1,43,1,48,1,53,1,58,1,67,1, 72,1,76,1,84,1,93,1,101,1,146,1,166,1,195,1,226,1,26,2,36, diff --git a/src/mzscheme/src/env.c b/src/mzscheme/src/env.c index c9975b27fe..6ed199f85a 100644 --- a/src/mzscheme/src/env.c +++ b/src/mzscheme/src/env.c @@ -3422,6 +3422,12 @@ void scheme_optimize_mutated(Optimize_Info *info, int pos) register_use(info, pos, 0x1); } +void scheme_optimize_produces_flonum(Optimize_Info *info, int pos) +/* pos must be in immediate frame */ +{ + register_use(info, pos, 0x4); +} + Scheme_Object *scheme_optimize_reverse(Optimize_Info *info, int pos, int unless_mutated) /* pos is in new-frame counts, and we want to produce an old-frame reference if it's not mutated */ @@ -3458,7 +3464,7 @@ int scheme_optimize_is_used(Optimize_Info *info, int pos) return 0; } -int scheme_optimize_is_mutated(Optimize_Info *info, int pos) +static int check_use(Optimize_Info *info, int pos, int flag) /* pos is in new-frame counts */ { while (1) { @@ -3468,29 +3474,28 @@ int scheme_optimize_is_mutated(Optimize_Info *info, int pos) info = info->next; } - if (info->use && (info->use[pos] & 0x1)) + if (info->use && (info->use[pos] & flag)) return 1; return 0; } -int scheme_optimize_is_unbox_arg(Optimize_Info *info, int pos) +int scheme_optimize_is_mutated(Optimize_Info *info, int pos) /* pos is in new-frame counts */ { - while (1) { - if (pos < info->new_frame) - break; - pos -= info->new_frame; - info = info->next; - } + return check_use(info, pos, 0x1); +} - if (info->use && (info->use[pos] & 0x2)) { - /* make sure it's not captured by a closure */ - if (!info->stat_dists || (info->sd_depths[pos] < 2)) - return 1; - } +int scheme_optimize_is_flonum_arg(Optimize_Info *info, int pos, int depth) +/* pos is in new-frame counts */ +{ + return check_use(info, pos, 0x2); +} - return 0; +int scheme_optimize_is_flonum_valued(Optimize_Info *info, int pos) +/* pos is in new-frame counts */ +{ + return check_use(info, pos, 0x4); } int scheme_optimize_any_uses(Optimize_Info *info, int start_pos, int end_pos) @@ -3935,13 +3940,27 @@ static int resolve_info_lookup(Resolve_Info *info, int pos, int *flags, Scheme_O boxmap = (mzshort *)ca[3]; vec = scheme_make_vector(sz + 1, NULL); for (i = 0; i < sz; i++) { + int boxed = 0, flonumed = 0, flags = 0; + + if (boxmap) { + int byte = boxmap[(2 * i) / BITS_PER_MZSHORT]; + if (byte & ((mzshort)1 << ((2 * i) & (BITS_PER_MZSHORT - 1)))) + boxed = 1; + if (byte & ((mzshort)2 << ((2 * i) & (BITS_PER_MZSHORT - 1)))) { + flonumed = 1; + flags = SCHEME_LOCAL_FLONUM; + } + } + loc = scheme_make_local(scheme_local_type, posmap[i] + offset + shifted, - 0); - if (boxmap) { - if (boxmap[i / BITS_PER_MZSHORT] & ((mzshort)1 << (i & (BITS_PER_MZSHORT - 1)))) - loc = scheme_box(loc); - } + flags); + + if (boxed) + loc = scheme_box(loc); + else if (flonumed) + loc = scheme_make_vector(1, loc); + SCHEME_VEC_ELS(vec)[i+1] = loc; } SCHEME_VEC_ELS(vec)[0] = ca[2]; diff --git a/src/mzscheme/src/eval.c b/src/mzscheme/src/eval.c index ef66ce2105..713e17c5e5 100644 --- a/src/mzscheme/src/eval.c +++ b/src/mzscheme/src/eval.c @@ -1316,6 +1316,8 @@ static Scheme_Object *resolve_application(Scheme_Object *o, Resolve_Info *orig_i loc = SCHEME_VEC_ELS(additions)[i+1]; if (SCHEME_BOXP(loc)) loc = SCHEME_BOX_VAL(loc); + else if (SCHEME_VECTORP(loc)) + loc = SCHEME_VEC_ELS(loc)[0]; app2->args[i + 1] = loc; } for (i = 1; i < n; i++) { @@ -1402,6 +1404,8 @@ static Scheme_Object *resolve_application2(Scheme_Object *o, Resolve_Info *orig_ loc = SCHEME_VEC_ELS(additions)[i+1]; if (SCHEME_BOXP(loc)) loc = SCHEME_BOX_VAL(loc); + else if (SCHEME_VECTORP(loc)) + loc = SCHEME_VEC_ELS(loc)[0]; app2->args[i + 1] = loc; } app2->args[0] = rator; @@ -1416,6 +1420,8 @@ static Scheme_Object *resolve_application2(Scheme_Object *o, Resolve_Info *orig_ loc = SCHEME_VEC_ELS(additions)[1]; if (SCHEME_BOXP(loc)) loc = SCHEME_BOX_VAL(loc); + else if (SCHEME_VECTORP(loc)) + loc = SCHEME_VEC_ELS(loc)[0]; app2->rand1 = loc; app2->rand2 = app->rand; return resolve_application3((Scheme_Object *)app2, orig_info, 2 + rdelta); @@ -1503,6 +1509,8 @@ static Scheme_Object *resolve_application3(Scheme_Object *o, Resolve_Info *orig_ loc = SCHEME_VEC_ELS(additions)[i+1]; if (SCHEME_BOXP(loc)) loc = SCHEME_BOX_VAL(loc); + else if (SCHEME_VECTORP(loc)) + loc = SCHEME_VEC_ELS(loc)[0]; app2->args[i + 1] = loc; } app2->args[0] = rator; @@ -2451,6 +2459,82 @@ Scheme_Object *optimize_for_inline(Optimize_Info *info, Scheme_Object *le, int a return NULL; } +int scheme_is_flonum_expression(Scheme_Object *expr, Optimize_Info *info) +{ + if (scheme_expr_produces_flonum(expr)) + return 1; + + if (SAME_TYPE(SCHEME_TYPE(expr), scheme_local_type)) { + if (scheme_optimize_is_flonum_valued(info, SCHEME_LOCAL_POS(expr))) + return 1; + } + + return 0; +} + +static void register_flonum_argument_types(Scheme_App_Rec *app, Scheme_App2_Rec *app2, Scheme_App3_Rec *app3, + Optimize_Info *info) +{ + Scheme_Object *rator, *rand, *le; + int n, i; + + if (app) { + rator = app->args[0]; + n = app->num_args; + } else if (app2) { + rator = app2->rator; + n = 1; + } else { + rator = app3->rator; + n = 2; + } + + if (SAME_TYPE(SCHEME_TYPE(rator), scheme_local_type)) { + rator = scheme_optimize_reverse(info, SCHEME_LOCAL_POS(rator), 1); + if (rator) { + int offset, single_use; + le = scheme_optimize_info_lookup(info, SCHEME_LOCAL_POS(rator), &offset, &single_use, 0, 0); + if (le && SAME_TYPE(SCHEME_TYPE(le), scheme_compiled_unclosed_procedure_type)) { + Scheme_Closure_Data *data = (Scheme_Closure_Data *)le; + char *map; + int ok; + + map = scheme_get_closure_flonum_map(data, n, &ok); + + if (ok) { + for (i = 0; i < n; i++) { + int is_flonum; + + if (app) + rand = app->args[i+1]; + else if (app2) + rand = app2->rand; + else { + if (!i) + rand = app3->rand1; + else + rand = app3->rand2; + } + + is_flonum = scheme_is_flonum_expression(rand, info); + if (is_flonum) { + if (!map) { + map = MALLOC_N_ATOMIC(char, n); + memset(map, 1, n); + } + } + if (map && !is_flonum) + map[i] = 0; + } + + if (map) + scheme_set_closure_flonum_map(data, map); + } + } + } + } +} + char *scheme_optimize_context_to_string(Scheme_Object *context) { if (context) { @@ -2604,22 +2688,25 @@ static int purely_functional_primitive(Scheme_Object *rator, int n) int scheme_wants_flonum_arguments(Scheme_Object *rator) { - if (SCHEME_PRIMP(rator) - && (SCHEME_PRIM_PROC_FLAGS(rator) & SCHEME_PRIM_IS_UNSAFE_FUNCTIONAL)) { - if (IS_NAMED_PRIM(rator, "unsafe-flabs") - || IS_NAMED_PRIM(rator, "unsafe-fl+") - || IS_NAMED_PRIM(rator, "unsafe-fl-") - || IS_NAMED_PRIM(rator, "unsafe-fl*") - || IS_NAMED_PRIM(rator, "unsafe-fl/") - || IS_NAMED_PRIM(rator, "unsafe-fl<") - || IS_NAMED_PRIM(rator, "unsafe-fl<=") - || IS_NAMED_PRIM(rator, "unsafe-fl=") - || IS_NAMED_PRIM(rator, "unsafe-fl>") - || IS_NAMED_PRIM(rator, "unsafe-fl>=") - || IS_NAMED_PRIM(rator, "unsafe-flvector-set!") - || IS_NAMED_PRIM(rator, "unsafe-flvector-ref") - || IS_NAMED_PRIM(rator, "unsafe-fx->fl")) - return 1; + if (SCHEME_PRIMP(rator)) { + if (SCHEME_PRIM_PROC_FLAGS(rator) & SCHEME_PRIM_IS_UNSAFE_FUNCTIONAL) { + if (IS_NAMED_PRIM(rator, "unsafe-flabs") + || IS_NAMED_PRIM(rator, "unsafe-fl+") + || IS_NAMED_PRIM(rator, "unsafe-fl-") + || IS_NAMED_PRIM(rator, "unsafe-fl*") + || IS_NAMED_PRIM(rator, "unsafe-fl/") + || IS_NAMED_PRIM(rator, "unsafe-fl<") + || IS_NAMED_PRIM(rator, "unsafe-fl<=") + || IS_NAMED_PRIM(rator, "unsafe-fl=") + || IS_NAMED_PRIM(rator, "unsafe-fl>") + || IS_NAMED_PRIM(rator, "unsafe-fl>=") + || IS_NAMED_PRIM(rator, "unsafe-flvector-ref") + || IS_NAMED_PRIM(rator, "unsafe-fx->fl")) + return 1; + } else if (SCHEME_PRIM_PROC_FLAGS(rator) & SCHEME_PRIM_IS_NARY_INLINED) { + if (IS_NAMED_PRIM(rator, "unsafe-flvector-set!")) + return 1; + } } return 0; @@ -2725,6 +2812,10 @@ int scheme_expr_produces_flonum(Scheme_Object *expr) return produces_unboxed(app->rator); } break; + default: + if (SCHEME_FLOATP(expr)) + return 1; + break; } return 0; } @@ -2925,6 +3016,8 @@ static Scheme_Object *optimize_application(Scheme_Object *o, Optimize_Info *info if (!app->num_args && SAME_OBJ(app->args[0], scheme_list_proc)) return scheme_null; + register_flonum_argument_types(app, NULL, NULL, info); + return check_unbox_rotation((Scheme_Object *)app, app->args[0], app->num_args, info); } @@ -3041,6 +3134,8 @@ static Scheme_Object *optimize_application2(Scheme_Object *o, Optimize_Info *inf info->single_result = -info->single_result; } + register_flonum_argument_types(NULL, app, NULL, info); + return check_unbox_rotation((Scheme_Object *)app, app->rator, 1, info); } @@ -3174,6 +3269,8 @@ static Scheme_Object *optimize_application3(Scheme_Object *o, Optimize_Info *inf info->single_result = -info->single_result; } + register_flonum_argument_types(NULL, NULL, app, info); + return check_unbox_rotation((Scheme_Object *)app, app->rator, 2, info); } @@ -10931,10 +11028,10 @@ int scheme_validate_rator_wants_box(Scheme_Object *app_rator, int pos, return 0; } - if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_REF_ARGS) { + if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS) { if (pos < data->num_params) { - int bit = ((mzshort)1 << (pos & (BITS_PER_MZSHORT - 1))); - if (data->closure_map[data->closure_size + (pos / BITS_PER_MZSHORT)] & bit) + int bit = ((mzshort)1 << ((2 * pos) & (BITS_PER_MZSHORT - 1))); + if (data->closure_map[data->closure_size + ((2 * pos) / BITS_PER_MZSHORT)] & bit) return 1; } } @@ -10971,7 +11068,7 @@ void scheme_validate_closure(Mz_CPort *port, Scheme_Object *expr, cnt = data->num_params; base = sz - cnt; - if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_REF_ARGS) { + if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS) { base2 = data->closure_size; for (i = 0; i < cnt; i++) { new_stack[base + i] = closure_stack[base2 + i]; @@ -11008,11 +11105,11 @@ static void validate_unclosed_procedure(Mz_CPort *port, Scheme_Object *expr, int self_pos) { Scheme_Closure_Data *data = (Scheme_Closure_Data *)expr; - int i, cnt, q, p, sz, base, vld, self_pos_in_closure = -1; + int i, cnt, q, p, sz, base, vld, self_pos_in_closure = -1, typed_arg = 0; mzshort *map; char *closure_stack; - if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_REF_ARGS) { + if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS) { sz = data->closure_size + data->num_params; } else { sz = data->closure_size; @@ -11024,14 +11121,18 @@ static void validate_unclosed_procedure(Mz_CPort *port, Scheme_Object *expr, else closure_stack = NULL; - if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_REF_ARGS) { + if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS) { cnt = data->num_params; base = sz - cnt; for (i = 0; i < cnt; i++) { - int bit = ((mzshort)1 << (i & (BITS_PER_MZSHORT - 1))); - if (map[data->closure_size + (i / BITS_PER_MZSHORT)] & bit) + int bit = ((mzshort)1 << ((2 * i) & (BITS_PER_MZSHORT - 1))); + if (map[data->closure_size + ((2 * i) / BITS_PER_MZSHORT)] & bit) { vld = VALID_BOX; - else + typed_arg = 1; + } else if (map[data->closure_size + ((2 * i) / BITS_PER_MZSHORT)] & (bit << 1)) { + vld = VALID_FLONUM; + typed_arg = 1; + } else vld = VALID_VAL; closure_stack[i + base] = vld; } @@ -11057,7 +11158,7 @@ static void validate_unclosed_procedure(Mz_CPort *port, Scheme_Object *expr, closure_stack[i + base] = vld; } - if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_REF_ARGS) { + if (typed_arg) { if ((proc_with_refs_ok != 1) && !argument_to_arity_error(app_rator, proc_with_refs_ok)) scheme_ill_formed_code(port); diff --git a/src/mzscheme/src/fun.c b/src/mzscheme/src/fun.c index 980b4b9e3c..90949e9164 100644 --- a/src/mzscheme/src/fun.c +++ b/src/mzscheme/src/fun.c @@ -958,10 +958,12 @@ void scheme_delay_load_closure(Scheme_Closure_Data *data) before a closure mapping is resolved. */ typedef struct { MZTAG_IF_REQUIRED - int *local_flags; + int *local_flags; /* for arguments from compile pass, flonum info updated in optimize pass */ mzshort base_closure_size; /* doesn't include top-level (if any) */ mzshort *base_closure_map; - short has_tl, body_size; + char *flonum_map; /* NULL when has_flomap set => no flonums */ + char has_tl, has_flomap; + short body_size; } Closure_Info; Scheme_Object * @@ -1001,6 +1003,11 @@ scheme_optimize_closure_compilation(Scheme_Object *_data, Optimize_Info *info, i code = scheme_optimize_expr(data->code, info, 0); + for (i = 0; i < data->num_params; i++) { + if (scheme_optimize_is_flonum_arg(info, i, 1)) + cl->local_flags[i] |= SCHEME_WAS_FLONUM_ARGUMENT; + } + if (info->single_result) SCHEME_CLOSURE_DATA_FLAGS(data) |= CLOS_SINGLE_RESULT; else if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_SINGLE_RESULT) @@ -1038,12 +1045,51 @@ scheme_optimize_closure_compilation(Scheme_Object *_data, Optimize_Info *info, i return (Scheme_Object *)data; } +char *scheme_get_closure_flonum_map(Scheme_Closure_Data *data, int arg_n, int *ok) +{ + Closure_Info *cl = (Closure_Info *)data->closure_map; + + if ((SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_REST) + || (arg_n != data->num_params)) { + *ok = 0; + return NULL; + } + + if (cl->has_flomap && !cl->flonum_map) { + *ok = 0; + return NULL; + } + + *ok = 1; + return cl->flonum_map; +} + +void scheme_set_closure_flonum_map(Scheme_Closure_Data *data, char *flonum_map) +{ + Closure_Info *cl = (Closure_Info *)data->closure_map; + int i; + + if (!cl->flonum_map) { + cl->has_flomap = 1; + cl->flonum_map = flonum_map; + } + + for (i = data->num_params; i--; ) { + if (flonum_map[i]) break; + } + + if (i < 0) { + cl->flonum_map = NULL; + } +} + Scheme_Object *scheme_clone_closure_compilation(int dup_ok, Scheme_Object *_data, Optimize_Info *info, int delta, int closure_depth) { Scheme_Closure_Data *data, *data2; Scheme_Object *body; Closure_Info *cl; int *flags, sz; + char *flonum_map; data = (Scheme_Closure_Data *)_data; @@ -1067,6 +1113,13 @@ Scheme_Object *scheme_clone_closure_compilation(int dup_ok, Scheme_Object *_data memcpy(flags, cl->local_flags, sz); cl->local_flags = flags; + if (cl->flonum_map) { + sz = data2->num_params; + flonum_map = (char *)scheme_malloc_atomic(sz); + memcpy(flonum_map, cl->flonum_map, sz); + cl->flonum_map = flonum_map; + } + return (Scheme_Object *)data2; } @@ -1211,7 +1264,7 @@ int scheme_closure_argument_flags(Scheme_Closure_Data *data, int i) XFORM_NONGCING static int boxmap_size(int n) { - return (n + (BITS_PER_MZSHORT - 1)) / BITS_PER_MZSHORT; + return ((2 * n) + (BITS_PER_MZSHORT - 1)) / BITS_PER_MZSHORT; } static mzshort *allocate_boxmap(int n) @@ -1226,14 +1279,14 @@ static mzshort *allocate_boxmap(int n) return boxmap; } -XFORM_NONGCING static void boxmap_set(mzshort *boxmap, int j) +XFORM_NONGCING static void boxmap_set(mzshort *boxmap, int j, int bit, int delta) { - boxmap[j / BITS_PER_MZSHORT] |= ((mzshort)1 << (j & (BITS_PER_MZSHORT - 1))); + boxmap[delta + ((2 * j) / BITS_PER_MZSHORT)] |= ((mzshort)bit << ((2 * j) & (BITS_PER_MZSHORT - 1))); } -XFORM_NONGCING static int boxmap_get(mzshort *boxmap, int j) +XFORM_NONGCING static int boxmap_get(mzshort *boxmap, int j, int bit) { - if (boxmap[j / BITS_PER_MZSHORT] & ((mzshort)1 << (j & (BITS_PER_MZSHORT - 1)))) + if (boxmap[(2 * j) / BITS_PER_MZSHORT] & ((mzshort)bit << ((2 * j) & (BITS_PER_MZSHORT - 1)))) return 1; else return 0; @@ -1245,9 +1298,9 @@ scheme_resolve_closure_compilation(Scheme_Object *_data, Resolve_Info *info, Scheme_Object *precomputed_lift) { Scheme_Closure_Data *data; - int i, closure_size, offset, np, num_params; + int i, closure_size, offset, np, num_params, expanded_already = 0; int has_tl, convert_size, need_lift; - mzshort *oldpos, *closure_map; + mzshort *oldpos, *closure_map, *new_closure_map; Closure_Info *cl; Resolve_Info *new_info; Scheme_Object *lifted, *result, *lifteds = NULL; @@ -1275,7 +1328,25 @@ scheme_resolve_closure_compilation(Scheme_Object *_data, Resolve_Info *info, closure. */ closure_size = data->closure_size; + if (cl->flonum_map) { + int at_least_one = 0; + for (i = data->num_params; i--; ) { + if (cl->flonum_map[i]) { + if (cl->local_flags[i] & SCHEME_WAS_FLONUM_ARGUMENT) + at_least_one = 1; + else + cl->flonum_map[i] = 0; + } + } + if (at_least_one) { + closure_size += boxmap_size(data->num_params + closure_size); + expanded_already = 1; + } else + cl->flonum_map = NULL; + } closure_map = (mzshort *)scheme_malloc_atomic(sizeof(mzshort) * closure_size); + if (cl->flonum_map) + memset(closure_map, 0, sizeof(mzshort) * closure_size); has_tl = cl->has_tl; if (has_tl && !can_lift) @@ -1302,14 +1373,28 @@ scheme_resolve_closure_compilation(Scheme_Object *_data, Resolve_Info *info, } } else { closure_map[offset] = li; - if (convert && (flags & SCHEME_INFO_BOXED)) { - /* The only problem with a boxed variable is that + if (convert && (flags & (SCHEME_INFO_BOXED | SCHEME_INFO_FLONUM_ARG))) { + /* The only problem with a boxed/flonum variable is that it's more difficult to validate. We have to track which arguments are boxes. And the resulting procedure must be used only in application positions. */ if (!convert_boxes) convert_boxes = allocate_boxmap(cl->base_closure_size); - boxmap_set(convert_boxes, offset); + boxmap_set(convert_boxes, offset, (flags & SCHEME_INFO_BOXED) ? 1 : 2, 0); + } else { + /* Currently, we only need flonum information as a closure type */ + if (flags & SCHEME_INFO_FLONUM_ARG) { + if (!expanded_already) { + closure_size += boxmap_size(data->num_params + closure_size); + new_closure_map = (mzshort *)scheme_malloc_atomic(sizeof(mzshort) * closure_size); + memset(new_closure_map, 0, sizeof(mzshort) * closure_size); + memcpy(new_closure_map, closure_map, sizeof(mzshort) * data->closure_size); + closure_map = new_closure_map; + expanded_already = 1; + } + boxmap_set(closure_map, data->num_params + offset, + (flags & SCHEME_INFO_BOXED) ? 1 : 2, data->closure_size); + } } offset++; } @@ -1318,7 +1403,7 @@ scheme_resolve_closure_compilation(Scheme_Object *_data, Resolve_Info *info, /* Add bindings introduced by closure conversion. The `captured' table maps old positions to new positions. */ while (lifteds) { - int j, cnt, boxed; + int j, cnt, boxed, flonumed; Scheme_Object *vec, *loc; if (!captured) { @@ -1326,8 +1411,12 @@ scheme_resolve_closure_compilation(Scheme_Object *_data, Resolve_Info *info, for (i = 0; i < offset; i++) { int cp; cp = i; - if (convert_boxes && boxmap_get(convert_boxes, i)) - cp = -(cp + 1); + if (convert_boxes) { + if (boxmap_get(convert_boxes, i, 1)) + cp = -((2 * cp) + 1); + else if (boxmap_get(convert_boxes, i, 2)) + cp = -((2 * cp) + 2); + } scheme_hash_set(captured, scheme_make_integer(closure_map[i]), scheme_make_integer(cp)); } } @@ -1341,15 +1430,24 @@ scheme_resolve_closure_compilation(Scheme_Object *_data, Resolve_Info *info, if (SCHEME_BOXP(loc)) { loc = SCHEME_BOX_VAL(loc); boxed = 1; - } else + flonumed = 0; + } else if (SCHEME_VECTORP(loc)) { + loc = SCHEME_VEC_ELS(loc)[0]; boxed = 0; + flonumed = 1; + } else { + boxed = 0; + flonumed = 0; + } i = SCHEME_LOCAL_POS(loc); if (!scheme_hash_get(captured, scheme_make_integer(i))) { /* Need to capture an extra binding: */ int cp; cp = captured->count; if (boxed) - cp = -(cp + 1); + cp = -((2 * cp) + 1); + else if (flonumed) + cp = -((2 * cp) + 2); scheme_hash_set(captured, scheme_make_integer(i), scheme_make_integer(cp)); } } @@ -1370,11 +1468,19 @@ scheme_resolve_closure_compilation(Scheme_Object *_data, Resolve_Info *info, cp = SCHEME_INT_VAL(captured->vals[j]); old_pos = SCHEME_INT_VAL(captured->keys[j]); if (cp < 0) { - /* Boxed */ - cp = -(cp + 1); + /* Boxed or flonum */ + int bit; + cp = -cp; + if (cp & 0x1) { + cp = (cp - 1) / 2; + bit = 1; + } else { + cp = (cp - 2) / 2; + bit = 2; + } if (!convert_boxes) convert_boxes = allocate_boxmap(offset); - boxmap_set(convert_boxes, cp); + boxmap_set(convert_boxes, cp, bit, 0); } closure_map[cp] = old_pos; } @@ -1391,11 +1497,11 @@ scheme_resolve_closure_compilation(Scheme_Object *_data, Resolve_Info *info, convert_size = offset; if (convert_boxes) - new_boxes_size = boxmap_size(convert_size + data->num_params); + new_boxes_size = boxmap_size(convert_size + data->num_params + (has_tl ? 1 : 0)); else new_boxes_size = 0; - if (has_tl || convert_boxes) { + if (has_tl || convert_boxes || cl->flonum_map) { int sz; sz = ((has_tl ? sizeof(mzshort) : 0) + new_boxes_size * sizeof(mzshort)); closure_map = (mzshort *)scheme_malloc_atomic(sz); @@ -1433,7 +1539,7 @@ scheme_resolve_closure_compilation(Scheme_Object *_data, Resolve_Info *info, if (!just_compute_lift) { data->closure_size = closure_size; if (convert && convert_boxes) - SCHEME_CLOSURE_DATA_FLAGS(data) |= CLOS_HAS_REF_ARGS; + SCHEME_CLOSURE_DATA_FLAGS(data) |= CLOS_HAS_TYPED_ARGS; } /* Set up environment mapping, initialized for arguments: */ @@ -1453,11 +1559,18 @@ scheme_resolve_closure_compilation(Scheme_Object *_data, Resolve_Info *info, cl->base_closure_size + data->num_params); for (i = 0; i < data->num_params; i++) { scheme_resolve_info_add_mapping(new_info, i, i + closure_size + convert_size, - ((cl->local_flags[i] & SCHEME_WAS_SET_BANGED) - ? SCHEME_INFO_BOXED - : 0), + (((cl->local_flags[i] & SCHEME_WAS_SET_BANGED) + ? SCHEME_INFO_BOXED + : 0) + | ((cl->flonum_map && cl->flonum_map[i]) + ? SCHEME_INFO_FLONUM_ARG + : 0)), NULL); + if (cl->flonum_map && cl->flonum_map[i]) + boxmap_set(closure_map, i + convert_size, 2, data->closure_size); } + if (expanded_already && !just_compute_lift) + SCHEME_CLOSURE_DATA_FLAGS(data) |= CLOS_HAS_TYPED_ARGS; } /* Extend mapping to go from old locations on the stack (as if bodies were @@ -1502,13 +1615,23 @@ scheme_resolve_closure_compilation(Scheme_Object *_data, Resolve_Info *info, if (SCHEME_BOXP(loc)) { if (!boxmap) boxmap = allocate_boxmap(sz); - boxmap_set(boxmap, j); + boxmap_set(boxmap, j, 1, 0); loc = SCHEME_BOX_VAL(loc); + } else if (SCHEME_VECTORP(loc)) { + if (!boxmap) + boxmap = allocate_boxmap(sz); + boxmap_set(boxmap, j, 2, 0); + loc = SCHEME_VEC_ELS(loc)[0]; } loc = scheme_hash_get(captured, scheme_make_integer(SCHEME_LOCAL_POS(loc))); cp = SCHEME_INT_VAL(loc); - if (cp < 0) - cp = -(cp + 1); + if (cp < 0) { + cp = -cp; + if (cp & 0x1) + cp = (cp - 1) / 2; + else + cp = (cp - 2) / 2; + } cmap[j] = cp + (has_tl && convert ? 1 : 0); } @@ -8481,8 +8604,8 @@ static Scheme_Object *write_compiled_closure(Scheme_Object *obj) } svec_size = data->closure_size; - if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_REF_ARGS) { - svec_size += (data->num_params + BITS_PER_MZSHORT - 1) / BITS_PER_MZSHORT; + if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS) { + svec_size += ((2 * (data->num_params + data->closure_size)) + BITS_PER_MZSHORT - 1) / BITS_PER_MZSHORT; } if (SCHEME_RPAIRP(data->code)) { @@ -8572,7 +8695,7 @@ static Scheme_Object *write_compiled_closure(Scheme_Object *obj) data->closure_map), ds); - if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_REF_ARGS) + if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS) l = CONS(scheme_make_integer(data->closure_size), l); @@ -8622,7 +8745,7 @@ static Scheme_Object *read_compiled_closure(Scheme_Object *obj) obj = SCHEME_CDR(obj); /* v is an svector or an integer... */ - if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_REF_ARGS) { + if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS) { if (!SCHEME_INTP(v)) return NULL; data->closure_size = SCHEME_INT_VAL(v); @@ -8635,7 +8758,7 @@ static Scheme_Object *read_compiled_closure(Scheme_Object *obj) if (!SAME_TYPE(scheme_svector_type, SCHEME_TYPE(v))) return NULL; - if (!(SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_REF_ARGS)) + if (!(SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS)) data->closure_size = SCHEME_SVEC_LEN(v); data->closure_map = SCHEME_SVEC_VEC(v); diff --git a/src/mzscheme/src/jit.c b/src/mzscheme/src/jit.c index ba4c8f1b4b..4fa480e26b 100644 --- a/src/mzscheme/src/jit.c +++ b/src/mzscheme/src/jit.c @@ -100,6 +100,10 @@ END_XFORM_ARITH; # define USE_TINY_JUMPS #endif +#if defined(MZ_PRECISE_GC) && defined(MZ_USE_JIT_I386) +# define USE_FLONUM_UNBOXING +#endif + #define JIT_NOT_RET JIT_R1 #if JIT_NOT_RET == JIT_RET Fix me! See use. @@ -203,6 +207,7 @@ typedef struct { int rs_virtual_offset; int unbox, unbox_depth; int flostack_offset, flostack_space; + int self_restart_offset, self_restart_space; } mz_jit_state; #define mz_RECORD_STATUS(s) (jitter->status_at_ptr = _jit.x.pc, jitter->reg_status = (s)) @@ -233,6 +238,11 @@ static int generate_two_args(Scheme_Object *rand1, Scheme_Object *rand2, mz_jit_ static int is_simple(Scheme_Object *obj, int depth, int just_markless, mz_jit_state *jitter, int stack_start); static int lambda_has_been_jitted(Scheme_Native_Closure_Data *ndata); +static int can_unbox(Scheme_Object *obj, int fuel, int regs); +#ifdef USE_FLONUM_UNBOXING +static int generate_flonum_local_unboxing(mz_jit_state *jitter, int push); +#endif + #ifdef MZ_PRECISE_GC static void register_traversers(void); static void release_native_code(void *fnlized, void *p); @@ -982,6 +992,7 @@ static void mz_runstack_closure_pushed(mz_jit_state *jitter, int a, int flags) /* closures are never popped; they go away due to returns or tail calls */ } +#ifdef USE_FLONUM_UNBOXING static void mz_runstack_flonum_pushed(mz_jit_state *jitter, int pos) { jitter->depth += 1; @@ -993,6 +1004,7 @@ static void mz_runstack_flonum_pushed(mz_jit_state *jitter, int pos) jitter->need_set_rs = 1; /* flonums are never popped; they go away due to returns or tail calls */ } +#endif static void mz_runstack_popped(mz_jit_state *jitter, int n) { @@ -1070,11 +1082,13 @@ static int mz_flostack_save(mz_jit_state *jitter, int *pos) return jitter->flostack_space; } -static void mz_flostack_restore(mz_jit_state *jitter, int space, int pos) +static void mz_flostack_restore(mz_jit_state *jitter, int space, int pos, int gen) { if (space != jitter->flostack_space) { - int delta = jitter->flostack_space - space; - jit_addi_p(JIT_SP, JIT_SP, delta * sizeof(double)); + if (gen) { + int delta = jitter->flostack_space - space; + jit_addi_p(JIT_SP, JIT_SP, delta * sizeof(double)); + } jitter->flostack_space = space; } jitter->flostack_offset = pos; @@ -1142,6 +1156,7 @@ static int mz_is_closure(mz_jit_state *jitter, int i, int arity, int *_flags) return 0; } +#ifdef USE_FLONUM_UNBOXING static int mz_flonum_pos(mz_jit_state *jitter, int i) { int j = i, p = jitter->num_mappings, c; @@ -1170,8 +1185,10 @@ static int mz_flonum_pos(mz_jit_state *jitter, int i) } --p; } + scheme_signal_error("internal error: flonum position not found"); return 0; } +#endif static int stack_safety(mz_jit_state *jitter, int cnt, int offset) /* de-sync'd rs ok */ @@ -1710,6 +1727,21 @@ static Scheme_Object *make_two_element_ivector(Scheme_Object *a, Scheme_Object * /* bytecode properties */ /*========================================================================*/ +#ifdef USE_FLONUM_UNBOXING +static int check_closure_flonum_bit(Scheme_Closure_Data *data, int pos, int delta) +{ + int bit; + pos += delta; + bit = ((mzshort)2 << ((2 * pos) & (BITS_PER_MZSHORT - 1))); + if (data->closure_map[data->closure_size + ((2 * pos) / BITS_PER_MZSHORT)] & bit) + return 1; + else + return 0; +} +# define CLOSURE_ARGUMENT_IS_FLONUM(data, pos) check_closure_flonum_bit(data, pos, 0) +# define CLOSURE_CONTENT_IS_FLONUM(data, pos) check_closure_flonum_bit(data, pos, data->num_params) +#endif + #ifdef NEED_LONG_JUMPS static int is_short(Scheme_Object *obj, int fuel) { @@ -3067,12 +3099,16 @@ static int generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc } static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, int num_rands, jit_insn *slow_code, - int args_already_in_place) + int args_already_in_place, Scheme_App_Rec *app, Scheme_Object **alt_rands) /* Last argument is in R0 */ { jit_insn *refslow, *refagain; int i, jmp_tiny, jmp_short; int closure_size = jitter->self_closure_size; + int space, offset, arg_offset, arg_tmp_offset; +#ifdef USE_FLONUM_UNBOXING + Scheme_Object *rand; +#endif #ifdef JIT_PRECISE_GC closure_size += 1; /* Skip procedure pointer, too */ @@ -3095,27 +3131,148 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i __END_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short); + arg_tmp_offset = offset = jitter->flostack_offset; + space = jitter->flostack_space; + + arg_offset = 1; + /* Copy args to runstack after closure data: */ mz_ld_runstack_base_alt(JIT_R2); jit_subi_p(JIT_R2, JIT_RUNSTACK_BASE_OR_ALT(JIT_R2), WORDS_TO_BYTES(num_rands + closure_size + args_already_in_place)); - if (num_rands) { - jit_stxi_p(WORDS_TO_BYTES(num_rands - 1 + closure_size + args_already_in_place), JIT_R2, JIT_R0); - for (i = num_rands - 1; i--; ) { - jit_ldxi_p(JIT_R1, JIT_RUNSTACK, WORDS_TO_BYTES(i)); - jit_stxi_p(WORDS_TO_BYTES(i + closure_size + args_already_in_place), JIT_R2, JIT_R1); - CHECK_LIMIT(); + for (i = num_rands; i--; ) { + int already_loaded = (i == num_rands - 1); +#ifdef USE_FLONUM_UNBOXING + int is_flonum, already_unboxed = 0; + if ((SCHEME_CLOSURE_DATA_FLAGS(jitter->self_data) & CLOS_HAS_TYPED_ARGS) + && CLOSURE_ARGUMENT_IS_FLONUM(jitter->self_data, i + args_already_in_place)) { + is_flonum = 1; + rand = (alt_rands + ? alt_rands[i+1+args_already_in_place] + : app->args[i+1+args_already_in_place]); + if (can_unbox(rand, 5, JIT_FPR_NUM-1)) { + int aoffset; + aoffset = JIT_FRAME_FLONUM_OFFSET - (arg_tmp_offset * sizeof(double)); + jit_ldxi_d_fppush(JIT_FPR0, JIT_FP, aoffset); + --arg_tmp_offset; + already_unboxed = 1; + if (!already_loaded && !SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) { + already_loaded = 1; + (void)jit_movi_p(JIT_R0, NULL); + } + } + } else + is_flonum = 0; +#endif + if (!already_loaded) + jit_ldxi_p(JIT_R0, JIT_RUNSTACK, WORDS_TO_BYTES(i)); + jit_stxi_p(WORDS_TO_BYTES(i + closure_size + args_already_in_place), JIT_R2, JIT_R0); +#ifdef USE_FLONUM_UNBOXING + if (is_flonum) { + int aoffset; + if (!already_unboxed) + jit_ldxi_d_fppush(JIT_FPR0, JIT_R0, &((Scheme_Double *)0x0)->double_val); + aoffset = JIT_FRAME_FLONUM_OFFSET - (arg_offset * sizeof(double)); + (void)jit_stxi_d_fppop(aoffset, JIT_FP, JIT_FPR0); + arg_offset++; } +#endif + CHECK_LIMIT(); } jit_movr_p(JIT_RUNSTACK, JIT_R2); + mz_flostack_restore(jitter, jitter->self_restart_space, jitter->self_restart_offset, 1); + /* Now jump: */ (void)jit_jmpi(jitter->self_restart_code); CHECK_LIMIT(); - + /* Slow path: */ __START_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short); mz_patch_branch(refslow); __END_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short); + + jitter->flostack_offset = offset; + jitter->flostack_space = space; + +#ifdef USE_FLONUM_UNBOXING + /* Need to box any arguments that we have only in flonum form */ + if (SCHEME_CLOSURE_DATA_FLAGS(jitter->self_data) & CLOS_HAS_TYPED_ARGS) { + arg_tmp_offset = offset; + for (i = num_rands; i--; ) { + if (CLOSURE_ARGUMENT_IS_FLONUM(jitter->self_data, i + args_already_in_place)) { + rand = (alt_rands + ? alt_rands[i+1+args_already_in_place] + : app->args[i+1+args_already_in_place]); + if (can_unbox(rand, 5, JIT_FPR_NUM-1) + && (!SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type) + || (SCHEME_GET_LOCAL_FLAGS(rand) == SCHEME_LOCAL_FLONUM))) { + int aoffset = JIT_FRAME_FLONUM_OFFSET - (arg_tmp_offset * sizeof(double)); + GC_CAN_IGNORE jit_insn *iref; + if (i != num_rands - 1) + mz_pushr_p(JIT_R0); + if (SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) { + /* have to check for an existing box */ + if (i != num_rands - 1) + mz_rs_ldxi(JIT_R0, i+1); + mz_rs_sync(); + __START_TINY_JUMPS__(1); + iref = jit_bnei_p(jit_forward(), JIT_R0, NULL); + __END_TINY_JUMPS__(1); + } else + iref = NULL; + jit_movi_l(JIT_R0, aoffset); + mz_rs_sync(); + (void)jit_calli(box_flonum_from_stack_code); + if (i != num_rands - 1) + mz_rs_stxi(i+1, JIT_R0); + if (SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) { + __START_TINY_JUMPS__(1); + mz_patch_branch(iref); + __END_TINY_JUMPS__(1); + } + CHECK_LIMIT(); + if (i != num_rands - 1) + mz_popr_p(JIT_R0); + --arg_tmp_offset; + } + } + } + + /* Arguments already in place may also need to be boxed. */ + arg_tmp_offset = jitter->self_restart_offset; + for (i = 0; i < args_already_in_place; i++) { + if (CLOSURE_ARGUMENT_IS_FLONUM(jitter->self_data, i)) { + GC_CAN_IGNORE jit_insn *iref; + mz_pushr_p(JIT_R0); + mz_ld_runstack_base_alt(JIT_R2); + jit_subi_p(JIT_R2, JIT_RUNSTACK_BASE_OR_ALT(JIT_R2), WORDS_TO_BYTES(num_rands + closure_size + args_already_in_place)); + jit_ldxi_p(JIT_R0, JIT_R2, WORDS_TO_BYTES(i+closure_size)); + mz_rs_sync(); + __START_TINY_JUMPS__(1); + iref = jit_bnei_p(jit_forward(), JIT_R0, NULL); + __END_TINY_JUMPS__(1); + { + int aoffset; + aoffset = JIT_FRAME_FLONUM_OFFSET - (arg_tmp_offset * sizeof(double)); + jit_ldxi_d_fppush(JIT_FPR0, JIT_FP, aoffset); + (void)jit_calli(box_flonum_from_stack_code); + mz_ld_runstack_base_alt(JIT_R2); + jit_subi_p(JIT_R2, JIT_RUNSTACK_BASE_OR_ALT(JIT_R2), WORDS_TO_BYTES(num_rands + closure_size + args_already_in_place)); + jit_stxi_p(WORDS_TO_BYTES(i+closure_size), JIT_R2, JIT_R0); + } + __START_TINY_JUMPS__(1); + mz_patch_branch(iref); + __END_TINY_JUMPS__(1); + mz_popr_p(JIT_R0); + CHECK_LIMIT(); + --arg_tmp_offset; + } + } + } +#endif + + mz_flostack_restore(jitter, 0, 0, 1); + generate_pause_for_gc_and_retry(jitter, 0, /* in short jumps */ JIT_R0, /* expose R0 to GC */ @@ -3127,7 +3284,7 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i mz_set_local_p(JIT_R2, JIT_LOCAL2); } - jit_stxi_p(WORDS_TO_BYTES(num_rands - 1), JIT_RUNSTACK, JIT_R0); + mz_rs_stxi(num_rands - 1, JIT_R0); generate(rator, jitter, 0, 0, JIT_V1); CHECK_LIMIT(); mz_rs_sync(); @@ -3520,7 +3677,29 @@ static int generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ CHECK_LIMIT(); need_safety = 0; } - generate_non_tail(arg, jitter, 0, !need_non_tail, 0); /* sync'd below */ +#ifdef USE_FLONUM_UNBOXING + if (direct_self + && is_tail + && (SCHEME_CLOSURE_DATA_FLAGS(jitter->self_data) & CLOS_HAS_TYPED_ARGS) + && (CLOSURE_ARGUMENT_IS_FLONUM(jitter->self_data, i+args_already_in_place)) + && can_unbox(arg, 5, JIT_FPR_NUM-1)) { + jitter->unbox++; + generate(arg, jitter, 0, 0, JIT_R0); + --jitter->unbox; + CHECK_LIMIT(); + generate_flonum_local_unboxing(jitter, 0); + CHECK_LIMIT(); + if (SAME_TYPE(SCHEME_TYPE(arg), scheme_local_type)) { + /* Also local Scheme_Object view, in case a box has been allocated */ + int apos; + apos = mz_remap(SCHEME_LOCAL_POS(arg)); + mz_rs_ldxi(JIT_R0, apos); + } else { + (void)jit_movi_p(JIT_R0, NULL); + } + } else +#endif + generate_non_tail(arg, jitter, 0, !need_non_tail, 0); /* sync'd below */ RESUME_JIT_DATA(); CHECK_LIMIT(); if ((i == num_rands - 1) && !direct_prim && !reorder_ok && !direct_self && !proc_already_in_place) { @@ -3581,7 +3760,7 @@ static int generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ && (num_rands >= MAX_SHARED_CALL_RANDS)) { LOG_IT(("<-many args\n")); if (is_tail) { - mz_flostack_restore(jitter, 0, 0); + mz_flostack_restore(jitter, 0, 0, 1); if (direct_prim) { generate_direct_prim_tail_call(jitter, num_rands); } else { @@ -3606,7 +3785,6 @@ static int generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ void *code; int dp = (direct_prim ? 1 : (direct_native ? (1 + direct_native + (nontail_self ? 1 : 0)) : 0)); if (is_tail) { - mz_flostack_restore(jitter, 0, 0); if (!shared_tail_code[dp][num_rands]) { code = generate_shared_call(num_rands, jitter, multi_ok, is_tail, direct_prim, direct_native, 0); shared_tail_code[dp][num_rands] = code; @@ -3614,9 +3792,10 @@ static int generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ code = shared_tail_code[dp][num_rands]; if (direct_self) { LOG_IT(("<-self\n")); - generate_self_tail_call(rator, jitter, num_rands, code, args_already_in_place); + generate_self_tail_call(rator, jitter, num_rands, code, args_already_in_place, app, alt_rands); CHECK_LIMIT(); } else { + mz_flostack_restore(jitter, 0, 0, 1); LOG_IT(("<-tail\n")); if (args_already_in_place) { jit_movi_l(JIT_R2, args_already_in_place); @@ -7113,6 +7292,62 @@ int generate_inlined_test(mz_jit_state *jitter, Scheme_Object *obj, int branch_s return 0; } +/*========================================================================*/ +/* flonum boxing */ +/*========================================================================*/ + +#ifdef USE_FLONUM_UNBOXING + +static int generate_flonum_local_boxing(mz_jit_state *jitter, int pos, int local_pos, int target) +{ + int offset; + offset = mz_flonum_pos(jitter, local_pos); + offset = JIT_FRAME_FLONUM_OFFSET - (offset * sizeof(double)); + if (jitter->unbox) { + int fpr0; + fpr0 = JIT_FPR(jitter->unbox_depth); + jit_ldxi_d_fppush(fpr0, JIT_FP, offset); + } else { + GC_CAN_IGNORE jit_insn *ref; + mz_rs_sync(); + __START_TINY_JUMPS__(1); + ref = jit_bnei_p(jit_forward(), target, NULL); + __END_TINY_JUMPS__(1); + CHECK_LIMIT(); + jit_movi_l(JIT_R0, offset); + (void)jit_calli(box_flonum_from_stack_code); + mz_rs_stxi(pos, JIT_R0); + __START_TINY_JUMPS__(1); + mz_patch_branch(ref); + __END_TINY_JUMPS__(1); + } + + return 1; +} + +static int generate_flonum_local_unboxing(mz_jit_state *jitter, int push) +/* Move FPR0 onto C stack */ +{ + int offset; + + if (jitter->flostack_offset == jitter->flostack_space) { + int space = 4 * sizeof(double); + jitter->flostack_space += 4; + jit_subi_l(JIT_SP, JIT_SP, space); + } + + jitter->flostack_offset += 1; + if (push) + mz_runstack_flonum_pushed(jitter, jitter->flostack_offset); + CHECK_LIMIT(); + + offset = JIT_FRAME_FLONUM_OFFSET - (jitter->flostack_offset * sizeof(double)); + (void)jit_stxi_d_fppop(offset, JIT_FP, JIT_FPR0); + + return 1; +} + +#endif /*========================================================================*/ /* lambda codegen */ @@ -7215,6 +7450,32 @@ static int generate_closure_fill(Scheme_Closure_Data *data, return 1; } +static int generate_closure_prep(Scheme_Closure_Data *data, mz_jit_state *jitter) +{ + int retval = 0; +#ifdef USE_FLONUM_UNBOXING + /* Ensure that flonums are boxed */ + int j, size, pos; + mzshort *map; + + if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS) { + size = data->closure_size; + map = data->closure_map; + for (j = 0; j < size; j++) { + if (CLOSURE_CONTENT_IS_FLONUM(data, j)) { + pos = mz_remap(map[j]); + jit_ldxi_p(JIT_R1, JIT_RUNSTACK, WORDS_TO_BYTES(pos)); + generate_flonum_local_boxing(jitter, pos, map[j], JIT_R0); + CHECK_LIMIT(); + retval = 1; + } + } + } +#endif + + return retval; +} + Scheme_Native_Closure_Data *scheme_generate_case_lambda(Scheme_Case_Lambda *c) { Scheme_Closure_Data *data; @@ -7347,18 +7608,22 @@ static int generate_non_tail(Scheme_Object *obj, mz_jit_state *jitter, int multi_ok, int mark_pos_ends, int ignored) /* de-sync's rs */ { + int flostack, flostack_pos; + if (is_simple(obj, INIT_SIMPLE_DEPTH, 0, jitter, 0)) { /* Simple; doesn't change the stack or set marks: */ int v; FOR_LOG(jitter->log_depth++); + flostack = mz_flostack_save(jitter, &flostack_pos); v = generate(obj, jitter, 0, multi_ok, ignored ? -1 : JIT_R0); + CHECK_LIMIT(); + mz_flostack_restore(jitter, flostack, flostack_pos, 1); FOR_LOG(--jitter->log_depth); return v; } { int amt, need_ends = 1, using_local1 = 0; - int flostack, flostack_pos; START_JIT_DATA(); /* Might change the stack or marks: */ @@ -7394,7 +7659,7 @@ static int generate_non_tail(Scheme_Object *obj, mz_jit_state *jitter, RESUME_JIT_DATA(); CHECK_LIMIT(); - mz_flostack_restore(jitter, flostack, flostack_pos); + mz_flostack_restore(jitter, flostack, flostack_pos, 1); amt = mz_runstack_restored(jitter); if (amt) { mz_rs_inc(amt); @@ -7516,7 +7781,7 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m unless the flag is SCHEME_LOCAL_FLONUM */ int pos, flonum; START_JIT_DATA(); -#if defined(CAN_INLINE_ALLOC) && defined(JIT_FRAME_FLONUM_OFFSET) +#ifdef USE_FLONUM_UNBOXING flonum = (SCHEME_GET_LOCAL_FLAGS(obj) == SCHEME_LOCAL_FLONUM); #else flonum = 0; @@ -7536,29 +7801,9 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m } CHECK_LIMIT(); if (flonum && !result_ignored) { -#ifdef JIT_FRAME_FLONUM_OFFSET - int offset; - offset = mz_flonum_pos(jitter, SCHEME_LOCAL_POS(obj)); - offset = JIT_FRAME_FLONUM_OFFSET - (offset * sizeof(double)); - if (jitter->unbox) { - int fpr0; - fpr0 = JIT_FPR(jitter->unbox_depth); - jit_ldxi_d_fppush(fpr0, JIT_FP, offset); - } else { - GC_CAN_IGNORE jit_insn *ref; - mz_rs_sync(); - __START_TINY_JUMPS__(1); - ref = jit_bnei_p(jit_forward(), target, NULL); - __END_TINY_JUMPS__(1); - CHECK_LIMIT(); - jit_movi_l(JIT_R0, offset); - (void)jit_calli(box_flonum_from_stack_code); - mz_rs_stxi(pos, JIT_R0); - __START_TINY_JUMPS__(1); - mz_patch_branch(ref); - __END_TINY_JUMPS__(1); - CHECK_LIMIT(); - } +#ifdef USE_FLONUM_UNBOXING + generate_flonum_local_boxing(jitter, pos, SCHEME_LOCAL_POS(obj), target); + CHECK_LIMIT(); #endif } else { if (jitter->unbox) generate_unboxing(jitter); @@ -7790,10 +8035,13 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m /* R0 is space left (in bytes), R2 is argc */ jit_lshi_l(JIT_R2, JIT_R2, JIT_LOG_WORD_SIZE); if (is_tail) { + int fpos, fstack; + fstack = mz_flostack_save(jitter, &fpos); __END_SHORT_JUMPS__(1); - mz_flostack_restore(jitter, 0, 0); + mz_flostack_restore(jitter, 0, 0, 1); (void)jit_bltr_ul(app_values_tail_slow_code, JIT_R0, JIT_R2); __START_SHORT_JUMPS__(1); + mz_flostack_restore(jitter, fstack, fpos, 0); ref5 = 0; } else { GC_CAN_IGNORE jit_insn *refok; @@ -8097,8 +8345,8 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m g1 = generate(branch->tbranch, jitter, is_tail, multi_ok, orig_target); RESUME_JIT_DATA(); CHECK_LIMIT(); - mz_flostack_restore(jitter, flostack, flostack_pos); amt = mz_runstack_restored(jitter); + mz_flostack_restore(jitter, flostack, flostack_pos, g1 != 2); if (g1 != 2) { if (!is_tail) { if (amt) @@ -8149,8 +8397,8 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m g2 = generate(branch->fbranch, jitter, is_tail, multi_ok, orig_target); RESUME_JIT_DATA(); CHECK_LIMIT(); - mz_flostack_restore(jitter, flostack, flostack_pos); amt = mz_runstack_restored(jitter); + mz_flostack_restore(jitter, flostack, flostack_pos, g2 != 2); if (g2 != 2) { if (!is_tail) { if (amt) @@ -8189,6 +8437,9 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m LOG_IT(("lambda\n")); mz_rs_sync(); + + generate_closure_prep(data, jitter); + CHECK_LIMIT(); /* Allocate closure */ generate_closure(data, jitter, 1); @@ -8328,7 +8579,7 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m case scheme_letrec_type: { Scheme_Letrec *l = (Scheme_Letrec *)obj; - int i, nsrs; + int i, nsrs, prepped = 0; START_JIT_DATA(); LOG_IT(("letrec...\n")); @@ -8343,10 +8594,16 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m jit_stxi_p(WORDS_TO_BYTES(i), JIT_RUNSTACK, JIT_R0); } + for (i = 0; i < l->count; i++) { + if (generate_closure_prep((Scheme_Closure_Data *)l->procs[i], jitter)) + prepped = 1; + CHECK_LIMIT(); + } + /* Close them: */ for (i = l->count; i--; ) { - if (i != l->count - 1) { - /* Last one we created is still in JIT_R0: */ + /* Last one we created may still be in JIT_R0: */ + if (prepped || (i != l->count - 1)) { jit_ldxi_p(JIT_R0, JIT_RUNSTACK, WORDS_TO_BYTES(i)); } generate_closure_fill((Scheme_Closure_Data *)l->procs[i], jitter); @@ -8390,13 +8647,13 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m mz_runstack_skipped(jitter, 1); -#if defined(CAN_INLINE_ALLOC) && defined(JIT_FRAME_FLONUM_OFFSET) +#ifdef USE_FLONUM_UNBOXING flonum = SCHEME_LET_EVAL_TYPE(lv) & LET_ONE_FLONUM; + if (flonum) + jitter->unbox++; #else flonum = 0; #endif - if (flonum) - jitter->unbox++; PAUSE_JIT_DATA(); generate_non_tail(lv->value, jitter, 0, 1, 0); /* no sync */ @@ -8409,18 +8666,10 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m CHECK_RUNSTACK_OVERFLOW(); if (flonum) { -#if defined(JIT_FRAME_FLONUM_OFFSET) - int offset; +#ifdef USE_FLONUM_UNBOXING --jitter->unbox; - if (jitter->flostack_offset == jitter->flostack_space) { - int space = 4 * sizeof(double); - jitter->flostack_space += 4; - jit_subi_l(JIT_SP, JIT_SP, space); - } - jitter->flostack_offset += 1; - mz_runstack_flonum_pushed(jitter, jitter->flostack_offset); - offset = JIT_FRAME_FLONUM_OFFSET - (jitter->flostack_offset * sizeof(double)); - (void)jit_stxi_d_fppop(offset, JIT_FP, JIT_FPR0); + generate_flonum_local_unboxing(jitter, 1); + CHECK_LIMIT(); (void)jit_movi_p(JIT_R0, NULL); #endif } else { @@ -10068,7 +10317,7 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data) /* A tail call with arity checking can start here. (This is a little reundant checking when `code' is the - etry point, but that's the slow path anyway.) */ + entry point, but that's the slow path anyway.) */ has_rest = ((SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_REST) ? 1 : 0); is_method = ((SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_IS_METHOD) ? 1 : 0); @@ -10172,6 +10421,24 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data) } } +#ifdef USE_FLONUM_UNBOXING + /* Unpack flonum arguments */ + if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS) { + for (i = data->num_params; i--; ) { + if (CLOSURE_ARGUMENT_IS_FLONUM(data, i)) { + mz_rs_ldxi(JIT_R1, i); + jit_ldxi_d_fppush(JIT_FPR0, JIT_R1, &((Scheme_Double *)0x0)->double_val); + generate_flonum_local_unboxing(jitter, 1); + CHECK_LIMIT(); + } else { + mz_runstack_pushed(jitter, 1); + } + } + jitter->self_pos = 0; + jitter->depth = 0; + } +#endif + #ifdef JIT_PRECISE_GC /* Keeping the native-closure code pointer on the runstack ensures that the code won't be GCed while we're running it. If the @@ -10229,16 +10496,41 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data) if (SAME_OBJ(lr->procs[pos], (Scheme_Object *)data)) { self_pos = i; } - } else - mz_runstack_pushed(jitter, 1); + } else { +#ifdef USE_FLONUM_UNBOXING + if ((SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS) + && (CLOSURE_CONTENT_IS_FLONUM(data, i))) { + mz_rs_ldxi(JIT_R1, i); + jit_ldxi_d_fppush(JIT_FPR0, JIT_R1, &((Scheme_Double *)0x0)->double_val); + generate_flonum_local_unboxing(jitter, 1); + CHECK_LIMIT(); + } else +#endif + mz_runstack_pushed(jitter, 1); + } } if ((self_pos >= 0) && !has_rest) { jitter->self_pos = self_pos; jitter->self_closure_size = data->closure_size; } } else { - mz_runstack_pushed(jitter, cnt); - +#ifdef USE_FLONUM_UNBOXING + /* Unpack flonum closure data */ + if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS) { + for (i = data->closure_size; i--; ) { + if (CLOSURE_CONTENT_IS_FLONUM(data, i)) { + mz_rs_ldxi(JIT_R1, i); + jit_ldxi_d_fppush(JIT_FPR0, JIT_R1, &((Scheme_Double *)0x0)->double_val); + generate_flonum_local_unboxing(jitter, 1); + CHECK_LIMIT(); + } else { + mz_runstack_pushed(jitter, 1); + } + } + } else +#endif + mz_runstack_pushed(jitter, cnt); + /* A define-values context? */ if (data->context && SAME_TYPE(SCHEME_TYPE(data->context), scheme_toplevel_type)) { jitter->self_toplevel_pos = SCHEME_TOPLEVEL_POS(data->context); @@ -10255,6 +10547,8 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data) jitter->self_data = data; jitter->self_restart_code = jit_get_ip().ptr; + jitter->self_restart_space = jitter->flostack_space; + jitter->self_restart_offset = jitter->flostack_offset; if (!has_rest) jitter->self_nontail_code = tail_code; @@ -10272,7 +10566,7 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data) /* r == 2 => tail call performed */ if (r != 2) { - mz_flostack_restore(jitter, 0, 0); + mz_flostack_restore(jitter, 0, 0, 1); jit_movr_p(JIT_RET, JIT_R0); mz_pop_threadlocal(); mz_pop_locals(); diff --git a/src/mzscheme/src/mzmark.c b/src/mzscheme/src/mzmark.c index cb2e36a299..af47b7fc7a 100644 --- a/src/mzscheme/src/mzmark.c +++ b/src/mzscheme/src/mzmark.c @@ -3144,6 +3144,7 @@ static int mark_closure_info_MARK(void *p) { gcMARK(i->local_flags); gcMARK(i->base_closure_map); + gcMARK(i->flonum_map); return gcBYTES_TO_WORDS(sizeof(Closure_Info)); @@ -3154,6 +3155,7 @@ static int mark_closure_info_FIXUP(void *p) { gcFIXUP(i->local_flags); gcFIXUP(i->base_closure_map); + gcFIXUP(i->flonum_map); return gcBYTES_TO_WORDS(sizeof(Closure_Info)); diff --git a/src/mzscheme/src/mzmarksrc.c b/src/mzscheme/src/mzmarksrc.c index 393e0772ef..18311f222f 100644 --- a/src/mzscheme/src/mzmarksrc.c +++ b/src/mzscheme/src/mzmarksrc.c @@ -1272,6 +1272,7 @@ mark_closure_info { gcMARK(i->local_flags); gcMARK(i->base_closure_map); + gcMARK(i->flonum_map); size: gcBYTES_TO_WORDS(sizeof(Closure_Info)); diff --git a/src/mzscheme/src/schpriv.h b/src/mzscheme/src/schpriv.h index ca9e5985ce..f2cbb2d300 100644 --- a/src/mzscheme/src/schpriv.h +++ b/src/mzscheme/src/schpriv.h @@ -1890,7 +1890,7 @@ typedef struct Scheme_Comp_Env } Scheme_Comp_Env; #define CLOS_HAS_REST 1 -#define CLOS_HAS_REF_ARGS 2 +#define CLOS_HAS_TYPED_ARGS 2 #define CLOS_PRESERVES_MARKS 4 #define CLOS_SFS 8 #define CLOS_IS_METHOD 16 @@ -2014,7 +2014,8 @@ typedef struct Scheme_Closure_Data mzshort num_params; /* includes collecting arg if has_rest */ mzshort max_let_depth; mzshort closure_size; - mzshort *closure_map; /* actually a Closure_Info* until resolved; if CLOS_HAS_REF_ARGS, followed by bit array */ + mzshort *closure_map; /* actually a Closure_Info* until resolved; if CLOS_HAS_TYPED_ARGS, + followed by bit array with 2 bits per args then per closed-over */ Scheme_Object *code; Scheme_Object *name; /* name or (vector name src line col pos span generated?) */ #ifdef MZ_USE_JIT @@ -2290,11 +2291,17 @@ Scheme_Object *scheme_optimize_info_lookup(Optimize_Info *info, int pos, int *cl void scheme_optimize_info_used_top(Optimize_Info *info); void scheme_optimize_mutated(Optimize_Info *info, int pos); +void scheme_optimize_produces_flonum(Optimize_Info *info, int pos); Scheme_Object *scheme_optimize_reverse(Optimize_Info *info, int pos, int unless_mutated); int scheme_optimize_is_used(Optimize_Info *info, int pos); int scheme_optimize_any_uses(Optimize_Info *info, int start_pos, int end_pos); int scheme_optimize_is_mutated(Optimize_Info *info, int pos); -int scheme_optimize_is_unbox_arg(Optimize_Info *info, int pos); +int scheme_optimize_is_flonum_arg(Optimize_Info *info, int pos, int depth); +int scheme_optimize_is_flonum_valued(Optimize_Info *info, int pos); + +int scheme_is_flonum_expression(Scheme_Object *expr, Optimize_Info *info); +char *scheme_get_closure_flonum_map(Scheme_Closure_Data *data, int arg_n, int *ok); +void scheme_set_closure_flonum_map(Scheme_Closure_Data *data, char *flonum_map); Scheme_Object *scheme_optimize_clone(int dup_ok, Scheme_Object *obj, Optimize_Info *info, int delta, int closure_depth); Scheme_Object *scheme_optimize_shift(Scheme_Object *obj, int delta, int after_depth); diff --git a/src/mzscheme/src/schvers.h b/src/mzscheme/src/schvers.h index 106c44555d..3cec7839a6 100644 --- a/src/mzscheme/src/schvers.h +++ b/src/mzscheme/src/schvers.h @@ -13,12 +13,12 @@ consistently.) */ -#define MZSCHEME_VERSION "4.2.3.6" +#define MZSCHEME_VERSION "4.2.3.7" #define MZSCHEME_VERSION_X 4 #define MZSCHEME_VERSION_Y 2 #define MZSCHEME_VERSION_Z 3 -#define MZSCHEME_VERSION_W 6 +#define MZSCHEME_VERSION_W 7 #define MZSCHEME_VERSION_MAJOR ((MZSCHEME_VERSION_X * 100) + MZSCHEME_VERSION_Y) #define MZSCHEME_VERSION_MINOR ((MZSCHEME_VERSION_Z * 1000) + MZSCHEME_VERSION_W) diff --git a/src/mzscheme/src/syntax.c b/src/mzscheme/src/syntax.c index 1ee0a7dfbb..46180fc1f5 100644 --- a/src/mzscheme/src/syntax.c +++ b/src/mzscheme/src/syntax.c @@ -940,14 +940,14 @@ static void define_values_validate(Scheme_Object *data, Mz_CPort *port, } } if (data) { - if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_REF_ARGS) { + if (SCHEME_CLOSURE_DATA_FLAGS(data) & CLOS_HAS_TYPED_ARGS) { int sz; sz = data->num_params; a = MALLOC_N_ATOMIC(mzshort, (sz + 1)); a[0] = -sz; for (i = 0; i < sz; i++) { - int bit = ((mzshort)1 << (i & (BITS_PER_MZSHORT - 1))); - if (data->closure_map[data->closure_size + (i / BITS_PER_MZSHORT)] & bit) + int bit = ((mzshort)1 << ((2 * i) & (BITS_PER_MZSHORT - 1))); + if (data->closure_map[data->closure_size + ((2 * i) / BITS_PER_MZSHORT)] & bit) a[i + 1] = 1; else a[i + 1] = 0; @@ -3216,6 +3216,10 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i did_set_value = 1; } else if (value && !is_rec) { int cnt; + + if (scheme_expr_produces_flonum(value)) + scheme_optimize_produces_flonum(body_info, pos); + cnt = ((pre_body->flags[0] & SCHEME_USE_COUNT_MASK) >> SCHEME_USE_COUNT_SHIFT); if (cnt == 1) { /* used only once; we may be able to shift the expression to the use @@ -3411,7 +3415,7 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i } else { for (j = pre_body->count; j--; ) { pre_body->flags[j] |= SCHEME_WAS_USED; - if (scheme_optimize_is_unbox_arg(body_info, pos+j)) + if (scheme_optimize_is_flonum_arg(body_info, pos+j, 0)) pre_body->flags[j] |= SCHEME_WAS_FLONUM_ARGUMENT; } info->size += 1;