expander: enable performance measurements permanently

Change the expander-performance macro so that it's a very low cost if
not enabled on startup. An extra JIT specialization reduces the cost
further, since the enabled state is known by JIT time.
This commit is contained in:
Matthew Flatt 2018-08-12 11:05:20 -06:00
parent 7cbeebbb89
commit eb73837baf
6 changed files with 1145 additions and 308 deletions

View File

@ -32,166 +32,154 @@
;; Beware that `body ...` is not in tail position when ;; Beware that `body ...` is not in tail position when
;; performance measurement is enabled. ;; performance measurement is enabled.
;; ------------------------------------------------------------ (provide performance-region)
;; Re-export this submodule to enable performance measurements
(module measure-mode racket/base (define log-performance? (and (environment-variables-ref
(provide performance-region) (current-environment-variables)
#"PLT_EXPAND_TIMES")
#t))
(define-syntax-rule (performance-region [tag0-expr tag-expr ...] body ...) (define-syntax-rule (performance-region [tag0-expr tag-expr ...] body ...)
(begin (begin
(start-performance-region tag0-expr tag-expr ...) (when log-performance?
(begin0 (start-performance-region tag0-expr tag-expr ...))
(let () body ...) (begin0
(end-performance-region)))) (let () body ...)
(when log-performance?
(end-performance-region)))))
(define region-stack #f) (define region-stack #f)
(define accums (make-hasheq)) (define accums (make-hasheq))
(struct region (path (struct region (path
[start #:mutable] ; start time [start #:mutable] ; start time
[start-memory #:mutable] ; memory allocated before start time [start-memory #:mutable] ; memory allocated before start time
[as-nested #:mutable] ; time accumulated for nested regions [as-nested #:mutable] ; time accumulated for nested regions
[as-nested-memory #:mutable])) ; ditto, for memory [as-nested-memory #:mutable])) ; ditto, for memory
(struct stat ([msecs #:mutable] [memory #:mutable] [count #:mutable])) (struct stat ([msecs #:mutable] [memory #:mutable] [count #:mutable]))
(define stat-key (gensym)) (define stat-key (gensym))
(define-logger performance) (define (start-performance-region . path)
(set! region-stack (cons (region (if region-stack
;; Replace '_ elements:
(let loop ([path path]
[enclosing-path (region-path (car region-stack))])
(if (null? path)
null
(cons (if (and (eq? '_ (car path))
(pair? enclosing-path))
(car enclosing-path)
(car path))
(loop (cdr path)
(if (pair? enclosing-path)
(cdr enclosing-path)
null)))))
path)
(current-inexact-milliseconds)
(current-memory-use 'cumulative)
0.0
0)
region-stack)))
(define (start-performance-region . path) (define (end-performance-region)
(set! region-stack (cons (region (if region-stack (define now (current-inexact-milliseconds))
;; Replace '_ elements: (define now-memory (current-memory-use 'cumulative))
(let loop ([path path] (define r (car region-stack))
[enclosing-path (region-path (car region-stack))]) (set! region-stack (cdr region-stack))
(if (null? path)
null
(cons (if (and (eq? '_ (car path))
(pair? enclosing-path))
(car enclosing-path)
(car path))
(loop (cdr path)
(if (pair? enclosing-path)
(cdr enclosing-path)
null)))))
path)
(current-inexact-milliseconds)
(current-memory-use 'cumulative)
0.0
0)
region-stack)))
(define (end-performance-region) (define full-delta (- now (region-start r)))
(define now (current-inexact-milliseconds)) (define delta (- full-delta (region-as-nested r)))
(define now-memory (current-memory-use 'cumulative))
(define r (car region-stack))
(set! region-stack (cdr region-stack))
(define full-delta (- now (region-start r))) (define full-delta-memory (- now-memory (region-start-memory r)))
(define delta (- full-delta (region-as-nested r))) (define delta-memory (- full-delta-memory (region-as-nested-memory r)))
(define full-delta-memory (- now-memory (region-start-memory r))) (let loop ([accums accums] [path (region-path r)])
(define delta-memory (- full-delta-memory (region-as-nested-memory r))) (define key (car path))
(let ([accum (or (hash-ref accums key #f)
(let ([accum (make-hasheq)])
(hash-set! accums key accum)
accum))])
(define s (or (hash-ref accum stat-key #f)
(let ([s (stat 0.0 0 0)])
(hash-set! accum stat-key s)
s)))
(set-stat-msecs! s (+ delta (stat-msecs s)))
(set-stat-memory! s (+ delta-memory (stat-memory s)))
(when (null? (cdr path))
(set-stat-count! s (add1 (stat-count s))))
(unless (null? (cdr path))
(loop accum (cdr path)))))
(let loop ([accums accums] [path (region-path r)]) (when region-stack
(define key (car path)) (set-region-as-nested! (car region-stack)
(let ([accum (or (hash-ref accums key #f) (+ (region-as-nested (car region-stack))
(let ([accum (make-hasheq)]) full-delta))
(hash-set! accums key accum) (set-region-as-nested-memory! (car region-stack)
accum))]) (+ (region-as-nested-memory (car region-stack))
(define s (or (hash-ref accum stat-key #f) full-delta-memory))))
(let ([s (stat 0.0 0 0)])
(hash-set! accum stat-key s)
s)))
(set-stat-msecs! s (+ delta (stat-msecs s)))
(set-stat-memory! s (+ delta-memory (stat-memory s)))
(when (null? (cdr path))
(set-stat-count! s (add1 (stat-count s))))
(unless (null? (cdr path))
(loop accum (cdr path)))))
(when region-stack (when log-performance?
(set-region-as-nested! (car region-stack) (void
(+ (region-as-nested (car region-stack)) (plumber-add-flush! (current-plumber)
full-delta)) (lambda (h)
(set-region-as-nested-memory! (car region-stack) (define (whole-len s)
(+ (region-as-nested-memory (car region-stack)) (caar (or (regexp-match-positions #rx"[.]" s) '(0))))
full-delta-memory)))) (define (kb b)
(define s (number->string (quotient b 1024)))
(void (plumber-add-flush! (current-plumber) (list->string
(lambda (h) (for/fold ([l null]) ([c (in-list (reverse (string->list s)))]
(define (whole-len s) [i (in-naturals)])
(caar (or (regexp-match-positions #rx"[.]" s) '(0)))) (cond
(define (kb b) [(and (positive? i) (zero? (modulo i 3)))
(define s (number->string (quotient b 1024))) (list* c #\, l)]
(list->string [else (cons c l)]))))
(for/fold ([l null]) ([c (in-list (reverse (string->list s)))] (define-values (label-max-len value-max-len memory-max-len count-max-len)
[i (in-naturals)]) (let loop ([accums accums] [label-len 6] [value-len 5] [memory-len 4] [count-len 5] [indent 2])
(cond (for/fold ([label-len label-len]
[(and (positive? i) (zero? (modulo i 3))) [value-len value-len]
(list* c #\, l)] [memory-len memory-len]
[else (cons c l)])))) [count-len count-len])
(define-values (label-max-len value-max-len memory-max-len count-max-len) ([(k v) (in-hash accums)])
(let loop ([accums accums] [label-len 6] [value-len 5] [memory-len 4] [count-len 5] [indent 2]) (cond
(for/fold ([label-len label-len] [(eq? k stat-key)
[value-len value-len] (values label-len
[memory-len memory-len] (max value-len (whole-len (format "~a" (stat-msecs v))))
[count-len count-len]) (max memory-len (string-length (format "~a" (kb (stat-memory v)))))
([(k v) (in-hash accums)]) (max count-len (string-length (format "~a" (stat-count v)))))]
(cond [else (loop v
[(eq? k stat-key) (max label-len (+ indent (string-length (format "~a" k))))
(values label-len value-len
(max value-len (whole-len (format "~a" (stat-msecs v)))) memory-len
(max memory-len (string-length (format "~a" (kb (stat-memory v))))) count-len
(max count-len (string-length (format "~a" (stat-count v)))))] (+ 2 indent))]))))
[else (loop v (log-error "REGION ~aMSECS ~aMEMK ~aCOUNT"
(max label-len (+ indent (string-length (format "~a" k)))) (make-string (- (+ label-max-len value-max-len) 11)
value-len #\space)
memory-len (make-string (- memory-max-len 4)
count-len #\space)
(+ 2 indent))])))) (make-string (- count-max-len 5)
(log-performance-info "REGION ~aMSECS ~aMEMK ~aCOUNT" #\space))
(make-string (- (+ label-max-len value-max-len) 11) (let loop ([name #f] [accums accums] [indent ""] [newline? #t])
#\space) (when name
(make-string (- memory-max-len 4) (define v (hash-ref accums stat-key))
#\space) (log-error "~a~a ~a~a ~a~a ~a~a"
(make-string (- count-max-len 5) indent
#\space)) name
(let loop ([name #f] [accums accums] [indent ""] [newline? #t]) (make-string (+ (- label-max-len (string-length (format "~a" name)) (string-length indent))
(when name (- value-max-len (whole-len (format "~a" (stat-msecs v)))))
(define v (hash-ref accums stat-key)) #\space)
(log-performance-info "~a~a ~a~a ~a~a ~a~a" (regexp-replace #rx"[.](..).*" (format "~a00" (stat-msecs v)) ".\\1")
indent (make-string (- memory-max-len (string-length (format "~a" (kb (stat-memory v)))))
name #\space)
(make-string (+ (- label-max-len (string-length (format "~a" name)) (string-length indent)) (kb (stat-memory v))
(- value-max-len (whole-len (format "~a" (stat-msecs v))))) (make-string (- count-max-len (string-length (format "~a" (stat-count v))))
#\space) #\space)
(regexp-replace #rx"[.](..).*" (format "~a00" (stat-msecs v)) ".\\1") (stat-count v)))
(make-string (- memory-max-len (string-length (format "~a" (kb (stat-memory v))))) (define keys (sort (for/list ([k (in-hash-keys accums)] #:when (not (eq? k stat-key))) k)
#\space) >
(kb (stat-memory v)) #:key (lambda (key) (stat-msecs (hash-ref (hash-ref accums key) stat-key)))))
(make-string (- count-max-len (string-length (format "~a" (stat-count v)))) (for ([k (in-list keys)]
#\space) [i (in-naturals)])
(stat-count v))) (when (and newline? (positive? i)) (log-error ""))
(define keys (sort (for/list ([k (in-hash-keys accums)] #:when (not (eq? k stat-key))) k) (loop k (hash-ref accums k) (string-append indent " ") #f)))))))
>
#:key (lambda (key) (stat-msecs (hash-ref (hash-ref accums key) stat-key)))))
(for ([k (in-list keys)]
[i (in-naturals)])
(when (and newline? (positive? i)) (log-performance-info ""))
(loop k (hash-ref accums k) (string-append indent " ") #f)))))))
;; ------------------------------------------------------------
;; Re-export this submodule to disable measurements
(module no-measure-mode racket/base
(provide performance-region)
(define-syntax-rule (performance-region [tag0-expr tag-expr ...] body ...)
(let () body ...)))
;; ------------------------------------------------------------
;; Select whether to measure (has overhead) or not:
(require (submod "." no-measure-mode))

View File

@ -489,39 +489,41 @@ Scheme_Object *scheme_extract_closure_local(Scheme_Object *obj, mz_jit_state *ji
} }
Scheme_Object *scheme_specialize_to_constant(Scheme_Object *obj, mz_jit_state *jitter, int extra_push) Scheme_Object *scheme_specialize_to_constant(Scheme_Object *obj, mz_jit_state *jitter, int extra_push, int extract_static)
{ {
Scheme_Object *c; Scheme_Object *c;
if (PAST_LIMIT()) return obj; if (PAST_LIMIT()) return obj;
if (!jitter->nc) return obj; /* We can always specialize static toplevel references */
if (extract_static
&& SAME_TYPE(SCHEME_TYPE(obj), scheme_static_toplevel_type)
&& (SCHEME_TOPLEVEL_FLAGS(obj) & SCHEME_TOPLEVEL_FLAGS_MASK) >= SCHEME_TOPLEVEL_FIXED) {
c = SCHEME_STATIC_TOPLEVEL_PREFIX(obj)->a[SCHEME_TOPLEVEL_POS(obj)];
c = ((Scheme_Bucket *)c)->val;
if (c)
return c;
}
if (SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED) { if (jitter->nc) {
if (SAME_TYPE(SCHEME_TYPE(obj), scheme_local_type)) { if (SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED) {
c = scheme_extract_closure_local(obj, jitter, extra_push, 1); if (SAME_TYPE(SCHEME_TYPE(obj), scheme_local_type)) {
if (c) { c = scheme_extract_closure_local(obj, jitter, extra_push, 1);
MZ_ASSERT(SCHEME_TYPE(c) != scheme_prefix_type); if (c) {
return c; MZ_ASSERT(SCHEME_TYPE(c) != scheme_prefix_type);
}
}
if (SAME_TYPE(SCHEME_TYPE(obj), scheme_toplevel_type)
&& (SCHEME_TOPLEVEL_FLAGS(obj) & SCHEME_TOPLEVEL_FLAGS_MASK) >= SCHEME_TOPLEVEL_FIXED) {
c = scheme_extract_global(obj, jitter->nc, 0);
if (c) {
c = ((Scheme_Bucket *)c)->val;
if (c)
return c; return c;
}
} }
}
if (SAME_TYPE(SCHEME_TYPE(obj), scheme_static_toplevel_type) if (SAME_TYPE(SCHEME_TYPE(obj), scheme_toplevel_type)
&& (SCHEME_TOPLEVEL_FLAGS(obj) & SCHEME_TOPLEVEL_FLAGS_MASK) >= SCHEME_TOPLEVEL_FIXED) { && (SCHEME_TOPLEVEL_FLAGS(obj) & SCHEME_TOPLEVEL_FLAGS_MASK) >= SCHEME_TOPLEVEL_FIXED) {
c = SCHEME_STATIC_TOPLEVEL_PREFIX(obj)->a[SCHEME_TOPLEVEL_POS(obj)]; c = scheme_extract_global(obj, jitter->nc, 0);
c = ((Scheme_Bucket *)c)->val; if (c) {
if (c) c = ((Scheme_Bucket *)c)->val;
return c; if (c)
return c;
}
}
} }
} }
@ -547,7 +549,7 @@ int scheme_native_closure_preserves_marks(Scheme_Object *p)
int scheme_is_noncm(Scheme_Object *a, mz_jit_state *jitter, int depth, int stack_start) int scheme_is_noncm(Scheme_Object *a, mz_jit_state *jitter, int depth, int stack_start)
{ {
a = scheme_specialize_to_constant(a, jitter, stack_start); a = scheme_specialize_to_constant(a, jitter, stack_start, 0);
if (SCHEME_PRIMP(a)) { if (SCHEME_PRIMP(a)) {
int opts; int opts;
@ -677,7 +679,8 @@ int scheme_is_simple(Scheme_Object *obj, int depth, int just_markless, mz_jit_st
{ {
Scheme_Object *rator; Scheme_Object *rator;
rator = scheme_specialize_to_constant(((Scheme_App_Rec *)obj)->args[0], jitter, rator = scheme_specialize_to_constant(((Scheme_App_Rec *)obj)->args[0], jitter,
stack_start + ((Scheme_App_Rec *)obj)->num_args); stack_start + ((Scheme_App_Rec *)obj)->num_args,
0);
if (scheme_inlined_nary_prim(rator, obj, jitter) if (scheme_inlined_nary_prim(rator, obj, jitter)
&& !SAME_OBJ(rator, scheme_values_proc)) && !SAME_OBJ(rator, scheme_values_proc))
return 1; return 1;
@ -690,7 +693,7 @@ int scheme_is_simple(Scheme_Object *obj, int depth, int just_markless, mz_jit_st
case scheme_application2_type: case scheme_application2_type:
{ {
Scheme_Object *rator; Scheme_Object *rator;
rator = scheme_specialize_to_constant(((Scheme_App2_Rec *)obj)->rator, jitter, stack_start + 1); rator = scheme_specialize_to_constant(((Scheme_App2_Rec *)obj)->rator, jitter, stack_start + 1, 0);
if (scheme_inlined_unary_prim(rator, obj, jitter)) if (scheme_inlined_unary_prim(rator, obj, jitter))
return 1; return 1;
else if (just_markless) { else if (just_markless) {
@ -701,7 +704,7 @@ int scheme_is_simple(Scheme_Object *obj, int depth, int just_markless, mz_jit_st
case scheme_application3_type: case scheme_application3_type:
{ {
Scheme_Object *rator; Scheme_Object *rator;
rator = scheme_specialize_to_constant(((Scheme_App3_Rec *)obj)->rator, jitter, stack_start + 2); rator = scheme_specialize_to_constant(((Scheme_App3_Rec *)obj)->rator, jitter, stack_start + 2, 0);
if (scheme_inlined_binary_prim(rator, obj, jitter) if (scheme_inlined_binary_prim(rator, obj, jitter)
&& !SAME_OBJ(rator, scheme_values_proc)) && !SAME_OBJ(rator, scheme_values_proc))
return 1; return 1;
@ -991,7 +994,7 @@ int scheme_native_closure_is_single_result(Scheme_Object *rator)
static int produces_single_value(Scheme_Object *rator, int num_args, mz_jit_state *jitter) static int produces_single_value(Scheme_Object *rator, int num_args, mz_jit_state *jitter)
{ {
rator = scheme_specialize_to_constant(rator, jitter, num_args); rator = scheme_specialize_to_constant(rator, jitter, num_args, 1);
if (SAME_TYPE(SCHEME_TYPE(rator), scheme_native_closure_type)) if (SAME_TYPE(SCHEME_TYPE(rator), scheme_native_closure_type))
return scheme_native_closure_is_single_result(rator); return scheme_native_closure_is_single_result(rator);
@ -2104,7 +2107,7 @@ int scheme_generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w
} }
#endif #endif
obj = scheme_specialize_to_constant(obj, jitter, 0); obj = scheme_specialize_to_constant(obj, jitter, 0, 0);
orig_target = target; orig_target = target;
result_ignored = (target < 0); result_ignored = (target < 0);
@ -2504,8 +2507,8 @@ int scheme_generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w
v = SCHEME_PTR1_VAL(obj); v = SCHEME_PTR1_VAL(obj);
p = SCHEME_PTR2_VAL(obj); p = SCHEME_PTR2_VAL(obj);
v = scheme_specialize_to_constant(v, jitter, 0); v = scheme_specialize_to_constant(v, jitter, 0, 1);
p = scheme_specialize_to_constant(p, jitter, 0); p = scheme_specialize_to_constant(p, jitter, 0, 1);
if (is_single_valued(p, jitter)) { if (is_single_valued(p, jitter)) {
/* We might discover late that `v` produces a single value, /* We might discover late that `v` produces a single value,
@ -2965,7 +2968,15 @@ int scheme_generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w
} }
case scheme_branch_type: case scheme_branch_type:
{ {
return generate_branch(obj, jitter, is_tail, wcm_may_replace, multi_ok, orig_target, result_ignored, for_branch); Scheme_Branch_Rec *branch = (Scheme_Branch_Rec *)obj;
Scheme_Object *tst;
tst = scheme_specialize_to_constant(branch->test, jitter, 0, 1);
if (SCHEME_TYPE(tst) > _scheme_values_types_) {
return scheme_generate((SCHEME_TRUEP(tst) ? branch->tbranch : branch->fbranch),
jitter, is_tail, wcm_may_replace,
multi_ok, orig_target, for_branch, for_values);
} else
return generate_branch(obj, jitter, is_tail, wcm_may_replace, multi_ok, orig_target, result_ignored, for_branch);
} }
case scheme_lambda_type: case scheme_lambda_type:
{ {

View File

@ -1612,7 +1612,7 @@ int scheme_jit_check_closure_extflonum_bit(Scheme_Lambda *data, int pos, int del
Scheme_Object *scheme_extract_global(Scheme_Object *o, Scheme_Native_Closure *nc, int local_only); Scheme_Object *scheme_extract_global(Scheme_Object *o, Scheme_Native_Closure *nc, int local_only);
Scheme_Object *scheme_extract_closure_local(Scheme_Object *obj, mz_jit_state *jitter, int extra_push, int get_constant); Scheme_Object *scheme_extract_closure_local(Scheme_Object *obj, mz_jit_state *jitter, int extra_push, int get_constant);
Scheme_Object *scheme_specialize_to_constant(Scheme_Object *obj, mz_jit_state *jitter, int extra_push); Scheme_Object *scheme_specialize_to_constant(Scheme_Object *obj, mz_jit_state *jitter, int extra_push, int extract_static);
void scheme_jit_register_traversers(void); void scheme_jit_register_traversers(void);
#ifdef MZ_USE_LWC #ifdef MZ_USE_LWC

View File

@ -1818,7 +1818,7 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
rator = (alt_rands ? alt_rands[0] : app->args[0]); rator = (alt_rands ? alt_rands[0] : app->args[0]);
rator = scheme_specialize_to_constant(rator, jitter, num_pushes); rator = scheme_specialize_to_constant(rator, jitter, num_pushes, 0);
if (no_call == 2) { if (no_call == 2) {
direct_prim = 1; direct_prim = 1;

View File

@ -1271,7 +1271,7 @@ int scheme_generate_inlined_unary(mz_jit_state *jitter, Scheme_App2_Rec *app, in
{ {
Scheme_Object *rator = app->rator; Scheme_Object *rator = app->rator;
rator = scheme_specialize_to_constant(rator, jitter, 1); rator = scheme_specialize_to_constant(rator, jitter, 1, 0);
{ {
int k; int k;
@ -2577,8 +2577,8 @@ int scheme_generate_two_args(Scheme_Object *rand1, Scheme_Object *rand2, mz_jit_
{ {
int simple1, simple2, direction = 1; int simple1, simple2, direction = 1;
rand1 = scheme_specialize_to_constant(rand1, jitter, skipped); rand1 = scheme_specialize_to_constant(rand1, jitter, skipped, 1);
rand2 = scheme_specialize_to_constant(rand2, jitter, skipped); rand2 = scheme_specialize_to_constant(rand2, jitter, skipped, 1);
simple1 = scheme_is_relatively_constant_and_avoids_r1(rand1, rand2); simple1 = scheme_is_relatively_constant_and_avoids_r1(rand1, rand2);
simple2 = scheme_is_relatively_constant_and_avoids_r1(rand2, rand1); simple2 = scheme_is_relatively_constant_and_avoids_r1(rand2, rand1);
@ -3042,7 +3042,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i
{ {
Scheme_Object *rator = app->rator; Scheme_Object *rator = app->rator;
rator = scheme_specialize_to_constant(rator, jitter, 2); rator = scheme_specialize_to_constant(rator, jitter, 2, 0);
if (SCHEME_PRIMP(rator) && IS_NAMED_PRIM(rator, "ptr-ref")) { if (SCHEME_PRIMP(rator) && IS_NAMED_PRIM(rator, "ptr-ref")) {
Scheme_App_Rec *app2; Scheme_App_Rec *app2;
@ -4549,7 +4549,7 @@ int scheme_generate_inlined_nary(mz_jit_state *jitter, Scheme_App_Rec *app, int
{ {
Scheme_Object *rator = app->args[0]; Scheme_Object *rator = app->args[0];
rator = scheme_specialize_to_constant(rator, jitter, app->num_args); rator = scheme_specialize_to_constant(rator, jitter, app->num_args, 0);
if (!for_branch) { if (!for_branch) {
int k; int k;

File diff suppressed because it is too large Load Diff