expander: enable performance measurements permanently

Change the expander-performance macro so that it's a very low cost if
not enabled on startup. An extra JIT specialization reduces the cost
further, since the enabled state is known by JIT time.
This commit is contained in:
Matthew Flatt 2018-08-12 11:05:20 -06:00
parent 7cbeebbb89
commit eb73837baf
6 changed files with 1145 additions and 308 deletions

View File

@ -32,166 +32,154 @@
;; Beware that `body ...` is not in tail position when
;; performance measurement is enabled.
;; ------------------------------------------------------------
;; Re-export this submodule to enable performance measurements
(provide performance-region)
(module measure-mode racket/base
(provide performance-region)
(define log-performance? (and (environment-variables-ref
(current-environment-variables)
#"PLT_EXPAND_TIMES")
#t))
(define-syntax-rule (performance-region [tag0-expr tag-expr ...] body ...)
(begin
(start-performance-region tag0-expr tag-expr ...)
(begin0
(let () body ...)
(end-performance-region))))
(define-syntax-rule (performance-region [tag0-expr tag-expr ...] body ...)
(begin
(when log-performance?
(start-performance-region tag0-expr tag-expr ...))
(begin0
(let () body ...)
(when log-performance?
(end-performance-region)))))
(define region-stack #f)
(define accums (make-hasheq))
(define region-stack #f)
(define accums (make-hasheq))
(struct region (path
[start #:mutable] ; start time
[start-memory #:mutable] ; memory allocated before start time
[as-nested #:mutable] ; time accumulated for nested regions
[as-nested-memory #:mutable])) ; ditto, for memory
(struct stat ([msecs #:mutable] [memory #:mutable] [count #:mutable]))
(struct region (path
[start #:mutable] ; start time
[start-memory #:mutable] ; memory allocated before start time
[as-nested #:mutable] ; time accumulated for nested regions
[as-nested-memory #:mutable])) ; ditto, for memory
(struct stat ([msecs #:mutable] [memory #:mutable] [count #:mutable]))
(define stat-key (gensym))
(define stat-key (gensym))
(define-logger performance)
(define (start-performance-region . path)
(set! region-stack (cons (region (if region-stack
;; Replace '_ elements:
(let loop ([path path]
[enclosing-path (region-path (car region-stack))])
(if (null? path)
null
(cons (if (and (eq? '_ (car path))
(pair? enclosing-path))
(car enclosing-path)
(car path))
(loop (cdr path)
(if (pair? enclosing-path)
(cdr enclosing-path)
null)))))
path)
(current-inexact-milliseconds)
(current-memory-use 'cumulative)
0.0
0)
region-stack)))
(define (start-performance-region . path)
(set! region-stack (cons (region (if region-stack
;; Replace '_ elements:
(let loop ([path path]
[enclosing-path (region-path (car region-stack))])
(if (null? path)
null
(cons (if (and (eq? '_ (car path))
(pair? enclosing-path))
(car enclosing-path)
(car path))
(loop (cdr path)
(if (pair? enclosing-path)
(cdr enclosing-path)
null)))))
path)
(current-inexact-milliseconds)
(current-memory-use 'cumulative)
0.0
0)
region-stack)))
(define (end-performance-region)
(define now (current-inexact-milliseconds))
(define now-memory (current-memory-use 'cumulative))
(define r (car region-stack))
(set! region-stack (cdr region-stack))
(define (end-performance-region)
(define now (current-inexact-milliseconds))
(define now-memory (current-memory-use 'cumulative))
(define r (car region-stack))
(set! region-stack (cdr region-stack))
(define full-delta (- now (region-start r)))
(define delta (- full-delta (region-as-nested r)))
(define full-delta (- now (region-start r)))
(define delta (- full-delta (region-as-nested r)))
(define full-delta-memory (- now-memory (region-start-memory r)))
(define delta-memory (- full-delta-memory (region-as-nested-memory r)))
(define full-delta-memory (- now-memory (region-start-memory r)))
(define delta-memory (- full-delta-memory (region-as-nested-memory r)))
(let loop ([accums accums] [path (region-path r)])
(define key (car path))
(let ([accum (or (hash-ref accums key #f)
(let ([accum (make-hasheq)])
(hash-set! accums key accum)
accum))])
(define s (or (hash-ref accum stat-key #f)
(let ([s (stat 0.0 0 0)])
(hash-set! accum stat-key s)
s)))
(set-stat-msecs! s (+ delta (stat-msecs s)))
(set-stat-memory! s (+ delta-memory (stat-memory s)))
(when (null? (cdr path))
(set-stat-count! s (add1 (stat-count s))))
(unless (null? (cdr path))
(loop accum (cdr path)))))
(let loop ([accums accums] [path (region-path r)])
(define key (car path))
(let ([accum (or (hash-ref accums key #f)
(let ([accum (make-hasheq)])
(hash-set! accums key accum)
accum))])
(define s (or (hash-ref accum stat-key #f)
(let ([s (stat 0.0 0 0)])
(hash-set! accum stat-key s)
s)))
(set-stat-msecs! s (+ delta (stat-msecs s)))
(set-stat-memory! s (+ delta-memory (stat-memory s)))
(when (null? (cdr path))
(set-stat-count! s (add1 (stat-count s))))
(unless (null? (cdr path))
(loop accum (cdr path)))))
(when region-stack
(set-region-as-nested! (car region-stack)
(+ (region-as-nested (car region-stack))
full-delta))
(set-region-as-nested-memory! (car region-stack)
(+ (region-as-nested-memory (car region-stack))
full-delta-memory))))
(when region-stack
(set-region-as-nested! (car region-stack)
(+ (region-as-nested (car region-stack))
full-delta))
(set-region-as-nested-memory! (car region-stack)
(+ (region-as-nested-memory (car region-stack))
full-delta-memory))))
(void (plumber-add-flush! (current-plumber)
(lambda (h)
(define (whole-len s)
(caar (or (regexp-match-positions #rx"[.]" s) '(0))))
(define (kb b)
(define s (number->string (quotient b 1024)))
(list->string
(for/fold ([l null]) ([c (in-list (reverse (string->list s)))]
[i (in-naturals)])
(cond
[(and (positive? i) (zero? (modulo i 3)))
(list* c #\, l)]
[else (cons c l)]))))
(define-values (label-max-len value-max-len memory-max-len count-max-len)
(let loop ([accums accums] [label-len 6] [value-len 5] [memory-len 4] [count-len 5] [indent 2])
(for/fold ([label-len label-len]
[value-len value-len]
[memory-len memory-len]
[count-len count-len])
([(k v) (in-hash accums)])
(cond
[(eq? k stat-key)
(values label-len
(max value-len (whole-len (format "~a" (stat-msecs v))))
(max memory-len (string-length (format "~a" (kb (stat-memory v)))))
(max count-len (string-length (format "~a" (stat-count v)))))]
[else (loop v
(max label-len (+ indent (string-length (format "~a" k))))
value-len
memory-len
count-len
(+ 2 indent))]))))
(log-performance-info "REGION ~aMSECS ~aMEMK ~aCOUNT"
(make-string (- (+ label-max-len value-max-len) 11)
#\space)
(make-string (- memory-max-len 4)
#\space)
(make-string (- count-max-len 5)
#\space))
(let loop ([name #f] [accums accums] [indent ""] [newline? #t])
(when name
(define v (hash-ref accums stat-key))
(log-performance-info "~a~a ~a~a ~a~a ~a~a"
indent
name
(make-string (+ (- label-max-len (string-length (format "~a" name)) (string-length indent))
(- value-max-len (whole-len (format "~a" (stat-msecs v)))))
#\space)
(regexp-replace #rx"[.](..).*" (format "~a00" (stat-msecs v)) ".\\1")
(make-string (- memory-max-len (string-length (format "~a" (kb (stat-memory v)))))
#\space)
(kb (stat-memory v))
(make-string (- count-max-len (string-length (format "~a" (stat-count v))))
#\space)
(stat-count v)))
(define keys (sort (for/list ([k (in-hash-keys accums)] #:when (not (eq? k stat-key))) k)
>
#:key (lambda (key) (stat-msecs (hash-ref (hash-ref accums key) stat-key)))))
(for ([k (in-list keys)]
[i (in-naturals)])
(when (and newline? (positive? i)) (log-performance-info ""))
(loop k (hash-ref accums k) (string-append indent " ") #f)))))))
;; ------------------------------------------------------------
;; Re-export this submodule to disable measurements
(module no-measure-mode racket/base
(provide performance-region)
(define-syntax-rule (performance-region [tag0-expr tag-expr ...] body ...)
(let () body ...)))
;; ------------------------------------------------------------
;; Select whether to measure (has overhead) or not:
(require (submod "." no-measure-mode))
(when log-performance?
(void
(plumber-add-flush! (current-plumber)
(lambda (h)
(define (whole-len s)
(caar (or (regexp-match-positions #rx"[.]" s) '(0))))
(define (kb b)
(define s (number->string (quotient b 1024)))
(list->string
(for/fold ([l null]) ([c (in-list (reverse (string->list s)))]
[i (in-naturals)])
(cond
[(and (positive? i) (zero? (modulo i 3)))
(list* c #\, l)]
[else (cons c l)]))))
(define-values (label-max-len value-max-len memory-max-len count-max-len)
(let loop ([accums accums] [label-len 6] [value-len 5] [memory-len 4] [count-len 5] [indent 2])
(for/fold ([label-len label-len]
[value-len value-len]
[memory-len memory-len]
[count-len count-len])
([(k v) (in-hash accums)])
(cond
[(eq? k stat-key)
(values label-len
(max value-len (whole-len (format "~a" (stat-msecs v))))
(max memory-len (string-length (format "~a" (kb (stat-memory v)))))
(max count-len (string-length (format "~a" (stat-count v)))))]
[else (loop v
(max label-len (+ indent (string-length (format "~a" k))))
value-len
memory-len
count-len
(+ 2 indent))]))))
(log-error "REGION ~aMSECS ~aMEMK ~aCOUNT"
(make-string (- (+ label-max-len value-max-len) 11)
#\space)
(make-string (- memory-max-len 4)
#\space)
(make-string (- count-max-len 5)
#\space))
(let loop ([name #f] [accums accums] [indent ""] [newline? #t])
(when name
(define v (hash-ref accums stat-key))
(log-error "~a~a ~a~a ~a~a ~a~a"
indent
name
(make-string (+ (- label-max-len (string-length (format "~a" name)) (string-length indent))
(- value-max-len (whole-len (format "~a" (stat-msecs v)))))
#\space)
(regexp-replace #rx"[.](..).*" (format "~a00" (stat-msecs v)) ".\\1")
(make-string (- memory-max-len (string-length (format "~a" (kb (stat-memory v)))))
#\space)
(kb (stat-memory v))
(make-string (- count-max-len (string-length (format "~a" (stat-count v))))
#\space)
(stat-count v)))
(define keys (sort (for/list ([k (in-hash-keys accums)] #:when (not (eq? k stat-key))) k)
>
#:key (lambda (key) (stat-msecs (hash-ref (hash-ref accums key) stat-key)))))
(for ([k (in-list keys)]
[i (in-naturals)])
(when (and newline? (positive? i)) (log-error ""))
(loop k (hash-ref accums k) (string-append indent " ") #f)))))))

View File

@ -489,39 +489,41 @@ Scheme_Object *scheme_extract_closure_local(Scheme_Object *obj, mz_jit_state *ji
}
Scheme_Object *scheme_specialize_to_constant(Scheme_Object *obj, mz_jit_state *jitter, int extra_push)
Scheme_Object *scheme_specialize_to_constant(Scheme_Object *obj, mz_jit_state *jitter, int extra_push, int extract_static)
{
Scheme_Object *c;
if (PAST_LIMIT()) return obj;
if (!jitter->nc) return obj;
/* We can always specialize static toplevel references */
if (extract_static
&& SAME_TYPE(SCHEME_TYPE(obj), scheme_static_toplevel_type)
&& (SCHEME_TOPLEVEL_FLAGS(obj) & SCHEME_TOPLEVEL_FLAGS_MASK) >= SCHEME_TOPLEVEL_FIXED) {
c = SCHEME_STATIC_TOPLEVEL_PREFIX(obj)->a[SCHEME_TOPLEVEL_POS(obj)];
c = ((Scheme_Bucket *)c)->val;
if (c)
return c;
}
if (SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED) {
if (SAME_TYPE(SCHEME_TYPE(obj), scheme_local_type)) {
c = scheme_extract_closure_local(obj, jitter, extra_push, 1);
if (c) {
MZ_ASSERT(SCHEME_TYPE(c) != scheme_prefix_type);
return c;
}
}
if (SAME_TYPE(SCHEME_TYPE(obj), scheme_toplevel_type)
&& (SCHEME_TOPLEVEL_FLAGS(obj) & SCHEME_TOPLEVEL_FLAGS_MASK) >= SCHEME_TOPLEVEL_FIXED) {
c = scheme_extract_global(obj, jitter->nc, 0);
if (c) {
c = ((Scheme_Bucket *)c)->val;
if (c)
if (jitter->nc) {
if (SCHEME_NATIVE_LAMBDA_FLAGS(jitter->nc->code) & NATIVE_SPECIALIZED) {
if (SAME_TYPE(SCHEME_TYPE(obj), scheme_local_type)) {
c = scheme_extract_closure_local(obj, jitter, extra_push, 1);
if (c) {
MZ_ASSERT(SCHEME_TYPE(c) != scheme_prefix_type);
return c;
}
}
}
if (SAME_TYPE(SCHEME_TYPE(obj), scheme_static_toplevel_type)
&& (SCHEME_TOPLEVEL_FLAGS(obj) & SCHEME_TOPLEVEL_FLAGS_MASK) >= SCHEME_TOPLEVEL_FIXED) {
c = SCHEME_STATIC_TOPLEVEL_PREFIX(obj)->a[SCHEME_TOPLEVEL_POS(obj)];
c = ((Scheme_Bucket *)c)->val;
if (c)
return c;
if (SAME_TYPE(SCHEME_TYPE(obj), scheme_toplevel_type)
&& (SCHEME_TOPLEVEL_FLAGS(obj) & SCHEME_TOPLEVEL_FLAGS_MASK) >= SCHEME_TOPLEVEL_FIXED) {
c = scheme_extract_global(obj, jitter->nc, 0);
if (c) {
c = ((Scheme_Bucket *)c)->val;
if (c)
return c;
}
}
}
}
@ -547,7 +549,7 @@ int scheme_native_closure_preserves_marks(Scheme_Object *p)
int scheme_is_noncm(Scheme_Object *a, mz_jit_state *jitter, int depth, int stack_start)
{
a = scheme_specialize_to_constant(a, jitter, stack_start);
a = scheme_specialize_to_constant(a, jitter, stack_start, 0);
if (SCHEME_PRIMP(a)) {
int opts;
@ -677,7 +679,8 @@ int scheme_is_simple(Scheme_Object *obj, int depth, int just_markless, mz_jit_st
{
Scheme_Object *rator;
rator = scheme_specialize_to_constant(((Scheme_App_Rec *)obj)->args[0], jitter,
stack_start + ((Scheme_App_Rec *)obj)->num_args);
stack_start + ((Scheme_App_Rec *)obj)->num_args,
0);
if (scheme_inlined_nary_prim(rator, obj, jitter)
&& !SAME_OBJ(rator, scheme_values_proc))
return 1;
@ -690,7 +693,7 @@ int scheme_is_simple(Scheme_Object *obj, int depth, int just_markless, mz_jit_st
case scheme_application2_type:
{
Scheme_Object *rator;
rator = scheme_specialize_to_constant(((Scheme_App2_Rec *)obj)->rator, jitter, stack_start + 1);
rator = scheme_specialize_to_constant(((Scheme_App2_Rec *)obj)->rator, jitter, stack_start + 1, 0);
if (scheme_inlined_unary_prim(rator, obj, jitter))
return 1;
else if (just_markless) {
@ -701,7 +704,7 @@ int scheme_is_simple(Scheme_Object *obj, int depth, int just_markless, mz_jit_st
case scheme_application3_type:
{
Scheme_Object *rator;
rator = scheme_specialize_to_constant(((Scheme_App3_Rec *)obj)->rator, jitter, stack_start + 2);
rator = scheme_specialize_to_constant(((Scheme_App3_Rec *)obj)->rator, jitter, stack_start + 2, 0);
if (scheme_inlined_binary_prim(rator, obj, jitter)
&& !SAME_OBJ(rator, scheme_values_proc))
return 1;
@ -991,7 +994,7 @@ int scheme_native_closure_is_single_result(Scheme_Object *rator)
static int produces_single_value(Scheme_Object *rator, int num_args, mz_jit_state *jitter)
{
rator = scheme_specialize_to_constant(rator, jitter, num_args);
rator = scheme_specialize_to_constant(rator, jitter, num_args, 1);
if (SAME_TYPE(SCHEME_TYPE(rator), scheme_native_closure_type))
return scheme_native_closure_is_single_result(rator);
@ -2104,7 +2107,7 @@ int scheme_generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w
}
#endif
obj = scheme_specialize_to_constant(obj, jitter, 0);
obj = scheme_specialize_to_constant(obj, jitter, 0, 0);
orig_target = target;
result_ignored = (target < 0);
@ -2504,8 +2507,8 @@ int scheme_generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w
v = SCHEME_PTR1_VAL(obj);
p = SCHEME_PTR2_VAL(obj);
v = scheme_specialize_to_constant(v, jitter, 0);
p = scheme_specialize_to_constant(p, jitter, 0);
v = scheme_specialize_to_constant(v, jitter, 0, 1);
p = scheme_specialize_to_constant(p, jitter, 0, 1);
if (is_single_valued(p, jitter)) {
/* We might discover late that `v` produces a single value,
@ -2965,7 +2968,15 @@ int scheme_generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w
}
case scheme_branch_type:
{
return generate_branch(obj, jitter, is_tail, wcm_may_replace, multi_ok, orig_target, result_ignored, for_branch);
Scheme_Branch_Rec *branch = (Scheme_Branch_Rec *)obj;
Scheme_Object *tst;
tst = scheme_specialize_to_constant(branch->test, jitter, 0, 1);
if (SCHEME_TYPE(tst) > _scheme_values_types_) {
return scheme_generate((SCHEME_TRUEP(tst) ? branch->tbranch : branch->fbranch),
jitter, is_tail, wcm_may_replace,
multi_ok, orig_target, for_branch, for_values);
} else
return generate_branch(obj, jitter, is_tail, wcm_may_replace, multi_ok, orig_target, result_ignored, for_branch);
}
case scheme_lambda_type:
{

View File

@ -1612,7 +1612,7 @@ int scheme_jit_check_closure_extflonum_bit(Scheme_Lambda *data, int pos, int del
Scheme_Object *scheme_extract_global(Scheme_Object *o, Scheme_Native_Closure *nc, int local_only);
Scheme_Object *scheme_extract_closure_local(Scheme_Object *obj, mz_jit_state *jitter, int extra_push, int get_constant);
Scheme_Object *scheme_specialize_to_constant(Scheme_Object *obj, mz_jit_state *jitter, int extra_push);
Scheme_Object *scheme_specialize_to_constant(Scheme_Object *obj, mz_jit_state *jitter, int extra_push, int extract_static);
void scheme_jit_register_traversers(void);
#ifdef MZ_USE_LWC

View File

@ -1818,7 +1818,7 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
rator = (alt_rands ? alt_rands[0] : app->args[0]);
rator = scheme_specialize_to_constant(rator, jitter, num_pushes);
rator = scheme_specialize_to_constant(rator, jitter, num_pushes, 0);
if (no_call == 2) {
direct_prim = 1;

View File

@ -1271,7 +1271,7 @@ int scheme_generate_inlined_unary(mz_jit_state *jitter, Scheme_App2_Rec *app, in
{
Scheme_Object *rator = app->rator;
rator = scheme_specialize_to_constant(rator, jitter, 1);
rator = scheme_specialize_to_constant(rator, jitter, 1, 0);
{
int k;
@ -2577,8 +2577,8 @@ int scheme_generate_two_args(Scheme_Object *rand1, Scheme_Object *rand2, mz_jit_
{
int simple1, simple2, direction = 1;
rand1 = scheme_specialize_to_constant(rand1, jitter, skipped);
rand2 = scheme_specialize_to_constant(rand2, jitter, skipped);
rand1 = scheme_specialize_to_constant(rand1, jitter, skipped, 1);
rand2 = scheme_specialize_to_constant(rand2, jitter, skipped, 1);
simple1 = scheme_is_relatively_constant_and_avoids_r1(rand1, rand2);
simple2 = scheme_is_relatively_constant_and_avoids_r1(rand2, rand1);
@ -3042,7 +3042,7 @@ int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i
{
Scheme_Object *rator = app->rator;
rator = scheme_specialize_to_constant(rator, jitter, 2);
rator = scheme_specialize_to_constant(rator, jitter, 2, 0);
if (SCHEME_PRIMP(rator) && IS_NAMED_PRIM(rator, "ptr-ref")) {
Scheme_App_Rec *app2;
@ -4549,7 +4549,7 @@ int scheme_generate_inlined_nary(mz_jit_state *jitter, Scheme_App_Rec *app, int
{
Scheme_Object *rator = app->args[0];
rator = scheme_specialize_to_constant(rator, jitter, app->num_args);
rator = scheme_specialize_to_constant(rator, jitter, app->num_args, 0);
if (!for_branch) {
int k;

File diff suppressed because it is too large Load Diff