statistical profiler, core and text interface part done
svn: r14340 original commit: 96d7291bfb1e4f4a600ecda14f314ccccfa2ff48
This commit is contained in:
commit
e29c411d53
258
collects/profile/analyzer.ss
Normal file
258
collects/profile/analyzer.ss
Normal file
|
@ -0,0 +1,258 @@
|
|||
#lang scheme/base
|
||||
|
||||
;; Analyzer for the sampler results
|
||||
|
||||
(provide analyze-samples)
|
||||
|
||||
(require scheme/list)
|
||||
|
||||
;; An encapsulation of an analyzed profile call graph:
|
||||
;; - total-time: the total time observed in msec (this is generally different
|
||||
;; than the time it took to run the profile).
|
||||
;; - sample-number: the number of samples taken.
|
||||
;; - thread-times: a list of (<thread-id> . msec) for the time spent in
|
||||
;; observed threads.
|
||||
;; - nodes: the list of call-graph nodes sorted by their total time.
|
||||
;; - *-node: a special node that is connected as a "caller" for all toplevel
|
||||
;; functions and a "callee" for all leaf functions. It will also be
|
||||
;; identifiable by having both id and src fields being #f. Can be used to
|
||||
;; start a graph traversal from the top or the bottom.
|
||||
(provide (struct-out profile))
|
||||
(define-struct profile
|
||||
(total-time cpu-time sample-number thread-times nodes *-node))
|
||||
|
||||
;; An entry for a single profiled function:
|
||||
;; - id, src: the corresponding values from `continuation-mark-set->context'.
|
||||
;; - thread-ids: the list of thread identifiers this function has been seen in.
|
||||
;; - total: total msecs it participated in (= time in it, including callees).
|
||||
;; - self: msecs where it was at the top of the stack (= time in its own code).
|
||||
;; - callers, callees: a list of `edge' values for the time spent while it was
|
||||
;; called by the repective <node>, or it called it, sorted in decreasing msec
|
||||
;; time.
|
||||
;; Note that the sum of caller/callee edges including the special `*-node'
|
||||
;; should be equal to the `total' time. So the edge from/to the `*-node' can
|
||||
;; be used to get the time spent as a leaf or as a root divided by the number
|
||||
;; of time the function appeared on the stack: so this value can be displayed
|
||||
;; in the call-graph and the numbers will sum up nicely to a 100%.
|
||||
(provide (struct-out node))
|
||||
(define-struct node (id src thread-ids total self callers callees)
|
||||
#:mutable
|
||||
#:property prop:custom-write
|
||||
(lambda (node o w?) (fprintf o "#<node:~s>" (or (node-id node) '???))))
|
||||
|
||||
;; An edge representing function calls between two nodes:
|
||||
;; - time: the total time spent while the call was somewhere on the stack.
|
||||
;; - caller, callee: the two relevant `node' values.
|
||||
;; - caller-time, callee-time: the time that the caller/callee spent in this
|
||||
;; call (different from the above time because each stack sample's time is
|
||||
;; divided by the number of times the caller/callee appears in that slice).
|
||||
(provide (struct-out edge))
|
||||
(define-struct edge (total caller caller-time callee callee-time)
|
||||
#:mutable
|
||||
#:property prop:custom-write
|
||||
(lambda (edge o w?)
|
||||
(fprintf o "#<edge:~s-~s>"
|
||||
(or (node-id (edge-caller edge)) '???)
|
||||
(or (node-id (edge-callee edge)) '???))))
|
||||
|
||||
(define with-hash:not-found (gensym))
|
||||
(define-syntax-rule (with-hash <hash> <key> <expr>)
|
||||
(let ([t <hash>] [k <key>])
|
||||
(let ([v (hash-ref t k with-hash:not-found)])
|
||||
(if (eq? v with-hash:not-found)
|
||||
(let ([v <expr>]) (hash-set! t k v) v)
|
||||
v))))
|
||||
|
||||
;; This function analyzes the output of the sampler. Returns a `profile'
|
||||
;; struct holding a list of `node' values, each one representing a node in the
|
||||
;; call graph, with the relevant information filled in. The results are sorted
|
||||
;; by the total time.
|
||||
(define (analyze-samples cpu-time+samples)
|
||||
(define cpu-time (car cpu-time+samples))
|
||||
(define samples (cdr cpu-time+samples))
|
||||
(define samples-by-thread
|
||||
(let ([by-thread (split-by-thread samples)])
|
||||
(for ([samples (in-vector by-thread)] [i (in-naturals 0)])
|
||||
(vector-set! by-thread i (get-times samples)))
|
||||
by-thread))
|
||||
(define id+src->node-hash (make-hasheq))
|
||||
(define (id+src->node id+src)
|
||||
(with-hash id+src->node-hash id+src
|
||||
(make-node (car id+src) (cdr id+src) '() 0 0 '() '())))
|
||||
;; special node that is the caller of toplevels and callee of leaves
|
||||
(define *-node (id+src->node '(#f . #f)))
|
||||
(define call->edge
|
||||
(let ([t (make-hasheq)])
|
||||
(lambda (ler lee)
|
||||
(with-hash (with-hash t ler (make-hasheq)) lee
|
||||
(let ([e (make-edge 0 ler 0 lee 0)])
|
||||
(set-node-callers! lee (cons e (node-callers lee)))
|
||||
(set-node-callees! ler (cons e (node-callees ler)))
|
||||
e)))))
|
||||
(define total-time 0)
|
||||
(define thread-times (make-vector (vector-length samples-by-thread) 0))
|
||||
(for ([thread-samples (in-vector samples-by-thread)]
|
||||
[thread-id (in-naturals 0)]
|
||||
#:when #t
|
||||
[sample (in-list thread-samples)])
|
||||
(define msecs (car sample))
|
||||
(define (connect ler lee ler# lee#)
|
||||
(define edge (call->edge ler lee))
|
||||
(set-edge-caller-time! edge (+ (edge-caller-time edge) (/ msecs lee#)))
|
||||
(set-edge-callee-time! edge (+ (edge-callee-time edge) (/ msecs ler#)))
|
||||
edge)
|
||||
(define stack ; the stack snapshot, translated to `node' values
|
||||
(for/list ([id+src (in-list (cdr sample))])
|
||||
(let* ([node (id+src->node id+src)] [tids (node-thread-ids node)])
|
||||
(unless (memq thread-id tids)
|
||||
(set-node-thread-ids! node (cons thread-id tids)))
|
||||
node)))
|
||||
(define counts (get-counts stack))
|
||||
(define stack+counts (map (lambda (x) (assq x counts)) stack))
|
||||
(define edges
|
||||
(if (null? stack)
|
||||
'()
|
||||
(append (let ([first (car stack+counts)] [last (last stack+counts)])
|
||||
(list (connect *-node (car last) 1 (cdr last))
|
||||
(connect (car first) *-node (cdr first) 1)))
|
||||
(for/list ([callee (in-list stack+counts)]
|
||||
[caller (in-list (cdr stack+counts))])
|
||||
(connect (car caller) (car callee)
|
||||
(cdr caller) (cdr callee))))))
|
||||
(set! total-time (+ msecs total-time))
|
||||
(for ([p (in-list counts)])
|
||||
(set-node-total! (car p) (+ msecs (node-total (car p)))))
|
||||
(for ([e (remove-duplicates edges eq?)])
|
||||
(set-edge-total! e (+ msecs (edge-total e))))
|
||||
(vector-set! thread-times thread-id
|
||||
(+ msecs (vector-ref thread-times thread-id)))
|
||||
(when (pair? stack)
|
||||
(set-node-self! (car stack) (+ (node-self (car stack)) msecs))))
|
||||
(set-node-total! *-node total-time)
|
||||
;; convert the nodes from the hash to a list, do a topological sort, and then
|
||||
;; sort by total time (combining both guarantees(?) sensible order)
|
||||
(let* ([nodes (hash-map id+src->node-hash (lambda (k v) v))]
|
||||
[nodes (topological-sort *-node (length nodes))]
|
||||
[nodes (sort (remq *-node nodes) > #:key node-total)])
|
||||
;; sort all the edges in the nodes according to total time
|
||||
(for ([n (in-list nodes)])
|
||||
(set-node-callees! n (sort (node-callees n) > #:key edge-callee-time))
|
||||
(set-node-callers! n (sort (node-callers n) > #:key edge-caller-time)))
|
||||
(make-profile
|
||||
total-time
|
||||
cpu-time
|
||||
(length samples)
|
||||
(for/list ([time (in-vector thread-times)] [n (in-naturals 0)])
|
||||
(cons n time))
|
||||
nodes
|
||||
*-node)))
|
||||
|
||||
;; A simple topological sort of nodes using BFS, starting from node `x' which
|
||||
;; will be the special *-node. Note that `expected-length' is only given for
|
||||
;; the sanity check in the end.
|
||||
(define (topological-sort x expected-length)
|
||||
(define sorted
|
||||
(let loop ([todo (list x)] [seen (list x)])
|
||||
(if (null? todo)
|
||||
'()
|
||||
(let* ([next (append-map (lambda (x) (map edge-caller (node-callers x)))
|
||||
todo)]
|
||||
[next (remove-duplicates next)]
|
||||
[next (remq* seen next)])
|
||||
(append todo (loop next (append next seen)))))))
|
||||
(if (= expected-length (length sorted))
|
||||
sorted
|
||||
(error 'topological "internal error")))
|
||||
|
||||
;; Groups raw samples by their thread-id, returns a vector with a field for
|
||||
;; each thread id holding the sample data for that thread. The samples in
|
||||
;; these are reversed (so they'll be sorted going forward in time).
|
||||
(define (split-by-thread samples)
|
||||
(define threads
|
||||
(make-vector (add1 (for/fold ([n -1]) ([sample (in-list samples)])
|
||||
(max (car sample) n)))
|
||||
'()))
|
||||
(for ([sample (in-list samples)])
|
||||
(let ([id (car sample)] [data (cdr sample)])
|
||||
(vector-set! threads id (cons data (vector-ref threads id)))))
|
||||
threads)
|
||||
#|
|
||||
(equal? (split-by-thread '())
|
||||
'#())
|
||||
(equal? (split-by-thread '([0 x]))
|
||||
'#([(x)]))
|
||||
(equal? (split-by-thread '([0 x] [0 y] [0 z]))
|
||||
'#([(z) (y) (x)]))
|
||||
(equal? (split-by-thread '([0 x] [1 y] [2 z]))
|
||||
'#([(x)] [(y)] [(z)]))
|
||||
(equal? (split-by-thread '([0 x1] [1 y1] [0 x2] [2 z1] [0 x3] [2 z2]))
|
||||
'#([(x3) (x2) (x1)] [(y1)] [(z2) (z1)]))
|
||||
|#
|
||||
|
||||
;; gets a list of thread-id and data for that thread beginning with the
|
||||
;; millisecond count, and returns a similar list where the samples begin with
|
||||
;; the time spent for that sample. The time spent is taken as half of the two
|
||||
;; touching ranges; for example, if there are three samples showing snapshot
|
||||
;; times of 10, 20, 60, then the middle one is assumed to have a time of 25.
|
||||
;; For the first and last samples, the time is twice the half of the single
|
||||
;; touching range -- with this example, this would be 10 for the first and 40
|
||||
;; for the last. If there is a thread with just one sample, it is dropped.
|
||||
(define (get-times samples)
|
||||
(cond
|
||||
;; nothing to do
|
||||
[(null? samples) '()]
|
||||
;; throw out a single sample
|
||||
[(null? (cdr samples)) '()]
|
||||
[else (let loop ([samples samples]
|
||||
[prevs (cons #f (map car samples))]
|
||||
[r '()])
|
||||
(if (null? samples)
|
||||
(reverse r)
|
||||
(let* ([prev (car prevs)]
|
||||
[cur (caar samples)]
|
||||
[data (cdar samples)]
|
||||
[prevs (cdr prevs)]
|
||||
[samples (cdr samples)]
|
||||
[next (and (pair? samples) (caar samples))])
|
||||
(loop samples prevs
|
||||
(cons (cons (if next
|
||||
;; not the last: there must be a next
|
||||
(if prev (/ (- next prev) 2) (- next cur))
|
||||
;; last one: there must be a prev
|
||||
(- cur prev))
|
||||
data)
|
||||
r)))))]))
|
||||
#|
|
||||
(equal? (get-times '())
|
||||
'())
|
||||
(equal? (get-times '([10 a]))
|
||||
'())
|
||||
(equal? (get-times '([10 a] [20 b]))
|
||||
'([10 a] [10 b]))
|
||||
(equal? (get-times '([10 a] [20 b] [60 c]))
|
||||
'([10 a] [25 b] [40 c]))
|
||||
(equal? (get-times '([10 a] [20 b] [30 c] [40 d]))
|
||||
'([10 a] [10 b] [10 c] [10 d]))
|
||||
(equal? (get-times '([10 a] [20 b] [60 c] [80 d]))
|
||||
'([10 a] [25 b] [30 c] [20 d]))
|
||||
|#
|
||||
|
||||
;; returns a list of (cons item occurrences) for the items in l
|
||||
(define (get-counts l)
|
||||
(let loop ([l l] [r '()])
|
||||
(if (null? l)
|
||||
r
|
||||
(let ([1st (car l)])
|
||||
(let loop* ([l1 '()] [c 1] [l (cdr l)])
|
||||
(cond [(null? l) (loop l1 (cons (cons 1st c) r))]
|
||||
[(eq? 1st (car l)) (loop* l1 (add1 c) (cdr l))]
|
||||
[else (loop* (cons (car l) l1) c (cdr l))]))))))
|
||||
#|
|
||||
(equal? (get-counts '()) '())
|
||||
(equal? (get-counts '(1)) '([1 . 1]))
|
||||
(equal? (get-counts '(1 1 1)) '([1 . 3]))
|
||||
(define (set=? xs ys) (null? (append (remove* xs ys) (remove* ys xs))))
|
||||
(set=? (get-counts '(1 2 3)) '([1 . 1] [2 . 1] [3 . 1]))
|
||||
(set=? (get-counts '(1 2 2 3 3 3)) '([1 . 1] [2 . 2] [3 . 3]))
|
||||
(set=? (get-counts '(3 1 2 3 2 3)) '([1 . 1] [2 . 2] [3 . 3]))
|
||||
|#
|
69
collects/profile/main.ss
Normal file
69
collects/profile/main.ss
Normal file
|
@ -0,0 +1,69 @@
|
|||
#lang scheme/base
|
||||
|
||||
(require "sampler.ss" (except-in "analyzer.ss" profile)
|
||||
(prefix-in text: "render-text.ss")
|
||||
(for-syntax scheme/base))
|
||||
|
||||
(define (profile-thunk thunk
|
||||
#:delay [delay 0.05]
|
||||
#:repeat [rpt 1]
|
||||
#:threads [threads? #f]
|
||||
#:render [renderer text:render])
|
||||
(define cust (and threads? (make-custodian (current-custodian))))
|
||||
(define sampler (create-sampler (if threads?
|
||||
(list cust (current-thread))
|
||||
(current-thread))
|
||||
delay))
|
||||
(define (run) (for ([i (in-range rpt)]) (thunk)))
|
||||
(with-handlers ([void (lambda (e)
|
||||
(fprintf (current-error-port)
|
||||
"profiled thunk error: ~a\n"
|
||||
(if (exn? e)
|
||||
(exn-message e)
|
||||
(format "~e" e))))])
|
||||
(if threads?
|
||||
(parameterize ([current-custodian cust]) (run))
|
||||
(run)))
|
||||
(sampler 'stop)
|
||||
(renderer (analyze-samples (sampler 'get-snapshots))))
|
||||
|
||||
(define-syntax (profile stx)
|
||||
(syntax-case stx ()
|
||||
[(_ x ...)
|
||||
(let loop ([expr #f] [kwds '()] [xs (syntax->list #'(x ...))])
|
||||
(cond
|
||||
[(null? xs)
|
||||
(if expr
|
||||
(with-syntax ([expr expr] [kwds (reverse kwds)])
|
||||
#'(profile-thunk (lambda () expr) . kwds))
|
||||
(raise-syntax-error 'profile "missing expression" stx))]
|
||||
[(keyword? (syntax-e (car xs)))
|
||||
(if (pair? (cdr xs))
|
||||
(loop expr (list* (cadr xs) (car xs) kwds) (cddr xs))
|
||||
;; let #%app throw the error
|
||||
(loop expr (cons (car xs) kwds) (cdr xs)))]
|
||||
[expr (raise-syntax-error 'profile "redundant expresion given"
|
||||
stx (car xs))]
|
||||
[else (loop (car xs) kwds (cdr xs))]))]))
|
||||
|
||||
|
||||
(define (fib1 n) (if (<= n 1) n (+ (fib1 (- n 1)) (fib1 (- n 2)))))
|
||||
(define (fib22 n) (if (<= n 2) 1 (+ (fib22 (- n 1)) (fib22 (- n 2)))))
|
||||
(define (fib3 n)
|
||||
(for ([i (in-range 100000000)]) (* i 3))
|
||||
(if (<= n 2) 1 (+ (fib22 (- n 1)) (fib22 (- n 2)))))
|
||||
(define (fibs n) (+ (fib1 n) (fib22 n) (fib3 n)))
|
||||
|
||||
(define (foo n)
|
||||
(define ch (make-channel))
|
||||
(define (bg-fib) (channel-put ch (fib1 n)))
|
||||
(thread bg-fib)
|
||||
(list (fib22 n) (channel-get ch)))
|
||||
|
||||
(require "render-graphviz.ss")
|
||||
|
||||
(profile (fibs 35)
|
||||
;(dynamic-require '(lib "scribblings/reference/reference.scrbl") #f)
|
||||
;(foo 35)
|
||||
;#:render render
|
||||
#:threads #t)
|
41
collects/profile/render-graphviz.ss
Normal file
41
collects/profile/render-graphviz.ss
Normal file
|
@ -0,0 +1,41 @@
|
|||
#lang scheme/base
|
||||
|
||||
(provide render)
|
||||
|
||||
(require "analyzer.ss" "utils.ss")
|
||||
|
||||
(define (render profile
|
||||
#:hide-self [hide-self% 1/100]
|
||||
#:hide-subs [hide-subs% 2/100])
|
||||
(define *-node (profile-*-node profile))
|
||||
(define hidden (get-hidden profile hide-self% hide-subs%))
|
||||
(define nodes (remq* hidden (profile-nodes profile)))
|
||||
(define total-time (profile-total-time profile))
|
||||
(define node->
|
||||
(let ([t (make-hasheq)])
|
||||
(for ([node (in-list nodes)] [idx (in-naturals 1)])
|
||||
(let ([id (node-id node)] [src (node-src node)])
|
||||
(hash-set! t node
|
||||
(list (format "node~a" idx)
|
||||
(if id
|
||||
(format "~a" id)
|
||||
(regexp-replace #rx"^.*/"
|
||||
(format-source (node-src node))
|
||||
""))))))
|
||||
(lambda (mode node)
|
||||
((case mode [(index) car] [(label) cadr]) (hash-ref t node)))))
|
||||
(define max-self%
|
||||
(/ (for/fold ([m 0]) ([node (in-list nodes)]) (max m (node-self node)))
|
||||
total-time))
|
||||
(printf "digraph Profile {\n")
|
||||
(for ([node (in-list nodes)])
|
||||
(define self% (/ (node-self node) total-time))
|
||||
(printf "~a [" (node-> 'index node))
|
||||
(printf "label=~s, " (node-> 'label node))
|
||||
(printf "fillcolor=\"1,~a,1\", " (exact->inexact (/ self% max-self%)))
|
||||
(printf "style=filled];\n")
|
||||
(for ([edge (in-list (node-callees node))])
|
||||
(define callee (edge-callee edge))
|
||||
(unless (or (eq? *-node callee) (memq callee hidden))
|
||||
(printf "~a -> ~a;\n" (node-> 'index node) (node-> 'index callee)))))
|
||||
(printf "}\n"))
|
137
collects/profile/render-text.ss
Normal file
137
collects/profile/render-text.ss
Normal file
|
@ -0,0 +1,137 @@
|
|||
#lang at-exp scheme/base
|
||||
|
||||
(provide render)
|
||||
|
||||
(require "analyzer.ss" "utils.ss" scheme/list)
|
||||
|
||||
(define (f:msec msec)
|
||||
(number->string (round (inexact->exact msec))))
|
||||
|
||||
(define (f:msec* msec)
|
||||
(string-append (f:msec msec) "ms"))
|
||||
|
||||
(define (display-table aligns table)
|
||||
;; * thunks are used for cells that are ignored when inspecting widths
|
||||
;; * chars are used for filler cells
|
||||
(define (display-line strings)
|
||||
(printf "~a\n" (regexp-replace #rx" +$" (apply string-append strings) "")))
|
||||
(let ([widths (let loop ([table table])
|
||||
(let ([table (filter pair? table)])
|
||||
(if (null? table) '()
|
||||
(cons (apply max (filter-map
|
||||
(lambda (x)
|
||||
(and (string? (car x))
|
||||
(string-length (car x))))
|
||||
table))
|
||||
(loop (map cdr table))))))])
|
||||
(for ([row (in-list table)])
|
||||
(display-line
|
||||
(for/list ([cell (in-list row)]
|
||||
[width (in-list widths)]
|
||||
[align (in-list aligns)])
|
||||
(let* ([cell (cond [(char? cell) (make-string width cell)]
|
||||
[(procedure? cell) (cell)]
|
||||
[else cell])]
|
||||
[pad (make-string (max 0 (- width (string-length cell)))
|
||||
#\space)])
|
||||
(case align
|
||||
[(l) (string-append cell pad)]
|
||||
[(r) (string-append pad cell)]
|
||||
[else (error 'internal-error "poof")])))))))
|
||||
|
||||
(define (render profile
|
||||
#:truncate-source [truncate-source 50]
|
||||
#:hide-self [hide-self% 1/100]
|
||||
#:hide-subs [hide-subs% 2/100])
|
||||
(define (show . xs)
|
||||
(let loop ([x xs])
|
||||
(cond [(or (not x) (null? x) (void? x)) (void)]
|
||||
[(pair? x) (loop (car x)) (loop (cdr x))]
|
||||
[else (display x)]))
|
||||
(newline))
|
||||
(define total-time (profile-total-time profile))
|
||||
(define cpu-time (profile-cpu-time profile))
|
||||
(define sample-number (profile-sample-number profile))
|
||||
(define granularity (/ total-time sample-number))
|
||||
(define threads+times (profile-thread-times profile))
|
||||
(define *-node (profile-*-node profile))
|
||||
(define hidden (get-hidden profile hide-self% hide-subs%))
|
||||
(define nodes (remq* hidden (profile-nodes profile)))
|
||||
(define node->
|
||||
(let ([t (make-hasheq)])
|
||||
(for ([node (in-list nodes)] [idx (in-naturals 1)])
|
||||
(let ([index (format "[~a]" idx)]
|
||||
[label (format "~a" (or (node-id node) '???))])
|
||||
(hash-set! t node (list index label @string-append{@label @index}))))
|
||||
(lambda (mode node)
|
||||
((case mode [(index) car] [(label) cadr] [(sub-label) caddr])
|
||||
(hash-ref t node)))))
|
||||
(define (sep ch) (list ch ch ch ch ch ch ch ch ch ch))
|
||||
(define =sep (sep #\=))
|
||||
(define -sep (sep #\-))
|
||||
@show{
|
||||
Profiling results
|
||||
-----------------
|
||||
Total cpu time observed: @f:msec*[total-time] (out of @f:msec*[cpu-time])
|
||||
Number of samples taken: @sample-number (once every @f:msec*[granularity])
|
||||
}
|
||||
(when (> (length threads+times) 1)
|
||||
@show{ Threads observed: @(length threads+times)})
|
||||
(when (pair? hidden)
|
||||
(let* ([hidden (length hidden)]
|
||||
[nodes (length (profile-nodes profile))]
|
||||
[self% @string-append{self<@(format-percent (or hide-self% 0))}]
|
||||
[subs% @string-append{local<@(format-percent (or hide-subs% 0))}]
|
||||
[%s (cond [(not hide-self%) subs%]
|
||||
[(not hide-subs%) self%]
|
||||
[else @string-append{@self% and @subs%}])])
|
||||
@show{ (Hiding functions with @|%s|: @|hidden| of @nodes hidden)}))
|
||||
(newline)
|
||||
(display-table
|
||||
'(r l r l l r l l l r l l)
|
||||
(append*
|
||||
`(,=sep
|
||||
(" " " " " "" " " " " "" " " " " Caller")
|
||||
("Idx" " " "To""tal " " " "Se""lf " " " "Name+src" "Local%")
|
||||
(" " " " " ms""(pct)" " " "ms""(pct)" " " " Callee")
|
||||
,=sep)
|
||||
(for/list ([node (in-list nodes)])
|
||||
(define index (node-> 'index node))
|
||||
(define name (node-> 'label node))
|
||||
(define total (node-total node))
|
||||
(define totalS (f:msec total))
|
||||
(define total% @string-append{(@(format-percent (/ total total-time)))})
|
||||
(define self (node-self node))
|
||||
(define selfS (f:msec self))
|
||||
(define self% @string-append{(@(format-percent (/ self total-time)))})
|
||||
(define name+src
|
||||
(let* ([src (format-source (node-src node))]
|
||||
[src-len (string-length src)]
|
||||
[name-len (string-length name)])
|
||||
(string-append
|
||||
name " "
|
||||
;; truncate-source only truncates the source
|
||||
(let* ([n (and truncate-source
|
||||
((+ src-len name-len 1) . - . truncate-source))]
|
||||
[n (and n (positive? n) (- src-len n 3))])
|
||||
(cond [(not n) src]
|
||||
[(n . <= . 0) "..."]
|
||||
[else (string-append "..."
|
||||
(substring src (- src-len n)))])))))
|
||||
(define (sub get-edges get-node get-node-time)
|
||||
(for*/list ([edge (in-list (get-edges node))]
|
||||
[sub (in-list (list (get-node edge)))] ; <-- hack...
|
||||
#:when (not (or (eq? *-node sub) ; <-- ...for this
|
||||
(memq sub hidden))))
|
||||
(define name (node-> 'sub-label sub))
|
||||
(define local% (format-percent (/ (get-node-time edge) total)))
|
||||
`("" "" "" "" "" "" "" ""
|
||||
,(string-append " " name) ,local%
|
||||
"" "")))
|
||||
`(,@(reverse (sub node-callers edge-caller edge-caller-time))
|
||||
(,(node-> 'index node)
|
||||
" " ,totalS ,total%
|
||||
" " ,selfS ,self%
|
||||
" " ,(lambda () name+src))
|
||||
,@(sub node-callees edge-callee edge-callee-time)
|
||||
,-sep)))))
|
134
collects/profile/sampler.ss
Normal file
134
collects/profile/sampler.ss
Normal file
|
@ -0,0 +1,134 @@
|
|||
#lang scheme/base
|
||||
|
||||
;; The core profiler sample collector
|
||||
;; (This module is a private tool for collecting profiling data, and should not
|
||||
;; be used as is.)
|
||||
|
||||
(provide create-sampler)
|
||||
|
||||
;; create-sampler : creates a sample collector thread, which tracks the given
|
||||
;; `to-track' value every `delay' seconds.
|
||||
;; * The input value can be either a thread (track just that thread), a
|
||||
;; custodian (track all threads managed by the custodian), or a list of
|
||||
;; threads and/or custodians. If a custodian is given, it must be
|
||||
;; subordinate to the current custodian.
|
||||
;; * The collected values are (<thread-id> <thread-time> . <stack>), where
|
||||
;; - The <thread-id> is an integer number identifying the thread, starting
|
||||
;; from 0. If the collected data has thread ids in a 0..N range
|
||||
;; (exclusive) , then there were N threads that were observed. (This can
|
||||
;; be relevant when tracking a custodian, where threads can change
|
||||
;; dynamically.)
|
||||
;; - The <thread-time> is the result of `current-process-milliseconds' for
|
||||
;; the thread that this sample came from. Note that these numbers will not
|
||||
;; start at 0, since threads are likely to run before a sample is
|
||||
;; collected.
|
||||
;; - Finally, the <stack> part is a snapshot of the thread's stack, as
|
||||
;; grabbed by `continuation-mark-set->context'. The the values in these
|
||||
;; snapshots are interned to reduce memory load.
|
||||
;; The results are collected sequentially, so they're always sorted from the
|
||||
;; newest to the oldest. Remember that these results should be considered
|
||||
;; private for the profiler collection, and can change when more information
|
||||
;; needs to be collected.
|
||||
;; * Returns a "controller" function that accepts messages to control the
|
||||
;; sampler thread. The current set of messages that the controller
|
||||
;; understands are:
|
||||
;; - 'pause and 'resume: stops or resumes collecting samples. These messages
|
||||
;; can be nested. Note that the thread will continue running it just won't
|
||||
;; collect snapshots.
|
||||
;; - 'stop: kills the collector thread. Should be called when you have your
|
||||
;; data. (There is no message to start a new sampler thread, although
|
||||
;; adding one will not be difficult.)
|
||||
;; - 'set-tracked! <new>: changes the thread/s and/or custodian/s to track.
|
||||
;; - 'set-delay! <new>: changes the sampling delay. This means that we won't
|
||||
;; have a direct correlation between the number of samples and the time
|
||||
;; they represent -- but the samples are statistical snapshots anyway, and
|
||||
;; the results are not formulated in terms of time spent. (The time spent
|
||||
;; could be added of course, but it is best to do that in terms of the
|
||||
;; start/stop times)
|
||||
;; - 'get-snapshots: returns the currently collected list of snapshots. Note
|
||||
;; that this can be called multiple times, each will return the data that
|
||||
;; is collected up to that point in time.
|
||||
(define (create-sampler to-track delay)
|
||||
;; the collected data
|
||||
(define snapshots '())
|
||||
(define cust (current-custodian))
|
||||
;; intern the entries (which are (cons id/#f srcloc/#f))
|
||||
(define entry-table (make-hash))
|
||||
(define (intern-entry entry)
|
||||
(let* ([key (or (cdr entry) (car entry))]
|
||||
[en (hash-ref entry-table key #f)])
|
||||
(if en
|
||||
;; ELI: is this sanity check needed?
|
||||
;; (if (equal? en entry)
|
||||
;; en
|
||||
;; (error 'profile "internal error: assumption invalid"))
|
||||
en
|
||||
(begin (hash-set! entry-table key entry) entry))))
|
||||
(define (validate to-track who)
|
||||
(let loop ([t to-track])
|
||||
(cond
|
||||
[(thread? t)]
|
||||
[(list? t) (for-each loop t)]
|
||||
[(not (custodian? t))
|
||||
(raise-type-error
|
||||
who "thread, custodian, or a list of threads/csutodians" to-track)]
|
||||
;; test that it's subordinate
|
||||
[(with-handlers ([exn:fail:contract? (lambda (_) #t)])
|
||||
(custodian-managed-list t cust) #f)
|
||||
(error who "got a custodian that is not subordinate to current")])))
|
||||
(define paused 0)
|
||||
(define thread-id
|
||||
(let ([next-id 0] [t (make-weak-hasheq)])
|
||||
(lambda (thd)
|
||||
(or (hash-ref t thd #f)
|
||||
(let ([id next-id])
|
||||
(set! next-id (add1 next-id))
|
||||
(hash-set! t thd id)
|
||||
id)))))
|
||||
(define (sampler)
|
||||
(sleep delay)
|
||||
(when (paused . <= . 0)
|
||||
(let loop ([t to-track])
|
||||
(cond [(thread? t)
|
||||
(unless (eq? t sampler-thread)
|
||||
(set! snapshots
|
||||
(cons (list* (thread-id t)
|
||||
(current-process-milliseconds t)
|
||||
(map intern-entry
|
||||
(continuation-mark-set->context
|
||||
(continuation-marks t))))
|
||||
snapshots)))]
|
||||
[(custodian? t) (for-each loop (custodian-managed-list t cust))]
|
||||
;; cannot assume that it's a list: we might get other values from
|
||||
;; a custodian managed list
|
||||
[(list? t) (for-each loop t)])))
|
||||
(sampler))
|
||||
(define cpu-time 0)
|
||||
(define start-time (current-process-milliseconds))
|
||||
(define (add-time)
|
||||
(when (paused . <= . 0)
|
||||
(let ([cur (current-process-milliseconds)])
|
||||
(set! cpu-time (+ cpu-time (- cur start-time)))
|
||||
(set! start-time cur))))
|
||||
(define (ignore-time)
|
||||
(when (paused . <= . 0)
|
||||
(set! start-time (current-process-milliseconds))))
|
||||
(define sampler-thread
|
||||
(begin (validate to-track 'create-sampler)
|
||||
(thread sampler)))
|
||||
;; use a sema to avoid mutations from different threads, the sampler thread
|
||||
;; is only reading these values so it doesn't use it.
|
||||
(define sema (make-semaphore 1))
|
||||
(define (sampler-controller msg [arg #f])
|
||||
(define-syntax-rule (w/sema body ...)
|
||||
(call-with-semaphore sema (lambda () body ...)))
|
||||
(case msg
|
||||
[(pause) (w/sema (add-time) (set! paused (add1 paused)))]
|
||||
[(resume) (w/sema (set! paused (sub1 paused)) (ignore-time))]
|
||||
[(stop) (kill-thread sampler-thread) (add-time) (set! paused +inf.0)]
|
||||
[(set-tracked!) (validate arg 'sampler-controller)
|
||||
(w/sema (set! to-track arg))]
|
||||
[(set-delay!) (w/sema (set! delay arg))]
|
||||
[(get-snapshots) (add-time) (cons cpu-time snapshots)]
|
||||
[else (error 'sampler-controller "unknown message: ~e" msg)]))
|
||||
sampler-controller)
|
156
collects/profile/scribblings/analyzer.scrbl
Normal file
156
collects/profile/scribblings/analyzer.scrbl
Normal file
|
@ -0,0 +1,156 @@
|
|||
#lang scribble/doc
|
||||
|
||||
@(require scribble/manual
|
||||
(for-label scheme profile/analyzer))
|
||||
|
||||
@title[#:tag "analyzer"]{Analyzing Profile Data}
|
||||
|
||||
@defmodule[profile/analyzer]
|
||||
|
||||
Once a profile run is done, and the results are collected, the next
|
||||
step is to analyze the data. In this step the sample time are
|
||||
computed and summed, a call-graph representing the observed function
|
||||
calls is built, and per-node and per-edge information is created.
|
||||
This is the job of the main function provided by
|
||||
@scheme[profile/analyzer].
|
||||
|
||||
@defproc[(analyze-samples [raw-sample-data any/c])
|
||||
profile?]{
|
||||
|
||||
This function consumes the raw result of the
|
||||
@seclink["sampler"]{sampler} (which is given in an undocumented form),
|
||||
analyzes it, and returns a @scheme[profile] value holding the analyzed
|
||||
results. Without this function, the results of the sampler are
|
||||
meaningless.}
|
||||
|
||||
|
||||
@defstruct[profile ([total-time exact-nonnegative-integer?]
|
||||
[cpu-time exact-nonnegative-integer?]
|
||||
[sample-number exact-nonnegative-integer?]
|
||||
[thread-times (listof (cons exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?))]
|
||||
[nodes (listof node?)]
|
||||
[*-node node?])]{
|
||||
|
||||
Represents the analyzed profile result.
|
||||
|
||||
@itemize[
|
||||
|
||||
@item{@scheme[total-time] is the total observed time (in milliseconds)
|
||||
included in the profile. This is different than the actual time the
|
||||
profiling took, due to unaccounted-for time spent in untracked
|
||||
threads. (E.g., the sampler thread itself.)}
|
||||
|
||||
@item{@scheme[cpu-time] is the actual cpu time consumed by the process
|
||||
during the profiler's work.}
|
||||
|
||||
@item{@scheme[sample-number] holds the number of samples taken during
|
||||
the profile. This can be used to compute the average time frame
|
||||
each of the input samples represented.}
|
||||
|
||||
@item{@scheme[thread-times] holds an association list mapping thread
|
||||
identifiers to cpu time for the corresponding threads. As samples
|
||||
are collected, each thread that is observed is assigned a small
|
||||
integer identifier. These identifers are listed for each function
|
||||
call, and the total time spent in each thread is in this field.}
|
||||
|
||||
@item{@scheme[nodes] is a list of nodes representing all observed
|
||||
functions. These nodes are the components of the call-graph that
|
||||
the analyzer assembles (see the @scheme[edge] field). The nodes are
|
||||
sorted by decreasing total amount of time (time spent either in the
|
||||
function or in its callees), and by a top-to-bottom topological
|
||||
sort.}
|
||||
|
||||
@item{@scheme[*-node] holds a ``special'' node value that is
|
||||
constructed for every graph. This node is used as the caller for
|
||||
all top-level function nodes and as the callee for all leaf nodes.
|
||||
It can therefore be used to start a scan of the call graph. In
|
||||
addition, the times associated with it's "callers and callees"
|
||||
actually represent the time these functions spent being the root of
|
||||
the computation or its leaf. (This can be different from a node's
|
||||
``self'' time, since it is divided by the number of instances a
|
||||
function had on the stack for every sample --- so for recursive
|
||||
functions this value is different from.)}
|
||||
|
||||
]}
|
||||
|
||||
|
||||
@defstruct[node ([id (or/c #f symbol?)]
|
||||
[src (or/c #f srcloc?)]
|
||||
[thread-ids (listof exact-nonnegative-integer?)]
|
||||
[total exact-nonnegative-integer?]
|
||||
[self exact-nonnegative-integer?]
|
||||
[callers (listof edge?)]
|
||||
[callees (listof edge?)])]{
|
||||
|
||||
Represents a function call node in the call graph of an analyzed
|
||||
profile result.
|
||||
|
||||
@itemize[
|
||||
|
||||
@item{The @scheme[id] and @scheme[src] field hold a symbol naming the
|
||||
function and/or its source location as a @scheme[srcloc] value.
|
||||
This is the same as the results of
|
||||
@scheme[continuation-mark-set->context], so at most of of these can
|
||||
be @scheme[#f], except for the special @scheme[*-node] (see the
|
||||
@scheme[profile] struct) that can be identified by both of these
|
||||
being @scheme[#f].}
|
||||
|
||||
@item{@scheme[thread-ids] holds a list of thread identifiers that were
|
||||
observed executing this function.}
|
||||
|
||||
@item{@scheme[total] holds the total time (in milliseconds) where this
|
||||
function was anywhere on the stack. It is common to see a few
|
||||
toplevel functions that have close to a 100% total time, but
|
||||
otherwise small @scheme[self] times --- these functions are the ones
|
||||
that derive the work that was done, but they don't do any hard work
|
||||
directly.}
|
||||
|
||||
@item{@scheme[self] holds the total time (in milliseconds) where this
|
||||
function was observed as the leaf of the stack. It represents the
|
||||
actual work done by this function, rather than @scheme[total] that
|
||||
represents the work done by both the function and its callees.}
|
||||
|
||||
@item{@scheme[callers] and @scheme[callees] hold the list of caller
|
||||
and callee nodes. The nodes are not actually held in these lists,
|
||||
instead, @scheme[edge] values are used --- and provide information
|
||||
specific to an edge in the call-graph.}
|
||||
|
||||
]}
|
||||
|
||||
|
||||
@defstruct[edge ([total exact-nonnegative-integer?]
|
||||
[caller node?]
|
||||
[caller-time exact-nonnegative-integer?]
|
||||
[callee node?]
|
||||
[callee-time exact-nonnegative-integer?])]{
|
||||
|
||||
Represents an edge between two function call nodes in the call graph
|
||||
of an analyzed profile result.
|
||||
|
||||
@itemize[
|
||||
|
||||
@item{@scheme[total] is analogous to the @scheme[total] field of a
|
||||
@scheme[node] value: the total time that this edge was anywhere on
|
||||
the stack.}
|
||||
|
||||
@item{@scheme[caller] and @scheme[callee] hold the two nodes that are
|
||||
connected by this edge.}
|
||||
|
||||
@item{@scheme[caller-time] and @scheme[callee-time] hold the time
|
||||
spent on this edge from the caller's or the callee's perspective.
|
||||
These times are different from each other (as well as from the total
|
||||
time) because the sums that make them are each divided by the number
|
||||
of times the caller or the callee was on the stack.
|
||||
|
||||
To understand this difference, consider a stack snapshot holding
|
||||
@tt{A @'rarr B @'rarr B @'rarr B @'rarr A}, and representing a
|
||||
second of observed cpu time. For this sample, the @tt{A @'rarr B}
|
||||
edge is charged by a whole second for its total time (the same goes
|
||||
for the @tt{A @'rarr A} edge, for example). Its caller time is
|
||||
charged 1/2 second because @tt{A} appears twice in this stack
|
||||
snapshot (in the other half, @tt{A} is chared for being a leaf ---
|
||||
the caller of the special @scheme[*-node]), and its callee time is
|
||||
charged 1/3 respectively.}
|
||||
|
||||
]}
|
3
collects/profile/scribblings/info.ss
Normal file
3
collects/profile/scribblings/info.ss
Normal file
|
@ -0,0 +1,3 @@
|
|||
#lang setup/infotab
|
||||
|
||||
(define scribblings '(("profile.scrbl" () (tool-library))))
|
25
collects/profile/scribblings/profile.scrbl
Normal file
25
collects/profile/scribblings/profile.scrbl
Normal file
|
@ -0,0 +1,25 @@
|
|||
#lang scribble/doc
|
||||
|
||||
@(require scribble/manual
|
||||
(for-label scheme))
|
||||
|
||||
@title{Statistical Profiler}
|
||||
|
||||
The @scheme[profile] collection implements a statistical profiler.
|
||||
The profiling is done by running a background thread that collects
|
||||
stack snapshots via @scheme[continuation-mark-set->context], meaning
|
||||
that the result is an estimate of the execution costs and it is
|
||||
limited to the kind of information that
|
||||
@scheme[continuation-mark-set->context] produces (most notably being
|
||||
limited to functions calls, and subject to compiler optimizations);
|
||||
but the result is often useful. In practice, since this method does
|
||||
not require recompilation of your source and has very little runtime
|
||||
overhead, it can be used for longer runs which compensates for these
|
||||
limits.
|
||||
|
||||
@table-of-contents[]
|
||||
|
||||
@include-section["toplevel.scrbl"]
|
||||
@include-section["sampler.scrbl"]
|
||||
@include-section["analyzer.scrbl"]
|
||||
@include-section["renderers.scrbl"]
|
129
collects/profile/scribblings/renderers.scrbl
Normal file
129
collects/profile/scribblings/renderers.scrbl
Normal file
|
@ -0,0 +1,129 @@
|
|||
#lang scribble/doc
|
||||
|
||||
@(require scribble/manual
|
||||
(for-label scheme
|
||||
profile/analyzer
|
||||
(prefix-in text: profile/render-text)
|
||||
(prefix-in graphviz: profile/render-graphviz)))
|
||||
|
||||
@title[#:tag "renderers"]{Profile Renderers}
|
||||
|
||||
After collecting the profile samples and analyzing the data, the last
|
||||
aspect of profiling is to render the results. The profile collection
|
||||
provides several renderers, each providing a rendering function that
|
||||
consumes a @scheme[profile] instance. See the
|
||||
@seclink["analyzer"]{analyzer} section for a description of the
|
||||
@scheme[profile] struct if you want to implement your own renderer.
|
||||
|
||||
@;--------------------------------------------------------------------
|
||||
@section{Textual Rendering}
|
||||
|
||||
@defmodule[profile/render-text]
|
||||
|
||||
@defproc[(render
|
||||
[profile-data profile?]
|
||||
[#:truncate-source truncate-source exact-nonnegative-integer? 50]
|
||||
[#:hide-self hide-self% (between/c 0 1) 1/100]
|
||||
[#:hide-subs hide-subs% (between/c 0 1) 2/100])
|
||||
void?]{
|
||||
|
||||
Prints the given @scheme[profile] results as a textual table.
|
||||
|
||||
The printout begins with some general facts about the profile, and
|
||||
then a table that represents the call-graph is printed. Each row in
|
||||
this table looks like:
|
||||
|
||||
@verbatim[#:indent 2]{
|
||||
B [M1] M2%
|
||||
[N1] N2(N3%) N4(N5%) A ...path/to/source.ss:12:34
|
||||
C [M3] M4%}
|
||||
|
||||
Where actual numbers appear in the printout. The meaning of the
|
||||
numbers and labels is as follows:
|
||||
@itemize{
|
||||
@item{@tt{A} --- the name of the function that this node represents,
|
||||
followed by the source location for the function if it is known.
|
||||
The name can be ``???'' for functions with no identifier, but in
|
||||
this case the source location will identify them.}
|
||||
@item{@tt{N1} --- an index number associated with this node. This is
|
||||
important in references to this function, since the symbolic names
|
||||
are not unique (and some can be missing). The number itself has no
|
||||
significance, it simply goes from 1 up.}
|
||||
@item{@tt{N2} --- the time (in milliseconds) that this function has
|
||||
been anywhere in a stack snapshot. This is the total time that the
|
||||
execution was somewhere in this function or in its callees.
|
||||
(Corresponds to the @scheme[node-total] field.)}
|
||||
@item{@tt{N3} --- this is the percentage of the node's total time
|
||||
(@tt{N2}) from the total observed time of the profile. An entry
|
||||
with a 100% refers to a function that was active throughout the
|
||||
whole execution.}
|
||||
@item{@tt{N4} --- the time (in milliseconds) that this function has
|
||||
been at the top of the stack snapshot. This is the time that this
|
||||
function consumed doing work itself rather than calling other
|
||||
functions. (Corresponds to the @scheme[node-self] field.)}
|
||||
@item{@tt{N5} --- this is the percentage of @tt{N4} out of the total
|
||||
observed time of the profile. Functions with high values here can
|
||||
be good candidates for optimization, But, of course, they can
|
||||
represent doing real work due to one of its callers that need to be
|
||||
optimized.}
|
||||
@item{@tt{B} and @tt{C} --- these are labels for the callers and
|
||||
callees of the function. Any number of callers and callees can
|
||||
appear here (including 0). The function itself can also appear in
|
||||
both places if it is (non-tail) recursive.}
|
||||
@item{@tt{M1} and @tt{M3} --- the index numbers for @tt{B} and
|
||||
@tt{C}. They can be used to disambiguate functions with the same
|
||||
name, as well as a quick way to find the corresponding entry in the
|
||||
table.}
|
||||
@item{@tt{M2} and @tt{M4} --- the percentages of the time @tt{A} spent
|
||||
being called by @tt{B} and calling @tt{C}. These percentages
|
||||
represent the time that this edge was found on a stack snapshot,
|
||||
divided by the number of occurrences of @tt{A} on the same snapshot.
|
||||
The number is the percentage of these times out of @tt{N2}, the
|
||||
total time @tt{A} has been active.
|
||||
|
||||
The total percentages for the all caller and for all callees should
|
||||
be close to 100% minus the time @tt{A} was the leaf or the root.
|
||||
|
||||
These values correspond to the @scheme[edge-caller-time] and
|
||||
@scheme[edge-callee-time] fields; see the documentation for further
|
||||
details.}
|
||||
}
|
||||
|
||||
The function has a few keyword arguments to customize its output:
|
||||
@itemize[
|
||||
|
||||
@item{The @scheme[truncate-source] argument determines the length that
|
||||
the source string should take (together with its label).}
|
||||
|
||||
@item{@scheme[hide-self%] and @scheme[hide-subs%] control hiding some
|
||||
of the nodes. A node is hidden if its self time (@tt{N3} in the
|
||||
above example) is smaller than @scheme[hide-self%] @emph{and} if all
|
||||
places where it occurs as a caller or a callee have percentages that
|
||||
are smaller than @scheme[hide-subs%]. The reason for requiring both
|
||||
conditions is to avoid having ``dangling references'' to hidden
|
||||
nodes.}
|
||||
|
||||
]}
|
||||
|
||||
|
||||
@;--------------------------------------------------------------------
|
||||
@section{Graph Rendering}
|
||||
|
||||
@defmodule[profile/render-graphviz]
|
||||
|
||||
@defproc[(render
|
||||
[profile-data profile?]
|
||||
[#:hide-self hide-self% (between/c 0 1) 1/100]
|
||||
[#:hide-subs hide-subs% (between/c 0 1) 2/100])
|
||||
void?]{
|
||||
|
||||
Prints the given @scheme[profile] results as a Graphviz directed
|
||||
graph.
|
||||
|
||||
This is an experimental module, provided mostly as a proof-of-concept.
|
||||
It renders the profile's call-graph as a graph representation for one
|
||||
of the Graphviz tools to render. Nodes are colored according to their
|
||||
`self' percentages, and edges.
|
||||
|
||||
The keyword arguments control hiding nodes in the same way as with the
|
||||
textual renderer.}
|
60
collects/profile/scribblings/sampler.scrbl
Normal file
60
collects/profile/scribblings/sampler.scrbl
Normal file
|
@ -0,0 +1,60 @@
|
|||
#lang scribble/doc
|
||||
|
||||
@(require scribble/manual
|
||||
(for-label scheme profile/sampler profile/analyzer))
|
||||
|
||||
@title[#:tag "sampler"]{Collecting Profile Information}
|
||||
|
||||
@defmodule[profile/sampler]
|
||||
|
||||
@defproc[(create-sampler [to-track (or/c thread? custodian?
|
||||
(listof (or/c thread? custodian?)))]
|
||||
[delay nonnegative-number?])
|
||||
((symbol?) (any/c) . ->* . any/c)]{
|
||||
|
||||
Creates a sample collector thread, which tracks the given
|
||||
@scheme[to-track] value every @scheme[delay] seconds. The
|
||||
@scheme[to-track] value can be either a thread (track just that
|
||||
thread), a custodian (track all threads managed by the custodian), or
|
||||
a list of threads and/or custodians. If a custodian is given, it must
|
||||
be subordinate to the current custodian.
|
||||
|
||||
The resulting value is a controller function, which consumes a message
|
||||
consisting of a symbol and an optional argument, and can affect the
|
||||
sampler. The following messages are currently supported:
|
||||
@itemize[
|
||||
|
||||
@item{@scheme['pause] and @scheme['resume] will stop or resume data
|
||||
collection. These messages can be nested. Note that the thread will
|
||||
continue running it will just stop collecting snapshots.}
|
||||
|
||||
@item{@scheme['stop] kills the controlled thread. It should be called
|
||||
when no additional data should be collected. (This is currently
|
||||
irreversible: there is no message to start a new sampler thread.)}
|
||||
|
||||
@item{@scheme['set-tracked!] with a value will change the tracked
|
||||
objects (initially specified as the @scheme[to-track] argument) to
|
||||
the given value.}
|
||||
|
||||
@item{@scheme['set-tracked!] with a value will change the delay that
|
||||
the sampler us taking between snapshots. Note that although
|
||||
changing this means that the snapshots are not uniformly
|
||||
distributed, the results will still be sensible --- this is because
|
||||
the cpu time between samples is taken into account when the
|
||||
resulting data is analyzed.}
|
||||
|
||||
@item{Finally, a @scheme['get-snapshots] message will make the
|
||||
controller return the currently collected data. Note that this can
|
||||
be called multiple times, each call will return the data thatis
|
||||
collected up to that point in time. In addition, it can be (and
|
||||
usually is) called after the sampler was stopped.
|
||||
|
||||
The value that is returned should be considered as an undocumented
|
||||
internal detail of the profiler, to be sent to
|
||||
@scheme[analyze-samples] for analysis. The reason this is not done
|
||||
automatically, is that a future extension might allow you to combine
|
||||
several sampler results, making it possible to combine a profile
|
||||
analysis from several individual runs, possibly from different
|
||||
machines.}
|
||||
|
||||
]}
|
67
collects/profile/scribblings/toplevel.scrbl
Normal file
67
collects/profile/scribblings/toplevel.scrbl
Normal file
|
@ -0,0 +1,67 @@
|
|||
#lang scribble/doc
|
||||
|
||||
@(require scribble/manual
|
||||
(for-label scheme profile profile/sampler
|
||||
(only-in profile/analyzer analyze-samples)
|
||||
(prefix-in text: profile/render-text)))
|
||||
|
||||
@title{Toplevel Interface}
|
||||
|
||||
@defmodule[profile]
|
||||
|
||||
This module provides one procedure and one macroxs that are convenient
|
||||
high-level entry points for timing expressions. This hides the
|
||||
details that are available through other parts of the library, and is
|
||||
intended as a convenient tool for profiling code.
|
||||
|
||||
@defproc[(profile-thunk [thunk (-> any/c)]
|
||||
[#:delay delay nonnegative-number? 0.05]
|
||||
[#:repeat iterations exact-nonnegative-integer? 1]
|
||||
[#:threads threads? any/c #f]
|
||||
[#:render renderer text:render])
|
||||
void?]{
|
||||
|
||||
Executes the given thunk while collecting profiling data, and render
|
||||
this data when done. Keyword arguments can customize the profiling:
|
||||
@itemize[
|
||||
|
||||
@item{The profiler works by @scheme[create-sampler] starting a
|
||||
``sampler'' thread whose job is to collect stack samples
|
||||
periodically (using @scheme[continuation-mark-set->context]).
|
||||
@scheme[delay] determines the amount of time the sampler
|
||||
@scheme[sleep]s for between samples. Note that this is will be
|
||||
close, but not identical to, the frequency in which data is actually
|
||||
sampled.}
|
||||
|
||||
@item{When the profiled computation takes a short amount of time, the
|
||||
collected data will not be accurate. In this case, you can specify
|
||||
an @scheme[iterations] argument to repeat the evaluation a number of
|
||||
times which will improve the accuracy of the resulting report.}
|
||||
|
||||
@item{Normally, the sampler collects snapshots of the
|
||||
@scheme[current-thread]'s stack. If there is some computation that
|
||||
happens on a different thread, that work will not be reflected in
|
||||
the results: the only effect will be suspiciously small value for
|
||||
the observed time, because the collected data is taking into account
|
||||
the cpu time that the thread actually performed (it uses
|
||||
@scheme[current-process-milliseconds] with the running thread as an
|
||||
argument). Specifying a non-@scheme[#f] value for the
|
||||
@scheme[threads?] argument will arrange for all threads that are
|
||||
started during the evaluation to be tracked. Note that this means
|
||||
that the computation will actually run in a new sub-custodian, as
|
||||
this is the only way to be able to track such threads.}
|
||||
|
||||
@item{Once the computation has finished, the sampler is stopped, and
|
||||
the accumulated data is collected. It is then analyzed by
|
||||
@scheme[analyze-samples], and the analyzed profile data is fed into
|
||||
a renderer. Use an identity function (@scheme[values]) to get the
|
||||
analyzed result, and render it yourself, or use one of the existing
|
||||
renderers (see @secref["renderers"]).}
|
||||
|
||||
]}
|
||||
|
||||
@defform[(profile expr keyword-arguments ...)]{
|
||||
|
||||
A macro version of @scheme[profile-thunk]. The keyword arguments can
|
||||
be specified in the same was as for a function call: they can appear
|
||||
before and/or after the expression to be profiled.}
|
37
collects/profile/utils.ss
Normal file
37
collects/profile/utils.ss
Normal file
|
@ -0,0 +1,37 @@
|
|||
#lang scheme/base
|
||||
|
||||
(provide format-percent format-source get-hidden)
|
||||
(require "analyzer.ss")
|
||||
|
||||
(define (format-percent percent)
|
||||
(let ([percent (inexact->exact (round (* percent 1000)))])
|
||||
(format "~a.~a%" (quotient percent 10) (modulo percent 10))))
|
||||
|
||||
(define (format-source src)
|
||||
(if src
|
||||
(format "~a:~a"
|
||||
(srcloc-source src)
|
||||
(if (srcloc-line src)
|
||||
(format "~a:~a" (srcloc-line src) (srcloc-column src))
|
||||
(format "#~a" (srcloc-position src))))
|
||||
"(unknown source)"))
|
||||
|
||||
;; hide a node if its self time is smaller than the self threshold *and* all of
|
||||
;; its edges are below the sub-node threshold too -- this avoids confusing
|
||||
;; output where a node does not have an entry but appears as a caller/callee.
|
||||
(define (get-hidden profile hide-self% hide-subs%)
|
||||
(define self% (or hide-self% 0))
|
||||
(define subs% (or hide-subs% 0))
|
||||
(define total-time (profile-total-time profile))
|
||||
(define (hide? node)
|
||||
(define (hide-sub? get-subs edge-sub edge-sub-time)
|
||||
(define %s
|
||||
(map (lambda (edge)
|
||||
(let ([total (node-total (edge-sub edge))])
|
||||
(if (zero? total) 0 (/ (edge-sub-time edge) total))))
|
||||
(get-subs node)))
|
||||
(subs% . >= . (apply max %s)))
|
||||
(and (self% . >= . (/ (node-self node) total-time))
|
||||
(hide-sub? node-callees edge-callee edge-caller-time)
|
||||
(hide-sub? node-callers edge-caller edge-callee-time)))
|
||||
(if (or (> self% 0) (> subs% 0)) (filter hide? (profile-nodes profile)) '()))
|
104
collects/tests/profile/main.ss
Normal file
104
collects/tests/profile/main.ss
Normal file
|
@ -0,0 +1,104 @@
|
|||
#lang scheme/base
|
||||
|
||||
(require tests/eli-tester profile/analyzer scheme/match scheme/list)
|
||||
|
||||
(define A '(A . #f))
|
||||
(define B '(B . #f))
|
||||
(define C '(C . #f))
|
||||
|
||||
(define (analyze cpu+lists)
|
||||
(profile->sexpr
|
||||
(analyze-samples
|
||||
(cons (car cpu+lists)
|
||||
(map (lambda (x) (append (take x 2) (reverse (drop x 2))))
|
||||
(reverse (cdr cpu+lists)))))))
|
||||
|
||||
(define (profile->sexpr prof)
|
||||
(define (node-id* node)
|
||||
(or (node-id node) (if (node-src node) '??? '*)))
|
||||
(define (edges->sexprs node get get-time)
|
||||
(for/list ([edge (get node)])
|
||||
`(,(node-id* (edge-caller edge)) -> ,(node-id* (edge-callee edge))
|
||||
time= ,(get-time edge)
|
||||
total= ,(edge-time edge))))
|
||||
(define (node->sexpr node)
|
||||
`(,(node-id* node)
|
||||
total= ,(node-total node)
|
||||
self= ,(node-self node)
|
||||
callers: ,@(edges->sexprs node node-callers edge-caller-time)
|
||||
callees: ,@(edges->sexprs node node-callees edge-callee-time)
|
||||
threads= ,(node-thread-ids node)))
|
||||
`(total= ,(profile-total-time prof)
|
||||
samples= ,(profile-sample-number prof)
|
||||
cpu= ,(profile-cpu-time prof)
|
||||
thread-times= ,(profile-thread-times prof)
|
||||
,@(map node->sexpr (cons (profile-*-node prof) (profile-nodes prof)))))
|
||||
|
||||
(test
|
||||
|
||||
(match (analyze `(10
|
||||
[0 0 ,A]
|
||||
[0 1 ,A]))
|
||||
[`(total= 2 samples= 2 cpu= 10 thread-times= ([0 . 2])
|
||||
[* total= 2 self= 0
|
||||
callers: [A -> * time= 2 total= 2]
|
||||
callees: [* -> A time= 2 total= 2]
|
||||
threads= ()]
|
||||
[A total= 2 self= 2
|
||||
callers: [* -> A time= 2 total= 2]
|
||||
callees: [A -> * time= 2 total= 2]
|
||||
threads= (0)])
|
||||
'ok]
|
||||
[bad (error 'test ">>> ~s" bad)])
|
||||
|
||||
(match (analyze `(10
|
||||
[0 0 ,A ,B ,A]
|
||||
[0 1 ,A ,B ,A]))
|
||||
[`(total= 2 samples= 2 cpu= 10 thread-times= ([0 . 2])
|
||||
[* total= 2 self= 0
|
||||
callers: [A -> * time= 2 total= 2]
|
||||
callees: [* -> A time= 2 total= 2]
|
||||
threads= ()]
|
||||
[A total= 2 self= 2
|
||||
callers: [B -> A time= 2/2 total= 2]
|
||||
[* -> A time= 2/2 total= 2]
|
||||
callees: [A -> B time= 2/2 total= 2]
|
||||
[A -> * time= 2/2 total= 2]
|
||||
threads= (0)]
|
||||
[B total= 2 self= 0
|
||||
callers: [A -> B time= 2 total= 2]
|
||||
callees: [B -> A time= 2 total= 2]
|
||||
threads= (0)])
|
||||
'ok]
|
||||
[bad (error 'test ">>> ~s" bad)])
|
||||
|
||||
(match (analyze `(10
|
||||
[0 0 ,A ,B ,A]
|
||||
[0 1 ,A ,C ,A]
|
||||
[0 2 ,A ,C ,A]
|
||||
[0 3 ,A ,C ,A]))
|
||||
[`(total= 4 samples= 4 cpu= 10 thread-times= ([0 . 4])
|
||||
[* total= 4 self= 0
|
||||
callers: [A -> * time= 4 total= 4]
|
||||
callees: [* -> A time= 4 total= 4]
|
||||
threads= ()]
|
||||
[A total= 4 self= 4
|
||||
callers: [* -> A time= 4/2 total= 4]
|
||||
[C -> A time= 3/2 total= 3]
|
||||
[B -> A time= 1/2 total= 1]
|
||||
callees: [A -> * time= 4/2 total= 4]
|
||||
[A -> C time= 3/2 total= 3]
|
||||
[A -> B time= 1/2 total= 1]
|
||||
threads= (0)]
|
||||
[C total= 3 self= 0
|
||||
callers: [A -> C time= 3 total= 3]
|
||||
callees: [C -> A time= 3 total= 3]
|
||||
threads= (0)]
|
||||
[B total= 1 self= 0
|
||||
callers: [A -> B time= 1 total= 1]
|
||||
callees: [B -> A time= 1 total= 1]
|
||||
threads= (0)])
|
||||
'ok]
|
||||
[bad (error 'test ">>> ~s" bad)])
|
||||
|
||||
)
|
Loading…
Reference in New Issue
Block a user