#lang scheme/base ;; Analyzer for the sampler results (provide analyze-samples) (require scheme/list) ;; An encapsulation of an analyzed profile call graph: ;; - total-time: the total time observed in msec (this is generally different ;; than the time it took to run the profile). ;; - sample-number: the number of samples taken. ;; - thread-times: a list of ( . msec) for the time spent in ;; observed threads. ;; - nodes: the list of call-graph nodes sorted by their total time. ;; - *-node: a special node that is connected as a "caller" for all toplevel ;; functions and a "callee" for all leaf functions. It will also be ;; identifiable by having both id and src fields being #f. Can be used to ;; start a graph traversal from the top or the bottom. (provide (struct-out profile)) (define-struct profile (total-time cpu-time sample-number thread-times nodes *-node)) ;; An entry for a single profiled function: ;; - id, src: the corresponding values from `continuation-mark-set->context'. ;; - thread-ids: the list of thread identifiers this function has been seen in. ;; - total: total msecs it participated in (= time in it, including callees). ;; - self: msecs where it was at the top of the stack (= time in its own code). ;; - callers, callees: a list of `edge' values for the time spent while it was ;; called by the repective , or it called it, sorted in decreasing msec ;; time. ;; Note that the sum of caller/callee edges including the special `*-node' ;; should be equal to the `total' time. So the edge from/to the `*-node' can ;; be used to get the time spent as a leaf or as a root divided by the number ;; of time the function appeared on the stack: so this value can be displayed ;; in the call-graph and the numbers will sum up nicely to a 100%. (provide (struct-out node)) (define-struct node (id src thread-ids total self callers callees) #:mutable #:property prop:custom-write (lambda (node o w?) (fprintf o "#" (or (node-id node) '???)))) ;; An edge representing function calls between two nodes: ;; - time: the total time spent while the call was somewhere on the stack. ;; - caller, callee: the two relevant `node' values. ;; - caller-time, callee-time: the time that the caller/callee spent in this ;; call (different from the above time because each stack sample's time is ;; divided by the number of times the caller/callee appears in that slice). (provide (struct-out edge)) (define-struct edge (total caller caller-time callee callee-time) #:mutable #:property prop:custom-write (lambda (edge o w?) (fprintf o "#" (or (node-id (edge-caller edge)) '???) (or (node-id (edge-callee edge)) '???)))) (define with-hash:not-found (gensym)) (define-syntax-rule (with-hash ) (let ([t ] [k ]) (let ([v (hash-ref t k with-hash:not-found)]) (if (eq? v with-hash:not-found) (let ([v ]) (hash-set! t k v) v) v)))) ;; This function analyzes the output of the sampler. Returns a `profile' ;; struct holding a list of `node' values, each one representing a node in the ;; call graph, with the relevant information filled in. The results are sorted ;; using a topological sort from the top, and by the total time for nodes at ;; the same level. (define (analyze-samples cpu-time+samples) (define cpu-time (car cpu-time+samples)) (define samples (cdr cpu-time+samples)) (define samples-by-thread (let ([by-thread (split-by-thread samples)]) (for ([samples (in-vector by-thread)] [i (in-naturals 0)]) (vector-set! by-thread i (get-times samples))) by-thread)) (define id+src->node-hash (make-hasheq)) (define (id+src->node id+src) (with-hash id+src->node-hash id+src (make-node (car id+src) (cdr id+src) '() 0 0 '() '()))) ;; special node that is the caller of toplevels and callee of leaves (define *-node (id+src->node '(#f . #f))) (define call->edge (let ([t (make-hasheq)]) (lambda (ler lee) (with-hash (with-hash t ler (make-hasheq)) lee (let ([e (make-edge 0 ler 0 lee 0)]) (set-node-callers! lee (cons e (node-callers lee))) (set-node-callees! ler (cons e (node-callees ler))) e))))) (define total-time 0) (define thread-times (make-vector (vector-length samples-by-thread) 0)) (for ([thread-samples (in-vector samples-by-thread)] [thread-id (in-naturals 0)] #:when #t [sample (in-list thread-samples)]) (define msecs (car sample)) (define (connect ler lee ler# lee#) (define edge (call->edge ler lee)) (set-edge-caller-time! edge (+ (edge-caller-time edge) (/ msecs lee#))) (set-edge-callee-time! edge (+ (edge-callee-time edge) (/ msecs ler#))) edge) (define stack ; the stack snapshot, translated to `node' values (for/list ([id+src (in-list (cdr sample))]) (let* ([node (id+src->node id+src)] [tids (node-thread-ids node)]) (unless (memq thread-id tids) (set-node-thread-ids! node (cons thread-id tids))) node))) (define counts (get-counts stack)) (define stack+counts (map (lambda (x) (assq x counts)) stack)) (define edges (if (null? stack) '() (append (let ([first (car stack+counts)] [last (last stack+counts)]) (list (connect *-node (car last) 1 (cdr last)) (connect (car first) *-node (cdr first) 1))) (for/list ([callee (in-list stack+counts)] [caller (in-list (cdr stack+counts))]) (connect (car caller) (car callee) (cdr caller) (cdr callee)))))) (set! total-time (+ msecs total-time)) (for ([p (in-list counts)]) (set-node-total! (car p) (+ msecs (node-total (car p))))) (for ([e (remove-duplicates edges eq?)]) (set-edge-total! e (+ msecs (edge-total e)))) (vector-set! thread-times thread-id (+ msecs (vector-ref thread-times thread-id))) (when (pair? stack) (set-node-self! (car stack) (+ (node-self (car stack)) msecs)))) (set-node-total! *-node total-time) ;; convert the nodes from the hash to a list, do a topological sort, and then ;; sort by total time (combining both guarantees(?) sensible order) (let ([nodes (remq *-node (topological-sort *-node (lambda (nodes) (sort nodes > #:key node-total))))]) ;; sort all the edges in the nodes according to total time (for ([n (in-list nodes)]) (set-node-callees! n (sort (node-callees n) > #:key edge-callee-time)) (set-node-callers! n (sort (node-callers n) > #:key edge-caller-time))) (make-profile total-time cpu-time (length samples) (for/list ([time (in-vector thread-times)] [n (in-naturals 0)]) (cons n time)) nodes *-node))) ;; A simple topological sort of nodes using BFS, starting from node `x' which ;; will be the special *-node. `subsort' is a `resolver' function to sort ;; nodes on the same level. (define (topological-sort x subsort) (let loop ([todo (list x)] [seen (list x)]) (if (null? todo) '() (let* ([next (append-map (lambda (x) (subsort (map edge-callee (node-callees x)))) todo)] [next (remq* seen (remove-duplicates next))]) (append todo (loop next (append next seen))))))) #| (define (node id) (make-node id #f '() 0 0 '() '())) (define (X . -> . Y) (let ([e (make-edge 0 X 0 Y 0)]) (set-node-callers! Y (cons e (node-callers Y))) (set-node-callees! X (cons e (node-callees X))))) (define A (node 'A)) (define B (node 'B)) (define C (node 'C)) (A . -> . B) (B . -> . C) (topological-sort A 3) |# ;; Groups raw samples by their thread-id, returns a vector with a field for ;; each thread id holding the sample data for that thread. The samples in ;; these are reversed (so they'll be sorted going forward in time). (define (split-by-thread samples) (define threads (make-vector (add1 (for/fold ([n -1]) ([sample (in-list samples)]) (max (car sample) n))) '())) (for ([sample (in-list samples)]) (let ([id (car sample)] [data (cdr sample)]) (vector-set! threads id (cons data (vector-ref threads id))))) threads) #| (equal? (split-by-thread '()) '#()) (equal? (split-by-thread '([0 x])) '#([(x)])) (equal? (split-by-thread '([0 x] [0 y] [0 z])) '#([(z) (y) (x)])) (equal? (split-by-thread '([0 x] [1 y] [2 z])) '#([(x)] [(y)] [(z)])) (equal? (split-by-thread '([0 x1] [1 y1] [0 x2] [2 z1] [0 x3] [2 z2])) '#([(x3) (x2) (x1)] [(y1)] [(z2) (z1)])) |# ;; gets a list of thread-id and data for that thread beginning with the ;; millisecond count, and returns a similar list where the samples begin with ;; the time spent for that sample. The time spent is taken as half of the two ;; touching ranges; for example, if there are three samples showing snapshot ;; times of 10, 20, 60, then the middle one is assumed to have a time of 25. ;; For the first and last samples, the time is twice the half of the single ;; touching range -- with this example, this would be 10 for the first and 40 ;; for the last. If there is a thread with just one sample, it is dropped. (define (get-times samples) (cond ;; nothing to do [(null? samples) '()] ;; throw out a single sample [(null? (cdr samples)) '()] [else (let loop ([samples samples] [prevs (cons #f (map car samples))] [r '()]) (if (null? samples) (reverse r) (let* ([prev (car prevs)] [cur (caar samples)] [data (cdar samples)] [prevs (cdr prevs)] [samples (cdr samples)] [next (and (pair? samples) (caar samples))]) (loop samples prevs (cons (cons (if next ;; not the last: there must be a next (if prev (/ (- next prev) 2) (- next cur)) ;; last one: there must be a prev (- cur prev)) data) r)))))])) #| (equal? (get-times '()) '()) (equal? (get-times '([10 a])) '()) (equal? (get-times '([10 a] [20 b])) '([10 a] [10 b])) (equal? (get-times '([10 a] [20 b] [60 c])) '([10 a] [25 b] [40 c])) (equal? (get-times '([10 a] [20 b] [30 c] [40 d])) '([10 a] [10 b] [10 c] [10 d])) (equal? (get-times '([10 a] [20 b] [60 c] [80 d])) '([10 a] [25 b] [30 c] [20 d])) |# ;; returns a list of (cons item occurrences) for the items in l (define (get-counts l) (let loop ([l l] [r '()]) (if (null? l) r (let ([1st (car l)]) (let loop* ([l1 '()] [c 1] [l (cdr l)]) (cond [(null? l) (loop l1 (cons (cons 1st c) r))] [(eq? 1st (car l)) (loop* l1 (add1 c) (cdr l))] [else (loop* (cons (car l) l1) c (cdr l))])))))) #| (equal? (get-counts '()) '()) (equal? (get-counts '(1)) '([1 . 1])) (equal? (get-counts '(1 1 1)) '([1 . 3])) (define (set=? xs ys) (null? (append (remove* xs ys) (remove* ys xs)))) (set=? (get-counts '(1 2 3)) '([1 . 1] [2 . 1] [3 . 1])) (set=? (get-counts '(1 2 2 3 3 3)) '([1 . 1] [2 . 2] [3 . 3])) (set=? (get-counts '(3 1 2 3 2 3)) '([1 . 1] [2 . 2] [3 . 3])) |#