#lang racket/base (require "structs.rkt" racket/list) ;; Format a percent number, possibly doing the division too. If we do the ;; division, then be careful: if we're dividing by zero, then make the result ;; zero. This is useful if the total time is zero because we didn't see any ;; activity (for example, the profiled code is just doing a `sleep'), in which ;; case all times will be 0. (provide format-percent) (define format-percent (case-lambda [(percent) (define p (inexact->exact (round (* percent 1000)))) (format "~a.~a%" (quotient p 10) (modulo p 10))] [(x y) (format-percent (if (zero? y) 0 (/ x y)))])) (provide format-source) (define (format-source src) (if src (format "~a:~a" (srcloc-source src) (if (srcloc-line src) (format "~a:~a" (srcloc-line src) (srcloc-column src)) (format "#~a" (srcloc-position src)))) "(unknown source)")) ;; Hide a node if its self time is smaller than the self threshold *and* all of ;; its edges are below the sub-node threshold too -- this avoids confusing ;; output where a node does not have an entry but appears as a caller/callee. (provide get-hidden) (define (get-hidden profile hide-self% hide-subs%) (define self% (or hide-self% 0)) (define subs% (or hide-subs% 0)) (define total-time (profile-total-time profile)) (define (hide? node) (define (hide-sub? get-subs edge-sub edge-sub-time) (define %s (map (λ (edge) (define total (node-total (edge-sub edge))) (if (zero? total) 0 (/ (edge-sub-time edge) total))) (get-subs node))) (subs% . >= . (apply max %s))) (and (self% . >= . (/ (node-self node) total-time)) (hide-sub? node-callees edge-callee edge-caller-time) (hide-sub? node-callers edge-caller edge-callee-time))) (cond [(and (<= self% 0) (<= subs% 0)) '()] [(zero? total-time) (profile-nodes profile)] [else (filter hide? (profile-nodes profile))])) ;; A topological sort of nodes, starting from node `root' (which will be given ;; as the special *-node). The result is a list of node lists, each one ;; corresponds to one level. Conceptually, the root node is always the only ;; item in the first level, so it is not included in the result. This is done ;; by assigning layers to nodes in a similar way to section 9.1 of "Graph ;; Drawing: Algorithms for the Visualization of Graphs" by Tollis, Di Battista, ;; Eades, and Tamassia. It uses a technique similar to the one described in ;; section 9.4 for removing cycles in the input graph, but improved by the fact ;; that we have weights on input/output edges (this is the only point that is ;; specific to the fact that it's a profiler graph). Note that this is useful ;; for a graphical rendering of the results, but it's also useful to sort the ;; results in a way that makes more sense. (provide topological-sort) (define (topological-sort root) ;; Make `nodes+io-times' map a node to an mcons of total input and total ;; output times ignoring edges to/from the *-node and self edges, the order ;; is the reverse of how we scan the graph (define (get-node+io node) (define (sum node-callers/lees edge-caller/lee edge-callee/ler-time) (for/fold ([sum 0]) ([e (in-list (node-callers/lees node))]) (define n (edge-caller/lee e)) (if (or (eq? n node) (eq? n root)) sum (+ sum (edge-callee/ler-time e))))) (cons node (mcons (sum node-callers edge-caller edge-callee-time) (sum node-callees edge-callee edge-caller-time)))) (define nodes+io-times (let loop ([todo (list root)] [r '()]) (if (pair? todo) (let* ([cur (car todo)] [todo (cdr todo)] [r (if (eq? cur root) r (cons (get-node+io cur) r))]) (loop (append todo ; append new things in the end, so it's a BFS (filter-map (λ (e) (define lee (edge-callee e)) (and (not (memq lee todo)) (not (assq lee r)) lee)) (node-callees cur))) r)) ;; note: the result does not include the root node r))) ;; Now create a linear order similar to the way section 9.4 describes, except ;; that this uses the total caller/callee times to get an even better ;; ordering (also, look for sources and sinks in every step). Note that the ;; list we scan is in reverse order. (define acyclic-order (let loop ([todo nodes+io-times] [rev-left '()] [right '()]) ;; heuristic for best sources: the ones with the lowest intime/outtime (define (best-sources) (let loop ([todo todo] [r '()] [best #f]) (if (null? todo) r (let* ([1st (car todo)] [rest (cdr todo)] [ratio (/ (mcar (cdr 1st)) (mcdr (cdr 1st)))]) (if (or (not best) (ratio . < . best)) (loop rest (list 1st) ratio) (loop rest (if (ratio . > . best) r (cons 1st r)) best)))))) (if (pair? todo) (let* ([sinks (filter (λ (x) (zero? (mcdr (cdr x)))) todo)] [todo (remq* sinks todo)] [sources (filter (λ (x) (zero? (mcar (cdr x)))) todo)] ;; if we have no sources and sinks, use the heuristic [sources (if (and (null? sinks) (null? sources)) (best-sources) sources)] [todo (remq* sources todo)] [sinks (map car sinks)] [sources (map car sources)]) ;; remove the source and sink times from the rest (for* ([nodes (in-list (list sources sinks))] [n (in-list nodes)]) (for ([e (in-list (node-callees n))]) (define x (assq (edge-callee e) todo)) (when x (set-mcar! (cdr x) (- (mcar (cdr x)) (edge-callee-time e))))) (for ([e (in-list (node-callers n))]) (define x (assq (edge-caller e) todo)) (when x (set-mcdr! (cdr x) (- (mcdr (cdr x)) (edge-caller-time e)))))) (loop todo (append (reverse sources) rev-left) (append sinks right))) ;; all done, get the order (append (reverse rev-left) right)))) ;; We're done, so make `t' map nodes to their callers with only edges that ;; are consistent with this ordering (define t (let ([t (make-hasheq)]) (let loop ([nodes acyclic-order]) (when (pair? nodes) (define ler (car nodes)) (define rest (cdr nodes)) (unless (hash-ref t ler #f) (hash-set! t ler '())) (for ([e (in-list (node-callees ler))]) (define lee (edge-callee e)) (when (memq lee rest) ; only consistent edges ;; note that we connect each pair of nodes at most once, and ;; never a node with itself (hash-set! t lee (cons ler (hash-ref t lee '()))))) (loop rest))) t)) ;; finally, assign layers using the simple method from section 9.1: sources ;; are at 0, and other nodes are placed at one layer after their parents (define height 0) (for ([node (in-list acyclic-order)]) (let loop ([node node]) (define x (hash-ref t node)) (if (number? x) x (let ([max (add1 (for/fold ([m -1]) ([ler (in-list x)]) (max m (loop ler))))]) (when (max . > . height) (set! height max)) (hash-set! t node max) max)))) (define layers (make-vector (add1 height) '())) (for ([node (in-list acyclic-order)]) (unless (eq? node root) ; filter out the root (define l (hash-ref t node)) (vector-set! layers l (cons node (vector-ref layers l))))) ;; in almost all cases, the root is the full first layer (in a few cases it ;; can be there with another node, eg (* -> A 2-> B 3-> A)), but be safe and ;; look for any empty layer (filter pair? (vector->list layers))) ;; gets a list of thread-id and data for that thread beginning with the ;; millisecond count, and returns a similar list where the samples begin with ;; the time spent for that sample. The time spent is taken as half of the two ;; touching ranges; for example, if there are three samples showing snapshot ;; times of 10, 20, 60, then the middle one is assumed to have a time of 25. ;; For the first and last samples, the time is twice the half of the single ;; touching range -- with this example, this would be 10 for the first and 40 ;; for the last. If there is a thread with just one sample, it is dropped. (provide get-times) (define (get-times samples) (cond ;; nothing to do [(null? samples) '()] ;; throw out a single sample [(null? (cdr samples)) '()] [else (let loop ([samples samples] [prevs (cons #f (map car samples))] [r '()]) (if (null? samples) (reverse r) (let* ([prev (car prevs)] [cur (caar samples)] [data (cdar samples)] [prevs (cdr prevs)] [samples (cdr samples)] [next (and (pair? samples) (caar samples))]) (loop samples prevs (cons (cons (if next ;; not the last: there must be a next (if prev (/ (- next prev) 2) (- next cur)) ;; last one: there must be a prev (- cur prev)) data) r)))))])) (module+ test (require rackunit) (check-equal? (get-times '()) '()) (check-equal? (get-times '([10 a])) '()) (check-equal? (get-times '([10 a] [20 b])) '([10 a] [10 b])) (check-equal? (get-times '([10 a] [20 b] [60 c])) '([10 a] [25 b] [40 c])) (check-equal? (get-times '([10 a] [20 b] [30 c] [40 d])) '([10 a] [10 b] [10 c] [10 d])) (check-equal? (get-times '([10 a] [20 b] [60 c] [80 d])) '([10 a] [25 b] [30 c] [20 d])))