From e29c411d539ccac30d7ba97d6683524a5588c0e9 Mon Sep 17 00:00:00 2001 From: Eli Barzilay Date: Mon, 30 Mar 2009 00:15:38 +0000 Subject: [PATCH] statistical profiler, core and text interface part done svn: r14340 original commit: 96d7291bfb1e4f4a600ecda14f314ccccfa2ff48 --- collects/profile/analyzer.ss | 258 +++++++++++++++++++ collects/profile/main.ss | 69 +++++ collects/profile/render-graphviz.ss | 41 +++ collects/profile/render-text.ss | 137 ++++++++++ collects/profile/sampler.ss | 134 ++++++++++ collects/profile/scribblings/analyzer.scrbl | 156 +++++++++++ collects/profile/scribblings/info.ss | 3 + collects/profile/scribblings/profile.scrbl | 25 ++ collects/profile/scribblings/renderers.scrbl | 129 ++++++++++ collects/profile/scribblings/sampler.scrbl | 60 +++++ collects/profile/scribblings/toplevel.scrbl | 67 +++++ collects/profile/utils.ss | 37 +++ collects/tests/profile/main.ss | 104 ++++++++ 13 files changed, 1220 insertions(+) create mode 100644 collects/profile/analyzer.ss create mode 100644 collects/profile/main.ss create mode 100644 collects/profile/render-graphviz.ss create mode 100644 collects/profile/render-text.ss create mode 100644 collects/profile/sampler.ss create mode 100644 collects/profile/scribblings/analyzer.scrbl create mode 100644 collects/profile/scribblings/info.ss create mode 100644 collects/profile/scribblings/profile.scrbl create mode 100644 collects/profile/scribblings/renderers.scrbl create mode 100644 collects/profile/scribblings/sampler.scrbl create mode 100644 collects/profile/scribblings/toplevel.scrbl create mode 100644 collects/profile/utils.ss create mode 100644 collects/tests/profile/main.ss diff --git a/collects/profile/analyzer.ss b/collects/profile/analyzer.ss new file mode 100644 index 0000000..4da0faa --- /dev/null +++ b/collects/profile/analyzer.ss @@ -0,0 +1,258 @@ +#lang scheme/base + +;; Analyzer for the sampler results + +(provide analyze-samples) + +(require scheme/list) + +;; An encapsulation of an analyzed profile call graph: +;; - total-time: the total time observed in msec (this is generally different +;; than the time it took to run the profile). +;; - sample-number: the number of samples taken. +;; - thread-times: a list of ( . msec) for the time spent in +;; observed threads. +;; - nodes: the list of call-graph nodes sorted by their total time. +;; - *-node: a special node that is connected as a "caller" for all toplevel +;; functions and a "callee" for all leaf functions. It will also be +;; identifiable by having both id and src fields being #f. Can be used to +;; start a graph traversal from the top or the bottom. +(provide (struct-out profile)) +(define-struct profile + (total-time cpu-time sample-number thread-times nodes *-node)) + +;; An entry for a single profiled function: +;; - id, src: the corresponding values from `continuation-mark-set->context'. +;; - thread-ids: the list of thread identifiers this function has been seen in. +;; - total: total msecs it participated in (= time in it, including callees). +;; - self: msecs where it was at the top of the stack (= time in its own code). +;; - callers, callees: a list of `edge' values for the time spent while it was +;; called by the repective , or it called it, sorted in decreasing msec +;; time. +;; Note that the sum of caller/callee edges including the special `*-node' +;; should be equal to the `total' time. So the edge from/to the `*-node' can +;; be used to get the time spent as a leaf or as a root divided by the number +;; of time the function appeared on the stack: so this value can be displayed +;; in the call-graph and the numbers will sum up nicely to a 100%. +(provide (struct-out node)) +(define-struct node (id src thread-ids total self callers callees) + #:mutable + #:property prop:custom-write + (lambda (node o w?) (fprintf o "#" (or (node-id node) '???)))) + +;; An edge representing function calls between two nodes: +;; - time: the total time spent while the call was somewhere on the stack. +;; - caller, callee: the two relevant `node' values. +;; - caller-time, callee-time: the time that the caller/callee spent in this +;; call (different from the above time because each stack sample's time is +;; divided by the number of times the caller/callee appears in that slice). +(provide (struct-out edge)) +(define-struct edge (total caller caller-time callee callee-time) + #:mutable + #:property prop:custom-write + (lambda (edge o w?) + (fprintf o "#" + (or (node-id (edge-caller edge)) '???) + (or (node-id (edge-callee edge)) '???)))) + +(define with-hash:not-found (gensym)) +(define-syntax-rule (with-hash ) + (let ([t ] [k ]) + (let ([v (hash-ref t k with-hash:not-found)]) + (if (eq? v with-hash:not-found) + (let ([v ]) (hash-set! t k v) v) + v)))) + +;; This function analyzes the output of the sampler. Returns a `profile' +;; struct holding a list of `node' values, each one representing a node in the +;; call graph, with the relevant information filled in. The results are sorted +;; by the total time. +(define (analyze-samples cpu-time+samples) + (define cpu-time (car cpu-time+samples)) + (define samples (cdr cpu-time+samples)) + (define samples-by-thread + (let ([by-thread (split-by-thread samples)]) + (for ([samples (in-vector by-thread)] [i (in-naturals 0)]) + (vector-set! by-thread i (get-times samples))) + by-thread)) + (define id+src->node-hash (make-hasheq)) + (define (id+src->node id+src) + (with-hash id+src->node-hash id+src + (make-node (car id+src) (cdr id+src) '() 0 0 '() '()))) + ;; special node that is the caller of toplevels and callee of leaves + (define *-node (id+src->node '(#f . #f))) + (define call->edge + (let ([t (make-hasheq)]) + (lambda (ler lee) + (with-hash (with-hash t ler (make-hasheq)) lee + (let ([e (make-edge 0 ler 0 lee 0)]) + (set-node-callers! lee (cons e (node-callers lee))) + (set-node-callees! ler (cons e (node-callees ler))) + e))))) + (define total-time 0) + (define thread-times (make-vector (vector-length samples-by-thread) 0)) + (for ([thread-samples (in-vector samples-by-thread)] + [thread-id (in-naturals 0)] + #:when #t + [sample (in-list thread-samples)]) + (define msecs (car sample)) + (define (connect ler lee ler# lee#) + (define edge (call->edge ler lee)) + (set-edge-caller-time! edge (+ (edge-caller-time edge) (/ msecs lee#))) + (set-edge-callee-time! edge (+ (edge-callee-time edge) (/ msecs ler#))) + edge) + (define stack ; the stack snapshot, translated to `node' values + (for/list ([id+src (in-list (cdr sample))]) + (let* ([node (id+src->node id+src)] [tids (node-thread-ids node)]) + (unless (memq thread-id tids) + (set-node-thread-ids! node (cons thread-id tids))) + node))) + (define counts (get-counts stack)) + (define stack+counts (map (lambda (x) (assq x counts)) stack)) + (define edges + (if (null? stack) + '() + (append (let ([first (car stack+counts)] [last (last stack+counts)]) + (list (connect *-node (car last) 1 (cdr last)) + (connect (car first) *-node (cdr first) 1))) + (for/list ([callee (in-list stack+counts)] + [caller (in-list (cdr stack+counts))]) + (connect (car caller) (car callee) + (cdr caller) (cdr callee)))))) + (set! total-time (+ msecs total-time)) + (for ([p (in-list counts)]) + (set-node-total! (car p) (+ msecs (node-total (car p))))) + (for ([e (remove-duplicates edges eq?)]) + (set-edge-total! e (+ msecs (edge-total e)))) + (vector-set! thread-times thread-id + (+ msecs (vector-ref thread-times thread-id))) + (when (pair? stack) + (set-node-self! (car stack) (+ (node-self (car stack)) msecs)))) + (set-node-total! *-node total-time) + ;; convert the nodes from the hash to a list, do a topological sort, and then + ;; sort by total time (combining both guarantees(?) sensible order) + (let* ([nodes (hash-map id+src->node-hash (lambda (k v) v))] + [nodes (topological-sort *-node (length nodes))] + [nodes (sort (remq *-node nodes) > #:key node-total)]) + ;; sort all the edges in the nodes according to total time + (for ([n (in-list nodes)]) + (set-node-callees! n (sort (node-callees n) > #:key edge-callee-time)) + (set-node-callers! n (sort (node-callers n) > #:key edge-caller-time))) + (make-profile + total-time + cpu-time + (length samples) + (for/list ([time (in-vector thread-times)] [n (in-naturals 0)]) + (cons n time)) + nodes + *-node))) + +;; A simple topological sort of nodes using BFS, starting from node `x' which +;; will be the special *-node. Note that `expected-length' is only given for +;; the sanity check in the end. +(define (topological-sort x expected-length) + (define sorted + (let loop ([todo (list x)] [seen (list x)]) + (if (null? todo) + '() + (let* ([next (append-map (lambda (x) (map edge-caller (node-callers x))) + todo)] + [next (remove-duplicates next)] + [next (remq* seen next)]) + (append todo (loop next (append next seen))))))) + (if (= expected-length (length sorted)) + sorted + (error 'topological "internal error"))) + +;; Groups raw samples by their thread-id, returns a vector with a field for +;; each thread id holding the sample data for that thread. The samples in +;; these are reversed (so they'll be sorted going forward in time). +(define (split-by-thread samples) + (define threads + (make-vector (add1 (for/fold ([n -1]) ([sample (in-list samples)]) + (max (car sample) n))) + '())) + (for ([sample (in-list samples)]) + (let ([id (car sample)] [data (cdr sample)]) + (vector-set! threads id (cons data (vector-ref threads id))))) + threads) +#| +(equal? (split-by-thread '()) + '#()) +(equal? (split-by-thread '([0 x])) + '#([(x)])) +(equal? (split-by-thread '([0 x] [0 y] [0 z])) + '#([(z) (y) (x)])) +(equal? (split-by-thread '([0 x] [1 y] [2 z])) + '#([(x)] [(y)] [(z)])) +(equal? (split-by-thread '([0 x1] [1 y1] [0 x2] [2 z1] [0 x3] [2 z2])) + '#([(x3) (x2) (x1)] [(y1)] [(z2) (z1)])) +|# + +;; gets a list of thread-id and data for that thread beginning with the +;; millisecond count, and returns a similar list where the samples begin with +;; the time spent for that sample. The time spent is taken as half of the two +;; touching ranges; for example, if there are three samples showing snapshot +;; times of 10, 20, 60, then the middle one is assumed to have a time of 25. +;; For the first and last samples, the time is twice the half of the single +;; touching range -- with this example, this would be 10 for the first and 40 +;; for the last. If there is a thread with just one sample, it is dropped. +(define (get-times samples) + (cond + ;; nothing to do + [(null? samples) '()] + ;; throw out a single sample + [(null? (cdr samples)) '()] + [else (let loop ([samples samples] + [prevs (cons #f (map car samples))] + [r '()]) + (if (null? samples) + (reverse r) + (let* ([prev (car prevs)] + [cur (caar samples)] + [data (cdar samples)] + [prevs (cdr prevs)] + [samples (cdr samples)] + [next (and (pair? samples) (caar samples))]) + (loop samples prevs + (cons (cons (if next + ;; not the last: there must be a next + (if prev (/ (- next prev) 2) (- next cur)) + ;; last one: there must be a prev + (- cur prev)) + data) + r)))))])) +#| +(equal? (get-times '()) + '()) +(equal? (get-times '([10 a])) + '()) +(equal? (get-times '([10 a] [20 b])) + '([10 a] [10 b])) +(equal? (get-times '([10 a] [20 b] [60 c])) + '([10 a] [25 b] [40 c])) +(equal? (get-times '([10 a] [20 b] [30 c] [40 d])) + '([10 a] [10 b] [10 c] [10 d])) +(equal? (get-times '([10 a] [20 b] [60 c] [80 d])) + '([10 a] [25 b] [30 c] [20 d])) +|# + +;; returns a list of (cons item occurrences) for the items in l +(define (get-counts l) + (let loop ([l l] [r '()]) + (if (null? l) + r + (let ([1st (car l)]) + (let loop* ([l1 '()] [c 1] [l (cdr l)]) + (cond [(null? l) (loop l1 (cons (cons 1st c) r))] + [(eq? 1st (car l)) (loop* l1 (add1 c) (cdr l))] + [else (loop* (cons (car l) l1) c (cdr l))])))))) +#| +(equal? (get-counts '()) '()) +(equal? (get-counts '(1)) '([1 . 1])) +(equal? (get-counts '(1 1 1)) '([1 . 3])) +(define (set=? xs ys) (null? (append (remove* xs ys) (remove* ys xs)))) +(set=? (get-counts '(1 2 3)) '([1 . 1] [2 . 1] [3 . 1])) +(set=? (get-counts '(1 2 2 3 3 3)) '([1 . 1] [2 . 2] [3 . 3])) +(set=? (get-counts '(3 1 2 3 2 3)) '([1 . 1] [2 . 2] [3 . 3])) +|# diff --git a/collects/profile/main.ss b/collects/profile/main.ss new file mode 100644 index 0000000..d6767d0 --- /dev/null +++ b/collects/profile/main.ss @@ -0,0 +1,69 @@ +#lang scheme/base + +(require "sampler.ss" (except-in "analyzer.ss" profile) + (prefix-in text: "render-text.ss") + (for-syntax scheme/base)) + +(define (profile-thunk thunk + #:delay [delay 0.05] + #:repeat [rpt 1] + #:threads [threads? #f] + #:render [renderer text:render]) + (define cust (and threads? (make-custodian (current-custodian)))) + (define sampler (create-sampler (if threads? + (list cust (current-thread)) + (current-thread)) + delay)) + (define (run) (for ([i (in-range rpt)]) (thunk))) + (with-handlers ([void (lambda (e) + (fprintf (current-error-port) + "profiled thunk error: ~a\n" + (if (exn? e) + (exn-message e) + (format "~e" e))))]) + (if threads? + (parameterize ([current-custodian cust]) (run)) + (run))) + (sampler 'stop) + (renderer (analyze-samples (sampler 'get-snapshots)))) + +(define-syntax (profile stx) + (syntax-case stx () + [(_ x ...) + (let loop ([expr #f] [kwds '()] [xs (syntax->list #'(x ...))]) + (cond + [(null? xs) + (if expr + (with-syntax ([expr expr] [kwds (reverse kwds)]) + #'(profile-thunk (lambda () expr) . kwds)) + (raise-syntax-error 'profile "missing expression" stx))] + [(keyword? (syntax-e (car xs))) + (if (pair? (cdr xs)) + (loop expr (list* (cadr xs) (car xs) kwds) (cddr xs)) + ;; let #%app throw the error + (loop expr (cons (car xs) kwds) (cdr xs)))] + [expr (raise-syntax-error 'profile "redundant expresion given" + stx (car xs))] + [else (loop (car xs) kwds (cdr xs))]))])) + + +(define (fib1 n) (if (<= n 1) n (+ (fib1 (- n 1)) (fib1 (- n 2))))) +(define (fib22 n) (if (<= n 2) 1 (+ (fib22 (- n 1)) (fib22 (- n 2))))) +(define (fib3 n) + (for ([i (in-range 100000000)]) (* i 3)) + (if (<= n 2) 1 (+ (fib22 (- n 1)) (fib22 (- n 2))))) +(define (fibs n) (+ (fib1 n) (fib22 n) (fib3 n))) + +(define (foo n) + (define ch (make-channel)) + (define (bg-fib) (channel-put ch (fib1 n))) + (thread bg-fib) + (list (fib22 n) (channel-get ch))) + +(require "render-graphviz.ss") + +(profile (fibs 35) + ;(dynamic-require '(lib "scribblings/reference/reference.scrbl") #f) + ;(foo 35) + ;#:render render + #:threads #t) diff --git a/collects/profile/render-graphviz.ss b/collects/profile/render-graphviz.ss new file mode 100644 index 0000000..2c6e5c8 --- /dev/null +++ b/collects/profile/render-graphviz.ss @@ -0,0 +1,41 @@ +#lang scheme/base + +(provide render) + +(require "analyzer.ss" "utils.ss") + +(define (render profile + #:hide-self [hide-self% 1/100] + #:hide-subs [hide-subs% 2/100]) + (define *-node (profile-*-node profile)) + (define hidden (get-hidden profile hide-self% hide-subs%)) + (define nodes (remq* hidden (profile-nodes profile))) + (define total-time (profile-total-time profile)) + (define node-> + (let ([t (make-hasheq)]) + (for ([node (in-list nodes)] [idx (in-naturals 1)]) + (let ([id (node-id node)] [src (node-src node)]) + (hash-set! t node + (list (format "node~a" idx) + (if id + (format "~a" id) + (regexp-replace #rx"^.*/" + (format-source (node-src node)) + "")))))) + (lambda (mode node) + ((case mode [(index) car] [(label) cadr]) (hash-ref t node))))) + (define max-self% + (/ (for/fold ([m 0]) ([node (in-list nodes)]) (max m (node-self node))) + total-time)) + (printf "digraph Profile {\n") + (for ([node (in-list nodes)]) + (define self% (/ (node-self node) total-time)) + (printf "~a [" (node-> 'index node)) + (printf "label=~s, " (node-> 'label node)) + (printf "fillcolor=\"1,~a,1\", " (exact->inexact (/ self% max-self%))) + (printf "style=filled];\n") + (for ([edge (in-list (node-callees node))]) + (define callee (edge-callee edge)) + (unless (or (eq? *-node callee) (memq callee hidden)) + (printf "~a -> ~a;\n" (node-> 'index node) (node-> 'index callee))))) + (printf "}\n")) diff --git a/collects/profile/render-text.ss b/collects/profile/render-text.ss new file mode 100644 index 0000000..0688f8f --- /dev/null +++ b/collects/profile/render-text.ss @@ -0,0 +1,137 @@ +#lang at-exp scheme/base + +(provide render) + +(require "analyzer.ss" "utils.ss" scheme/list) + +(define (f:msec msec) + (number->string (round (inexact->exact msec)))) + +(define (f:msec* msec) + (string-append (f:msec msec) "ms")) + +(define (display-table aligns table) + ;; * thunks are used for cells that are ignored when inspecting widths + ;; * chars are used for filler cells + (define (display-line strings) + (printf "~a\n" (regexp-replace #rx" +$" (apply string-append strings) ""))) + (let ([widths (let loop ([table table]) + (let ([table (filter pair? table)]) + (if (null? table) '() + (cons (apply max (filter-map + (lambda (x) + (and (string? (car x)) + (string-length (car x)))) + table)) + (loop (map cdr table))))))]) + (for ([row (in-list table)]) + (display-line + (for/list ([cell (in-list row)] + [width (in-list widths)] + [align (in-list aligns)]) + (let* ([cell (cond [(char? cell) (make-string width cell)] + [(procedure? cell) (cell)] + [else cell])] + [pad (make-string (max 0 (- width (string-length cell))) + #\space)]) + (case align + [(l) (string-append cell pad)] + [(r) (string-append pad cell)] + [else (error 'internal-error "poof")]))))))) + +(define (render profile + #:truncate-source [truncate-source 50] + #:hide-self [hide-self% 1/100] + #:hide-subs [hide-subs% 2/100]) + (define (show . xs) + (let loop ([x xs]) + (cond [(or (not x) (null? x) (void? x)) (void)] + [(pair? x) (loop (car x)) (loop (cdr x))] + [else (display x)])) + (newline)) + (define total-time (profile-total-time profile)) + (define cpu-time (profile-cpu-time profile)) + (define sample-number (profile-sample-number profile)) + (define granularity (/ total-time sample-number)) + (define threads+times (profile-thread-times profile)) + (define *-node (profile-*-node profile)) + (define hidden (get-hidden profile hide-self% hide-subs%)) + (define nodes (remq* hidden (profile-nodes profile))) + (define node-> + (let ([t (make-hasheq)]) + (for ([node (in-list nodes)] [idx (in-naturals 1)]) + (let ([index (format "[~a]" idx)] + [label (format "~a" (or (node-id node) '???))]) + (hash-set! t node (list index label @string-append{@label @index})))) + (lambda (mode node) + ((case mode [(index) car] [(label) cadr] [(sub-label) caddr]) + (hash-ref t node))))) + (define (sep ch) (list ch ch ch ch ch ch ch ch ch ch)) + (define =sep (sep #\=)) + (define -sep (sep #\-)) + @show{ + Profiling results + ----------------- + Total cpu time observed: @f:msec*[total-time] (out of @f:msec*[cpu-time]) + Number of samples taken: @sample-number (once every @f:msec*[granularity]) + } + (when (> (length threads+times) 1) + @show{ Threads observed: @(length threads+times)}) + (when (pair? hidden) + (let* ([hidden (length hidden)] + [nodes (length (profile-nodes profile))] + [self% @string-append{self<@(format-percent (or hide-self% 0))}] + [subs% @string-append{local<@(format-percent (or hide-subs% 0))}] + [%s (cond [(not hide-self%) subs%] + [(not hide-subs%) self%] + [else @string-append{@self% and @subs%}])]) + @show{ (Hiding functions with @|%s|: @|hidden| of @nodes hidden)})) + (newline) + (display-table + '(r l r l l r l l l r l l) + (append* + `(,=sep + (" " " " " "" " " " " "" " " " " Caller") + ("Idx" " " "To""tal " " " "Se""lf " " " "Name+src" "Local%") + (" " " " " ms""(pct)" " " "ms""(pct)" " " " Callee") + ,=sep) + (for/list ([node (in-list nodes)]) + (define index (node-> 'index node)) + (define name (node-> 'label node)) + (define total (node-total node)) + (define totalS (f:msec total)) + (define total% @string-append{(@(format-percent (/ total total-time)))}) + (define self (node-self node)) + (define selfS (f:msec self)) + (define self% @string-append{(@(format-percent (/ self total-time)))}) + (define name+src + (let* ([src (format-source (node-src node))] + [src-len (string-length src)] + [name-len (string-length name)]) + (string-append + name " " + ;; truncate-source only truncates the source + (let* ([n (and truncate-source + ((+ src-len name-len 1) . - . truncate-source))] + [n (and n (positive? n) (- src-len n 3))]) + (cond [(not n) src] + [(n . <= . 0) "..."] + [else (string-append "..." + (substring src (- src-len n)))]))))) + (define (sub get-edges get-node get-node-time) + (for*/list ([edge (in-list (get-edges node))] + [sub (in-list (list (get-node edge)))] ; <-- hack... + #:when (not (or (eq? *-node sub) ; <-- ...for this + (memq sub hidden)))) + (define name (node-> 'sub-label sub)) + (define local% (format-percent (/ (get-node-time edge) total))) + `("" "" "" "" "" "" "" "" + ,(string-append " " name) ,local% + "" ""))) + `(,@(reverse (sub node-callers edge-caller edge-caller-time)) + (,(node-> 'index node) + " " ,totalS ,total% + " " ,selfS ,self% + " " ,(lambda () name+src)) + ,@(sub node-callees edge-callee edge-callee-time) + ,-sep))))) diff --git a/collects/profile/sampler.ss b/collects/profile/sampler.ss new file mode 100644 index 0000000..2f0ef36 --- /dev/null +++ b/collects/profile/sampler.ss @@ -0,0 +1,134 @@ +#lang scheme/base + +;; The core profiler sample collector +;; (This module is a private tool for collecting profiling data, and should not +;; be used as is.) + +(provide create-sampler) + +;; create-sampler : creates a sample collector thread, which tracks the given +;; `to-track' value every `delay' seconds. +;; * The input value can be either a thread (track just that thread), a +;; custodian (track all threads managed by the custodian), or a list of +;; threads and/or custodians. If a custodian is given, it must be +;; subordinate to the current custodian. +;; * The collected values are ( . ), where +;; - The is an integer number identifying the thread, starting +;; from 0. If the collected data has thread ids in a 0..N range +;; (exclusive) , then there were N threads that were observed. (This can +;; be relevant when tracking a custodian, where threads can change +;; dynamically.) +;; - The is the result of `current-process-milliseconds' for +;; the thread that this sample came from. Note that these numbers will not +;; start at 0, since threads are likely to run before a sample is +;; collected. +;; - Finally, the part is a snapshot of the thread's stack, as +;; grabbed by `continuation-mark-set->context'. The the values in these +;; snapshots are interned to reduce memory load. +;; The results are collected sequentially, so they're always sorted from the +;; newest to the oldest. Remember that these results should be considered +;; private for the profiler collection, and can change when more information +;; needs to be collected. +;; * Returns a "controller" function that accepts messages to control the +;; sampler thread. The current set of messages that the controller +;; understands are: +;; - 'pause and 'resume: stops or resumes collecting samples. These messages +;; can be nested. Note that the thread will continue running it just won't +;; collect snapshots. +;; - 'stop: kills the collector thread. Should be called when you have your +;; data. (There is no message to start a new sampler thread, although +;; adding one will not be difficult.) +;; - 'set-tracked! : changes the thread/s and/or custodian/s to track. +;; - 'set-delay! : changes the sampling delay. This means that we won't +;; have a direct correlation between the number of samples and the time +;; they represent -- but the samples are statistical snapshots anyway, and +;; the results are not formulated in terms of time spent. (The time spent +;; could be added of course, but it is best to do that in terms of the +;; start/stop times) +;; - 'get-snapshots: returns the currently collected list of snapshots. Note +;; that this can be called multiple times, each will return the data that +;; is collected up to that point in time. +(define (create-sampler to-track delay) + ;; the collected data + (define snapshots '()) + (define cust (current-custodian)) + ;; intern the entries (which are (cons id/#f srcloc/#f)) + (define entry-table (make-hash)) + (define (intern-entry entry) + (let* ([key (or (cdr entry) (car entry))] + [en (hash-ref entry-table key #f)]) + (if en + ;; ELI: is this sanity check needed? + ;; (if (equal? en entry) + ;; en + ;; (error 'profile "internal error: assumption invalid")) + en + (begin (hash-set! entry-table key entry) entry)))) + (define (validate to-track who) + (let loop ([t to-track]) + (cond + [(thread? t)] + [(list? t) (for-each loop t)] + [(not (custodian? t)) + (raise-type-error + who "thread, custodian, or a list of threads/csutodians" to-track)] + ;; test that it's subordinate + [(with-handlers ([exn:fail:contract? (lambda (_) #t)]) + (custodian-managed-list t cust) #f) + (error who "got a custodian that is not subordinate to current")]))) + (define paused 0) + (define thread-id + (let ([next-id 0] [t (make-weak-hasheq)]) + (lambda (thd) + (or (hash-ref t thd #f) + (let ([id next-id]) + (set! next-id (add1 next-id)) + (hash-set! t thd id) + id))))) + (define (sampler) + (sleep delay) + (when (paused . <= . 0) + (let loop ([t to-track]) + (cond [(thread? t) + (unless (eq? t sampler-thread) + (set! snapshots + (cons (list* (thread-id t) + (current-process-milliseconds t) + (map intern-entry + (continuation-mark-set->context + (continuation-marks t)))) + snapshots)))] + [(custodian? t) (for-each loop (custodian-managed-list t cust))] + ;; cannot assume that it's a list: we might get other values from + ;; a custodian managed list + [(list? t) (for-each loop t)]))) + (sampler)) + (define cpu-time 0) + (define start-time (current-process-milliseconds)) + (define (add-time) + (when (paused . <= . 0) + (let ([cur (current-process-milliseconds)]) + (set! cpu-time (+ cpu-time (- cur start-time))) + (set! start-time cur)))) + (define (ignore-time) + (when (paused . <= . 0) + (set! start-time (current-process-milliseconds)))) + (define sampler-thread + (begin (validate to-track 'create-sampler) + (thread sampler))) + ;; use a sema to avoid mutations from different threads, the sampler thread + ;; is only reading these values so it doesn't use it. + (define sema (make-semaphore 1)) + (define (sampler-controller msg [arg #f]) + (define-syntax-rule (w/sema body ...) + (call-with-semaphore sema (lambda () body ...))) + (case msg + [(pause) (w/sema (add-time) (set! paused (add1 paused)))] + [(resume) (w/sema (set! paused (sub1 paused)) (ignore-time))] + [(stop) (kill-thread sampler-thread) (add-time) (set! paused +inf.0)] + [(set-tracked!) (validate arg 'sampler-controller) + (w/sema (set! to-track arg))] + [(set-delay!) (w/sema (set! delay arg))] + [(get-snapshots) (add-time) (cons cpu-time snapshots)] + [else (error 'sampler-controller "unknown message: ~e" msg)])) + sampler-controller) diff --git a/collects/profile/scribblings/analyzer.scrbl b/collects/profile/scribblings/analyzer.scrbl new file mode 100644 index 0000000..375c395 --- /dev/null +++ b/collects/profile/scribblings/analyzer.scrbl @@ -0,0 +1,156 @@ +#lang scribble/doc + +@(require scribble/manual + (for-label scheme profile/analyzer)) + +@title[#:tag "analyzer"]{Analyzing Profile Data} + +@defmodule[profile/analyzer] + +Once a profile run is done, and the results are collected, the next +step is to analyze the data. In this step the sample time are +computed and summed, a call-graph representing the observed function +calls is built, and per-node and per-edge information is created. +This is the job of the main function provided by +@scheme[profile/analyzer]. + +@defproc[(analyze-samples [raw-sample-data any/c]) + profile?]{ + +This function consumes the raw result of the +@seclink["sampler"]{sampler} (which is given in an undocumented form), +analyzes it, and returns a @scheme[profile] value holding the analyzed +results. Without this function, the results of the sampler are +meaningless.} + + +@defstruct[profile ([total-time exact-nonnegative-integer?] + [cpu-time exact-nonnegative-integer?] + [sample-number exact-nonnegative-integer?] + [thread-times (listof (cons exact-nonnegative-integer? + exact-nonnegative-integer?))] + [nodes (listof node?)] + [*-node node?])]{ + +Represents the analyzed profile result. + +@itemize[ + +@item{@scheme[total-time] is the total observed time (in milliseconds) + included in the profile. This is different than the actual time the + profiling took, due to unaccounted-for time spent in untracked + threads. (E.g., the sampler thread itself.)} + +@item{@scheme[cpu-time] is the actual cpu time consumed by the process + during the profiler's work.} + +@item{@scheme[sample-number] holds the number of samples taken during + the profile. This can be used to compute the average time frame + each of the input samples represented.} + +@item{@scheme[thread-times] holds an association list mapping thread + identifiers to cpu time for the corresponding threads. As samples + are collected, each thread that is observed is assigned a small + integer identifier. These identifers are listed for each function + call, and the total time spent in each thread is in this field.} + +@item{@scheme[nodes] is a list of nodes representing all observed + functions. These nodes are the components of the call-graph that + the analyzer assembles (see the @scheme[edge] field). The nodes are + sorted by decreasing total amount of time (time spent either in the + function or in its callees), and by a top-to-bottom topological + sort.} + +@item{@scheme[*-node] holds a ``special'' node value that is + constructed for every graph. This node is used as the caller for + all top-level function nodes and as the callee for all leaf nodes. + It can therefore be used to start a scan of the call graph. In + addition, the times associated with it's "callers and callees" + actually represent the time these functions spent being the root of + the computation or its leaf. (This can be different from a node's + ``self'' time, since it is divided by the number of instances a + function had on the stack for every sample --- so for recursive + functions this value is different from.)} + +]} + + +@defstruct[node ([id (or/c #f symbol?)] + [src (or/c #f srcloc?)] + [thread-ids (listof exact-nonnegative-integer?)] + [total exact-nonnegative-integer?] + [self exact-nonnegative-integer?] + [callers (listof edge?)] + [callees (listof edge?)])]{ + +Represents a function call node in the call graph of an analyzed +profile result. + +@itemize[ + +@item{The @scheme[id] and @scheme[src] field hold a symbol naming the + function and/or its source location as a @scheme[srcloc] value. + This is the same as the results of + @scheme[continuation-mark-set->context], so at most of of these can + be @scheme[#f], except for the special @scheme[*-node] (see the + @scheme[profile] struct) that can be identified by both of these + being @scheme[#f].} + +@item{@scheme[thread-ids] holds a list of thread identifiers that were + observed executing this function.} + +@item{@scheme[total] holds the total time (in milliseconds) where this + function was anywhere on the stack. It is common to see a few + toplevel functions that have close to a 100% total time, but + otherwise small @scheme[self] times --- these functions are the ones + that derive the work that was done, but they don't do any hard work + directly.} + +@item{@scheme[self] holds the total time (in milliseconds) where this + function was observed as the leaf of the stack. It represents the + actual work done by this function, rather than @scheme[total] that + represents the work done by both the function and its callees.} + +@item{@scheme[callers] and @scheme[callees] hold the list of caller + and callee nodes. The nodes are not actually held in these lists, + instead, @scheme[edge] values are used --- and provide information + specific to an edge in the call-graph.} + +]} + + +@defstruct[edge ([total exact-nonnegative-integer?] + [caller node?] + [caller-time exact-nonnegative-integer?] + [callee node?] + [callee-time exact-nonnegative-integer?])]{ + +Represents an edge between two function call nodes in the call graph +of an analyzed profile result. + +@itemize[ + +@item{@scheme[total] is analogous to the @scheme[total] field of a + @scheme[node] value: the total time that this edge was anywhere on + the stack.} + +@item{@scheme[caller] and @scheme[callee] hold the two nodes that are + connected by this edge.} + +@item{@scheme[caller-time] and @scheme[callee-time] hold the time + spent on this edge from the caller's or the callee's perspective. + These times are different from each other (as well as from the total + time) because the sums that make them are each divided by the number + of times the caller or the callee was on the stack. + + To understand this difference, consider a stack snapshot holding + @tt{A @'rarr B @'rarr B @'rarr B @'rarr A}, and representing a + second of observed cpu time. For this sample, the @tt{A @'rarr B} + edge is charged by a whole second for its total time (the same goes + for the @tt{A @'rarr A} edge, for example). Its caller time is + charged 1/2 second because @tt{A} appears twice in this stack + snapshot (in the other half, @tt{A} is chared for being a leaf --- + the caller of the special @scheme[*-node]), and its callee time is + charged 1/3 respectively.} + +]} diff --git a/collects/profile/scribblings/info.ss b/collects/profile/scribblings/info.ss new file mode 100644 index 0000000..860b926 --- /dev/null +++ b/collects/profile/scribblings/info.ss @@ -0,0 +1,3 @@ +#lang setup/infotab + +(define scribblings '(("profile.scrbl" () (tool-library)))) diff --git a/collects/profile/scribblings/profile.scrbl b/collects/profile/scribblings/profile.scrbl new file mode 100644 index 0000000..ef93db5 --- /dev/null +++ b/collects/profile/scribblings/profile.scrbl @@ -0,0 +1,25 @@ +#lang scribble/doc + +@(require scribble/manual + (for-label scheme)) + +@title{Statistical Profiler} + +The @scheme[profile] collection implements a statistical profiler. +The profiling is done by running a background thread that collects +stack snapshots via @scheme[continuation-mark-set->context], meaning +that the result is an estimate of the execution costs and it is +limited to the kind of information that +@scheme[continuation-mark-set->context] produces (most notably being +limited to functions calls, and subject to compiler optimizations); +but the result is often useful. In practice, since this method does +not require recompilation of your source and has very little runtime +overhead, it can be used for longer runs which compensates for these +limits. + +@table-of-contents[] + +@include-section["toplevel.scrbl"] +@include-section["sampler.scrbl"] +@include-section["analyzer.scrbl"] +@include-section["renderers.scrbl"] diff --git a/collects/profile/scribblings/renderers.scrbl b/collects/profile/scribblings/renderers.scrbl new file mode 100644 index 0000000..b00bd96 --- /dev/null +++ b/collects/profile/scribblings/renderers.scrbl @@ -0,0 +1,129 @@ +#lang scribble/doc + +@(require scribble/manual + (for-label scheme + profile/analyzer + (prefix-in text: profile/render-text) + (prefix-in graphviz: profile/render-graphviz))) + +@title[#:tag "renderers"]{Profile Renderers} + +After collecting the profile samples and analyzing the data, the last +aspect of profiling is to render the results. The profile collection +provides several renderers, each providing a rendering function that +consumes a @scheme[profile] instance. See the +@seclink["analyzer"]{analyzer} section for a description of the +@scheme[profile] struct if you want to implement your own renderer. + +@;-------------------------------------------------------------------- +@section{Textual Rendering} + +@defmodule[profile/render-text] + +@defproc[(render + [profile-data profile?] + [#:truncate-source truncate-source exact-nonnegative-integer? 50] + [#:hide-self hide-self% (between/c 0 1) 1/100] + [#:hide-subs hide-subs% (between/c 0 1) 2/100]) + void?]{ + +Prints the given @scheme[profile] results as a textual table. + +The printout begins with some general facts about the profile, and +then a table that represents the call-graph is printed. Each row in +this table looks like: + +@verbatim[#:indent 2]{ + B [M1] M2% + [N1] N2(N3%) N4(N5%) A ...path/to/source.ss:12:34 + C [M3] M4%} + +Where actual numbers appear in the printout. The meaning of the +numbers and labels is as follows: +@itemize{ +@item{@tt{A} --- the name of the function that this node represents, + followed by the source location for the function if it is known. + The name can be ``???'' for functions with no identifier, but in + this case the source location will identify them.} +@item{@tt{N1} --- an index number associated with this node. This is + important in references to this function, since the symbolic names + are not unique (and some can be missing). The number itself has no + significance, it simply goes from 1 up.} +@item{@tt{N2} --- the time (in milliseconds) that this function has + been anywhere in a stack snapshot. This is the total time that the + execution was somewhere in this function or in its callees. + (Corresponds to the @scheme[node-total] field.)} +@item{@tt{N3} --- this is the percentage of the node's total time + (@tt{N2}) from the total observed time of the profile. An entry + with a 100% refers to a function that was active throughout the + whole execution.} +@item{@tt{N4} --- the time (in milliseconds) that this function has + been at the top of the stack snapshot. This is the time that this + function consumed doing work itself rather than calling other + functions. (Corresponds to the @scheme[node-self] field.)} +@item{@tt{N5} --- this is the percentage of @tt{N4} out of the total + observed time of the profile. Functions with high values here can + be good candidates for optimization, But, of course, they can + represent doing real work due to one of its callers that need to be + optimized.} +@item{@tt{B} and @tt{C} --- these are labels for the callers and + callees of the function. Any number of callers and callees can + appear here (including 0). The function itself can also appear in + both places if it is (non-tail) recursive.} +@item{@tt{M1} and @tt{M3} --- the index numbers for @tt{B} and + @tt{C}. They can be used to disambiguate functions with the same + name, as well as a quick way to find the corresponding entry in the + table.} +@item{@tt{M2} and @tt{M4} --- the percentages of the time @tt{A} spent + being called by @tt{B} and calling @tt{C}. These percentages + represent the time that this edge was found on a stack snapshot, + divided by the number of occurrences of @tt{A} on the same snapshot. + The number is the percentage of these times out of @tt{N2}, the + total time @tt{A} has been active. + + The total percentages for the all caller and for all callees should + be close to 100% minus the time @tt{A} was the leaf or the root. + + These values correspond to the @scheme[edge-caller-time] and + @scheme[edge-callee-time] fields; see the documentation for further + details.} +} + +The function has a few keyword arguments to customize its output: +@itemize[ + +@item{The @scheme[truncate-source] argument determines the length that + the source string should take (together with its label).} + +@item{@scheme[hide-self%] and @scheme[hide-subs%] control hiding some + of the nodes. A node is hidden if its self time (@tt{N3} in the + above example) is smaller than @scheme[hide-self%] @emph{and} if all + places where it occurs as a caller or a callee have percentages that + are smaller than @scheme[hide-subs%]. The reason for requiring both + conditions is to avoid having ``dangling references'' to hidden + nodes.} + +]} + + +@;-------------------------------------------------------------------- +@section{Graph Rendering} + +@defmodule[profile/render-graphviz] + +@defproc[(render + [profile-data profile?] + [#:hide-self hide-self% (between/c 0 1) 1/100] + [#:hide-subs hide-subs% (between/c 0 1) 2/100]) + void?]{ + +Prints the given @scheme[profile] results as a Graphviz directed +graph. + +This is an experimental module, provided mostly as a proof-of-concept. +It renders the profile's call-graph as a graph representation for one +of the Graphviz tools to render. Nodes are colored according to their +`self' percentages, and edges. + +The keyword arguments control hiding nodes in the same way as with the +textual renderer.} diff --git a/collects/profile/scribblings/sampler.scrbl b/collects/profile/scribblings/sampler.scrbl new file mode 100644 index 0000000..1140e9c --- /dev/null +++ b/collects/profile/scribblings/sampler.scrbl @@ -0,0 +1,60 @@ +#lang scribble/doc + +@(require scribble/manual + (for-label scheme profile/sampler profile/analyzer)) + +@title[#:tag "sampler"]{Collecting Profile Information} + +@defmodule[profile/sampler] + +@defproc[(create-sampler [to-track (or/c thread? custodian? + (listof (or/c thread? custodian?)))] + [delay nonnegative-number?]) + ((symbol?) (any/c) . ->* . any/c)]{ + +Creates a sample collector thread, which tracks the given +@scheme[to-track] value every @scheme[delay] seconds. The +@scheme[to-track] value can be either a thread (track just that +thread), a custodian (track all threads managed by the custodian), or +a list of threads and/or custodians. If a custodian is given, it must +be subordinate to the current custodian. + +The resulting value is a controller function, which consumes a message +consisting of a symbol and an optional argument, and can affect the +sampler. The following messages are currently supported: +@itemize[ + +@item{@scheme['pause] and @scheme['resume] will stop or resume data +collection. These messages can be nested. Note that the thread will +continue running it will just stop collecting snapshots.} + +@item{@scheme['stop] kills the controlled thread. It should be called + when no additional data should be collected. (This is currently + irreversible: there is no message to start a new sampler thread.)} + +@item{@scheme['set-tracked!] with a value will change the tracked + objects (initially specified as the @scheme[to-track] argument) to + the given value.} + +@item{@scheme['set-tracked!] with a value will change the delay that + the sampler us taking between snapshots. Note that although + changing this means that the snapshots are not uniformly + distributed, the results will still be sensible --- this is because + the cpu time between samples is taken into account when the + resulting data is analyzed.} + +@item{Finally, a @scheme['get-snapshots] message will make the + controller return the currently collected data. Note that this can + be called multiple times, each call will return the data thatis + collected up to that point in time. In addition, it can be (and + usually is) called after the sampler was stopped. + + The value that is returned should be considered as an undocumented + internal detail of the profiler, to be sent to + @scheme[analyze-samples] for analysis. The reason this is not done + automatically, is that a future extension might allow you to combine + several sampler results, making it possible to combine a profile + analysis from several individual runs, possibly from different + machines.} + +]} diff --git a/collects/profile/scribblings/toplevel.scrbl b/collects/profile/scribblings/toplevel.scrbl new file mode 100644 index 0000000..5f1f20d --- /dev/null +++ b/collects/profile/scribblings/toplevel.scrbl @@ -0,0 +1,67 @@ +#lang scribble/doc + +@(require scribble/manual + (for-label scheme profile profile/sampler + (only-in profile/analyzer analyze-samples) + (prefix-in text: profile/render-text))) + +@title{Toplevel Interface} + +@defmodule[profile] + +This module provides one procedure and one macroxs that are convenient +high-level entry points for timing expressions. This hides the +details that are available through other parts of the library, and is +intended as a convenient tool for profiling code. + +@defproc[(profile-thunk [thunk (-> any/c)] + [#:delay delay nonnegative-number? 0.05] + [#:repeat iterations exact-nonnegative-integer? 1] + [#:threads threads? any/c #f] + [#:render renderer text:render]) + void?]{ + +Executes the given thunk while collecting profiling data, and render +this data when done. Keyword arguments can customize the profiling: +@itemize[ + +@item{The profiler works by @scheme[create-sampler] starting a + ``sampler'' thread whose job is to collect stack samples + periodically (using @scheme[continuation-mark-set->context]). + @scheme[delay] determines the amount of time the sampler + @scheme[sleep]s for between samples. Note that this is will be + close, but not identical to, the frequency in which data is actually + sampled.} + +@item{When the profiled computation takes a short amount of time, the + collected data will not be accurate. In this case, you can specify + an @scheme[iterations] argument to repeat the evaluation a number of + times which will improve the accuracy of the resulting report.} + +@item{Normally, the sampler collects snapshots of the + @scheme[current-thread]'s stack. If there is some computation that + happens on a different thread, that work will not be reflected in + the results: the only effect will be suspiciously small value for + the observed time, because the collected data is taking into account + the cpu time that the thread actually performed (it uses + @scheme[current-process-milliseconds] with the running thread as an + argument). Specifying a non-@scheme[#f] value for the + @scheme[threads?] argument will arrange for all threads that are + started during the evaluation to be tracked. Note that this means + that the computation will actually run in a new sub-custodian, as + this is the only way to be able to track such threads.} + +@item{Once the computation has finished, the sampler is stopped, and + the accumulated data is collected. It is then analyzed by + @scheme[analyze-samples], and the analyzed profile data is fed into + a renderer. Use an identity function (@scheme[values]) to get the + analyzed result, and render it yourself, or use one of the existing + renderers (see @secref["renderers"]).} + +]} + +@defform[(profile expr keyword-arguments ...)]{ + +A macro version of @scheme[profile-thunk]. The keyword arguments can +be specified in the same was as for a function call: they can appear +before and/or after the expression to be profiled.} diff --git a/collects/profile/utils.ss b/collects/profile/utils.ss new file mode 100644 index 0000000..0228ba4 --- /dev/null +++ b/collects/profile/utils.ss @@ -0,0 +1,37 @@ +#lang scheme/base + +(provide format-percent format-source get-hidden) +(require "analyzer.ss") + +(define (format-percent percent) + (let ([percent (inexact->exact (round (* percent 1000)))]) + (format "~a.~a%" (quotient percent 10) (modulo percent 10)))) + +(define (format-source src) + (if src + (format "~a:~a" + (srcloc-source src) + (if (srcloc-line src) + (format "~a:~a" (srcloc-line src) (srcloc-column src)) + (format "#~a" (srcloc-position src)))) + "(unknown source)")) + +;; hide a node if its self time is smaller than the self threshold *and* all of +;; its edges are below the sub-node threshold too -- this avoids confusing +;; output where a node does not have an entry but appears as a caller/callee. +(define (get-hidden profile hide-self% hide-subs%) + (define self% (or hide-self% 0)) + (define subs% (or hide-subs% 0)) + (define total-time (profile-total-time profile)) + (define (hide? node) + (define (hide-sub? get-subs edge-sub edge-sub-time) + (define %s + (map (lambda (edge) + (let ([total (node-total (edge-sub edge))]) + (if (zero? total) 0 (/ (edge-sub-time edge) total)))) + (get-subs node))) + (subs% . >= . (apply max %s))) + (and (self% . >= . (/ (node-self node) total-time)) + (hide-sub? node-callees edge-callee edge-caller-time) + (hide-sub? node-callers edge-caller edge-callee-time))) + (if (or (> self% 0) (> subs% 0)) (filter hide? (profile-nodes profile)) '())) diff --git a/collects/tests/profile/main.ss b/collects/tests/profile/main.ss new file mode 100644 index 0000000..4d577fc --- /dev/null +++ b/collects/tests/profile/main.ss @@ -0,0 +1,104 @@ +#lang scheme/base + +(require tests/eli-tester profile/analyzer scheme/match scheme/list) + +(define A '(A . #f)) +(define B '(B . #f)) +(define C '(C . #f)) + +(define (analyze cpu+lists) + (profile->sexpr + (analyze-samples + (cons (car cpu+lists) + (map (lambda (x) (append (take x 2) (reverse (drop x 2)))) + (reverse (cdr cpu+lists))))))) + +(define (profile->sexpr prof) + (define (node-id* node) + (or (node-id node) (if (node-src node) '??? '*))) + (define (edges->sexprs node get get-time) + (for/list ([edge (get node)]) + `(,(node-id* (edge-caller edge)) -> ,(node-id* (edge-callee edge)) + time= ,(get-time edge) + total= ,(edge-time edge)))) + (define (node->sexpr node) + `(,(node-id* node) + total= ,(node-total node) + self= ,(node-self node) + callers: ,@(edges->sexprs node node-callers edge-caller-time) + callees: ,@(edges->sexprs node node-callees edge-callee-time) + threads= ,(node-thread-ids node))) + `(total= ,(profile-total-time prof) + samples= ,(profile-sample-number prof) + cpu= ,(profile-cpu-time prof) + thread-times= ,(profile-thread-times prof) + ,@(map node->sexpr (cons (profile-*-node prof) (profile-nodes prof))))) + +(test + + (match (analyze `(10 + [0 0 ,A] + [0 1 ,A])) + [`(total= 2 samples= 2 cpu= 10 thread-times= ([0 . 2]) + [* total= 2 self= 0 + callers: [A -> * time= 2 total= 2] + callees: [* -> A time= 2 total= 2] + threads= ()] + [A total= 2 self= 2 + callers: [* -> A time= 2 total= 2] + callees: [A -> * time= 2 total= 2] + threads= (0)]) + 'ok] + [bad (error 'test ">>> ~s" bad)]) + + (match (analyze `(10 + [0 0 ,A ,B ,A] + [0 1 ,A ,B ,A])) + [`(total= 2 samples= 2 cpu= 10 thread-times= ([0 . 2]) + [* total= 2 self= 0 + callers: [A -> * time= 2 total= 2] + callees: [* -> A time= 2 total= 2] + threads= ()] + [A total= 2 self= 2 + callers: [B -> A time= 2/2 total= 2] + [* -> A time= 2/2 total= 2] + callees: [A -> B time= 2/2 total= 2] + [A -> * time= 2/2 total= 2] + threads= (0)] + [B total= 2 self= 0 + callers: [A -> B time= 2 total= 2] + callees: [B -> A time= 2 total= 2] + threads= (0)]) + 'ok] + [bad (error 'test ">>> ~s" bad)]) + + (match (analyze `(10 + [0 0 ,A ,B ,A] + [0 1 ,A ,C ,A] + [0 2 ,A ,C ,A] + [0 3 ,A ,C ,A])) + [`(total= 4 samples= 4 cpu= 10 thread-times= ([0 . 4]) + [* total= 4 self= 0 + callers: [A -> * time= 4 total= 4] + callees: [* -> A time= 4 total= 4] + threads= ()] + [A total= 4 self= 4 + callers: [* -> A time= 4/2 total= 4] + [C -> A time= 3/2 total= 3] + [B -> A time= 1/2 total= 1] + callees: [A -> * time= 4/2 total= 4] + [A -> C time= 3/2 total= 3] + [A -> B time= 1/2 total= 1] + threads= (0)] + [C total= 3 self= 0 + callers: [A -> C time= 3 total= 3] + callees: [C -> A time= 3 total= 3] + threads= (0)] + [B total= 1 self= 0 + callers: [A -> B time= 1 total= 1] + callees: [B -> A time= 1 total= 1] + threads= (0)]) + 'ok] + [bad (error 'test ">>> ~s" bad)]) + + )