
Should become its own package once we split up the collects. Requested by Eli, to reduce the dependencies of the profiler collection.
240 lines
10 KiB
Racket
240 lines
10 KiB
Racket
#lang racket/base
|
|
|
|
(require racket/list unstable/list racket/match racket/contract racket/string
|
|
racket/set racket/dict
|
|
profile/structs profile/utils
|
|
"utils.rkt" "dot.rkt")
|
|
|
|
(provide boundary-view)
|
|
|
|
;; Boundary View
|
|
;; Shows contract costs associated with each each boundary (see definition
|
|
;; below) and, when possible, which call edges cross contract boundaries.
|
|
;; Because of the limitations of the profiler, some pieces of information
|
|
;; are conservative / best effort.
|
|
|
|
|
|
(define (prune-module-name name)
|
|
(define (p n) (regexp-replace #rx"^.*/" n ""))
|
|
(cond [(string? name) (p name)]
|
|
[(path? name) (p (path->string name))]
|
|
;; submodule path
|
|
[(list? name) (list 'submod
|
|
(p (path->string (first name)))
|
|
(second name))]))
|
|
|
|
;; A boundary is, at a high-level, a profile node (contracted function) and a
|
|
;; (potentially empty) set of profile edges (incoming call edges that cross the
|
|
;; contract boundary).
|
|
;; A given node can be part of multiple boundaries if it enters into contracts
|
|
;; with multiple parties, in which case it is be part of one boundary per
|
|
;; party, and the incoming edges all come from that party.
|
|
;; Contract checking time can be assigned to boundaries, but not to individual
|
|
;; edges (from our perspective, the edges in a boundary can't be distinguished)
|
|
;; Edges are found by looking at the regular time profile for callers to the
|
|
;; node that originate from the contract's other party. Due to limitations in
|
|
;; the profiler (tail calls, mostly), the set of edges is an approximation
|
|
;; (both over- and under-).
|
|
(struct boundary (contracted-node edges blame time)
|
|
#:transparent) ;; TODO for debugging
|
|
|
|
;; Rendering options.
|
|
;; For large programs (e.g. Acquire), enabling module clustering when showing
|
|
;; all functions (not just contracted ones) makes renderings undecipherable.
|
|
;; Also, most non-dot Graphviz renderers cope poorly with our graphs. fdp is
|
|
;; probably the next best one.
|
|
;; This is less of a problem now that we never show non-contracted nodes.
|
|
|
|
;; draw borders to group functions from the same module
|
|
(define show-module-clustering? #t)
|
|
|
|
(define boundary-graph-dot-file
|
|
(string-append output-file-prefix "boundary-graph.dot"))
|
|
(define contract-key-file
|
|
(string-append output-file-prefix "contract-key.txt"))
|
|
|
|
(define (boundary-view correlated)
|
|
(match-define (contract-profile
|
|
total-time n-samples n-contract-samples
|
|
live-contract-samples all-blames regular-profile)
|
|
correlated)
|
|
|
|
(define all-contracts (remove-duplicates (map blame-contract all-blames)))
|
|
|
|
;; On the graph, contracts are abbreviated with a key ([1], [2], etc.).
|
|
;; The mapping from keys to actual contracts is printed separately.
|
|
;; TODO only have keys for contracts that actually show up in the profile, and only show these
|
|
(define contracts->keys (map cons all-contracts (range (length all-contracts))))
|
|
|
|
;; For each blame, find the edges (from the profile graph) that cross
|
|
;; contract boundaries.
|
|
;; Each blame may belong to multiple boundaries (multiple contracted
|
|
;; functions from the same module called from the same other party).
|
|
(define no-profile-nodes
|
|
(make-hash)) ; we may want to re-use nodes for multiple blames
|
|
(define all-boundaries
|
|
(for/list ([b (in-list all-blames)])
|
|
;; First, find the contracted function in the profile graph.
|
|
;; Some functions are not in the profile graph. We create nodes for them,
|
|
;; which won't be connected to the actual profile graph, but will show up
|
|
;; in the output. (See no-profile-nodes above.)
|
|
(define contracted-function
|
|
(or (for/first ([n (in-list (profile-nodes regular-profile))]
|
|
;; Matching is overly permissive (relies only on
|
|
;; function name). If two functions in the same module
|
|
;; have the same name, results may be bogus.
|
|
;; TODO fix this
|
|
;; Refining results using source locations would be
|
|
;; tricky: blame struct and profiler don't store source
|
|
;; information in the same format (path with <collects>
|
|
;; vs full path, and pointing to defined identifier vs
|
|
;; pointing to the whole definition).
|
|
;; TODO check at least for inclusion of blame source in
|
|
;; profiler source. not 100%, but can get us closer
|
|
;; The current matching also won't work for contracted
|
|
;; anonymous functions. (Probably rare, though.)
|
|
#:when (equal? (blame-value b) (node-id n)))
|
|
n)
|
|
;; function is not in the profile. create a node for it (that will
|
|
;; not be connected to the profile graph), or reuse an existing
|
|
;; node if we created one for that id
|
|
(hash-ref! no-profile-nodes
|
|
(blame-value b)
|
|
(node (blame-value b)
|
|
#f ; no source ; TODO could we find it?
|
|
'(0) ; dummy thread-ids
|
|
0 0 ; if not in the profile, no time is a safe bet
|
|
'() '())))) ; no known callers or callees
|
|
;; Out of the callers of contracted-function, look for the ones that are
|
|
;; in the negative module. Those edges are the boundary edges.
|
|
(define caller-module (blame-negative b))
|
|
(define boundary-edges
|
|
(for*/list ([e (node-callers contracted-function)]
|
|
[src (in-value (node-src (edge-caller e)))]
|
|
#:when (and src (equal? caller-module (srcloc-source src))))
|
|
e))
|
|
(define time-spent ; TODO probably more efficient to group ahead of time
|
|
(samples-time (for/list ([s (in-list live-contract-samples)]
|
|
#:when (equal? (car s) b))
|
|
s)))
|
|
(boundary contracted-function boundary-edges b time-spent)))
|
|
|
|
(with-output-to-file boundary-graph-dot-file
|
|
#:exists 'replace
|
|
(lambda () (render all-boundaries contracts->keys)))
|
|
(render-dot boundary-graph-dot-file)
|
|
;; print contract key
|
|
;; TODO find a way to add to pdf
|
|
;; TODO also to add to pdf: show proportion of time spent in contracts
|
|
;; otherwise we have no idea if we're looking at a case where contracts are
|
|
;; bottlenecks or not
|
|
(with-output-to-file contract-key-file
|
|
#:exists 'replace
|
|
(lambda ()
|
|
(for ([contract+key (in-list contracts->keys)])
|
|
(printf "[~a] = ~a\n"
|
|
(cdr contract+key)
|
|
(car contract+key))))))
|
|
|
|
|
|
;; given a profile node, return all the boundaries centered there
|
|
(define (node->boundaries node all-boundaries)
|
|
(for/list ([b (in-list all-boundaries)]
|
|
#:when (eq? node (boundary-contracted-node b)))
|
|
b))
|
|
|
|
(define (node->module node) ; approximate. if no source info, useless
|
|
(define src (node-src node))
|
|
(and src (srcloc-source src)))
|
|
|
|
(define (node-location node)
|
|
(define id (node-id node))
|
|
(define src (node-src node))
|
|
(format "~a~a~a"
|
|
(if id (format "~a" id) "")
|
|
(if (and id src) "\n" "")
|
|
(if src
|
|
(prune-module-name (format-source (node-src node)))
|
|
"")))
|
|
|
|
(define (summarize-boundary b contracts->keys)
|
|
(format "\n[~a] @ ~a : ~ams"
|
|
;; show the contract key
|
|
(dict-ref contracts->keys (blame-contract (boundary-blame b)))
|
|
(prune-module-name (blame-negative (boundary-blame b)))
|
|
(boundary-time b)))
|
|
|
|
|
|
;; Inspired by ../render-graphviz.rkt
|
|
(define (render all-boundaries contracts->keys)
|
|
|
|
(define total-contract-time
|
|
(max 1e-20 (for/sum ([b (in-list all-boundaries)]) (boundary-time b))))
|
|
(define max-self%
|
|
(/ (for/fold ([m 0]) ([b (in-list all-boundaries)])
|
|
(max m (boundary-time b)))
|
|
total-contract-time))
|
|
|
|
;; All boundary-related nodes, which includes both actual boundary nodes and
|
|
;; callers that call boundary nodes across boundary edges.
|
|
(define nodes
|
|
(set->list
|
|
(for/fold ([nodes (set)])
|
|
([b (in-list all-boundaries)])
|
|
(for/fold ([nodes* (set-add nodes (boundary-contracted-node b))])
|
|
([e (in-list (boundary-edges b))])
|
|
(set-add nodes* (edge-caller e))))))
|
|
(define nodes->names (for/hash ([n nodes]) (values n (gensym))))
|
|
|
|
(printf "digraph Profile {\n")
|
|
(printf "splines=\"true\"\n") ; polyline kinda works too, maybe
|
|
|
|
;; cluster nodes per module, to show boundaries
|
|
(for ([module-nodes (in-list (group-by equal? nodes #:key node->module))]
|
|
[cluster-idx (in-naturals 1)])
|
|
(define known-module? (node->module (first module-nodes)))
|
|
;; don't cluster nodes for which we have no module info
|
|
(when (and known-module? show-module-clustering?)
|
|
(printf "subgraph cluster_~a {\n" cluster-idx)
|
|
(printf "penwidth=3.0\n")
|
|
(printf "graph[shape=\"ellipse\"]\n"))
|
|
|
|
;; render the cluster's nodes
|
|
(for ([node (in-list module-nodes)])
|
|
(define boundaries (node->boundaries node all-boundaries))
|
|
(define self% (/ (for/sum ([b (in-list boundaries)]) (boundary-time b))
|
|
total-contract-time))
|
|
(define label
|
|
(format
|
|
"~a\n~ams~a"
|
|
(node-location node)
|
|
(node-self node) ; raw running time (not contracts)
|
|
;; Display a summary of each boundary, which includes which contract
|
|
;; is used, the negative party and contract time spent.
|
|
(string-join
|
|
(for/list ([b (in-list boundaries)])
|
|
(summarize-boundary b contracts->keys))
|
|
"")))
|
|
(printf "~a [label=~s, style=filled" (dict-ref nodes->names node) label)
|
|
;; Boundary nodes are boxes, caller nodes are ovals.
|
|
(unless (null? boundaries)
|
|
(printf ", shape=\"box\", fillcolor=\"1,~a,1\""
|
|
(/ self% max-self% 1.0)))
|
|
(printf "];\n"))
|
|
|
|
(when (and known-module? show-module-clustering?)
|
|
(printf "}\n")))
|
|
|
|
;; draw edges
|
|
;; needs to be done after the clusters, otherwise any node mentioned in an
|
|
;; edge printed inside a cluster is considered to be in the cluster, which
|
|
;; messes things up
|
|
(for* ([node (in-list nodes)]
|
|
[boundary (in-list (node->boundaries node all-boundaries))]
|
|
[edge (in-list (boundary-edges boundary))])
|
|
(printf "~a -> ~a;\n"
|
|
(dict-ref nodes->names (edge-caller edge))
|
|
(dict-ref nodes->names node)))
|
|
|
|
(printf "}\n"))
|