#lang scheme/base

;; Analyzer for the sampler results

(provide analyze-samples)

(require scheme/list)

;; An encapsulation of an analyzed profile call graph:
;; - total-time: the total time observed in msec (this is generally different
;;   than the time it took to run the profile).
;; - sample-number: the number of samples taken.
;; - thread-times: a list of (<thread-id> . msec) for the time spent in
;;   observed threads.
;; - nodes: the list of call-graph nodes sorted by their total time.
;; - *-node: a special node that is connected as a "caller" for all toplevel
;;   functions and a "callee" for all leaf functions.  It will also be
;;   identifiable by having both id and src fields being #f.  Can be used to
;;   start a graph traversal from the top or the bottom.
(provide (struct-out profile))
(define-struct profile
  (total-time cpu-time sample-number thread-times nodes *-node))

;; An entry for a single profiled function:
;; - id, src: the corresponding values from `continuation-mark-set->context'.
;; - thread-ids: the list of thread identifiers this function has been seen in.
;; - total: total msecs it participated in (= time in it, including callees).
;; - self: msecs where it was at the top of the stack (= time in its own code).
;; - callers, callees: a list of `edge' values for the time spent while it was
;;   called by the repective <node>, or it called it, sorted in decreasing msec
;;   time.
;; Note that the sum of caller/callee edges including the special `*-node'
;; should be equal to the `total' time.  So the edge from/to the `*-node' can
;; be used to get the time spent as a leaf or as a root divided by the number
;; of time the function appeared on the stack: so this value can be displayed
;; in the call-graph and the numbers will sum up nicely to a 100%.
(provide (struct-out node))
(define-struct node (id src thread-ids total self callers callees)
  #:mutable
  #:property prop:custom-write
  (lambda (node o w?) (fprintf o "#<node:~s>" (or (node-id node) '???))))

;; An edge representing function calls between two nodes:
;; - time: the total time spent while the call was somewhere on the stack.
;; - caller, callee: the two relevant `node' values.
;; - caller-time, callee-time: the time that the caller/callee spent in this
;;   call (different from the above time because each stack sample's time is
;;   divided by the number of times the caller/callee appears in that slice).
(provide (struct-out edge))
(define-struct edge (total caller caller-time callee callee-time)
  #:mutable
  #:property prop:custom-write
  (lambda (edge o w?)
    (fprintf o "#<edge:~s-~s>"
             (or (node-id (edge-caller edge)) '???)
             (or (node-id (edge-callee edge)) '???))))

(define with-hash:not-found (gensym))
(define-syntax-rule (with-hash <hash> <key> <expr>)
  (let ([t <hash>] [k <key>])
    (let ([v (hash-ref t k with-hash:not-found)])
      (if (eq? v with-hash:not-found)
        (let ([v <expr>]) (hash-set! t k v) v)
        v))))

;; This function analyzes the output of the sampler.  Returns a `profile'
;; struct holding a list of `node' values, each one representing a node in the
;; call graph, with the relevant information filled in.  The results are sorted
;; using a topological sort from the top, and by the total time for nodes at
;; the same level.
(define (analyze-samples cpu-time+samples)
  (define cpu-time (car cpu-time+samples))
  (define samples  (cdr cpu-time+samples))
  (define samples-by-thread
    (let ([by-thread (split-by-thread samples)])
      (for ([samples (in-vector by-thread)] [i (in-naturals 0)])
        (vector-set! by-thread i (get-times samples)))
      by-thread))
  (define id+src->node-hash (make-hasheq))
  (define (id+src->node id+src)
    (with-hash id+src->node-hash id+src
               (make-node (car id+src) (cdr id+src) '() 0 0 '() '())))
  ;; special node that is the caller of toplevels and callee of leaves
  (define *-node (id+src->node '(#f . #f)))
  (define call->edge
    (let ([t (make-hasheq)])
      (lambda (ler lee)
        (with-hash (with-hash t ler (make-hasheq)) lee
                   (let ([e (make-edge 0 ler 0 lee 0)])
                     (set-node-callers! lee (cons e (node-callers lee)))
                     (set-node-callees! ler (cons e (node-callees ler)))
                     e)))))
  (define total-time 0)
  (define thread-times (make-vector (vector-length samples-by-thread) 0))
  (for ([thread-samples (in-vector samples-by-thread)]
        [thread-id (in-naturals 0)]
        #:when #t
        [sample (in-list thread-samples)])
    (define msecs (car sample))
    (define (connect ler lee ler# lee#)
      (define edge (call->edge ler lee))
      (set-edge-caller-time! edge (+ (edge-caller-time edge) (/ msecs lee#)))
      (set-edge-callee-time! edge (+ (edge-callee-time edge) (/ msecs ler#)))
      edge)
    (define stack ; the stack snapshot, translated to `node' values
      (for/list ([id+src (in-list (cdr sample))])
        (let* ([node (id+src->node id+src)] [tids (node-thread-ids node)])
          (unless (memq thread-id tids)
            (set-node-thread-ids! node (cons thread-id tids)))
          node)))
    (define counts (get-counts stack))
    (define stack+counts (map (lambda (x) (assq x counts)) stack))
    (define edges
      (if (null? stack)
        '()
        (append (let ([first (car stack+counts)] [last (last stack+counts)])
                  (list (connect *-node (car last) 1 (cdr last))
                        (connect (car first) *-node (cdr first) 1)))
                (for/list ([callee (in-list stack+counts)]
                           [caller (in-list (cdr stack+counts))])
                  (connect (car caller) (car callee)
                           (cdr caller) (cdr callee))))))
    (set! total-time (+ msecs total-time))
    (for ([p (in-list counts)])
      (set-node-total! (car p) (+ msecs (node-total (car p)))))
    (for ([e (remove-duplicates edges eq?)])
      (set-edge-total! e (+ msecs (edge-total e))))
    (vector-set! thread-times thread-id
                 (+ msecs (vector-ref thread-times thread-id)))
    (when (pair? stack)
      (set-node-self! (car stack) (+ (node-self (car stack)) msecs))))
  (set-node-total! *-node total-time)
  ;; convert the nodes from the hash to a list, do a topological sort, and then
  ;; sort by total time (combining both guarantees(?) sensible order)
  (let ([nodes (remq *-node (topological-sort
                             *-node
                             (lambda (nodes)
                               (sort nodes > #:key node-total))))])
    ;; sort all the edges in the nodes according to total time
    (for ([n (in-list nodes)])
      (set-node-callees! n (sort (node-callees n) > #:key edge-callee-time))
      (set-node-callers! n (sort (node-callers n) > #:key edge-caller-time)))
    (make-profile
     total-time
     cpu-time
     (length samples)
     (for/list ([time (in-vector thread-times)] [n (in-naturals 0)])
       (cons n time))
     nodes
     *-node)))

;; A simple topological sort of nodes using BFS, starting from node `x' which
;; will be the special *-node.  `subsort' is a `resolver' function to sort
;; nodes on the same level.
(define (topological-sort x subsort)
  (let loop ([todo (list x)] [seen (list x)])
    (if (null? todo)
      '()
      (let* ([next (append-map (lambda (x)
                                 (subsort (map edge-callee (node-callees x))))
                               todo)]
             [next (remq* seen (remove-duplicates next))])
        (append todo (loop next (append next seen)))))))
#|
(define (node id) (make-node id #f '() 0 0 '() '()))
(define (X . -> . Y)
  (let ([e (make-edge 0 X 0 Y 0)])
    (set-node-callers! Y (cons e (node-callers Y)))
    (set-node-callees! X (cons e (node-callees X)))))
(define A (node 'A))
(define B (node 'B))
(define C (node 'C))
(A . -> . B)
(B . -> . C)
(topological-sort A 3)
|#

;; Groups raw samples by their thread-id, returns a vector with a field for
;; each thread id holding the sample data for that thread.  The samples in
;; these are reversed (so they'll be sorted going forward in time).
(define (split-by-thread samples)
  (define threads
    (make-vector (add1 (for/fold ([n -1]) ([sample (in-list samples)])
                         (max (car sample) n)))
                 '()))
  (for ([sample (in-list samples)])
    (let ([id (car sample)] [data (cdr sample)])
      (vector-set! threads id (cons data (vector-ref threads id)))))
  threads)
#|
(equal? (split-by-thread '())
        '#())
(equal? (split-by-thread '([0 x]))
        '#([(x)]))
(equal? (split-by-thread '([0 x] [0 y] [0 z]))
        '#([(z) (y) (x)]))
(equal? (split-by-thread '([0 x] [1 y] [2 z]))
        '#([(x)] [(y)] [(z)]))
(equal? (split-by-thread '([0 x1] [1 y1] [0 x2] [2 z1] [0 x3] [2 z2]))
        '#([(x3) (x2) (x1)] [(y1)] [(z2) (z1)]))
|#

;; gets a list of thread-id and data for that thread beginning with the
;; millisecond count, and returns a similar list where the samples begin with
;; the time spent for that sample.  The time spent is taken as half of the two
;; touching ranges; for example, if there are three samples showing snapshot
;; times of 10, 20, 60, then the middle one is assumed to have a time of 25.
;; For the first and last samples, the time is twice the half of the single
;; touching range -- with this example, this would be 10 for the first and 40
;; for the last.  If there is a thread with just one sample, it is dropped.
(define (get-times samples)
  (cond
    ;; nothing to do
    [(null? samples) '()]
    ;; throw out a single sample
    [(null? (cdr samples)) '()]
    [else (let loop ([samples samples]
                     [prevs   (cons #f (map car samples))]
                     [r       '()])
            (if (null? samples)
              (reverse r)
              (let* ([prev (car prevs)]
                     [cur  (caar samples)]
                     [data (cdar samples)]
                     [prevs (cdr prevs)]
                     [samples (cdr samples)]
                     [next (and (pair? samples) (caar samples))])
                (loop samples prevs
                      (cons (cons (if next
                                    ;; not the last: there must be a next
                                    (if prev (/ (- next prev) 2) (- next cur))
                                    ;; last one: there must be a prev
                                    (- cur prev))
                                  data)
                            r)))))]))
#|
(equal? (get-times '())
        '())
(equal? (get-times '([10 a]))
        '())
(equal? (get-times '([10 a] [20 b]))
        '([10 a] [10 b]))
(equal? (get-times '([10 a] [20 b] [60 c]))
        '([10 a] [25 b] [40 c]))
(equal? (get-times '([10 a] [20 b] [30 c] [40 d]))
        '([10 a] [10 b] [10 c] [10 d]))
(equal? (get-times '([10 a] [20 b] [60 c] [80 d]))
        '([10 a] [25 b] [30 c] [20 d]))
|#

;; returns a list of (cons item occurrences) for the items in l
(define (get-counts l)
  (let loop ([l l] [r '()])
    (if (null? l)
      r
      (let ([1st (car l)])
        (let loop* ([l1 '()] [c 1] [l (cdr l)])
          (cond [(null? l) (loop l1 (cons (cons 1st c) r))]
                [(eq? 1st (car l)) (loop* l1 (add1 c) (cdr l))]
                [else (loop* (cons (car l) l1) c (cdr l))]))))))
#|
(equal? (get-counts '()) '())
(equal? (get-counts '(1)) '([1 . 1]))
(equal? (get-counts '(1 1 1)) '([1 . 3]))
(define (set=? xs ys) (null? (append (remove* xs ys) (remove* ys xs))))
(set=? (get-counts '(1 2 3)) '([1 . 1] [2 . 1] [3 . 1]))
(set=? (get-counts '(1 2 2 3 3 3)) '([1 . 1] [2 . 2] [3 . 3]))
(set=? (get-counts '(3 1 2 3 2 3)) '([1 . 1] [2 . 2] [3 . 3]))
|#