improve hash microbenchmarks

This commit is contained in:
Matthew Flatt 2020-01-11 09:52:36 -07:00
parent e71963c48b
commit b8398f796c
7 changed files with 325 additions and 38 deletions

View File

@ -0,0 +1,7 @@
#lang racket/base
(require "set.rkt"
"ref.rkt"
"remove.rkt"
"iterate.rkt"
"subset.rkt")

View File

@ -1,15 +1,21 @@
#lang racket/base
(require (for-syntax racket/base))
(provide K Q M L N I
(provide H K J Q M L N I
times
unknown
with-hash-variants
make-large-equal-key/share1
make-large-equal-key/share2)
;; Iterations for slow nested things
(define H 10)
;; Iterations for nested things
(define K 100)
(define K (* H 10))
;; Iterations for fast nested things
(define J (* K 5))
;; Iterations for slow things:
(define Q 100000)
@ -55,7 +61,7 @@
(syntax-case (car body) (quote)
[(quote sym)
(identifier? #'sym)
(cons #`(quote #,(string->symbol (format "~a:~a" prefix (syntax-e #'sym))))
(cons #`(quote #,(string->symbol (format "~a:~a" (syntax-e #'sym) prefix)))
(loop (cdr body)))]
[#:only
(if (eq? prefix (syntax-e (cadr body)))

View File

@ -1,7 +1,7 @@
#lang racket/base
(require "config.rkt")
'eq:keys
'iterate-keys:eq
(times
(let ([ht (for/hasheq ([i (in-range K)])
(values i i))])
@ -10,7 +10,16 @@
(for/fold ([v #f]) ([k (in-hash-keys ht)])
k)))))
'eq:vals
'iterate-vals:eq#t
(times
(let ([ht (for/hasheq ([i (in-range K)])
(values i #t))])
(for ([i (in-range Q)])
(void
(for/fold ([v #f]) ([v (in-hash-values ht)])
v)))))
'iterate-vals:eq
(times
(let ([ht (for/hasheq ([i (in-range K)])
(values i i))])
@ -19,27 +28,36 @@
(for/fold ([v #f]) ([v (in-hash-values ht)])
v)))))
'eq:keys-unsafe
'iterate-unsafe-keys:eq
(times
(let ([ht (for/hasheq ([i (in-range K)])
(let ([ht (for/hasheq ([i (in-range J)])
(values i i))])
(void
(for ([i (in-range Q)])
(for/fold ([v #f]) ([k (in-immutable-hash-keys ht)])
k)))))
'eq:vals-unsafe
'iterate-unsafe-vals:eq#t
(times
(let ([ht (for/hasheq ([i (in-range K)])
(let ([ht (for/hasheq ([i (in-range J)])
(values i #t))])
(for ([i (in-range Q)])
(void
(for/fold ([v #f]) ([v (in-immutable-hash-values ht)])
v)))))
'iterate-unsafe-vals:eq
(times
(let ([ht (for/hasheq ([i (in-range J)])
(values i i))])
(for ([i (in-range Q)])
(void
(for/fold ([v #f]) ([v (in-immutable-hash-values ht)])
v)))))
'eq:for-each
'for-each:eq
(times
(let ([ht (for/hasheq ([i (in-range K)])
(let ([ht (for/hasheq ([i (in-range J)])
(values i i))])
(for ([i (in-range Q)])
(hash-for-each ht (lambda (k v) 'ok)))))

View File

@ -3,7 +3,7 @@
(with-hash-variants
'hash-removes
'removes
(times
(let ([ht (FOR/HASH ([i (in-range 100)])
(values (MAKE-KEY i) (MAKE-VAL i)))])

View File

@ -11,7 +11,7 @@
(with-hash-variants
'add-to-empty
'set-in-empty
(times
(let loop ([ht EMPTY] [i L])
(if (zero? i)
@ -19,7 +19,7 @@
(loop (hash-set ht KEY (MAKE-VAL 'true))
(sub1 i)))))
'add-many
'set-many
(times
(for ([i (in-range Q)])
(let loop ([ht EMPTY] [i K])
@ -28,7 +28,7 @@
(loop (hash-set ht (MAKE-KEY i) (MAKE-VAL 'true))
(sub1 i))))))
'add-many-in-order
'set-many-in-order
(times
(for ([i (in-range Q)])
(let loop ([ht EMPTY] [l shuffled])
@ -37,7 +37,7 @@
(loop (hash-set ht (car l) (MAKE-VAL 'true))
(cdr l))))))
'add-same
'set-same
(times
(for ([i (in-range Q)])
(let loop ([ht EMPTY] [i K])

View File

@ -33,31 +33,34 @@
(when v
(error "failed")))
'eq:subset-shared-small
'subset-lil-shared:eq
(times
(let* ([sub-ht (gen 6)]
[ht (gen-more 3 sub-ht)])
(check-true
(for/and ([i (in-range M)])
(hash-keys-subset? sub-ht ht)))))
(for ([i (in-range H)])
(check-true
(for/and ([i (in-range M)])
(hash-keys-subset? sub-ht ht))))))
'eq:subset-unshared-small
'subset-lil-unshared:eq
(times
(let ([ht (gen 6)]
[sub-ht (gen 3)])
(check-true
(for/and ([i (in-range M)])
(hash-keys-subset? sub-ht ht)))))
(for ([i (in-range H)])
(check-true
(for/and ([i (in-range M)])
(hash-keys-subset? sub-ht ht))))))
'eq:not-subset-unshared-small
'subset-lil-not:eq
(times
(let ([ht (gen 6)]
[sub-ht (gen 3)])
(check-false
(for/or ([i (in-range L)])
(hash-keys-subset? ht sub-ht)))))
(for ([i (in-range H)])
(check-false
(for/or ([i (in-range L)])
(hash-keys-subset? ht sub-ht))))))
'eq:subset-shared-medium+small
'subset-med+lil-shared:eq
(times
(let* ([sub-ht (gen 10)]
[ht (gen-more 1 sub-ht)])
@ -65,7 +68,7 @@
(for/and ([i (in-range L)])
(hash-keys-subset? sub-ht ht)))))
'eq:subset-shared-medium+medium
'subset-med+med-shared:eq
(times
(let* ([sub-ht (gen 10)]
[ht (gen-more 10 sub-ht)])
@ -73,14 +76,15 @@
(for/and ([i (in-range L)])
(hash-keys-subset? sub-ht ht)))))
'eq:subset-same-large
'subset-big-same:eq
(times
(let* ([sub-ht (gen 100)])
(check-true
(for/and ([i (in-range L)])
(hash-keys-subset? sub-ht sub-ht)))))
(for ([i (in-range H)])
(let* ([sub-ht (gen 100)])
(check-true
(for/and ([i (in-range L)])
(hash-keys-subset? sub-ht sub-ht))))))
'eq:subset-shared-large+small
'subset-big+lil-shared:eq
(times
(let* ([sub-ht (gen 100)]
[ht (gen-more 3 sub-ht)])
@ -88,7 +92,7 @@
(for/and ([i (in-range L)])
(hash-keys-subset? sub-ht ht)))))
'eq:subset-shared-large+medium
'subset-big+med-shared:eq
(times
(let* ([sub-ht (gen 100)]
[ht (gen-more 10 sub-ht)])
@ -98,7 +102,7 @@
;; This one amounts to a test of how fast the subset
;; operation iterates internally:
'eq:subset-unshared-large
'subset-big-unshared:eq
(times
(let* ([sub-ht (gen 100)]
[ht (gen 100)])

View File

@ -0,0 +1,252 @@
#lang racket/base
(require racket/format)
(provide parse-times)
(define (parse-times in report
#:init-impl [init-impl #f]
#:drop-runs [drop-runs 0]
#:accum [accum cons]
#:base [base '()]
#:rel-stdev? [rel-stdev? #f]
#:gc? [gc? #f])
(let loop ([group #f] [impl init-impl] [times null])
(define l (read-line in))
(cond
[(eof-object? l)
(accum (summary report group impl times drop-runs rel-stdev?)
base)]
[else
(cond
[(regexp-match #rx"== ([a-zA-Z0-9-]) ==" l)
=> (lambda (m)
(define first (when group
(summary report group impl times drop-runs rel-stdev?)))
(define rest (loop #f (cadr m) '()))
(if group
(accum first rest)
rest))]
[(regexp-match #rx"'([#:+a-zA-Z0-9-]+)" l)
=> (lambda (m)
(define first (when group
(summary report group impl times drop-runs rel-stdev?)))
(define rest (loop (cadr m) impl '()))
(if group
(accum first rest)
rest))]
[(and gc?
(regexp-match #rx"elapsed cpu time, including ([0-9.]+)s collecting" l))
=> (lambda (m)
(loop group impl (cons (inexact->exact (floor (* 1000 (string->number (cadr m)))))
times)))]
[(regexp-match #rx"cpu (?:time|msec): ([0-9]+)" l)
=> (lambda (m)
(loop group impl (cons (string->number (cadr m)) times)))]
[(or (regexp-match #rx"([0-9.]+)(?:s| secs) (?:CPU time|elapsed cpu time|cpu time)" l)
(regexp-match #rx"elapsed ([0-9.]+)s" l))
=> (lambda (m)
(loop group impl (cons (inexact->exact (floor (* 1000 (string->number (cadr m)))))
times)))]
[(regexp-match #rx"([0-9.]+)u ([0-9.]+)s" l) ; [0-9]+:[0-9.]+ [0-9.]+%" l)
=> (lambda (m)
(loop group impl (cons (inexact->exact (floor (* 1000 (+ (string->number (cadr m))
(string->number (caddr m))))))
times)))]
[else (loop group impl times)])])))
(define (summary report group impl times drop-runs rel-stdev?)
(let* ([times (reverse times)]
[times (if ((length times) . > . drop-runs)
(list-tail times drop-runs)
'())])
(define median (if (null? times)
"???"
(list-ref (sort times <) (quotient (length times) 2))))
(define avg (if (null? times)
"???"
(quotient (apply + times) (length times))))
(define dev (cond
[(null? times) "???"]
[(null? (cdr times)) (if rel-stdev?
"0%"
"±0")]
[else
(let ([stdev (sqrt (/ (for/sum ([t (in-list times)])
(expt (- t avg) 2))
(sub1 (length times))))])
(cond
[rel-stdev?
(string-append
(~r #:precision '(= 2)
(* 100
(/ stdev
avg)))
"%")]
[else
(format "±~a" (inexact->exact (round stdev)))]))]))
(report group impl (number->string median) (number->string avg) dev)))
(module+ main
(require racket/cmdline)
(define NAME-WIDTH 25)
(define TIME-WIDTH 8)
(define RSD-WIDTH 10)
(define line-format "~a~a ~a ~a\n")
(define init-impl #f)
(define drop-runs 0)
(define gc? #f)
(define sort? #f)
(define chars-per-unit #f)
(define inputs null)
(define lines null)
(command-line
#:once-each
[("--impl") name "Input is for implementation <name>"
(set! init-impl name)]
[("--drop") n "Drop first <n> runs"
(set! drop-runs (string->number n))]
[("--width") w "Name width as <w>"
(set! NAME-WIDTH (string->number w))]
[("--gc") "Collect GC times"
(set! gc? #t)]
[("--sort") "Sort by average"
(set! sort? #t)]
[("--bars") n "Plot comparative with <n> characters per unit"
(set! chars-per-unit (string->number n))]
#:multi
[("++in") path "Read from <path> for comparative"
(set! inputs (cons path inputs))]
#:args ()
(void))
(define (pad s n
#:right? [right? #f])
(let ([s (format "~a" s)])
(define padding (make-string (max 0 (- n (string-length s))) #\space))
(if right?
(string-append s padding)
(string-append padding s))))
(cond
[(null? inputs)
;; stdin, single-implement mode
(define (report group impl median avg dev)
(printf line-format
(pad (if impl
(format "~a ~a:" group impl)
(format "~a:" group))
NAME-WIDTH)
(pad median TIME-WIDTH)
(pad avg TIME-WIDTH)
(pad dev RSD-WIDTH)))
(define (save-or-report group impl median avg dev)
(cond
[sort?
(set! lines (cons (list group impl median avg dev)
lines))]
[else (report group impl median avg dev)]))
(printf line-format
(pad "" NAME-WIDTH)
(pad "median" TIME-WIDTH)
(pad "mean" TIME-WIDTH)
(pad "stdev" RSD-WIDTH))
(parse-times (current-input-port)
#:init-impl init-impl
#:drop-runs drop-runs
save-or-report
#:accum void
#:gc? gc?)
(when sort?
(for ([l (in-list (sort lines <
#:key (lambda (p)
(string->number (list-ref p 3)))))])
(apply report l)))]
[else
;; multi-implementation comparison mode
(struct entry (median average stddev))
(define groups (make-hash))
(define list-groups null)
(define (report group impl med avg dev)
(unless (hash-ref groups group #f)
(hash-set! groups group #t)
(set! list-groups (cons group list-groups)))
(cons group (entry med avg dev)))
(define name+timess
(for/list ([input (in-list (reverse inputs))])
(define-values (base name-path dir?) (split-path input))
(define name (path->string name-path))
(define times
(call-with-input-file*
input
(lambda (i)
(parse-times i
#:init-impl name
#:drop-runs drop-runs
report
#:accum (lambda (p ht) (hash-set ht (car p) (cdr p)))
#:base #hash()
#:gc? gc?))))
(list name times)))
(define (format-number n)
(if chars-per-unit
(let ([n (inexact->exact (round (* n chars-per-unit)))])
(if (n . < . chars-per-unit)
(make-string n #\=)
(string-append (make-string (sub1 chars-per-unit) #\=)
"|"
(make-string (- n chars-per-unit) #\=))))
(~r #:precision '(= 2) n)))
(define SEP 2)
(define widths
(for/list ([name+times (in-list name+timess)])
(for/fold ([n (string-length (car name+times))]) ([group (in-list list-groups)])
(define base-e (hash-ref (list-ref (car name+timess) 1) group))
(define base (string->number (entry-median base-e)))
(define v (string->number (entry-median (hash-ref (list-ref name+times 1) group))))
(max n (string-length (format-number (/ v base)))))))
(display (pad "" NAME-WIDTH))
(for ([name+times (in-list name+timess)]
[width (in-list widths)])
(define name (car name+times))
(display (make-string SEP #\space))
(display (pad name width #:right? chars-per-unit)))
(newline)
(for ([group (in-list (reverse list-groups))])
(define e (hash-ref (list-ref (car name+timess) 1) group))
(define base (string->number (entry-median e)))
(display (pad (format "~a:" group)
NAME-WIDTH))
(display (make-string SEP #\space))
(display (pad (format-number 1.0)
(car widths)
#:right? chars-per-unit))
(for ([name+times (in-list (cdr name+timess))]
[width (in-list (cdr widths))])
(define name (car name+times))
(define v (string->number (entry-median (hash-ref (list-ref name+times 1) group))))
(display (make-string SEP #\space))
(display (pad (format-number (/ v base))
width
#:right? chars-per-unit)))
(newline))]))