Fixes, docs, and API changes for `math/statistics'
* Fixed and added tests for `quantile' and `median', documented them * Added `sort-samples', documented it * Removed `real-quantile' and `real-median' (too many design choices right now; will revisit when implementing Kernel Density Estimators) * Documented `absdev' and `absdev/median' * Fixed `update-statistics*': now uses O(1) space as advertised (if the sequences of values and weights both use O(1) space) * Changed types of binning functions: allows using #:key in the future (when TR supports function type cases that differ only by keyword argument types better), places optional weights at the end like other statistics functions * Clarified binning docs about sort stability and half-open intervals
This commit is contained in:
parent
9c70f3373d
commit
9865182df4
|
@ -12,7 +12,7 @@
|
|||
sample-bin-compact
|
||||
sample-bin-total
|
||||
bin-samples
|
||||
bin-weighted-samples)
|
||||
bin-samples/key)
|
||||
|
||||
;; ===================================================================================================
|
||||
;; Hashing
|
||||
|
@ -100,65 +100,71 @@
|
|||
(cond [(pred? x) (loop (rest xs) (cons x ys))]
|
||||
[else (values (reverse ys) xs)])])))
|
||||
|
||||
(: bin-samples
|
||||
(All (A B)
|
||||
(case-> ((Sequenceof A) (A A -> Any) (Sequenceof A) -> (Listof (sample-bin A A)))
|
||||
((Sequenceof B) (B B -> Any) (Sequenceof A) (A -> B) -> (Listof (sample-bin A B))))))
|
||||
(define bin-samples
|
||||
(case-lambda
|
||||
[(bnds lte? xs) (bin-samples bnds lte? xs (λ: ([x : A]) x))]
|
||||
[(bnds lte? xs key)
|
||||
(let* ([bnds (sort (sequence->list bnds) (λ: ([b1 : B] [b2 : B]) (and (lte? b1 b2) #t)))]
|
||||
[xs (sequence->list xs)]
|
||||
[xks (map (λ: ([x : A]) (cons x (key x))) xs)]
|
||||
[xks (sort xks (λ: ([xk1 : (Pair A B)] [xk2 : (Pair A B)])
|
||||
(and (lte? (cdr xk1) (cdr xk2)) #t)))])
|
||||
(cond
|
||||
[(empty? bnds)
|
||||
(cond [(empty? xks) empty]
|
||||
[else (define min (cdr (first xks)))
|
||||
(define max (cdr (last xks)))
|
||||
(list (sample-bin min max (map (inst car A B) xks) #f))])]
|
||||
[else
|
||||
(let: loop : (Listof (sample-bin A B)) ([min : (U #f B) #f]
|
||||
[max : B (first bnds)]
|
||||
[bnds : (Listof B) (rest bnds)]
|
||||
[xks : (Listof (Pair A B)) xks]
|
||||
[bins : (Listof (sample-bin A B)) empty])
|
||||
(let-values ([(yks xks) (list-split-after xks (λ: ([xk : (Pair A B)])
|
||||
(lte? (cdr xk) max)))])
|
||||
(define maybe-bin
|
||||
(cond [min (list (sample-bin min max (map (inst car A B) yks) #f))]
|
||||
[(empty? yks) empty]
|
||||
[else (list (sample-bin (cdr (first yks)) max (map (inst car A B) yks) #f))]))
|
||||
(cond [(empty? bnds)
|
||||
(cond [(empty? xks) (reverse (append maybe-bin bins))]
|
||||
[else
|
||||
(define bin2
|
||||
(sample-bin max (cdr (last xks)) (map (inst car A B) xks) #f))
|
||||
(reverse (append (cons bin2 maybe-bin) bins))])]
|
||||
[else
|
||||
(loop max (first bnds) (rest bnds) xks (append maybe-bin bins))])))]))]))
|
||||
(: bin-unweighted-samples/key
|
||||
(All (A B) ((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) -> (Listof (sample-bin A B)))))
|
||||
(define (bin-unweighted-samples/key bnds lte? key xs)
|
||||
(: lt? (B B -> Boolean))
|
||||
(define (lt? x1 x2)
|
||||
(and (lte? x1 x2) (not (lte? x2 x1))))
|
||||
(let* ([bnds (sort (sequence->list bnds) lt?)]
|
||||
[xs (sequence->list xs)]
|
||||
[xks (map (λ: ([x : A]) (cons x (key x))) xs)]
|
||||
[xks (sort xks (λ: ([xk1 : (Pair A B)] [xk2 : (Pair A B)])
|
||||
(lt? (cdr xk1) (cdr xk2))))])
|
||||
(cond
|
||||
[(empty? bnds)
|
||||
(cond [(empty? xks) empty]
|
||||
[else (define min (cdr (first xks)))
|
||||
(define max (cdr (last xks)))
|
||||
(list (sample-bin min max (map (inst car A B) xks) #f))])]
|
||||
[else
|
||||
(let: loop : (Listof (sample-bin A B)) ([min : (U #f B) #f]
|
||||
[max : B (first bnds)]
|
||||
[bnds : (Listof B) (rest bnds)]
|
||||
[xks : (Listof (Pair A B)) xks]
|
||||
[bins : (Listof (sample-bin A B)) empty])
|
||||
(let-values ([(yks xks) (list-split-after xks (λ: ([xk : (Pair A B)])
|
||||
(lte? (cdr xk) max)))])
|
||||
(define maybe-bin
|
||||
(cond [min (list (sample-bin min max (map (inst car A B) yks) #f))]
|
||||
[(empty? yks) empty]
|
||||
[else (list (sample-bin (cdr (first yks)) max (map (inst car A B) yks) #f))]))
|
||||
(cond [(empty? bnds)
|
||||
(cond [(empty? xks) (reverse (append maybe-bin bins))]
|
||||
[else
|
||||
(define bin2
|
||||
(sample-bin max (cdr (last xks)) (map (inst car A B) xks) #f))
|
||||
(reverse (append (cons bin2 maybe-bin) bins))])]
|
||||
[else
|
||||
(loop max (first bnds) (rest bnds) xks (append maybe-bin bins))])))])))
|
||||
|
||||
(: bin-weighted-samples
|
||||
(All (A B) (case-> ((Sequenceof A) (A A -> Any) (Sequenceof A) (U #f (Sequenceof Real))
|
||||
-> (Listof (sample-bin A A)))
|
||||
((Sequenceof B) (B B -> Any) (Sequenceof A) (U #f (Sequenceof Real)) (A -> B)
|
||||
-> (Listof (sample-bin A B))))))
|
||||
(define bin-weighted-samples
|
||||
(case-lambda
|
||||
[(bnds lte? xs ws)
|
||||
(cond [ws (bin-weighted-samples bnds lte? xs ws (λ: ([x : A]) x))]
|
||||
[else (bin-samples bnds lte? xs)])]
|
||||
[(bnds lte? xs ws key)
|
||||
(cond [ws (let-values ([(xs ws) (sequences->weighted-samples 'bin-samples xs ws)])
|
||||
(define xws (map (inst cons A Nonnegative-Real) xs ws))
|
||||
(define xw-key (λ: ([xw : (Pair A Nonnegative-Real)]) (key (car xw))))
|
||||
(map (λ: ([bin : (sample-bin (Pair A Nonnegative-Real) B)])
|
||||
(define xws (sample-bin-values bin))
|
||||
(sample-bin (sample-bin-min bin)
|
||||
(sample-bin-max bin)
|
||||
(map (inst car A Nonnegative-Real) xws)
|
||||
(map (inst cdr A Nonnegative-Real) xws)))
|
||||
(bin-samples bnds lte? xws xw-key)))]
|
||||
[else (bin-samples bnds lte? xs key)])]))
|
||||
(: bin-weighted-samples/key
|
||||
(All (A B) ((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) (Sequenceof Real)
|
||||
-> (Listof (sample-bin A B)))))
|
||||
(define (bin-weighted-samples/key bnds lte? key xs ws)
|
||||
(let-values ([(xs ws) (sequences->weighted-samples 'bin-samples/key xs ws)])
|
||||
(define xws (map (inst cons A Nonnegative-Real) xs ws))
|
||||
(define xw-key (λ: ([xw : (Pair A Nonnegative-Real)]) (key (car xw))))
|
||||
(map (λ: ([bin : (sample-bin (Pair A Nonnegative-Real) B)])
|
||||
(define xws (sample-bin-values bin))
|
||||
(sample-bin (sample-bin-min bin)
|
||||
(sample-bin-max bin)
|
||||
(map (inst car A Nonnegative-Real) xws)
|
||||
(map (inst cdr A Nonnegative-Real) xws)))
|
||||
(bin-unweighted-samples/key bnds lte? xw-key xws))))
|
||||
|
||||
(: bin-samples/key
|
||||
(All (A B)
|
||||
(case-> ((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) -> (Listof (sample-bin A B)))
|
||||
((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) (U #f (Sequenceof Real))
|
||||
-> (Listof (sample-bin A B))))))
|
||||
(define (bin-samples/key bnds lte? key xs [ws #f])
|
||||
(cond [ws (bin-weighted-samples/key bnds lte? key xs ws)]
|
||||
[else (bin-unweighted-samples/key bnds lte? key xs)]))
|
||||
|
||||
(: bin-samples
|
||||
(All (A) (case-> ((Sequenceof A) (A A -> Any) (Sequenceof A) -> (Listof (sample-bin A A)))
|
||||
((Sequenceof A) (A A -> Any) (Sequenceof A) (Sequenceof Real)
|
||||
-> (Listof (sample-bin A A))))))
|
||||
(define (bin-samples bnds lte? xs [ws #f])
|
||||
(bin-samples/key bnds lte? (λ: ([x : A]) x) xs ws))
|
||||
|
|
|
@ -1,69 +1,80 @@
|
|||
#lang typed/racket/base
|
||||
|
||||
(require racket/sequence
|
||||
racket/list
|
||||
racket/fixnum
|
||||
"../../base.rkt"
|
||||
"quickselect.rkt"
|
||||
"statistics-utils.rkt")
|
||||
|
||||
(provide quantile
|
||||
(provide sort-samples
|
||||
quantile
|
||||
median
|
||||
real-quantile
|
||||
real-median
|
||||
absolute-deviation/median
|
||||
absolute-deviation)
|
||||
absdev/median
|
||||
absdev)
|
||||
|
||||
(: sort-weighted-samples
|
||||
(All (A) (Symbol (A A -> Any) (Sequenceof A) (Sequenceof Real)
|
||||
-> (Values (Listof A) (Listof Nonnegative-Real)))))
|
||||
(define (sort-weighted-samples name lt? xs ws)
|
||||
(let-values ([(xs ws) (sequences->weighted-samples name xs ws)])
|
||||
(define xws ((inst sort (Pair A Nonnegative-Real) Real)
|
||||
(map (inst cons A Nonnegative-Real) xs ws)
|
||||
(λ: ([xw1 : (Pair A Nonnegative-Real)]
|
||||
[xw2 : (Pair A Nonnegative-Real)])
|
||||
(and (lt? (car xw1) (car xw2)) #t))))
|
||||
(values (map (inst car A Nonnegative-Real) xws)
|
||||
(map (inst cdr A Nonnegative-Real) xws))))
|
||||
|
||||
(: sort-samples (All (A) (case-> ((A A -> Any) (Sequenceof A) -> (Listof A))
|
||||
((A A -> Any) (Sequenceof A) (U #f (Sequenceof Real))
|
||||
-> (Values (Listof A) (Listof Nonnegative-Real))))))
|
||||
(define sort-samples
|
||||
(case-lambda
|
||||
[(lt? xs)
|
||||
(sort (sequence->list xs)
|
||||
(λ: ([x1 : A] [x2 : A])
|
||||
(and (lt? x1 x2) #t)))]
|
||||
[(lt? xs ws)
|
||||
(cond [ws (sort-weighted-samples 'sort-samples lt? xs ws)]
|
||||
[else (define ys (sort (sequence->list xs)
|
||||
(λ: ([x1 : A] [x2 : A])
|
||||
(and (lt? x1 x2) #t))))
|
||||
(values ys (make-list (length ys) 1))])]))
|
||||
|
||||
(: quantile (All (A) (case-> (Real (A A -> Any) (Sequenceof A) -> A)
|
||||
(Real (A A -> Any) (Sequenceof A) (Option (Sequenceof Real)) -> A))))
|
||||
(Real (A A -> Any) (Sequenceof A) (U #f (Sequenceof Real)) -> A))))
|
||||
(define (quantile p lt? xs [ws #f])
|
||||
(cond [(or (p . < . 0) (p . > . 1))
|
||||
(raise-argument-error 'quantile "Real in [0,1]" 0 p < xs ws)]
|
||||
(raise-argument-error 'quantile "Real in [0,1]" 0 p lt? xs ws)]
|
||||
[ws
|
||||
(let-values ([(xs ws) (sequences->weighted-samples 'median xs ws)])
|
||||
(define xws ((inst sort (Pair A Nonnegative-Real) Real)
|
||||
(map (inst cons A Nonnegative-Real) xs ws)
|
||||
(λ: ([xw1 : (Pair A Nonnegative-Real)]
|
||||
[xw2 : (Pair A Nonnegative-Real)])
|
||||
(if (lt? (car xw1) (car xw2)) #t #f))))
|
||||
(let ([xs (map (inst car A Nonnegative-Real) xws)]
|
||||
[ws (map (inst cdr A Nonnegative-Real) xws)])
|
||||
(define total-w (sum ws))
|
||||
(cond [(zero? total-w)
|
||||
(raise-argument-error 'quantile "weights with positive sum" 3 p lt? xs ws)]
|
||||
[else
|
||||
(define thresh (* p total-w))
|
||||
(let loop ([xs (cdr xs)] [ws (cdr ws)] [x (car xs)] [s (car ws)])
|
||||
(cond [(s . > . thresh) x]
|
||||
[(null? xs) x]
|
||||
[else (loop (cdr xs) (cdr ws) (car xs) (+ s (car ws)))]))])))]
|
||||
(let-values ([(xs ws) (sort-weighted-samples 'quantile lt? xs ws)])
|
||||
(define total-w (sum ws))
|
||||
(cond [(zero? total-w)
|
||||
(raise-argument-error 'quantile "weights with positive sum" 3 p lt? xs ws)]
|
||||
[else
|
||||
(let loop ([xs (cdr xs)] [ws (cdr ws)] [x (car xs)] [s (car ws)])
|
||||
(cond [((/ s total-w) . >= . p) x]
|
||||
[(null? xs) x]
|
||||
[else (loop (cdr xs) (cdr ws) (car xs) (+ s (car ws)))]))]))]
|
||||
[else
|
||||
(let ([xs (sequence->vector xs)])
|
||||
(define n (vector-length xs))
|
||||
(cond [(n . fx<= . 0) (raise-argument-error 'quantile "nonempty Sequence" 2 p lt? xs)]
|
||||
[else (kth-value! xs (min (- n 1) (exact-ceiling (* p n))) lt?)]))]))
|
||||
[else
|
||||
(define i (max 0 (- (exact-ceiling (* p n)) 1)))
|
||||
(kth-value! xs i lt?)]))]))
|
||||
|
||||
(: median (All (A) (case-> ((A A -> Any) (Sequenceof A) -> A)
|
||||
((A A -> Any) (Sequenceof A) (Option (Sequenceof Real)) -> A))))
|
||||
((A A -> Any) (Sequenceof A) (U #f (Sequenceof Real)) -> A))))
|
||||
(define (median lt? xs [ws #f])
|
||||
(cond [ws (quantile 1/2 lt? xs ws)]
|
||||
[else
|
||||
(let ([xs (sequence->vector xs)])
|
||||
(define n (vector-length xs))
|
||||
(cond [(n . fx<= . 0) (raise-argument-error 'median "nonempty Sequence" xs)]
|
||||
[else (kth-value! xs (quotient n 2) lt?)]))]))
|
||||
(quantile 1/2 lt? xs ws))
|
||||
|
||||
(: real-quantile (case-> (Real (Sequenceof Real) -> Real)
|
||||
(Real (Sequenceof Real) (Option (Sequenceof Real)) -> Real)))
|
||||
(define (real-quantile p xs [ws #f])
|
||||
(quantile p < xs ws))
|
||||
;; ===================================================================================================
|
||||
;; Absolute deviation
|
||||
|
||||
(: real-median (case-> ((Sequenceof Real) -> Real)
|
||||
((Sequenceof Real) (Option (Sequenceof Real)) -> Real)))
|
||||
(define (real-median xs [ws #f])
|
||||
(median < xs ws))
|
||||
|
||||
(: absolute-deviation* (Symbol Real (Sequenceof Real) (Option (Sequenceof Real)) -> Nonnegative-Real))
|
||||
(define (absolute-deviation* name m xs ws)
|
||||
(: absdev* (Symbol Real (Sequenceof Real) (Option (Sequenceof Real)) -> Nonnegative-Real))
|
||||
(define (absdev* name m xs ws)
|
||||
(define-values (axs n)
|
||||
(cond [ws (let-values ([(xs ws) (sequences->weighted-samples name xs ws)])
|
||||
(values (map (λ: ([x : Real] [w : Real]) (* w (abs (- x m)))) xs ws)
|
||||
|
@ -75,14 +86,14 @@
|
|||
[else
|
||||
(max 0 (/ (sum axs) n))]))
|
||||
|
||||
(: absolute-deviation/median
|
||||
(: absdev/median
|
||||
(case-> (Real (Sequenceof Real) -> Nonnegative-Real)
|
||||
(Real (Sequenceof Real) (Option (Sequenceof Real)) -> Nonnegative-Real)))
|
||||
(define (absolute-deviation/median m xs [ws #f])
|
||||
(absolute-deviation* 'absolute-deviation/median m xs ws))
|
||||
|
||||
(: absolute-deviation
|
||||
(define (absdev/median m xs [ws #f])
|
||||
(absdev* 'absdev/median m xs ws))
|
||||
|
||||
(: absdev
|
||||
(case-> ((Sequenceof Real) -> Nonnegative-Real)
|
||||
((Sequenceof Real) (Option (Sequenceof Real)) -> Nonnegative-Real)))
|
||||
(define (absolute-deviation xs [ws #f])
|
||||
(absolute-deviation* 'absolute-deviation (real-median xs ws) xs ws))
|
||||
(define (absdev xs [ws #f])
|
||||
(absdev* 'absdev (median < xs ws) xs ws))
|
||||
|
|
|
@ -60,11 +60,12 @@
|
|||
(case-> (statistics (Sequenceof Real) -> statistics)
|
||||
(statistics (Sequenceof Real) (Option (Sequenceof Real)) -> statistics)))
|
||||
(define (update-statistics* e xs [ws #f])
|
||||
(cond [ws (let-values ([(xs ws) (sequences->weighted-samples 'update-statistics* xs ws)])
|
||||
(for/fold: ([e : statistics e]) ([x (in-list xs)] [w (in-list ws)])
|
||||
(update-statistics e x w)))]
|
||||
[else (for/fold: ([e : statistics e]) ([x xs])
|
||||
(update-statistics e x 1.0))]))
|
||||
(cond [ws
|
||||
(for/fold: ([e : statistics e]) ([x xs] [w ws])
|
||||
(update-statistics e x w))]
|
||||
[else
|
||||
(for/fold: ([e : statistics e]) ([x xs])
|
||||
(update-statistics e x 1.0))]))
|
||||
|
||||
;; ===================================================================================================
|
||||
|
||||
|
|
|
@ -9,6 +9,10 @@
|
|||
|
||||
;; ===================================================================================================
|
||||
|
||||
(: lte?->lt? (All (A) ((A A -> Any) -> (A A -> Boolean))))
|
||||
(define ((lte?->lt? lte?) x1 x2)
|
||||
(and (lte? x1 x2) (not (lte? x2 x1))))
|
||||
|
||||
(: find-near-pow2 (Real -> Nonnegative-Exact-Rational))
|
||||
(define (find-near-pow2 x)
|
||||
(expt 2 (max -1073 (min 1023 (exact-round (/ (log (abs x)) (fllog 2.0)))))))
|
||||
|
|
|
@ -5,10 +5,11 @@
|
|||
(for-label racket/base racket/promise racket/list
|
||||
math plot
|
||||
(only-in typed/racket/base
|
||||
ann
|
||||
ann inst
|
||||
Flonum Real Boolean Any Listof Integer case-> -> U
|
||||
Sequenceof Positive-Flonum Nonnegative-Flonum
|
||||
HashTable Positive-Integer Nonnegative-Real Values))
|
||||
Sequenceof Positive-Flonum Nonnegative-Flonum Symbol
|
||||
HashTable Positive-Integer Nonnegative-Real Values
|
||||
String))
|
||||
"utils.rkt")
|
||||
|
||||
@(define typed-eval (make-math-eval))
|
||||
|
@ -34,89 +35,6 @@ See @secref{stats:expected-values} for a discussion.
|
|||
|
||||
@local-table-of-contents[]
|
||||
|
||||
@section{Counting and Binning}
|
||||
|
||||
@defproc*[([(samples->hash [xs (Sequenceof A)]) (HashTable A Positive-Integer)]
|
||||
[(samples->hash [xs (Sequenceof A)] [ws (U #f (Sequenceof Real))])
|
||||
(HashTable A Nonnegative-Real)])]{
|
||||
@examples[#:eval typed-eval
|
||||
(samples->hash '(1 2 3 4 4))
|
||||
(samples->hash '(1 1 2 3 4) '(1/2 1/2 1 1 2))]
|
||||
}
|
||||
|
||||
@defproc*[([(count-samples [xs (Sequenceof A)]) (Values (Listof A) (Listof Positive-Integer))]
|
||||
[(count-samples [xs (Sequenceof A)] [ws (U #f (Sequenceof Real))])
|
||||
(Values (Listof A) (Listof Nonnegative-Real))])]{
|
||||
@examples[#:eval typed-eval
|
||||
(count-samples '(1 2 3 4 4))
|
||||
(count-samples '(1 1 2 3 4) '(1/2 1/2 1 1 2))]
|
||||
}
|
||||
|
||||
@defstruct*[sample-bin ([min B]
|
||||
[max B]
|
||||
[values (Listof A)]
|
||||
[weights (U #f (Listof Nonnegative-Real))])]{
|
||||
Represents a @italic{bin}, or a group of samples within an interval in a total order.
|
||||
The values and bounds have a different type to allow @racket[bin-samples] and
|
||||
@racket[bin-weighted-samples] to group elements based on a function of their values (a @racket[key],
|
||||
like in @racket[sort]).
|
||||
}
|
||||
|
||||
@defproc*[([(bin-samples [bounds (Sequenceof A)]
|
||||
[lte? (A A -> Any)]
|
||||
[xs (Sequenceof A)])
|
||||
(Listof (sample-bin A A))]
|
||||
[(bin-samples [bounds (Sequenceof B)]
|
||||
[lte? (B B -> Any)]
|
||||
[xs (Sequenceof A)]
|
||||
[key (A -> B)])
|
||||
(Listof (sample-bin A B))])]{
|
||||
Like @racket[(sort xs lte? #:key key)], but additionally groups samples into bins.
|
||||
Keys are always cached, and @racket[bounds] is sorted before binning.
|
||||
|
||||
If @racket[n = (length bounds)], then @racket[bin-samples] returns @italic{at least} @racket[(- n 1)]
|
||||
bins, one for each pair of adjacent (sorted) bounds.
|
||||
If some values in @racket[xs] are less than the smallest bound, they are grouped into a single bin in
|
||||
front.
|
||||
If some are greater than the largest bound, they are grouped into a single bin at the end.
|
||||
|
||||
@examples[#:eval typed-eval
|
||||
(bin-samples '() <= '(0 1 2 3 4 5 6))
|
||||
(bin-samples '(3) <= '(0 1 2 3 4 5 6))
|
||||
(bin-samples '(2 4) <= '(0 1 2 3 4 5 6))]
|
||||
|
||||
Note that @racket[bin-samples] always returns bins with @racket[#f] weights, meaning they contain
|
||||
unweighted samples.
|
||||
}
|
||||
|
||||
@defproc*[([(bin-weighted-samples [xs (Sequenceof A)]
|
||||
[ws (Sequenceof Real)]
|
||||
[bounds (Sequenceof A)]
|
||||
[lte? (A A -> Any)])
|
||||
(Listof (sample-bin A A))]
|
||||
[(bin-weighted-samples [xs (Sequenceof A)]
|
||||
[ws (Sequenceof Real)]
|
||||
[bounds (Sequenceof B)]
|
||||
[lte? (B B -> Any)]
|
||||
[key (A -> B)])
|
||||
(Listof (sample-bin A B))])]{
|
||||
Like @racket[bin-samples], but for weighted samples.
|
||||
}
|
||||
|
||||
@defproc[(sample-bin-compact [bin (sample-bin A B)]) (sample-bin A B)]{
|
||||
Compacts @racket[bin] by applying @racket[count-samples] to its values and weights.
|
||||
@examples[#:eval typed-eval
|
||||
(sample-bin-compact (sample-bin 1 4 '(1 2 3 4 4) #f))]
|
||||
}
|
||||
|
||||
@defproc[(sample-bin-total [bin (sample-bin A B)]) Nonnegative-Real]{
|
||||
If @racket[(sample-bin-weights bin)] is @racket[#f], returns the number of samples in @racket[bin];
|
||||
otherwise, returns the sum of their weights.
|
||||
@examples[#:eval typed-eval
|
||||
(sample-bin-total (sample-bin 1 4 '(1 2 3 4 4) #f))
|
||||
(sample-bin-total (sample-bin-compact (sample-bin 1 4 '(1 2 3 4 4) #f)))]
|
||||
}
|
||||
|
||||
@section[#:tag "stats:expected-values"]{Expected Values}
|
||||
|
||||
Functions documented in this section that compute higher central moments, such as @racket[variance],
|
||||
|
@ -206,7 +124,7 @@ using known mean @racket[mean].
|
|||
@section[#:tag "stats:running"]{Running Expected Values}
|
||||
|
||||
The @racket[statistics] object allows computing the sample minimum, maximum, count, mean, variance,
|
||||
skewness, and excess kurtosis of any number of samples in O(1) space.
|
||||
skewness, and excess kurtosis of a sequence of samples in O(1) space.
|
||||
|
||||
To use it, start with @racket[empty-statistics], then use @racket[update-statistics] to obtain a
|
||||
new statistics object with updated values. Use @racket[statistics-min], @racket[statistics-mean],
|
||||
|
@ -326,6 +244,148 @@ Like @racket[covariance] and @racket[correlation], but computed using known mean
|
|||
@racket[μx] and @racket[μy].
|
||||
}
|
||||
|
||||
@section{Counting and Binning}
|
||||
|
||||
@defproc*[([(samples->hash [xs (Sequenceof A)]) (HashTable A Positive-Integer)]
|
||||
[(samples->hash [xs (Sequenceof A)] [ws (U #f (Sequenceof Real))])
|
||||
(HashTable A Nonnegative-Real)])]{
|
||||
@examples[#:eval typed-eval
|
||||
(samples->hash '(1 2 3 4 4))
|
||||
(samples->hash '(1 1 2 3 4) '(1/2 1/2 1 1 2))]
|
||||
}
|
||||
|
||||
@defproc*[([(count-samples [xs (Sequenceof A)]) (Values (Listof A) (Listof Positive-Integer))]
|
||||
[(count-samples [xs (Sequenceof A)] [ws (U #f (Sequenceof Real))])
|
||||
(Values (Listof A) (Listof Nonnegative-Real))])]{
|
||||
@examples[#:eval typed-eval
|
||||
(count-samples '(1 2 3 4 4))
|
||||
(count-samples '(1 1 2 3 4) '(1/2 1/2 1 1 2))]
|
||||
}
|
||||
|
||||
@defstruct*[sample-bin ([min B]
|
||||
[max B]
|
||||
[values (Listof A)]
|
||||
[weights (U #f (Listof Nonnegative-Real))])]{
|
||||
Represents a @italic{bin}, or a group of samples within an interval in a total order.
|
||||
The values and bounds have a different type to allow @racket[bin-samples/key]
|
||||
to group elements based on a function of their values.
|
||||
}
|
||||
|
||||
@defproc[(bin-samples [bounds (Sequenceof A)]
|
||||
[lte? (A A -> Any)]
|
||||
[xs (Sequenceof A)]
|
||||
[ws (U #f (Sequenceof Real))])
|
||||
(Listof (sample-bin A A))]{
|
||||
Like @racket[(sort xs lte?)], but additionally groups samples into bins.
|
||||
The bins' @racket[bounds] are sorted before binning @racket[xs].
|
||||
|
||||
If @racket[n = (length bounds)], then @racket[bin-samples] returns @italic{at least} @racket[(- n 1)]
|
||||
bins, one for each pair of adjacent (sorted) bounds.
|
||||
If some values in @racket[xs] are less than the smallest bound, they are grouped into a single bin in
|
||||
front.
|
||||
If some are greater than the largest bound, they are grouped into a single bin at the end.
|
||||
|
||||
@examples[#:eval typed-eval
|
||||
(bin-samples '() <= '(0 1 2 3 4 5 6))
|
||||
(bin-samples '(3) <= '(0 1 2 3 4 5 6))
|
||||
(bin-samples '(2 4) <= '(0 1 2 3 4 5 6))
|
||||
(bin-samples '(2 4) <=
|
||||
'(0 1 2 3 4 5 6)
|
||||
'(10 20 30 40 50 60 70))]
|
||||
|
||||
If @racket[lte?] is a less-than-or-equal relation, the bins represent half-open intervals
|
||||
(@racket[min], @racket[max]] (except possibly the first, which may be closed).
|
||||
If @racket[lte?] is a less-than relation, the bins represent half-open intervals
|
||||
[@racket[min], @racket[max]) (except possibly the last, which may be closed).
|
||||
In either case, the sorts applied to @racket[bounds] and @racket[xs] are stable.
|
||||
|
||||
Because intervals used in probability measurements are normally open on the left, prefer to use
|
||||
less-than-or-equal relations for @racket[lte?].
|
||||
|
||||
If @racket[ws] is @racket[#f], @racket[bin-samples] returns bins with @racket[#f] weights.
|
||||
}
|
||||
|
||||
@defproc[(bin-samples/key [bounds (Sequenceof B)]
|
||||
[lte? (B B -> Any)]
|
||||
[key (A -> B)]
|
||||
[xs (Sequenceof A)]
|
||||
[ws (U #f (Sequenceof Real))])
|
||||
(Listof (sample-bin A B))]{
|
||||
Like @racket[(sort xs lte? #:key key #:cache-keys? #t)], but additionally groups samples into bins.
|
||||
@examples[#:eval typed-eval
|
||||
(bin-samples/key '(2 4) <= (inst car Real String)
|
||||
'((1 . "1") (2 . "2") (3 . "3") (4 . "4") (5 . "5")))]
|
||||
See @racket[bin-samples] for the simpler, one-type variant.
|
||||
}
|
||||
|
||||
@defproc[(sample-bin-compact [bin (sample-bin A B)]) (sample-bin A B)]{
|
||||
Compacts @racket[bin] by applying @racket[count-samples] to its values and weights.
|
||||
@examples[#:eval typed-eval
|
||||
(sample-bin-compact (sample-bin 1 4 '(1 2 3 4 4) #f))]
|
||||
}
|
||||
|
||||
@defproc[(sample-bin-total [bin (sample-bin A B)]) Nonnegative-Real]{
|
||||
If @racket[(sample-bin-weights bin)] is @racket[#f], returns the number of samples in @racket[bin];
|
||||
otherwise, returns the sum of their weights.
|
||||
@examples[#:eval typed-eval
|
||||
(sample-bin-total (sample-bin 1 4 '(1 2 3 4 4) #f))
|
||||
(sample-bin-total (sample-bin-compact (sample-bin 1 4 '(1 2 3 4 4) #f)))]
|
||||
}
|
||||
|
||||
@section{Order Statistics}
|
||||
|
||||
@defproc*[([(sort-samples [lt? (A A -> Any)] [xs (Sequenceof A)]) (Listof A)]
|
||||
[(sort-samples [lt? (A A -> Any)]
|
||||
[xs (Sequenceof A)]
|
||||
[ws (U #f (Sequenceof Real))])
|
||||
(Values (Listof A) (Listof Nonnegative-Real))])]{
|
||||
Sorts possibly weighted samples according to @racket[lt?], which is assumed to define a total
|
||||
order over the elements in @racket[xs].
|
||||
@examples[#:eval typed-eval
|
||||
(sort-samples < '(5 2 3 1))
|
||||
(sort-samples < '(5 2 3 1) '(50 20 30 10))
|
||||
(sort-samples < '(5 2 3 1) #f)]
|
||||
Because @racket[sort-samples] is defined in terms of @racket[sort], the sort is only guaranteed
|
||||
to be stable if @racket[lt?] is strictly a less-than relation.
|
||||
}
|
||||
|
||||
@defproc[(median [lt? (A A -> Any)] [xs (Sequenceof A)] [ws (U #f (Sequenceof Real)) #f]) A]{
|
||||
Equivalent to @racket[(quantile 1/2 lt? xs ws)].
|
||||
}
|
||||
|
||||
@defproc[(quantile [p Real]
|
||||
[lt? (A A -> Any)]
|
||||
[xs (Sequenceof A)]
|
||||
[ws (U #f (Sequenceof Real)) #f])
|
||||
A]{
|
||||
Computes the inverse of the empirical @tech{cdf} represented by the samples @racket[xs],
|
||||
which are optionally weighted by @racket[ws].
|
||||
|
||||
@examples[#:eval typed-eval
|
||||
(quantile 0 < '(1 3 5))
|
||||
(quantile 0.5 < '(1 2 3 4))
|
||||
(quantile 0.5 < '(1 2 3 4) '(0.25 0.20 0.20 0.35))]
|
||||
|
||||
If @racket[p = 0], @racket[quantile] returns the smallest element of @racket[xs] under the
|
||||
ordering relation @racket[lt?]. If @racket[p = 1], it returns the largest element.
|
||||
|
||||
For weighted samples, @racket[quantile] sorts @racket[xs] and @racket[ws] together
|
||||
(using @racket[sort-samples]), then finds the least @racket[x] for which the proportion of its
|
||||
cumulative weight is greater than or equal to @racket[p].
|
||||
|
||||
For unweighted samples, @racket[quantile] uses the quickselect algorithm to find the element that
|
||||
would be at index @racket[(ceiling (- (* p n) 1))] if @racket[xs] were sorted, where @racket[n]
|
||||
is the length of @racket[xs].
|
||||
}
|
||||
|
||||
@defproc[(absdev [xs (Sequenceof Real)] [ws (U #f (Sequenceof Real)) #f]) Nonnegative-Real]{
|
||||
Computes the average absolute difference between the elements in @racket[xs] and
|
||||
@racket[(median < xs ws)]. If @racket[ws] is not @racket[#f], it is a weighted average.
|
||||
}
|
||||
|
||||
@defproc[(absdev/median [median Real] [xs (Sequenceof Real)] [ws (U #f (Sequenceof Real)) #f])
|
||||
Nonnegative-Real]{
|
||||
Like @racket[(absdev xs ws)], but computed using known median @racket[median].
|
||||
}
|
||||
|
||||
@(close-eval typed-eval)
|
||||
|
|
|
@ -92,3 +92,26 @@
|
|||
(list (sample-bin 1 3 '(1 1 2 2 2 3) #f)
|
||||
(sample-bin 3 8 '(4 5 5 5 5 6 7 8) #f)
|
||||
(sample-bin 8 9 '(9 9) #f)))
|
||||
|
||||
(check-equal?
|
||||
(bin-samples '(3 8) <=
|
||||
'(1 1 2 2 2 3 4 5 5 5 5 6 7 8 9 9)
|
||||
'(1 1.1 2 2.1 2.2 3 4 5 5.1 5.2 5.3 6 7 8 9 9.1))
|
||||
(list (sample-bin 1 3 '(1 1 2 2 2 3) '(1 1.1 2 2.1 2.2 3))
|
||||
(sample-bin 3 8 '(4 5 5 5 5 6 7 8) '(4 5 5.1 5.2 5.3 6 7 8))
|
||||
(sample-bin 8 9 '(9 9) '(9 9.1))))
|
||||
|
||||
(check-equal?
|
||||
(bin-samples/key '(2 4) <= (inst car Real Symbol)
|
||||
'((1 . a) (2 . b) (3 . c) (4 . d) (5 . e)))
|
||||
(list (sample-bin 1 2 '((1 . a) (2 . b)) #f)
|
||||
(sample-bin 2 4 '((3 . c) (4 . d)) #f)
|
||||
(sample-bin 4 5 '((5 . e)) #f)))
|
||||
|
||||
(for: ([p '(0 1/6 2/6 3/6 4/6 5/6 6/6)])
|
||||
(check-equal? (quantile p < '(1 2 3) '(1 1 1))
|
||||
(quantile p < '(1 2 3))))
|
||||
|
||||
(for: ([p '(0 1/8 2/8 3/8 4/8 5/8 6/8 7/8 8/8)])
|
||||
(check-equal? (quantile p < '(1 2 3 4) '(1 1 1 1))
|
||||
(quantile p < '(1 2 3 4))))
|
||||
|
|
Loading…
Reference in New Issue
Block a user