From 2d34811ab644a76372d64a6cafa6a48ebc3b6e6e Mon Sep 17 00:00:00 2001 From: Neil Toronto Date: Tue, 27 Nov 2012 13:37:07 -0700 Subject: [PATCH] Finished `math/distributions' documentation! Fixed a few limit cases in some distributions (e.g. (uniform-dist 0 0) didn't act like a delta distribution, (beta-dist 0 0) and (beta-dist +inf.0 +inf.0) pretended to be defined by unique limits even though they can't be) Made integer distributions' pdfs return +nan.0 when given non-integers Added "private/statistics/counting.rkt", for hashing and binning samples Added `flvector-sums' (cumulative sums with single rounding error) Added `flinteger?', `flnan?' and `flrational?', which are faster than their non-flonum counterparts (at least in Typed Racket; haven't tested untyped) --- .../private/distributions/bernoulli-dist.rkt | 4 +- .../math/private/distributions/beta-dist.rkt | 4 + .../private/distributions/binomial-dist.rkt | 21 +- .../private/distributions/discrete-dist.rkt | 10 +- .../private/distributions/dist-struct.rkt | 21 +- .../private/distributions/geometric-dist.rkt | 15 +- .../distributions/impl/beta-inv-cdf.rkt | 5 +- .../private/distributions/impl/beta-pdf.rkt | 19 +- .../distributions/impl/binomial-pdf.rkt | 18 +- .../private/distributions/poisson-dist.rkt | 3 +- .../private/distributions/triangle-dist.rkt | 5 +- .../private/distributions/truncated-dist.rkt | 11 +- .../private/distributions/uniform-dist.rkt | 4 +- .../math/private/flonum/flonum-functions.rkt | 20 +- .../private/functions/incomplete-beta.rkt | 8 +- collects/math/private/statistics/counting.rkt | 61 ++ .../private/statistics/statistics-utils.rkt | 25 +- collects/math/private/vector/flvector.rkt | 42 +- .../math/scribblings/math-distributions.scrbl | 569 +++++++++++++++--- collects/math/statistics.rkt | 6 +- 20 files changed, 707 insertions(+), 164 deletions(-) create mode 100644 collects/math/private/statistics/counting.rkt diff --git a/collects/math/private/distributions/bernoulli-dist.rkt b/collects/math/private/distributions/bernoulli-dist.rkt index d50c094ec5..11c019cbbe 100644 --- a/collects/math/private/distributions/bernoulli-dist.rkt +++ b/collects/math/private/distributions/bernoulli-dist.rkt @@ -19,10 +19,10 @@ (cond [(not (flprobability? q)) +nan.0] [log? (cond [(fl= k 0.0) (fllog1p (- q))] [(fl= k 1.0) (fllog q)] - [else 0.0])] + [else +nan.0])] [else (cond [(fl= k 0.0) (fl- 1.0 q)] [(fl= k 1.0) q] - [else 0.0])])) + [else +nan.0])])) (: flbernoulli-cdf (Flonum Flonum Any Any -> Flonum)) (define (flbernoulli-cdf q k log? 1-p?) diff --git a/collects/math/private/distributions/beta-dist.rkt b/collects/math/private/distributions/beta-dist.rkt index 50e4bec761..0af8ff42bd 100644 --- a/collects/math/private/distributions/beta-dist.rkt +++ b/collects/math/private/distributions/beta-dist.rkt @@ -38,6 +38,10 @@ (: flbeta-sample (Flonum Flonum Integer -> FlVector)) (define (flbeta-sample a b n) (cond [(n . < . 0) (raise-argument-error 'flbeta-sample "Natural" 2 a b n)] + [(or (a . fl< . 0.0) (b . fl< . 0.0) + (and (fl= a 0.0) (fl= b 0.0)) + (and (fl= a +inf.0) (fl= b +inf.0))) + (build-flvector n (λ (_) +nan.0))] [else (define xs (flgamma-sample a 1.0 n)) (define ys (flgamma-sample b 1.0 n)) diff --git a/collects/math/private/distributions/binomial-dist.rkt b/collects/math/private/distributions/binomial-dist.rkt index 01997ded23..f663b308cb 100644 --- a/collects/math/private/distributions/binomial-dist.rkt +++ b/collects/math/private/distributions/binomial-dist.rkt @@ -25,29 +25,30 @@ (: flbinomial-cdf (Flonum Flonum Flonum Any Any -> Flonum)) (define (flbinomial-cdf n q k log? 1-p?) - (cond [(n . fl< . 0.0) +nan.0] - [(not (integer? n)) +nan.0] - [(or (q . < . 0.0) (q . > . 1.0)) +nan.0] + (cond [(or (not (flinteger? n)) (n . fl< . 0.0) + (q . fl< . 0.0) (q . fl> . 1.0)) + +nan.0] [else (let ([k (flfloor k)]) - (cond [log? (fllog-beta-inc (+ k 1.0) (- n k) q (not 1-p?) #t)] - [else (flbeta-inc (+ k 1.0) (- n k) q (not 1-p?) #t)]))])) + (cond [log? (fllog-beta-inc (fl+ k 1.0) (fl- n k) q (not 1-p?) #t)] + [else (flbeta-inc (fl+ k 1.0) (fl- n k) q (not 1-p?) #t)]))])) (: flbinomial-inv-cdf (Flonum Flonum Flonum Any Any -> Flonum)) (define (flbinomial-inv-cdf n q p log? 1-p?) - (cond [(n . fl< . 0.0) +nan.0] - [(not (integer? n)) +nan.0] - [(not (flprobability? p log?)) +nan.0] + (cond [(or (not (flinteger? n)) (n . fl< . 0.0) + (q . fl< . 0.0) (q . fl> . 1.0) + (not (flprobability? p log?))) + +nan.0] [(flprobability-one? p log? 1-p?) n] [(flprobability-zero? p log? 1-p?) 0.0] [1-p? - (define z (flnormal-inv-cdf (fl* n q) (flsqrt (* n q (fl- 1.0 q))) p log? 1-p?)) + (define z (flnormal-inv-cdf (fl* n q) (flsqrt (* n q (fl- 1.0 q))) p log? #t)) (flfind-least-integer (λ: ([k : Flonum]) ((flbinomial-cdf n q k log? 1-p?) . fl< . p)) 0.0 n (flmax 0.0 (flmin n z)))] [else - (define z (flnormal-inv-cdf (fl* n q) (flsqrt (* n q (fl- 1.0 q))) p log? 1-p?)) + (define z (flnormal-inv-cdf (fl* n q) (flsqrt (* n q (fl- 1.0 q))) p log? #f)) (flfind-least-integer (λ: ([k : Flonum]) ((flbinomial-cdf n q k log? 1-p?) . fl>= . p)) 0.0 n diff --git a/collects/math/private/distributions/discrete-dist.rkt b/collects/math/private/distributions/discrete-dist.rkt index d4386258e5..cc5a7a5cc7 100644 --- a/collects/math/private/distributions/discrete-dist.rkt +++ b/collects/math/private/distributions/discrete-dist.rkt @@ -11,16 +11,16 @@ (provide Discrete-Dist discrete-dist discrete-dist-values - discrete-dist-weights) + discrete-dist-probs) (begin-encourage-inline (struct: (In Out) discrete-dist-struct dist ([values : (Listof Out)] - [weights : (Listof Positive-Flonum)]) + [probs : (Listof Positive-Flonum)]) #:property prop:custom-print-quotable 'never #:property prop:custom-write (λ (v port mode) (pretty-print-constructor - 'discrete-dist (list (discrete-dist-struct-values v) (discrete-dist-struct-weights v)) + 'discrete-dist (list (discrete-dist-struct-values v) (discrete-dist-struct-probs v)) port mode))) (define-type (Discrete-Dist A) (discrete-dist-struct A A)) @@ -28,8 +28,8 @@ (: discrete-dist-values (All (A) ((Discrete-Dist A) -> (Listof A)))) (define (discrete-dist-values d) (discrete-dist-struct-values d)) - (: discrete-dist-weights (All (A) ((Discrete-Dist A) -> (Listof Positive-Flonum)))) - (define (discrete-dist-weights d) (discrete-dist-struct-weights d)) + (: discrete-dist-probs (All (A) ((Discrete-Dist A) -> (Listof Positive-Flonum)))) + (define (discrete-dist-probs d) (discrete-dist-struct-probs d)) ) diff --git a/collects/math/private/distributions/dist-struct.rkt b/collects/math/private/distributions/dist-struct.rkt index 71d6d3b9e1..b2262ef39a 100644 --- a/collects/math/private/distributions/dist-struct.rkt +++ b/collects/math/private/distributions/dist-struct.rkt @@ -18,8 +18,8 @@ (struct-out dist) (struct-out ordered-dist) Real-Dist - dist-median sample - real-dist-prob) + dist-median + pdf sample cdf inv-cdf real-dist-prob) (define-type (PDF In) (case-> (In -> Flonum) @@ -60,6 +60,11 @@ (: dist-median (All (In Out) ((ordered-dist In Out) -> Out))) (define (dist-median d) (force (ordered-dist-median d))) + (: pdf (All (In Out) (case-> ((dist In Out) In -> Flonum) + ((dist In Out) In Any -> Flonum)))) + (define (pdf d v [log? #f]) + ((dist-pdf d) v log?)) + (: sample (All (In Out) (case-> ((dist In Out) -> Out) ((dist In Out) Integer -> (Listof Out))))) (define sample @@ -67,6 +72,18 @@ [(d) ((dist-sample d))] [(d n) ((dist-sample d) n)])) + (: cdf (All (In Out) (case-> ((ordered-dist In Out) In -> Flonum) + ((ordered-dist In Out) In Any -> Flonum) + ((ordered-dist In Out) In Any Any -> Flonum)))) + (define (cdf d v [log? #f] [1-p? #f]) + ((ordered-dist-cdf d) v log? 1-p?)) + + (: inv-cdf (All (In Out) (case-> ((ordered-dist In Out) Real -> Out) + ((ordered-dist In Out) Real Any -> Out) + ((ordered-dist In Out) Real Any Any -> Out)))) + (define (inv-cdf d p [log? #f] [1-p? #f]) + ((ordered-dist-inv-cdf d) p log? 1-p?)) + ) (: real-dist-prob* (Real-Dist Flonum Flonum Any -> Flonum)) diff --git a/collects/math/private/distributions/geometric-dist.rkt b/collects/math/private/distributions/geometric-dist.rkt index 7ae905c414..4b289b29f6 100644 --- a/collects/math/private/distributions/geometric-dist.rkt +++ b/collects/math/private/distributions/geometric-dist.rkt @@ -16,15 +16,15 @@ (: flgeometric-pdf (Flonum Flonum Any -> Flonum)) (define (flgeometric-pdf q k log?) - (cond [(or (q . fl<= . 0.0) (q . fl>= . 1.0)) + (cond [(not (flinteger? k)) +nan.0] + [(or (q . fl<= . 0.0) (q . fl>= . 1.0)) (cond [(fl= q 1.0) (cond [(fl= k 0.0) (if log? 0.0 1.0)] [else (if log? -inf.0 0.0)])] [(fl= q 0.0) (if log? -inf.0 0.0)] [else +nan.0])] [(k . fl< . 0.0) (if log? -inf.0 0.0)] - [(integer? k) (cond [log? (fl+ (fllog q) (fl* k (fllog1p (- q))))] - [else (fl* q (flexp (fl* k (fllog1p (- q)))))])] - [else (if log? -inf.0 0.0)])) + [log? (fl+ (fllog q) (fl* k (fllog1p (- q))))] + [else (fl* q (flexp (fl* k (fllog1p (- q)))))])) (: flgeometric-cdf (Flonum Flonum Any Any -> Flonum)) (define (flgeometric-cdf q k log? 1-p?) @@ -34,9 +34,10 @@ [else +nan.0])] [(k . fl< . 0.0) (flprobability 0.0 log? 1-p?)] [else - (define log-1-p (fl* (fl+ (flfloor k) 1.0) (fllog1p (- q)))) - (cond [1-p? (if log? log-1-p (exp log-1-p))] - [else (if log? (lg1- log-1-p) (- (flexpm1 log-1-p)))])])) + (let ([k (flfloor k)]) + (define log-1-p (fl* (fl+ k 1.0) (fllog1p (- q)))) + (cond [1-p? (if log? log-1-p (exp log-1-p))] + [else (if log? (lg1- log-1-p) (- (flexpm1 log-1-p)))]))])) (: flgeometric-inv-cdf (Flonum Flonum Any Any -> Flonum)) (define (flgeometric-inv-cdf q p log? 1-p?) diff --git a/collects/math/private/distributions/impl/beta-inv-cdf.rkt b/collects/math/private/distributions/impl/beta-inv-cdf.rkt index d3f47aff87..9ac4fef278 100644 --- a/collects/math/private/distributions/impl/beta-inv-cdf.rkt +++ b/collects/math/private/distributions/impl/beta-inv-cdf.rkt @@ -164,9 +164,10 @@ (: flbeta-inv-log-cdf-limits (Flonum Flonum Flonum -> Flonum)) (define (flbeta-inv-log-cdf-limits a b log-p) (cond [(not (and (log-p . fl<= . 0.0) (a . fl>= . 0.0) (b . fl>= . 0.0))) +nan.0] + [(or (and (fl= a 0.0) (fl= b 0.0)) + (and (fl= a +inf.0) (fl= b +inf.0))) + +nan.0] [(fl= log-p -inf.0) 0.0] - [(and (fl= a 0.0) (fl= b 0.0)) (if (log-p . fl< . (fllog 0.5)) 0.0 1.0)] - [(and (fl= a +inf.0) (fl= b +inf.0)) 0.5] [(fl= a +inf.0) 1.0] [(fl= b +inf.0) 0.0] [(fl= a 0.0) 0.0] diff --git a/collects/math/private/distributions/impl/beta-pdf.rkt b/collects/math/private/distributions/impl/beta-pdf.rkt index 019cb445fe..193e35340d 100644 --- a/collects/math/private/distributions/impl/beta-pdf.rkt +++ b/collects/math/private/distributions/impl/beta-pdf.rkt @@ -7,18 +7,17 @@ (: flbeta-log-pdf (Flonum Flonum Flonum -> Flonum)) (define (flbeta-log-pdf a b x) - (cond [(or (a . fl<= . 0.0) (b . fl<= . 0.0)) - (cond [(or (a . fl< . 0.0) (b . fl< . 0.0)) - +nan.0] - [(and (fl= a 0.0) (fl= b 0.0)) - (if (or (fl= x 0.0) (fl= x 1.0)) +inf.0 -inf.0)] - [(fl= a 0.0) - (if (fl= x 0.0) +inf.0 0.0)] - [else - (if (fl= x 1.0) +inf.0 0.0)])] + (cond [(or (a . fl< . 0.0) (b . fl< . 0.0) + (and (fl= a 0.0) (fl= b 0.0)) + (and (fl= a +inf.0) (fl= b +inf.0))) + +nan.0] + [(or (fl= a 0.0) (fl= b +inf.0)) + (if (fl= x 0.0) +inf.0 -inf.0)] + [(or (fl= b 0.0) (fl= a +inf.0)) + (if (fl= x 1.0) +inf.0 -inf.0)] [(or (x . fl< . 0.0) (x . fl> . 1.0)) -inf.0] [else (flsum (list (fl* (fl- a 1.0) (fllog x)) (fl* (fl- b 1.0) (fllog1p (- x))) - (fl- 0.0 (fllog-beta a b))))])) + (- (fllog-beta a b))))])) diff --git a/collects/math/private/distributions/impl/binomial-pdf.rkt b/collects/math/private/distributions/impl/binomial-pdf.rkt index 9d87ff88e3..02c09d383a 100644 --- a/collects/math/private/distributions/impl/binomial-pdf.rkt +++ b/collects/math/private/distributions/impl/binomial-pdf.rkt @@ -18,11 +18,13 @@ (max 1.0 (flexpt 2.0 (flfloor (/ (fllog d) (fllog 2.0)))))) (: flbinomial-pdf (Flonum Flonum Flonum -> Flonum)) -;; Maximum error grows without bound in the size of `n', but is reasonable for xxx +;; Maximum error grows without bound in the size of `n' (todo: figure out what it's reasonable for) (define (flbinomial-pdf n p k) - (cond [(or (not (integer? n)) (n . fl< . 0.0)) +nan.0] - [(or (p . fl< . 0.0) (p . fl> . 1.0)) +nan.0] - [(or (not (integer? k)) (k . fl< . 0.0) (k . fl> . n)) 0.0] + (cond [(or (not (integer? n)) (n . fl< . 0.0) + (p . fl< . 0.0) (p . fl> . 1.0) + (not (integer? k))) + +nan.0] + [(or (k . fl< . 0.0) (k . fl> . n)) 0.0] [(fl= p 0.0) (if (fl= k 0.0) 1.0 0.0)] [(fl= p 1.0) (if (fl= k n) 1.0 0.0)] [(fl= k 0.0) (flexpt1p (- p) n)] @@ -66,9 +68,11 @@ (: flbinomial-log-pdf (Flonum Flonum Flonum -> Flonum)) ;; Maximum error grows without bound in `n', but is <= 4 ulps for n <= 1e4 (define (flbinomial-log-pdf n p k) - (cond [(or (not (integer? n)) (n . fl< . 0.0)) +nan.0] - [(or (p . fl< . 0.0) (p . fl> . 1.0)) +nan.0] - [(or (not (integer? k)) (k . fl< . 0.0) (k . fl> . n)) -inf.0] + (cond [(or (not (flinteger? n)) (n . fl< . 0.0) + (p . fl< . 0.0) (p . fl> . 1.0) + (not (flinteger? k))) + +nan.0] + [(or (k . fl< . 0.0) (k . fl> . n)) -inf.0] [(fl= p 0.0) (if (fl= k 0.0) 0.0 -inf.0)] [(fl= p 1.0) (if (fl= k n) 0.0 -inf.0)] [(fl= k 0.0) (fl* n (fllog1p (- p)))] diff --git a/collects/math/private/distributions/poisson-dist.rkt b/collects/math/private/distributions/poisson-dist.rkt index fd7e6299df..6b2ea84408 100644 --- a/collects/math/private/distributions/poisson-dist.rkt +++ b/collects/math/private/distributions/poisson-dist.rkt @@ -21,8 +21,7 @@ (: flpoisson-pdf (Flonum Flonum Any -> Flonum)) (define (flpoisson-pdf l k log?) - (cond [(l . fl< . 0.0) +nan.0] - [(not (integer? k)) (if log? -inf.0 0.0)] + (cond [(or (l . fl< . 0.0) (not (integer? k))) +nan.0] [log? (impl:flpoisson-log-pdf l k)] [else (impl:flpoisson-pdf l k)])) diff --git a/collects/math/private/distributions/triangle-dist.rkt b/collects/math/private/distributions/triangle-dist.rkt index 7aa8fd9cd5..79a2d75152 100644 --- a/collects/math/private/distributions/triangle-dist.rkt +++ b/collects/math/private/distributions/triangle-dist.rkt @@ -27,6 +27,7 @@ (fl* (fl- b a) (fl- c a)))] [(x . fl< . b) (fl/ (fl* 2.0 (fl- b x)) (fl* (fl- b a) (fl- b c)))] + [(= x a b) +inf.0] [else 0.0])) (if log? (fllog p) p)) @@ -41,12 +42,12 @@ (fl- 1.0 (fl/ (fl* b-x b-x) (fl* (fl- b a) (fl- b c))))] [else 1.0])) - (cond [1-p? (if log? (fllog (fl- 1.0 q)) (fl- 1.0 q))] + (cond [1-p? (if log? (fllog1p (- q)) (fl- 1.0 q))] [else (if log? (fllog q) q)])) (: unsafe-fltriangle-inv-cdf (Float Float Float Float Any Any -> Float)) (define (unsafe-fltriangle-inv-cdf a b c q log? 1-p?) - (let ([q (cond [1-p? (if log? (fl- 1.0 (flexp q)) (fl- 1.0 q))] + (let ([q (cond [1-p? (if log? (- (flexpm1 q)) (fl- 1.0 q))] [else (if log? (flexp q) q)])]) (cond [(q . fl< . 0.0) +nan.0] [(q . fl= . 0.0) a] diff --git a/collects/math/private/distributions/truncated-dist.rkt b/collects/math/private/distributions/truncated-dist.rkt index 3e33375f97..52fdfa9a13 100644 --- a/collects/math/private/distributions/truncated-dist.rkt +++ b/collects/math/private/distributions/truncated-dist.rkt @@ -88,15 +88,8 @@ (min b (max a x))]))) (: sample-single (-> Flonum)) - (define sample-single - (cond [(log-P_a . 0.5)))] - [else - (define (sample-single) - (define x (orig-sample)) - (cond [(and (a . fl<= . x) (x . fl<= . b)) x] - [else (sample-single)])) - sample-single])) + (define (sample-single) + (inv-cdf (fl* 0.5 (random)) #f ((random) . fl> . 0.5))) (: sample (Sample Flonum)) (define sample diff --git a/collects/math/private/distributions/uniform-dist.rkt b/collects/math/private/distributions/uniform-dist.rkt index 70b5205e8d..5992cafece 100644 --- a/collects/math/private/distributions/uniform-dist.rkt +++ b/collects/math/private/distributions/uniform-dist.rkt @@ -25,12 +25,12 @@ (define (unsafe-fluniform-cdf a b x log? 1-p?) (cond [1-p? (define q (cond [(x . fl< . a) 1.0] - [(x . fl> . b) 0.0] + [(x . fl>= . b) 0.0] [else (fl/ (fl- b x) (fl- b a))])) (if log? (fllog q) q)] [else (define q (cond [(x . fl< . a) 0.0] - [(x . fl> . b) 1.0] + [(x . fl>= . b) 1.0] [else (fl/ (fl- x a) (fl- b a))])) (if log? (fllog q) q)])) diff --git a/collects/math/private/flonum/flonum-functions.rkt b/collects/math/private/flonum/flonum-functions.rkt index ef8e88bffa..0fa00e37c6 100644 --- a/collects/math/private/flonum/flonum-functions.rkt +++ b/collects/math/private/flonum/flonum-functions.rkt @@ -9,7 +9,7 @@ (provide (all-from-out racket/flonum) fl - flsubnormal? + flsubnormal? flrational? flnan? flinteger? flnext* flprev* flulp-error float-complex? (rename-out [inline-number->float-complex number->float-complex]) @@ -37,6 +37,18 @@ (and ((flabs x) . fl<= . +max-subnormal.0) (not (fl= x 0.0)))) + (define flrational? + (λ: ([x : Flonum]) + (and (x . fl> . -inf.0) (x . fl< . +inf.0)))) + + (define flnan? + (λ: ([x : Flonum]) + (not (and (x . fl>= . -inf.0) (x . fl<= . +inf.0))))) + + (define flinteger? + (λ: ([x : Flonum]) + (fl= x (fltruncate x)))) + (: flsubnormal-next* (Flonum -> Flonum)) (define (flsubnormal-next* x) (fl/ (fl+ (fl* x (flexpt 2.0 1022.0)) epsilon.0) @@ -194,13 +206,13 @@ (: flcscpix (Flonum -> Flonum)) (define (flcscpix x) - (cond [(and (not (zero? x)) (integer? x)) +nan.0] + (cond [(and (not (zero? x)) (flinteger? x)) +nan.0] [else (/ 1.0 (flsinpix x))])) (: flsecpix (Flonum -> Flonum)) (define (flsecpix x) - (cond [(and (x . fl> . 0.0) (integer? (fl- x 0.5))) +nan.0] - [(and (x . fl< . 0.0) (integer? (fl+ x 0.5))) +nan.0] + (cond [(and (x . fl> . 0.0) (flinteger? (fl- x 0.5))) +nan.0] + [(and (x . fl< . 0.0) (flinteger? (fl+ x 0.5))) +nan.0] [else (/ 1.0 (flcospix x))])) (: flcotpix (Flonum -> Flonum)) diff --git a/collects/math/private/functions/incomplete-beta.rkt b/collects/math/private/functions/incomplete-beta.rkt index 5c6dd9f782..edd73866ee 100644 --- a/collects/math/private/functions/incomplete-beta.rkt +++ b/collects/math/private/functions/incomplete-beta.rkt @@ -170,10 +170,12 @@ ACM Transactions on Mathematical Software, 1992, vol 18, no 3, pp 360--373. (: flbeta-regularized-limits (Flonum Flonum Flonum -> Flonum)) (define (flbeta-regularized-limits a b x) - (cond [(or (x . fl< . 0.0) (x . fl> . 1.0) (a . fl< . 0.0) (b . fl< . 0.0)) +nan.0] + (cond [(or (x . fl< . 0.0) (x . fl> . 1.0) + (a . fl< . 0.0) (b . fl< . 0.0) + (and (fl= a 0.0) (fl= b 0.0)) + (and (fl= a +inf.0) (fl= b +inf.0))) + +nan.0] [(fl= x 1.0) 1.0] - [(and (fl= a 0.0) (fl= b 0.0)) 0.5] - [(and (fl= a +inf.0) (fl= b +inf.0)) (if (x . fl< . 0.5) 0.0 1.0)] [(fl= a +inf.0) 0.0] [(fl= b +inf.0) 1.0] [(fl= a 0.0) 1.0] diff --git a/collects/math/private/statistics/counting.rkt b/collects/math/private/statistics/counting.rkt new file mode 100644 index 0000000000..dc77d31d1d --- /dev/null +++ b/collects/math/private/statistics/counting.rkt @@ -0,0 +1,61 @@ +#lang typed/racket/base + +(require racket/unsafe/ops + racket/list + racket/sequence) + +(provide samples->immutable-hash samples->hash + count-samples + (struct-out sample-bin) + Real-Bin + bin-real-samples) + +(struct: (A) sample-bin ([min : A] [max : A] [values : (Listof A)]) #:transparent) + +(define-type Real-Bin (sample-bin Real)) + +(: samples->immutable-hash (All (A) ((Sequenceof A) -> (HashTable A Positive-Integer)))) +(define (samples->immutable-hash xs) + (define: h : (HashTable A Positive-Integer) (make-immutable-hash null)) + (for/fold: ([h : (HashTable A Positive-Integer) h]) ([x : A xs]) + (hash-set h x (unsafe-fx+ 1 (hash-ref h x (λ () 0)))))) + +(: samples->hash (All (A) ((Sequenceof A) -> (HashTable A Positive-Integer)))) +(define (samples->hash xs) + (define: h : (HashTable A Positive-Integer) (make-hash null)) + (for: ([x : A xs]) + (hash-set! h x (unsafe-fx+ 1 (hash-ref h x (λ () 0))))) + h) + +(: count-samples (All (A) ((Sequenceof A) -> (Values (Listof A) (Listof Positive-Integer))))) +(define (count-samples xs) + (define h (samples->hash xs)) + (define xws (hash-map h (λ: ([x : A] [c : Positive-Integer]) (cons x c)))) + (values (map (λ: ([xw : (Pair A Positive-Integer)]) (car xw)) xws) + (map (λ: ([xw : (Pair A Positive-Integer)]) (cdr xw)) xws))) + +(: bin-real-samples ((Sequenceof Real) (Sequenceof Real) -> (Listof Real-Bin))) +(define (bin-real-samples bin-bounds xs) + (let* ([bin-bounds (list* -inf.0 +inf.0 (sequence->list bin-bounds))] + [bin-bounds (filter (λ: ([x : Real]) (not (eqv? x +nan.0))) + (remove-duplicates bin-bounds))] + [bin-bounds (sort bin-bounds <)] + [x-min (first bin-bounds)] + [x-max (last bin-bounds)] + [xs (sequence->list xs)] + [xs (filter (λ: ([x : Real]) (<= x-min x x-max)) xs)] + [xs (sort xs <)]) + (define-values (res rest-xs) + (for/fold: ([res : (Listof Real-Bin) empty] + [xs : (Listof Real) xs] + ) ([x1 (in-list bin-bounds)] + [x2 (in-list (rest bin-bounds))]) + (define-values (lst rest-xs) + (let: loop : (Values (Listof Real) (Listof Real)) ([lst : (Listof Real) empty] + [xs : (Listof Real) xs]) + (if (and (not (empty? xs)) (<= x1 (first xs) x2)) + (loop (cons (first xs) lst) (rest xs)) + (values lst xs)))) + (cond [(empty? lst) (values res rest-xs)] + [else (values (cons (sample-bin x1 x2 (reverse lst)) res) rest-xs)]))) + (reverse res))) diff --git a/collects/math/private/statistics/statistics-utils.rkt b/collects/math/private/statistics/statistics-utils.rkt index c651989a0c..e56799e94a 100644 --- a/collects/math/private/statistics/statistics-utils.rkt +++ b/collects/math/private/statistics/statistics-utils.rkt @@ -27,6 +27,27 @@ ;; =================================================================================================== +(: find-near-pow2 (Real -> Nonnegative-Exact-Rational)) +(define (find-near-pow2 x) + (expt 2 (max -1073 (min 1023 (exact-round (/ (log (abs x)) (fllog 2.0))))))) + +(: weights->list (Symbol (Sequenceof Real) -> (Listof Nonnegative-Real))) +(define (weights->list name w-seq) + (for/list: : (Listof Nonnegative-Real) ([w w-seq]) + (cond [(w . >= . 0) w] + [else (raise-argument-error name "(Sequenceof Nonnegative-Real)" w-seq)]))) + +(: weights->normalized-weights (Symbol (Sequenceof Real) -> (Listof Nonnegative-Flonum))) +(define (weights->normalized-weights name ws) + (let ([ws (weights->list name ws)]) + (when (empty? ws) (raise-argument-error name "nonempty (Sequenceof Real)" ws)) + (define max-w (find-near-pow2 (apply max ws))) + (let ([ws (map (λ: ([w : Real]) (/ w max-w)) ws)]) + (define total-w (sum ws)) + (map (λ: ([w : Real]) (assert (fl (/ w total-w)) nonnegative?)) ws)))) + +;; =================================================================================================== + (: check-lengths! (All (A B) (Symbol String A B Index Index -> Void))) (define (check-lengths! name what xs ys m n) (unless (= m n) (error name "~a must be the same length; given ~e (length ~a) and ~e (length ~a)" @@ -46,10 +67,6 @@ (define nonnegative? (λ: ([x : Real]) (not (negative? x)))) -(: find-near-pow2 (Nonnegative-Real -> Nonnegative-Exact-Rational)) -(define (find-near-pow2 x) - (expt 2 (max -1073 (min 1023 (exact-round (/ (log x) (fllog 2.0))))))) - (: sequences->normalized-weighted-samples (All (A) (Symbol (Sequenceof A) (Sequenceof Real) -> (Values (Listof A) (Listof Positive-Flonum))))) diff --git a/collects/math/private/vector/flvector.rkt b/collects/math/private/vector/flvector.rkt index 978278a596..1c89c3c7dc 100644 --- a/collects/math/private/vector/flvector.rkt +++ b/collects/math/private/vector/flvector.rkt @@ -1,8 +1,9 @@ #lang typed/racket/base -(require racket/flonum +(require racket/fixnum racket/string (for-syntax racket/base syntax/parse) + "../../flonum.rkt" "../unsafe.rkt" "flvector-syntax.rkt") @@ -47,7 +48,9 @@ flvector< flvector<= flvector> - flvector>=) + flvector>= + ;; + flvector-sums) ;; =================================================================================================== ;; flvector-copy @@ -256,3 +259,38 @@ (define flvector<= (lift-comparison 'flvector<= fl<=)) (define flvector> (lift-comparison 'flvector> fl>)) (define flvector>= (lift-comparison 'flvector>= fl>=)) + +;; =================================================================================================== + +(: flvector-sums (FlVector -> FlVector)) +(define (flvector-sums xs) + (define n (flvector-length xs)) + (define rs (make-flvector n)) + (define ss (make-flvector n)) + (let j-loop ([#{j : Nonnegative-Fixnum} 0] + [#{m : Nonnegative-Fixnum} 0]) + (cond + [(j . fx< . n) + (define x (unsafe-flvector-ref xs j)) + (let p-loop ([#{p : Nonnegative-Fixnum} 0] + [#{x : Flonum} x] + [#{s : Flonum} 0.0] + [#{i : Nonnegative-Fixnum} 0]) + (cond + [(p . fx< . m) + (define r (unsafe-flvector-ref rs p)) + (let-values ([(x r) (if ((flabs x) . fl< . (flabs r)) (values r x) (values x r))]) + (define z (fl+ x r)) + (define-values (hi lo) + (cond [(flrational? z) (values z (fl- r (fl- z x)))] + [else (values x r)])) + (cond [(fl= lo 0.0) + (p-loop (unsafe-fx+ p 1) hi s i)] + [else + (unsafe-flvector-set! rs i lo) + (p-loop (unsafe-fx+ p 1) hi (fl+ s lo) (unsafe-fx+ i 1))]))] + [else + (unsafe-flvector-set! rs i x) + (unsafe-flvector-set! ss j (fl+ s x)) + (j-loop (fx+ j 1) (unsafe-fx+ i 1))]))] + [else ss]))) diff --git a/collects/math/scribblings/math-distributions.scrbl b/collects/math/scribblings/math-distributions.scrbl index 78d3b9d1f3..00306ee24a 100644 --- a/collects/math/scribblings/math-distributions.scrbl +++ b/collects/math/scribblings/math-distributions.scrbl @@ -2,10 +2,11 @@ @(require scribble/eval racket/sandbox - (for-label racket/base racket/promise + (for-label racket/base racket/promise racket/list math plot (only-in typed/racket/base - Flonum Real Boolean Any Listof Integer case-> -> Promise)) + Flonum Real Boolean Any Listof Integer case-> -> Promise U + Sequenceof Positive-Flonum Nonnegative-Flonum)) "utils.rkt") @(define untyped-eval (make-untyped-math-eval)) @@ -16,20 +17,31 @@ @defmodule[math/distributions] +The @racketmodname[math/distributions] module exports the following: +@itemlist[#:style 'ordered + @item{@tech{Distribution objects}, which represent probability distributions} + @item{Functions that operate on distribution objects} + @item{The low-level flonum functions used to define distribution objects}] + +@bold{Performance Warning:} Using distribution objects in untyped Racket is currently 25-50 times +slower than using them in Typed Racket, due to the overhead of checking higher-order contracts. +We are working on it. + +For now, if you need speed, either use the @racketmodname[typed/racket] language, or use just the +low-level flonum functions, which are documented in @secref{dist:flonum}. + @local-table-of-contents[] -@section[#:tag "dist:intro"]{Introduction} - -The @racketmodname[math/distributions] module exports distribution objects and functions that -operate on them. +@section[#:tag "dist:dist-objects"]{Distribution Objects} A @deftech{distribution object} represents a probability distribution over a common domain, such as the real numbers, integers, or a set of symbols. Their constructors correspond with distribution families, such as the family of normal distributions. -A distribution object has a density function (a @deftech{pdf}) and a procedure to generate random -samples. An @italic{ordered} distribution object additionally has a cumulative distribution function -(a @deftech{cdf}), and its generalized inverse (an @deftech{inverse cdf}). +A distribution object, or a value of type @racket[dist], has a density function (a @deftech{pdf}) +and a procedure to generate random samples. An @italic{ordered} distribution object, or a value +of type @racket[ordered-dist], additionally has a cumulative distribution function (a @deftech{cdf}), +and its generalized inverse (an @deftech{inverse cdf}). The following example creates an ordered distribution object representing a normal distribution with mean 2 and standard deviation 5, computes an approximation of the probability of the @@ -38,14 +50,16 @@ half-open interval (1/2,1], and computes another approximation from random sampl (define d (normal-dist 2 5)) (real-dist-prob d 0.5 1.0) (define xs (sample d 10000)) - (fl (/ (count (λ (x) (and (1/2 . < . x) (x . <= . 1))) xs) - (length xs)))] + (eval:alts + (fl (/ (count (λ (x) (and (1/2 . < . x) (x . <= . 1))) xs) + (length xs))) + (eval:result @racketresultfont{0.0391}))] This plots the pdf and a kernel density estimate of the pdf from random samples: @interaction[#:eval untyped-eval (plot (list (function (dist-pdf d) #:color 0 #:style 'dot) (density xs)) - #:x-label "x" #:y-label "N(2,5)")] + #:x-label "x" #:y-label "density of N(2,5)")] There are also higher-order distributions, which take other distributions as constructor arguments. For example, the truncated distribution family returns a distribution like its @@ -57,34 +71,32 @@ the probabilities within the interval: (real-dist-prob d-trunc 0.5 1.0) (plot (list (function (dist-pdf d-trunc) #:color 0 #:style 'dot) (density (sample d-trunc 1000))) - #:x-label "x" #:y-label "T(N(2,5),-∞,5)")] + #:x-label "x" #:y-label "density of T(N(2,5),-∞,5)")] Because real distributions' cdfs represent the probability P[@italic{X} ≤ @italic{x}], they are -right-continuous: +right-continuous (i.e. continuous @italic{from the right}): @interaction[#:eval untyped-eval (define d (geometric-dist 0.4)) - (define cdf (dist-cdf d)) (plot (for/list ([i (in-range -1 7)]) (define i+1-ε (flprev (+ i 1.0))) - (list (lines (list (vector i (cdf i)) - (vector i+1-ε (cdf i+1-ε))) + (list (lines (list (vector i (cdf d i)) + (vector i+1-ε (cdf d i+1-ε))) #:width 2) - (points (list (vector i (cdf i))) + (points (list (vector i (cdf d i))) #:sym 'fullcircle5 #:color 1) - (points (list (vector i+1-ε (cdf i+1-ε))) + (points (list (vector i+1-ε (cdf d i+1-ε))) #:sym 'fullcircle5 #:color 1 #:fill-color 0))) #:x-min -0.5 #:x-max 6.5 #:y-min -0.05 #:y-max 1 #:x-label "x" #:y-label "P[X ≤ x]")] For convenience, cdfs are defined over the extended reals regardless of their distribution's support, but their inverses return values only within the support: @interaction[#:eval untyped-eval - (define inv-cdf (dist-inv-cdf d)) - (cdf +inf.0) - (cdf 1.5) - (cdf -inf.0) - (inv-cdf (cdf +inf.0)) - (inv-cdf (cdf 1.5)) - (inv-cdf (cdf -inf.0))] + (cdf d +inf.0) + (cdf d 1.5) + (cdf d -inf.0) + (inv-cdf d (cdf d +inf.0)) + (inv-cdf d (cdf d 1.5)) + (inv-cdf d (cdf d -inf.0))] A distribution's inverse cdf is defined on the interval [0,1] and is always left-continuous, except possibly at 0 when its support is bounded on the left (as with @racket[geometric-dist]). @@ -92,43 +104,50 @@ Every pdf and cdf can return log densities and log probabilities, in case densit are too small to represent as flonums (i.e. are less than @racket[+min.0]): @interaction[#:eval untyped-eval (define d (normal-dist)) - (define pdf (dist-pdf d)) - (define cdf (dist-cdf d)) - (pdf 40.0) - (cdf -40.0) - (pdf 40.0 #t) - (cdf -40.0 #t)] + (pdf d 40.0) + (cdf d -40.0) + (pdf d 40.0 #t) + (cdf d -40.0 #t)] Additionally, every cdf can return upper-tail probabilities, which are always more accurate when lower-tail probabilities are greater than @racket[0.5]: @interaction[#:eval untyped-eval - (cdf 20.0) - (cdf 20.0 #f #t)] -Upper-tail probabilities can also be returned as logs in case probabilities are too small: + (cdf d 20.0) + (cdf d 20.0 #f #t)] +Upper-tail probabilities can also be returned as log probabilities in case probabilities are too +small: @interaction[#:eval untyped-eval - (cdf 40.0) - (cdf 40.0 #f #t) - (cdf 40.0 #t #t)] + (cdf d 40.0) + (cdf d 40.0 #f #t) + (cdf d 40.0 #t #t)] Inverse cdfs accept log probabilities and upper-tail probabilities. The functions @racket[lg+] and @racket[lgsum], as well as others in @racketmodname[math/flonum], perform arithmetic on log probabilities. +When distribution object constructors receive parameters outside their domains, they return +@deftech{undefined distributions}, or distributions whose functions all return @racket[+nan.0]: +@interaction[#:eval untyped-eval + (pdf (gamma-dist -1 2) 2) + (sample (poisson-dist -2)) + (cdf (beta-dist 0 0) 1/2) + (inv-cdf (geometric-dist 1.1) 0.2)] + @section{Basic Distribution Types and Operations} @defform[(PDF In)]{ The type of probability density functions, or @tech{pdfs}, defined as @racketblock[(case-> (In -> Flonum) (In Any -> Flonum))] -The second argument defaults to @racket[#f]. When the second argument is not @racket[#f], a -pdf returns a log density. +For any function of this type, the second argument should default to @racket[#f]. When not +@racket[#f], the function should return a log density. } @defform[(Sample Out)]{ The type of a distribution's sampling procedure, defined as @racketblock[(case-> (-> Out) (Integer -> (Listof Out)))] -When given a nonnegative integer @racket[n] as an argument, a sampling procedure returns a list of -random samples of length @racket[n]. +When given a nonnegative integer @racket[n] as an argument, a sampling procedure should return +a length-@racket[n] list of independent, random samples. } @defstruct*[dist ([pdf (PDF In)] [sample (Sample Out)])]{ @@ -138,7 +157,19 @@ type a distribution returns as random samples. @examples[#:eval untyped-eval (dist? (discrete-dist '(a b c))) - (dist? (normal-dist))] + (dist? (normal-dist)) + ((dist-pdf (normal-dist)) 0) + ((dist-sample (normal-dist)))] +See @racket[pdf] and @racket[sample] for uncurried forms of @racket[dist-pdf] and +@racket[dist-sample]. +} + +@defproc[(pdf [d (dist In Out)] [v In] [log? Any #f]) Flonum]{ +An uncurried form of @racket[dist-pdf]. When @racket[log?] is not @racket[#f], returns a +log density. +@examples[#:eval untyped-eval + (pdf (discrete-dist '(a b c) '(1 2 3)) 'a) + (pdf (discrete-dist '(a b c) '(1 2 3)) 'a #t)] } @defproc*[([(sample [d (dist In Out)]) Out] @@ -156,12 +187,8 @@ The type of cumulative distribution functions, or @tech{cdfs}, defined as @racketblock[(case-> (In -> Flonum) (In Any -> Flonum) (In Any Any -> Flonum))] -Both optional arguments default to @racket[#f]. - -Suppose @racket[cdf : (CDF Real)], and @racket[p = (cdf x log? 1-p?)]. The flonum @racket[p] -represents a probability. If @racket[log?] is a true value, @racket[p] is a log probability. -If @racket[1-p?] is a true value, @racket[p] is an upper-tail probability or upper-tail -log probability. +For any function of this type, both optional arguments should default to @racket[#f], and be +interpreted as specified in the description of @racket[cdf]. } @defform[(Inverse-CDF Out)]{ @@ -169,12 +196,8 @@ The type of inverse cumulative distribution functions, or @tech{inverse cdfs}, d @racketblock[(case-> (Real -> Out) (Real Any -> Out) (Real Any Any -> Out))] -Both optional arguments default to @racket[#f]. - -Suppose @racket[inv-cdf : (Inverse-CDF Flonum)], and @racket[x = (inv-cdf p log? 1-p?)]. -If @racket[log?] is a true value, @racket[inv-cdf] interprets @racket[p] as a log probability. -If @racket[1-p?] is a true value, @racket[inv-cdf] interprets @racket[p] as an upper-tail probability -or upper-tail log probability. +For any function of this type, both optional arguments should default to @racket[#f], and be +interpreted as specified in the description of @racket[inv-cdf]. } @defstruct*[(ordered-dist dist) ([cdf (CDF In)] @@ -185,17 +208,16 @@ or upper-tail log probability. The parent type of @italic{ordered} @tech{distribution objects}. Similarly to @racket[dist], the @racket[In] type parameter is the data type an ordered distribution -accepts as arguments to its @tech{pdf}; it is also the data type its @tech{cdf} accepts. - -Also similarly to @racket[dist], the @racket[Out] type parameter is the data type an ordered -distribution returns as random samples; it is also the data type its @tech{inverse cdf} returns. +accepts as arguments to its @tech{pdf}, and the @racket[Out] type parameter is the data type an +ordered distribution returns as random samples. Additionally, its @tech{cdf} accepts values of type +@racket[In], and its @tech{inverse cdf} returns values of type @racket[Out]. @examples[#:eval untyped-eval (ordered-dist? (discrete-dist '(a b c))) (ordered-dist? (normal-dist))] The median is stored in an @racket[ordered-dist] to allow interval probabilities to be calculated -as accurately as possible. For example, for @racket[d = (normal-dist)], whose median is @racket[0.0], +accurately. For example, for @racket[d = (normal-dist)], whose median is @racket[0.0], @racket[(real-dist-prob d -2.0 -1.0)] is calculated using lower-tail probabilities, and @racket[(real-dist-prob d 1.0 2.0)] is calculated using upper-tail probabilities. } @@ -205,11 +227,31 @@ The parent type of real-valued distributions, such as any distribution returned @racket[normal-dist]. Equivalent to the type @racket[(ordered-dist Real Flonum)]. } +@defproc[(cdf [d (ordered-dist In Out)] [v In] [log? Any #f] [1-p? Any #f]) Flonum]{ +An uncurried form of @racket[dist-cdf]. + +When @racket[log?] is @racket[#f], @racket[cdf] returns a probability; otherwise, it returns a log +probability. + +When @racket[1-p?] is @racket[#f], @racket[cdf] returns a lower-tail probability or log probability +(depending on @racket[log?]); otherwise, it returns an upper-tail probability or log-probability. +} + +@defproc[(inv-cdf [d (ordered-dist In Out)] [p Real] [log? Any #f] [1-p? Any #f]) Out]{ +An uncurried form of @racket[dist-inv-cdf]. + +When @racket[log?] is @racket[#f], @racket[inv-cdf] interprets @racket[p] as a probability; otherwise, +it interprets @racket[p] as a log probability. + +When @racket[1-p?] is @racket[#f], @racket[inv-cdf] interprets @racket[p] as a lower-tail probability +or log probability (depending on @racket[log?]); otherwise, it interprets @racket[p] as an upper-tail +probability or log probability. +} + @defproc[(real-dist-prob [d Real-Dist] [a Real] [b Real] [log? Any #f] [1-p? Any #f]) Flonum]{ Computes the probability of the half-open interval (@racket[a], @racket[b]]. (If @racket[b < a], -the two endpoints are swapped first.) The @racket[log?] and @racket[1-p?] arguments determine how the -return value should be interpreted, just as the arguments to a function of type -@racket[(CDF Real Flonum)]. +the two endpoints are swapped first.) The @racket[log?] and @racket[1-p?] arguments determine the +meaning of the return value in the same way as the corresponding arguments to @racket[cdf]. } @deftogether[(@defproc[(dist-cdf [d (ordered-dist In Out)]) (CDF In)] @@ -228,25 +270,188 @@ The first four are synonyms for @racket[ordered-dist-cdf], @racket[ordered-dist- (dist-median (gamma-dist 4 2))] } -@section{Discrete Distributions} +@section{Finite Distribution Families} -@subsection{Discrete Distribution} +@subsection{Unordered Discrete Distributions} -@subsection{Categorical Distribution} +@deftogether[(@defform[(Discrete-Dist A)] + @defproc*[([(discrete-dist [xs (Sequenceof A)]) (Discrete-Dist A)] + [(discrete-dist [xs (Sequenceof A)] [ws (Sequenceof Real)]) + (Discrete-Dist A)])] + @defproc[(discrete-dist-values [d (Discrete-Dist A)]) (Listof A)] + @defproc[(discrete-dist-probs [d (Discrete-Dist A)]) (Listof Positive-Flonum)])]{ +Represents families of unordered, discrete distributions over values of type @racket[A], with equality +decided by @racket[equal?]. -@section{Integer Distributions} +The weights in @racket[ws] must be nonnegative, and are treated as unnormalized probabilities. +When @racket[ws] is not given, the values in @racket[xs] are assigned uniform probabilities. -@subsection{Bernoulli Distribution} +The type @racket[(Discrete-Dist A)] is a subtype of @racket[(dist A A)]. This means that discrete +distribution objects are unordered, and thus have only a @tech{pdf} and a procedure to generate +random samples. -@subsection{Binomial Distribution} +@examples[#:eval untyped-eval + (define xs '(a b c)) + (define d (discrete-dist xs '(2 5 3))) + (define n 500) + (define h (samples->hash (sample d n))) + (plot (list (discrete-histogram + (map vector xs (map (dist-pdf d) xs)) + #:x-min 0 #:skip 2 #:label "P[x]") + (discrete-histogram + (map vector xs (map (λ (x) (/ (hash-ref h x) n)) xs)) + #:x-min 1 #:skip 2 #:line-style 'dot #:alpha 0.5 + #:label "est. P[x]")))] +} -@subsection{Geometric Distribution} +@section{Integer Distribution Families} -@subsection{Poisson Distribution} +Mathematically, integer distributions are commonly defined in one of two ways: over extended reals, +or over extended integers. The most common definitions use the extended reals, so the following +@tech{distribution object} constructors return objects of type @racket[Real-Dist]. -@section{Real Distributions} +(Another reason is that the extended integers correspond with the type +@racket[(U Integer +inf.0 -inf.0)]. Values of this type are difficult to use as integers: in +practice, they are the same as values of type @racket[Real].) -@subsection{Beta Distribution} +This leaves us with a quandary and two design decisions users should be aware of. The quandary is +that, when an integer distribution is defined over the reals, it has a @tech{cdf}, but @italic{no +well-defined @tech{pdf}}: the pdf would be zero except at integer points, where it would be +undefined. + +Unfortunately, an integer distribution without a pdf is nearly useless. +@margin-note*{In measure-theory parlance, the pdfs are defined with respect to counting measure, + while the cdfs are defined with respect to Lebesgue measure.} +So the pdfs of these integer distributions are pdfs defined over integers, while their cdfs are +defined over reals. + +Most implementations, such as @hyperlink["http://www.r-project.org"]{R}'s, make the same design +choice. Unlike R's, this implementation's pdfs return @racket[+nan.0] when given non-integers, +for three reasons: +@itemlist[@item{Their domain of definition is the integers.} + @item{Applying an integer pdf to a non-integer almost certainly indicates a logic error, + which is harder to detect when a program returns an apparently sensible value.} + @item{If this design choice turns out to be wrong and we change pdfs to return + @racket[0.0], this should affect very few programs. A change from @racket[0.0] to + @racket[+nan.0] could break many programs.}] + +Integer distributions defined over the extended integers are not out of the question, and may +show up in future versions of @racketmodname[math/distributions] if there is a clear need. + +@subsection{Bernoulli Distributions} + +@margin-note{Wikipedia: + @hyperlink["http://wikipedia.org/wiki/Bernoulli_distribution"]{Bernoulli Distribution}.} +@deftogether[(@defidform[Bernoulli-Dist] + @defproc[(bernoulli-dist [prob Real]) Bernoulli-Dist] + @defproc[(bernoulli-dist-prob [d Bernoulli-Dist]) Flonum])]{ +Represents the Bernoulli distribution family parameterized by probability of success. + +@racket[(bernoulli-dist prob)] is equivalent to @racket[(binomial-dist 1 prob)], but operations +on it are faster. + +@examples[#:eval untyped-eval + (define d (bernoulli-dist 0.75)) + (map (dist-pdf d) '(0 1)) + (map (dist-cdf d) '(0 1)) + (define d (binomial-dist 1 0.75)) + (map (dist-pdf d) '(0 1)) + (map (dist-cdf d) '(0 1))] +} + +@subsection{Binomial Distributions} + +@margin-note{Wikipedia: + @hyperlink["http://wikipedia.org/wiki/Binomial_distribution"]{Binomial Distribution}.} +@deftogether[(@defidform[Binomial-Dist] + @defproc[(binomial-dist [count Real] [prob Real]) Binomial-Dist] + @defproc[(binomial-dist-count [d Binomial-Dist]) Flonum] + @defproc[(binomial-dist-prob [d Binomial-Dist]) Flonum])]{ +Represents the binomial distribution family parameterized by count (number of trials) and +probability of success. + +@examples[#:eval untyped-eval + (define d (binomial-dist 15 0.6)) + (plot (discrete-histogram + (map vector (build-list 16 values) (build-list 16 (dist-pdf d)))) + #:x-label "number of successes" #:y-label "probability") + (plot (function-interval (λ (x) 0) (dist-cdf d) -0.5 15.5) + #:x-label "at-most number of successes" #:y-label "probability")] +} + +@subsection{Geometric Distributions} + +@margin-note{Wikipedia: + @hyperlink["http://wikipedia.org/wiki/Geometric_distribution"]{Geometric Distribution}.} +@deftogether[(@defidform[Geometric-Dist] + @defproc[(geometric-dist [prob Real]) Geometric-Dist] + @defproc[(geometric-dist-prob [d Geometric-Dist]) Flonum])]{ +Represents the geometric distribution family parameterized by success probability. The random +variable is the number of failures before the first success, or equivalently, the index of the +first success starting from zero. + +@examples[#:eval untyped-eval + (define d (geometric-dist 0.25)) + (plot (discrete-histogram + (map vector (build-list 16 values) (build-list 16 (dist-pdf d)))) + #:x-label "first success index" #:y-label "probability") + (plot (function-interval (λ (x) 0) (dist-cdf d) -0.5 15.5) + #:x-label "at-most first success index" #:y-label "probability" + #:y-max 1)] +} + +@subsection{Poisson Distributions} + +@margin-note{Wikipedia: + @hyperlink["http://wikipedia.org/wiki/Poisson_distribution"]{Poisson Distribution}.} +@deftogether[(@defidform[Poisson-Dist] + @defproc[(poisson-dist [mean Real]) Poisson-Dist] + @defproc[(poisson-dist-mean [d Poisson-Dist]) Flonum])]{ +Represents the Poisson distribution family parameterized by the mean number of occurrences of +independent events. + +@examples[#:eval untyped-eval + (define d (poisson-dist 6.2)) + (plot (discrete-histogram + (map vector (build-list 16 values) (build-list 16 (dist-pdf d)))) + #:x-label "number of events" #:y-label "probability") + (plot (function-interval (λ (x) 0) (dist-cdf d) -0.5 15.5) + #:x-label "at-most number of events" #:y-label "probability" + #:y-max 1)] +} + +@section{Real Distribution Families} + +The @tech{distribution object} constructors documented in this section return unique, useful +distributions for the largest possible parameter domain. This usually means that they return +distributions for a larger domain than their mathematical counterparts are defined on. + +For example, those that have a scale parameter, such as @racket[cauchy-dist], +@racket[logistic-dist], @racket[exponential-dist] and @racket[normal-dist], are typically +undefined for a zero scale. However, in floating-point math, it is often useful to simulate +limits in finite time using special values like @racket[+inf.0]. Therefore, when a +scale-parameterized family's constructor receives @racket[0], it returns a distribution object +that behaves like a @racket[Delta-Dist]: +@interaction[#:eval untyped-eval + (pdf (normal-dist 1 0) 1) + (pdf (normal-dist 1 0) 1.0000001)] +Further, negative scales are accepted, even for @racket[exponential-dist], which results in a +distribution with positive scale reflected about zero. + +Some parameters' boundary values give rise to non-unique limits. Sometimes the ambiguity +can be resolved using necessary properties; see @racket[Gamma-Dist] for an example. When no +resolution exists, as with @racket[(beta-dist 0 0)], which puts an indeterminate probability on +the value @racket[0] and the rest on @racket[1], the constructor returns an +@tech{undefined distribution}. + +Some distribution object constructors attempt to return sensible distributions when given +special values such as @racket[+inf.0] as parameters. Do not count on these yet. + +Many distribution families, such as @racket[Gamma-Dist], can be parameterized on either scale +or rate (which is the reciprocal of scale). In all such cases, the implementations provided by +@racketmodname[math/distributions] are parameterized on scale. + +@subsection{Beta Distributions} @margin-note{Wikipedia: @hyperlink["http://wikipedia.org/wiki/Beta_distribution"]{Beta Distribution}.} @@ -254,7 +459,8 @@ The first four are synonyms for @racket[ordered-dist-cdf], @racket[ordered-dist- @defproc[(beta-dist [alpha Real] [beta Real]) Beta-Dist] @defproc[(beta-dist-alpha [d Beta-Dist]) Flonum] @defproc[(beta-dist-beta [d Beta-Dist]) Flonum])]{ -Represents the beta distribution family parameterized by two shape parameters, or pseudocounts. +Represents the beta distribution family parameterized by two shape parameters, or pseudocounts, +which must both be nonnegative. @examples[#:eval untyped-eval (plot (for/list ([α (in-list '(1 2 3 1/2))] @@ -270,9 +476,17 @@ Represents the beta distribution family parameterized by two shape parameters, o (function (dist-cdf (beta-dist α β)) #:color i #:label (format "Beta(~a,~a)" α β))) #:x-min 0 #:x-max 1 #:y-label "probability")] + +@racket[(beta-dist 0 0)] and @racket[(beta-dist +inf.0 +inf.0)] are @tech{undefined distributions}. + +When @racket[a = 0] or @racket[b = +inf.0], the returned distribution acts like +@racket[(delta-dist 0)]. + +When @racket[a = +inf.0] or @racket[b = 0], the returned distribution acts like +@racket[(delta-dist 1)]. } -@subsection{Cauchy Distribution} +@subsection{Cauchy Distributions} @margin-note{Wikipedia: @hyperlink["http://wikipedia.org/wiki/Cauchy_distribution"]{Cauchy Distribution}.} @@ -299,7 +513,7 @@ Represents the Cauchy distribution family parameterized by mode and scale. #:x-min -8 #:x-max 8 #:y-label "probability")] } -@subsection{Delta Distribution} +@subsection{Delta Distributions} @margin-note{Wikipedia: @hyperlink["http://wikipedia.org/wiki/Dirac_delta_function"]{Dirac Delta Function}.} @@ -323,7 +537,7 @@ When a distribution with a scale parameter has scale zero, it behaves like a del (plot (function (dist-cdf (normal-dist 0 0)) -1e-300 1e-300))] } -@subsection{Exponential Distribution} +@subsection{Exponential Distributions} @margin-note{Wikipedia: @hyperlink["http://wikipedia.org/wiki/Exponential_distribution"]{Exponential @@ -353,7 +567,7 @@ is the reciprocal of mean or scale. If you have rates, construct exponential dis #:legend-anchor 'bottom-right)] } -@subsection{Gamma Distribution} +@subsection{Gamma Distributions} @margin-note{Wikipedia: @hyperlink["http://wikipedia.org/wiki/Gamma_distribution"]{Gamma Distribution}.} @@ -361,7 +575,8 @@ is the reciprocal of mean or scale. If you have rates, construct exponential dis @defproc[(gamma-dist [shape Real 1] [scale Real 1]) Gamma-Dist] @defproc[(gamma-dist-shape [d Gamma-Dist]) Flonum] @defproc[(gamma-dist-scale [d Gamma-Dist]) Flonum])]{ -Represents the gamma distribution family parameterized by shape and scale. +Represents the gamma distribution family parameterized by shape and scale. The @racket[shape] +parameter must be nonnegative. @bold{Warning:} The gamma distribution family is often parameterized by shape and @italic{rate}, which is the reciprocal of scale. If you have rates, construct gamma distributions using @@ -383,9 +598,18 @@ which is the reciprocal of scale. If you have rates, construct gamma distributio #:color i #:label (format "Gamma(~a,~a)" k s))) #:x-min 0 #:x-max 15 #:y-label "probability" #:legend-anchor 'bottom-right)] + +The cdf of the gamma distribution with @racket[shape = 0] could return either @racket[0.0] or +@racket[1.0] at @racket[x = 0], depending on whether a double limit is taken with respect to +@racket[scale] or with respect to @racket[x] first. However the limits are taken, the cdf +must return @racket[1.0] for @racket[x > 0]. Because cdfs are right-continuous, the only correct +choice is +@interaction[#:eval untyped-eval + (cdf (gamma-dist 0 1) 0)] +Therefore, a gamma distribution with @racket[shape = 0] behaves like @racket[(delta-dist 0)]. } -@subsection{Logistic Distribution} +@subsection{Logistic Distributions} @margin-note{Wikipedia: @hyperlink["http://wikipedia.org/wiki/Logistic_distribution"]{Logistic Distribution}.} @@ -413,7 +637,7 @@ and scale. In this parameterization, the variance is @racket[(* 1/3 (sqr (* pi s #:x-min -8 #:x-max 8 #:y-label "probability")] } -@subsection{Normal Distribution} +@subsection{Normal Distributions} @margin-note{Wikipedia: @hyperlink["http://wikipedia.org/wiki/Normal_distribution"]{Normal Distribution}.} @@ -444,7 +668,7 @@ using #:x-min -5 #:x-max 5 #:y-label "probability")] } -@subsection{Triangular Distribution} +@subsection{Triangular Distributions} @margin-note{Wikipedia: @hyperlink["http://wikipedia.org/wiki/Triangular_distribution"]{Triangular @@ -467,7 +691,7 @@ before constructing the distribution object. [i (in-naturals)]) (function (dist-pdf (triangle-dist a b m)) #:color i #:label (format "Triangle(~a,~a,~a)" a b m))) - #:x-min -3 #:x-max 3 #:y-label "density") + #:x-min -3.5 #:x-max 3.5 #:y-label "density") (plot (for/list ([a (in-list '(-3 -1 -2))] [b (in-list '(0 1 3))] @@ -475,12 +699,44 @@ before constructing the distribution object. [i (in-naturals)]) (function (dist-cdf (triangle-dist a b m)) #:color i #:label (format "Triangle(~a,~a,~a)" a b m))) - #:x-min -3 #:x-max 3 #:y-label "probability")] + #:x-min -3.5 #:x-max 3.5 #:y-label "probability")] + +@racket[(triangle-dist c c c)] for any real @racket[c] behaves like a support-limited delta +distribution centered at @racket[c]. } -@subsection{Truncated Distribution} +@subsection{Truncated Distributions} -@subsection{Uniform Distribution} +@deftogether[(@defidform[Truncated-Dist] + @defproc*[([(truncated-dist [d Real-Dist]) Truncated-Dist] + [(truncated-dist [d Real-Dist] [max Real]) Truncated-Dist] + [(truncated-dist [d Real-Dist] [min Real] [max Real]) Truncated-Dist])] + @defproc[(truncated-dist-original [t Truncated-Dist]) Real-Dist] + @defproc[(truncated-dist-min [t Truncated-Dist]) Flonum] + @defproc[(truncated-dist-max [t Truncated-Dist]) Flonum])]{ +Represents distributions like @racket[d], but with zero density for @racket[x < min] and +for @racket[x > max]. The probability of the interval [@racket[min], @racket[max]] is renormalized +to one. + +@racket[(truncated-dist d)] is equivalent to @racket[(truncated-dist d -inf.0 +inf.0)]. +@racket[(truncated-dist d max)] is equivalent to @racket[(truncated-dist d -inf.0 max)]. +If @racket[min > max], they are swapped before constructing the distribution object. + +Samples are taken by applying the truncated distribution's @tech{inverse cdf} to uniform samples. + +@examples[#:eval untyped-eval + (define d (normal-dist)) + (define t (truncated-dist d -2 1)) + t + (plot (list (function (dist-pdf d) #:label "N(0,1)" #:color 0) + (function (dist-pdf t) #:label "T(N(0,1),-2,1)")) + #:x-min -3.5 #:x-max 3.5 #:y-label "density") + (plot (list (function (dist-cdf d) #:label "N(0,1)" #:color 0) + (function (dist-cdf t) #:label "T(N(0,1),-2,1)")) + #:x-min -3.5 #:x-max 3.5 #:y-label "probability")] +} + +@subsection{Uniform Distributions} @margin-note{ Wikipedia: @@ -497,21 +753,156 @@ Represents the uniform distribution family parameterized by minimum and maximum. @racket[(uniform-dist)] is equivalent to @racket[(uniform-dist 0 1)]. @racket[(uniform-dist max)] is equivalent to @racket[(uniform-dist 0 max)]. If @racket[max < min], they are swapped before constructing the distribution object. -@;{ + @examples[#:eval untyped-eval (plot (for/list ([a (in-list '(-3 -1 -2))] [b (in-list '(0 1 3))] [i (in-naturals)]) (function (dist-pdf (uniform-dist a b)) #:color i #:label (format "Uniform(~a,~a)" a b))) - #:x-min -3 #:x-max 3 #:y-label "density") + #:x-min -3.5 #:x-max 3.5 #:y-label "density") (plot (for/list ([a (in-list '(-3 -1 -2))] [b (in-list '(0 1 3))] [i (in-naturals)]) (function (dist-cdf (uniform-dist a b)) #:color i #:label (format "Uniform(~a,~a)" a b))) - #:x-min -3 #:x-max 3 #:y-label "probability")] + #:x-min -3.5 #:x-max 3.5 #:y-label "probability")] + +@racket[(uniform-dist x x)] for any real @racket[x] behaves like a support-limited delta +distribution centered at @racket[x]. } + +@section[#:tag "dist:flonum"]{Low-Level Distribution Functions} + +The following functions are provided for users who need lower overhead than that of +@tech{distribution objects}, such as (currently) untyped Racket users, and library writers who +are implementing their own distribution abstractions. + +Because applying these functions is meant to be fast, none of them have optional arguments. In +particular, the boolean flags @racket[log?] and @racket[1-p?] are always required. + +Every low-level function's argument list begins with the distribution family parameters. In the +case of @tech{pdfs} and @tech{cdfs}, these arguments are followed by a domain value and boolean +flags. In the case of @tech{inverse cdfs}, they are followed by a probability argument and boolean +flags. For sampling procedures, the distribution family parameters are followed by the requested +number of random samples. + +Generally, @racket[prob] is a probability parameter, @racket[k] is an integer domain value, +@racket[x] is a real domain value, @racket[p] is the probability argument to an inverse cdf, +and @racket[n] is the number of random samples. + +@subsection{Integer Distribution Functions} + +@deftogether[ +(@defproc[(flbernoulli-pdf [prob Flonum] [k Flonum] [log? Any]) Flonum] + @defproc[(flbernoulli-cdf [prob Flonum] [k Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flbernoulli-inv-cdf [prob Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flbernoulli-sample [prob Flonum] [n Integer]) FlVector])]{ +Low-level flonum functions used to implement @racket[bernoulli-dist]. } + +@deftogether[ +(@defproc[(flbinomial-pdf [count Flonum] [prob Flonum] [k Flonum] [log? Any]) Flonum] + @defproc[(flbinomial-cdf [count Flonum] [prob Flonum] [k Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flbinomial-inv-cdf [count Flonum] [prob Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flbinomial-sample [count Flonum] [prob Flonum] [n Integer]) FlVector])]{ +Low-level flonum functions used to implement @racket[binomial-dist]. +} + +@deftogether[ +(@defproc[(flgeometric-pdf [prob Flonum] [k Flonum] [log? Any]) Flonum] + @defproc[(flgeometric-cdf [prob Flonum] [k Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flgeometric-inv-cdf [prob Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flgeometric-sample [prob Flonum] [n Integer]) FlVector])]{ +Low-level flonum functions used to implement @racket[geometric-dist]. +} + +@deftogether[ +(@defproc[(flpoisson-pdf [mean Flonum] [k Flonum] [log? Any]) Flonum] + @defproc[(flpoisson-cdf [mean Flonum] [k Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flpoisson-inv-cdf [mean Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flpoisson-sample [mean Flonum] [n Integer]) FlVector] + @defproc[(flpoisson-median [mean Flonum]) Flonum])]{ +Low-level flonum functions used to implement @racket[poisson-dist]. + +@racket[(flpoisson-median mean)] runs faster than +@racket[(flpoisson-inv-cdf mean 0.5 #f #f)], significantly so when @racket[mean] is large. +} + +@subsection{Real Distribution Functions} + +@deftogether[ +(@defproc[(flbeta-pdf [alpha Flonum] [beta Flonum] [x Flonum] [log? Any]) Flonum] + @defproc[(flbeta-cdf [alpha Flonum] [beta Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flbeta-inv-cdf [alpha Flonum] [beta Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flbeta-sample [alpha Flonum] [beta Flonum] [n Integer]) FlVector])]{ +Low-level flonum functions used to implement @racket[beta-dist]. +} + +@deftogether[ +(@defproc[(flcauchy-pdf [mode Flonum] [scale Flonum] [x Flonum] [log? Any]) Flonum] + @defproc[(flcauchy-cdf [mode Flonum] [scale Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flcauchy-inv-cdf [mode Flonum] [scale Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flcauchy-sample [mode Flonum] [scale Flonum] [n Integer]) FlVector])]{ +Low-level flonum functions used to implement @racket[cauchy-dist]. +} + +@deftogether[ +(@defproc[(fldelta-pdf [mean Flonum] [x Flonum] [log? Any]) Flonum] + @defproc[(fldelta-cdf [mean Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(fldelta-inv-cdf [mean Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum])]{ +Low-level flonum functions used to implement @racket[delta-dist]. + +If you need delta-distributed random samples, use @racket[(make-flvector n mean)]. +} + +@deftogether[ +(@defproc[(flexponential-pdf [mean Flonum] [x Flonum] [log? Any]) Flonum] + @defproc[(flexponential-cdf [mean Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flexponential-inv-cdf [mean Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flexponential-sample [mean Flonum] [n Integer]) FlVector])]{ +Low-level flonum functions used to implement @racket[exponential-dist]. +} + +@deftogether[ +(@defproc[(flgamma-pdf [shape Flonum] [scale Flonum] [x Flonum] [log? Any]) Flonum] + @defproc[(flgamma-cdf [shape Flonum] [scale Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flgamma-inv-cdf [shape Flonum] [scale Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flgamma-sample [shape Flonum] [scale Flonum] [n Integer]) FlVector])]{ +Low-level flonum functions used to implement @racket[gamma-dist]. +} + +@deftogether[ +(@defproc[(fllogistic-pdf [mean Flonum] [scale Flonum] [x Flonum] [log? Any]) Flonum] + @defproc[(fllogistic-cdf [mean Flonum] [scale Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(fllogistic-inv-cdf [mean Flonum] [scale Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(fllogistic-sample [mean Flonum] [scale Flonum] [n Integer]) FlVector])]{ +Low-level flonum functions used to implement @racket[logistic-dist]. +} + +@deftogether[ +(@defproc[(flnormal-pdf [mean Flonum] [stddev Flonum] [x Flonum] [log? Any]) Flonum] + @defproc[(flnormal-cdf [mean Flonum] [stddev Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flnormal-inv-cdf [mean Flonum] [stddev Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(flnormal-sample [mean Flonum] [stddev Flonum] [n Integer]) FlVector])]{ +Low-level flonum functions used to implement @racket[normal-dist]. +} + +@deftogether[ +(@defproc[(fltriangle-pdf [min Flonum] [max Flonum] [mode Flonum] [x Flonum] [log? Any]) Flonum] + @defproc[(fltriangle-cdf [min Flonum] [max Flonum] [mode Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(fltriangle-inv-cdf [min Flonum] [max Flonum] [mode Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(fltriangle-sample [min Flonum] [max Flonum] [mode Flonum] [n Integer]) FlVector])]{ +Low-level flonum functions used to implement @racket[triangle-dist]. +} + +@deftogether[ +(@defproc[(fluniform-pdf [min Flonum] [max Flonum] [x Flonum] [log? Any]) Flonum] + @defproc[(fluniform-cdf [min Flonum] [max Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(fluniform-inv-cdf [min Flonum] [max Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum] + @defproc[(fluniform-sample [min Flonum] [max Flonum] [n Integer]) FlVector])]{ +Low-level flonum functions used to implement @racket[uniform-dist]. +} + @(close-eval untyped-eval) diff --git a/collects/math/statistics.rkt b/collects/math/statistics.rkt index 829da980dd..895606b834 100644 --- a/collects/math/statistics.rkt +++ b/collects/math/statistics.rkt @@ -3,10 +3,12 @@ (require "private/statistics/statistics-struct.rkt" "private/statistics/expected-values.rkt" "private/statistics/correlation.rkt" - "private/statistics/order-statistics.rkt") + "private/statistics/order-statistics.rkt" + "private/statistics/counting.rkt") (provide (all-from-out "private/statistics/statistics-struct.rkt" "private/statistics/expected-values.rkt" "private/statistics/correlation.rkt" - "private/statistics/order-statistics.rkt")) + "private/statistics/order-statistics.rkt" + "private/statistics/counting.rkt"))