Finished `math/distributions' documentation!

Fixed a few limit cases in some distributions (e.g. (uniform-dist 0 0) didn't
act like a delta distribution, (beta-dist 0 0) and (beta-dist +inf.0 +inf.0)
pretended to be defined by unique limits even though they can't be)

Made integer distributions' pdfs return +nan.0 when given non-integers

Added "private/statistics/counting.rkt", for hashing and binning samples

Added `flvector-sums' (cumulative sums with single rounding error)

Added `flinteger?', `flnan?' and `flrational?', which are faster than their
non-flonum counterparts (at least in Typed Racket; haven't tested untyped)
This commit is contained in:
Neil Toronto 2012-11-27 13:37:07 -07:00
parent 3fdd834c6c
commit 2d34811ab6
20 changed files with 707 additions and 164 deletions

View File

@ -19,10 +19,10 @@
(cond [(not (flprobability? q)) +nan.0]
[log? (cond [(fl= k 0.0) (fllog1p (- q))]
[(fl= k 1.0) (fllog q)]
[else 0.0])]
[else +nan.0])]
[else (cond [(fl= k 0.0) (fl- 1.0 q)]
[(fl= k 1.0) q]
[else 0.0])]))
[else +nan.0])]))
(: flbernoulli-cdf (Flonum Flonum Any Any -> Flonum))
(define (flbernoulli-cdf q k log? 1-p?)

View File

@ -38,6 +38,10 @@
(: flbeta-sample (Flonum Flonum Integer -> FlVector))
(define (flbeta-sample a b n)
(cond [(n . < . 0) (raise-argument-error 'flbeta-sample "Natural" 2 a b n)]
[(or (a . fl< . 0.0) (b . fl< . 0.0)
(and (fl= a 0.0) (fl= b 0.0))
(and (fl= a +inf.0) (fl= b +inf.0)))
(build-flvector n (λ (_) +nan.0))]
[else
(define xs (flgamma-sample a 1.0 n))
(define ys (flgamma-sample b 1.0 n))

View File

@ -25,29 +25,30 @@
(: flbinomial-cdf (Flonum Flonum Flonum Any Any -> Flonum))
(define (flbinomial-cdf n q k log? 1-p?)
(cond [(n . fl< . 0.0) +nan.0]
[(not (integer? n)) +nan.0]
[(or (q . < . 0.0) (q . > . 1.0)) +nan.0]
(cond [(or (not (flinteger? n)) (n . fl< . 0.0)
(q . fl< . 0.0) (q . fl> . 1.0))
+nan.0]
[else
(let ([k (flfloor k)])
(cond [log? (fllog-beta-inc (+ k 1.0) (- n k) q (not 1-p?) #t)]
[else (flbeta-inc (+ k 1.0) (- n k) q (not 1-p?) #t)]))]))
(cond [log? (fllog-beta-inc (fl+ k 1.0) (fl- n k) q (not 1-p?) #t)]
[else (flbeta-inc (fl+ k 1.0) (fl- n k) q (not 1-p?) #t)]))]))
(: flbinomial-inv-cdf (Flonum Flonum Flonum Any Any -> Flonum))
(define (flbinomial-inv-cdf n q p log? 1-p?)
(cond [(n . fl< . 0.0) +nan.0]
[(not (integer? n)) +nan.0]
[(not (flprobability? p log?)) +nan.0]
(cond [(or (not (flinteger? n)) (n . fl< . 0.0)
(q . fl< . 0.0) (q . fl> . 1.0)
(not (flprobability? p log?)))
+nan.0]
[(flprobability-one? p log? 1-p?) n]
[(flprobability-zero? p log? 1-p?) 0.0]
[1-p?
(define z (flnormal-inv-cdf (fl* n q) (flsqrt (* n q (fl- 1.0 q))) p log? 1-p?))
(define z (flnormal-inv-cdf (fl* n q) (flsqrt (* n q (fl- 1.0 q))) p log? #t))
(flfind-least-integer
(λ: ([k : Flonum]) ((flbinomial-cdf n q k log? 1-p?) . fl< . p))
0.0 n
(flmax 0.0 (flmin n z)))]
[else
(define z (flnormal-inv-cdf (fl* n q) (flsqrt (* n q (fl- 1.0 q))) p log? 1-p?))
(define z (flnormal-inv-cdf (fl* n q) (flsqrt (* n q (fl- 1.0 q))) p log? #f))
(flfind-least-integer
(λ: ([k : Flonum]) ((flbinomial-cdf n q k log? 1-p?) . fl>= . p))
0.0 n

View File

@ -11,16 +11,16 @@
(provide Discrete-Dist
discrete-dist
discrete-dist-values
discrete-dist-weights)
discrete-dist-probs)
(begin-encourage-inline
(struct: (In Out) discrete-dist-struct dist ([values : (Listof Out)]
[weights : (Listof Positive-Flonum)])
[probs : (Listof Positive-Flonum)])
#:property prop:custom-print-quotable 'never
#:property prop:custom-write
(λ (v port mode)
(pretty-print-constructor
'discrete-dist (list (discrete-dist-struct-values v) (discrete-dist-struct-weights v))
'discrete-dist (list (discrete-dist-struct-values v) (discrete-dist-struct-probs v))
port mode)))
(define-type (Discrete-Dist A) (discrete-dist-struct A A))
@ -28,8 +28,8 @@
(: discrete-dist-values (All (A) ((Discrete-Dist A) -> (Listof A))))
(define (discrete-dist-values d) (discrete-dist-struct-values d))
(: discrete-dist-weights (All (A) ((Discrete-Dist A) -> (Listof Positive-Flonum))))
(define (discrete-dist-weights d) (discrete-dist-struct-weights d))
(: discrete-dist-probs (All (A) ((Discrete-Dist A) -> (Listof Positive-Flonum))))
(define (discrete-dist-probs d) (discrete-dist-struct-probs d))
)

View File

@ -18,8 +18,8 @@
(struct-out dist)
(struct-out ordered-dist)
Real-Dist
dist-median sample
real-dist-prob)
dist-median
pdf sample cdf inv-cdf real-dist-prob)
(define-type (PDF In)
(case-> (In -> Flonum)
@ -60,6 +60,11 @@
(: dist-median (All (In Out) ((ordered-dist In Out) -> Out)))
(define (dist-median d) (force (ordered-dist-median d)))
(: pdf (All (In Out) (case-> ((dist In Out) In -> Flonum)
((dist In Out) In Any -> Flonum))))
(define (pdf d v [log? #f])
((dist-pdf d) v log?))
(: sample (All (In Out) (case-> ((dist In Out) -> Out)
((dist In Out) Integer -> (Listof Out)))))
(define sample
@ -67,6 +72,18 @@
[(d) ((dist-sample d))]
[(d n) ((dist-sample d) n)]))
(: cdf (All (In Out) (case-> ((ordered-dist In Out) In -> Flonum)
((ordered-dist In Out) In Any -> Flonum)
((ordered-dist In Out) In Any Any -> Flonum))))
(define (cdf d v [log? #f] [1-p? #f])
((ordered-dist-cdf d) v log? 1-p?))
(: inv-cdf (All (In Out) (case-> ((ordered-dist In Out) Real -> Out)
((ordered-dist In Out) Real Any -> Out)
((ordered-dist In Out) Real Any Any -> Out))))
(define (inv-cdf d p [log? #f] [1-p? #f])
((ordered-dist-inv-cdf d) p log? 1-p?))
)
(: real-dist-prob* (Real-Dist Flonum Flonum Any -> Flonum))

View File

@ -16,15 +16,15 @@
(: flgeometric-pdf (Flonum Flonum Any -> Flonum))
(define (flgeometric-pdf q k log?)
(cond [(or (q . fl<= . 0.0) (q . fl>= . 1.0))
(cond [(not (flinteger? k)) +nan.0]
[(or (q . fl<= . 0.0) (q . fl>= . 1.0))
(cond [(fl= q 1.0) (cond [(fl= k 0.0) (if log? 0.0 1.0)]
[else (if log? -inf.0 0.0)])]
[(fl= q 0.0) (if log? -inf.0 0.0)]
[else +nan.0])]
[(k . fl< . 0.0) (if log? -inf.0 0.0)]
[(integer? k) (cond [log? (fl+ (fllog q) (fl* k (fllog1p (- q))))]
[else (fl* q (flexp (fl* k (fllog1p (- q)))))])]
[else (if log? -inf.0 0.0)]))
[log? (fl+ (fllog q) (fl* k (fllog1p (- q))))]
[else (fl* q (flexp (fl* k (fllog1p (- q)))))]))
(: flgeometric-cdf (Flonum Flonum Any Any -> Flonum))
(define (flgeometric-cdf q k log? 1-p?)
@ -34,9 +34,10 @@
[else +nan.0])]
[(k . fl< . 0.0) (flprobability 0.0 log? 1-p?)]
[else
(define log-1-p (fl* (fl+ (flfloor k) 1.0) (fllog1p (- q))))
(cond [1-p? (if log? log-1-p (exp log-1-p))]
[else (if log? (lg1- log-1-p) (- (flexpm1 log-1-p)))])]))
(let ([k (flfloor k)])
(define log-1-p (fl* (fl+ k 1.0) (fllog1p (- q))))
(cond [1-p? (if log? log-1-p (exp log-1-p))]
[else (if log? (lg1- log-1-p) (- (flexpm1 log-1-p)))]))]))
(: flgeometric-inv-cdf (Flonum Flonum Any Any -> Flonum))
(define (flgeometric-inv-cdf q p log? 1-p?)

View File

@ -164,9 +164,10 @@
(: flbeta-inv-log-cdf-limits (Flonum Flonum Flonum -> Flonum))
(define (flbeta-inv-log-cdf-limits a b log-p)
(cond [(not (and (log-p . fl<= . 0.0) (a . fl>= . 0.0) (b . fl>= . 0.0))) +nan.0]
[(or (and (fl= a 0.0) (fl= b 0.0))
(and (fl= a +inf.0) (fl= b +inf.0)))
+nan.0]
[(fl= log-p -inf.0) 0.0]
[(and (fl= a 0.0) (fl= b 0.0)) (if (log-p . fl< . (fllog 0.5)) 0.0 1.0)]
[(and (fl= a +inf.0) (fl= b +inf.0)) 0.5]
[(fl= a +inf.0) 1.0]
[(fl= b +inf.0) 0.0]
[(fl= a 0.0) 0.0]

View File

@ -7,18 +7,17 @@
(: flbeta-log-pdf (Flonum Flonum Flonum -> Flonum))
(define (flbeta-log-pdf a b x)
(cond [(or (a . fl<= . 0.0) (b . fl<= . 0.0))
(cond [(or (a . fl< . 0.0) (b . fl< . 0.0))
+nan.0]
[(and (fl= a 0.0) (fl= b 0.0))
(if (or (fl= x 0.0) (fl= x 1.0)) +inf.0 -inf.0)]
[(fl= a 0.0)
(if (fl= x 0.0) +inf.0 0.0)]
[else
(if (fl= x 1.0) +inf.0 0.0)])]
(cond [(or (a . fl< . 0.0) (b . fl< . 0.0)
(and (fl= a 0.0) (fl= b 0.0))
(and (fl= a +inf.0) (fl= b +inf.0)))
+nan.0]
[(or (fl= a 0.0) (fl= b +inf.0))
(if (fl= x 0.0) +inf.0 -inf.0)]
[(or (fl= b 0.0) (fl= a +inf.0))
(if (fl= x 1.0) +inf.0 -inf.0)]
[(or (x . fl< . 0.0) (x . fl> . 1.0))
-inf.0]
[else
(flsum (list (fl* (fl- a 1.0) (fllog x))
(fl* (fl- b 1.0) (fllog1p (- x)))
(fl- 0.0 (fllog-beta a b))))]))
(- (fllog-beta a b))))]))

View File

@ -18,11 +18,13 @@
(max 1.0 (flexpt 2.0 (flfloor (/ (fllog d) (fllog 2.0))))))
(: flbinomial-pdf (Flonum Flonum Flonum -> Flonum))
;; Maximum error grows without bound in the size of `n', but is reasonable for xxx
;; Maximum error grows without bound in the size of `n' (todo: figure out what it's reasonable for)
(define (flbinomial-pdf n p k)
(cond [(or (not (integer? n)) (n . fl< . 0.0)) +nan.0]
[(or (p . fl< . 0.0) (p . fl> . 1.0)) +nan.0]
[(or (not (integer? k)) (k . fl< . 0.0) (k . fl> . n)) 0.0]
(cond [(or (not (integer? n)) (n . fl< . 0.0)
(p . fl< . 0.0) (p . fl> . 1.0)
(not (integer? k)))
+nan.0]
[(or (k . fl< . 0.0) (k . fl> . n)) 0.0]
[(fl= p 0.0) (if (fl= k 0.0) 1.0 0.0)]
[(fl= p 1.0) (if (fl= k n) 1.0 0.0)]
[(fl= k 0.0) (flexpt1p (- p) n)]
@ -66,9 +68,11 @@
(: flbinomial-log-pdf (Flonum Flonum Flonum -> Flonum))
;; Maximum error grows without bound in `n', but is <= 4 ulps for n <= 1e4
(define (flbinomial-log-pdf n p k)
(cond [(or (not (integer? n)) (n . fl< . 0.0)) +nan.0]
[(or (p . fl< . 0.0) (p . fl> . 1.0)) +nan.0]
[(or (not (integer? k)) (k . fl< . 0.0) (k . fl> . n)) -inf.0]
(cond [(or (not (flinteger? n)) (n . fl< . 0.0)
(p . fl< . 0.0) (p . fl> . 1.0)
(not (flinteger? k)))
+nan.0]
[(or (k . fl< . 0.0) (k . fl> . n)) -inf.0]
[(fl= p 0.0) (if (fl= k 0.0) 0.0 -inf.0)]
[(fl= p 1.0) (if (fl= k n) 0.0 -inf.0)]
[(fl= k 0.0) (fl* n (fllog1p (- p)))]

View File

@ -21,8 +21,7 @@
(: flpoisson-pdf (Flonum Flonum Any -> Flonum))
(define (flpoisson-pdf l k log?)
(cond [(l . fl< . 0.0) +nan.0]
[(not (integer? k)) (if log? -inf.0 0.0)]
(cond [(or (l . fl< . 0.0) (not (integer? k))) +nan.0]
[log? (impl:flpoisson-log-pdf l k)]
[else (impl:flpoisson-pdf l k)]))

View File

@ -27,6 +27,7 @@
(fl* (fl- b a) (fl- c a)))]
[(x . fl< . b) (fl/ (fl* 2.0 (fl- b x))
(fl* (fl- b a) (fl- b c)))]
[(= x a b) +inf.0]
[else 0.0]))
(if log? (fllog p) p))
@ -41,12 +42,12 @@
(fl- 1.0 (fl/ (fl* b-x b-x)
(fl* (fl- b a) (fl- b c))))]
[else 1.0]))
(cond [1-p? (if log? (fllog (fl- 1.0 q)) (fl- 1.0 q))]
(cond [1-p? (if log? (fllog1p (- q)) (fl- 1.0 q))]
[else (if log? (fllog q) q)]))
(: unsafe-fltriangle-inv-cdf (Float Float Float Float Any Any -> Float))
(define (unsafe-fltriangle-inv-cdf a b c q log? 1-p?)
(let ([q (cond [1-p? (if log? (fl- 1.0 (flexp q)) (fl- 1.0 q))]
(let ([q (cond [1-p? (if log? (- (flexpm1 q)) (fl- 1.0 q))]
[else (if log? (flexp q) q)])])
(cond [(q . fl< . 0.0) +nan.0]
[(q . fl= . 0.0) a]

View File

@ -88,15 +88,8 @@
(min b (max a x))])))
(: sample-single (-> Flonum))
(define sample-single
(cond [(log-P_a<x<=b . fl< . (- (fllog 3.0)))
(λ () (inv-cdf (fl* 0.5 (random)) #f ((random) . fl> . 0.5)))]
[else
(define (sample-single)
(define x (orig-sample))
(cond [(and (a . fl<= . x) (x . fl<= . b)) x]
[else (sample-single)]))
sample-single]))
(define (sample-single)
(inv-cdf (fl* 0.5 (random)) #f ((random) . fl> . 0.5)))
(: sample (Sample Flonum))
(define sample

View File

@ -25,12 +25,12 @@
(define (unsafe-fluniform-cdf a b x log? 1-p?)
(cond [1-p? (define q
(cond [(x . fl< . a) 1.0]
[(x . fl> . b) 0.0]
[(x . fl>= . b) 0.0]
[else (fl/ (fl- b x) (fl- b a))]))
(if log? (fllog q) q)]
[else (define q
(cond [(x . fl< . a) 0.0]
[(x . fl> . b) 1.0]
[(x . fl>= . b) 1.0]
[else (fl/ (fl- x a) (fl- b a))]))
(if log? (fllog q) q)]))

View File

@ -9,7 +9,7 @@
(provide (all-from-out racket/flonum)
fl
flsubnormal?
flsubnormal? flrational? flnan? flinteger?
flnext* flprev*
flulp-error
float-complex? (rename-out [inline-number->float-complex number->float-complex])
@ -37,6 +37,18 @@
(and ((flabs x) . fl<= . +max-subnormal.0)
(not (fl= x 0.0))))
(define flrational?
(λ: ([x : Flonum])
(and (x . fl> . -inf.0) (x . fl< . +inf.0))))
(define flnan?
(λ: ([x : Flonum])
(not (and (x . fl>= . -inf.0) (x . fl<= . +inf.0)))))
(define flinteger?
(λ: ([x : Flonum])
(fl= x (fltruncate x))))
(: flsubnormal-next* (Flonum -> Flonum))
(define (flsubnormal-next* x)
(fl/ (fl+ (fl* x (flexpt 2.0 1022.0)) epsilon.0)
@ -194,13 +206,13 @@
(: flcscpix (Flonum -> Flonum))
(define (flcscpix x)
(cond [(and (not (zero? x)) (integer? x)) +nan.0]
(cond [(and (not (zero? x)) (flinteger? x)) +nan.0]
[else (/ 1.0 (flsinpix x))]))
(: flsecpix (Flonum -> Flonum))
(define (flsecpix x)
(cond [(and (x . fl> . 0.0) (integer? (fl- x 0.5))) +nan.0]
[(and (x . fl< . 0.0) (integer? (fl+ x 0.5))) +nan.0]
(cond [(and (x . fl> . 0.0) (flinteger? (fl- x 0.5))) +nan.0]
[(and (x . fl< . 0.0) (flinteger? (fl+ x 0.5))) +nan.0]
[else (/ 1.0 (flcospix x))]))
(: flcotpix (Flonum -> Flonum))

View File

@ -170,10 +170,12 @@ ACM Transactions on Mathematical Software, 1992, vol 18, no 3, pp 360--373.
(: flbeta-regularized-limits (Flonum Flonum Flonum -> Flonum))
(define (flbeta-regularized-limits a b x)
(cond [(or (x . fl< . 0.0) (x . fl> . 1.0) (a . fl< . 0.0) (b . fl< . 0.0)) +nan.0]
(cond [(or (x . fl< . 0.0) (x . fl> . 1.0)
(a . fl< . 0.0) (b . fl< . 0.0)
(and (fl= a 0.0) (fl= b 0.0))
(and (fl= a +inf.0) (fl= b +inf.0)))
+nan.0]
[(fl= x 1.0) 1.0]
[(and (fl= a 0.0) (fl= b 0.0)) 0.5]
[(and (fl= a +inf.0) (fl= b +inf.0)) (if (x . fl< . 0.5) 0.0 1.0)]
[(fl= a +inf.0) 0.0]
[(fl= b +inf.0) 1.0]
[(fl= a 0.0) 1.0]

View File

@ -0,0 +1,61 @@
#lang typed/racket/base
(require racket/unsafe/ops
racket/list
racket/sequence)
(provide samples->immutable-hash samples->hash
count-samples
(struct-out sample-bin)
Real-Bin
bin-real-samples)
(struct: (A) sample-bin ([min : A] [max : A] [values : (Listof A)]) #:transparent)
(define-type Real-Bin (sample-bin Real))
(: samples->immutable-hash (All (A) ((Sequenceof A) -> (HashTable A Positive-Integer))))
(define (samples->immutable-hash xs)
(define: h : (HashTable A Positive-Integer) (make-immutable-hash null))
(for/fold: ([h : (HashTable A Positive-Integer) h]) ([x : A xs])
(hash-set h x (unsafe-fx+ 1 (hash-ref h x (λ () 0))))))
(: samples->hash (All (A) ((Sequenceof A) -> (HashTable A Positive-Integer))))
(define (samples->hash xs)
(define: h : (HashTable A Positive-Integer) (make-hash null))
(for: ([x : A xs])
(hash-set! h x (unsafe-fx+ 1 (hash-ref h x (λ () 0)))))
h)
(: count-samples (All (A) ((Sequenceof A) -> (Values (Listof A) (Listof Positive-Integer)))))
(define (count-samples xs)
(define h (samples->hash xs))
(define xws (hash-map h (λ: ([x : A] [c : Positive-Integer]) (cons x c))))
(values (map (λ: ([xw : (Pair A Positive-Integer)]) (car xw)) xws)
(map (λ: ([xw : (Pair A Positive-Integer)]) (cdr xw)) xws)))
(: bin-real-samples ((Sequenceof Real) (Sequenceof Real) -> (Listof Real-Bin)))
(define (bin-real-samples bin-bounds xs)
(let* ([bin-bounds (list* -inf.0 +inf.0 (sequence->list bin-bounds))]
[bin-bounds (filter (λ: ([x : Real]) (not (eqv? x +nan.0)))
(remove-duplicates bin-bounds))]
[bin-bounds (sort bin-bounds <)]
[x-min (first bin-bounds)]
[x-max (last bin-bounds)]
[xs (sequence->list xs)]
[xs (filter (λ: ([x : Real]) (<= x-min x x-max)) xs)]
[xs (sort xs <)])
(define-values (res rest-xs)
(for/fold: ([res : (Listof Real-Bin) empty]
[xs : (Listof Real) xs]
) ([x1 (in-list bin-bounds)]
[x2 (in-list (rest bin-bounds))])
(define-values (lst rest-xs)
(let: loop : (Values (Listof Real) (Listof Real)) ([lst : (Listof Real) empty]
[xs : (Listof Real) xs])
(if (and (not (empty? xs)) (<= x1 (first xs) x2))
(loop (cons (first xs) lst) (rest xs))
(values lst xs))))
(cond [(empty? lst) (values res rest-xs)]
[else (values (cons (sample-bin x1 x2 (reverse lst)) res) rest-xs)])))
(reverse res)))

View File

@ -27,6 +27,27 @@
;; ===================================================================================================
(: find-near-pow2 (Real -> Nonnegative-Exact-Rational))
(define (find-near-pow2 x)
(expt 2 (max -1073 (min 1023 (exact-round (/ (log (abs x)) (fllog 2.0)))))))
(: weights->list (Symbol (Sequenceof Real) -> (Listof Nonnegative-Real)))
(define (weights->list name w-seq)
(for/list: : (Listof Nonnegative-Real) ([w w-seq])
(cond [(w . >= . 0) w]
[else (raise-argument-error name "(Sequenceof Nonnegative-Real)" w-seq)])))
(: weights->normalized-weights (Symbol (Sequenceof Real) -> (Listof Nonnegative-Flonum)))
(define (weights->normalized-weights name ws)
(let ([ws (weights->list name ws)])
(when (empty? ws) (raise-argument-error name "nonempty (Sequenceof Real)" ws))
(define max-w (find-near-pow2 (apply max ws)))
(let ([ws (map (λ: ([w : Real]) (/ w max-w)) ws)])
(define total-w (sum ws))
(map (λ: ([w : Real]) (assert (fl (/ w total-w)) nonnegative?)) ws))))
;; ===================================================================================================
(: check-lengths! (All (A B) (Symbol String A B Index Index -> Void)))
(define (check-lengths! name what xs ys m n)
(unless (= m n) (error name "~a must be the same length; given ~e (length ~a) and ~e (length ~a)"
@ -46,10 +67,6 @@
(define nonnegative? (λ: ([x : Real]) (not (negative? x))))
(: find-near-pow2 (Nonnegative-Real -> Nonnegative-Exact-Rational))
(define (find-near-pow2 x)
(expt 2 (max -1073 (min 1023 (exact-round (/ (log x) (fllog 2.0)))))))
(: sequences->normalized-weighted-samples
(All (A) (Symbol (Sequenceof A) (Sequenceof Real)
-> (Values (Listof A) (Listof Positive-Flonum)))))

View File

@ -1,8 +1,9 @@
#lang typed/racket/base
(require racket/flonum
(require racket/fixnum
racket/string
(for-syntax racket/base syntax/parse)
"../../flonum.rkt"
"../unsafe.rkt"
"flvector-syntax.rkt")
@ -47,7 +48,9 @@
flvector<
flvector<=
flvector>
flvector>=)
flvector>=
;;
flvector-sums)
;; ===================================================================================================
;; flvector-copy
@ -256,3 +259,38 @@
(define flvector<= (lift-comparison 'flvector<= fl<=))
(define flvector> (lift-comparison 'flvector> fl>))
(define flvector>= (lift-comparison 'flvector>= fl>=))
;; ===================================================================================================
(: flvector-sums (FlVector -> FlVector))
(define (flvector-sums xs)
(define n (flvector-length xs))
(define rs (make-flvector n))
(define ss (make-flvector n))
(let j-loop ([#{j : Nonnegative-Fixnum} 0]
[#{m : Nonnegative-Fixnum} 0])
(cond
[(j . fx< . n)
(define x (unsafe-flvector-ref xs j))
(let p-loop ([#{p : Nonnegative-Fixnum} 0]
[#{x : Flonum} x]
[#{s : Flonum} 0.0]
[#{i : Nonnegative-Fixnum} 0])
(cond
[(p . fx< . m)
(define r (unsafe-flvector-ref rs p))
(let-values ([(x r) (if ((flabs x) . fl< . (flabs r)) (values r x) (values x r))])
(define z (fl+ x r))
(define-values (hi lo)
(cond [(flrational? z) (values z (fl- r (fl- z x)))]
[else (values x r)]))
(cond [(fl= lo 0.0)
(p-loop (unsafe-fx+ p 1) hi s i)]
[else
(unsafe-flvector-set! rs i lo)
(p-loop (unsafe-fx+ p 1) hi (fl+ s lo) (unsafe-fx+ i 1))]))]
[else
(unsafe-flvector-set! rs i x)
(unsafe-flvector-set! ss j (fl+ s x))
(j-loop (fx+ j 1) (unsafe-fx+ i 1))]))]
[else ss])))

View File

@ -2,10 +2,11 @@
@(require scribble/eval
racket/sandbox
(for-label racket/base racket/promise
(for-label racket/base racket/promise racket/list
math plot
(only-in typed/racket/base
Flonum Real Boolean Any Listof Integer case-> -> Promise))
Flonum Real Boolean Any Listof Integer case-> -> Promise U
Sequenceof Positive-Flonum Nonnegative-Flonum))
"utils.rkt")
@(define untyped-eval (make-untyped-math-eval))
@ -16,20 +17,31 @@
@defmodule[math/distributions]
The @racketmodname[math/distributions] module exports the following:
@itemlist[#:style 'ordered
@item{@tech{Distribution objects}, which represent probability distributions}
@item{Functions that operate on distribution objects}
@item{The low-level flonum functions used to define distribution objects}]
@bold{Performance Warning:} Using distribution objects in untyped Racket is currently 25-50 times
slower than using them in Typed Racket, due to the overhead of checking higher-order contracts.
We are working on it.
For now, if you need speed, either use the @racketmodname[typed/racket] language, or use just the
low-level flonum functions, which are documented in @secref{dist:flonum}.
@local-table-of-contents[]
@section[#:tag "dist:intro"]{Introduction}
The @racketmodname[math/distributions] module exports distribution objects and functions that
operate on them.
@section[#:tag "dist:dist-objects"]{Distribution Objects}
A @deftech{distribution object} represents a probability distribution over a common domain,
such as the real numbers, integers, or a set of symbols. Their constructors correspond with
distribution families, such as the family of normal distributions.
A distribution object has a density function (a @deftech{pdf}) and a procedure to generate random
samples. An @italic{ordered} distribution object additionally has a cumulative distribution function
(a @deftech{cdf}), and its generalized inverse (an @deftech{inverse cdf}).
A distribution object, or a value of type @racket[dist], has a density function (a @deftech{pdf})
and a procedure to generate random samples. An @italic{ordered} distribution object, or a value
of type @racket[ordered-dist], additionally has a cumulative distribution function (a @deftech{cdf}),
and its generalized inverse (an @deftech{inverse cdf}).
The following example creates an ordered distribution object representing a normal distribution
with mean 2 and standard deviation 5, computes an approximation of the probability of the
@ -38,14 +50,16 @@ half-open interval (1/2,1], and computes another approximation from random sampl
(define d (normal-dist 2 5))
(real-dist-prob d 0.5 1.0)
(define xs (sample d 10000))
(fl (/ (count (λ (x) (and (1/2 . < . x) (x . <= . 1))) xs)
(length xs)))]
(eval:alts
(fl (/ (count (λ (x) (and (1/2 . < . x) (x . <= . 1))) xs)
(length xs)))
(eval:result @racketresultfont{0.0391}))]
This plots the pdf and a kernel density estimate of the pdf from random samples:
@interaction[#:eval untyped-eval
(plot (list (function (dist-pdf d) #:color 0 #:style 'dot)
(density xs))
#:x-label "x" #:y-label "N(2,5)")]
#:x-label "x" #:y-label "density of N(2,5)")]
There are also higher-order distributions, which take other distributions as constructor
arguments. For example, the truncated distribution family returns a distribution like its
@ -57,34 +71,32 @@ the probabilities within the interval:
(real-dist-prob d-trunc 0.5 1.0)
(plot (list (function (dist-pdf d-trunc) #:color 0 #:style 'dot)
(density (sample d-trunc 1000)))
#:x-label "x" #:y-label "T(N(2,5),-∞,5)")]
#:x-label "x" #:y-label "density of T(N(2,5),-∞,5)")]
Because real distributions' cdfs represent the probability P[@italic{X} ≤ @italic{x}], they are
right-continuous:
right-continuous (i.e. continuous @italic{from the right}):
@interaction[#:eval untyped-eval
(define d (geometric-dist 0.4))
(define cdf (dist-cdf d))
(plot (for/list ([i (in-range -1 7)])
(define i+1-ε (flprev (+ i 1.0)))
(list (lines (list (vector i (cdf i))
(vector i+1-ε (cdf i+1-ε)))
(list (lines (list (vector i (cdf d i))
(vector i+1-ε (cdf d i+1-ε)))
#:width 2)
(points (list (vector i (cdf i)))
(points (list (vector i (cdf d i)))
#:sym 'fullcircle5 #:color 1)
(points (list (vector i+1-ε (cdf i+1-ε)))
(points (list (vector i+1-ε (cdf d i+1-ε)))
#:sym 'fullcircle5 #:color 1 #:fill-color 0)))
#:x-min -0.5 #:x-max 6.5 #:y-min -0.05 #:y-max 1
#:x-label "x" #:y-label "P[X ≤ x]")]
For convenience, cdfs are defined over the extended reals regardless of their distribution's
support, but their inverses return values only within the support:
@interaction[#:eval untyped-eval
(define inv-cdf (dist-inv-cdf d))
(cdf +inf.0)
(cdf 1.5)
(cdf -inf.0)
(inv-cdf (cdf +inf.0))
(inv-cdf (cdf 1.5))
(inv-cdf (cdf -inf.0))]
(cdf d +inf.0)
(cdf d 1.5)
(cdf d -inf.0)
(inv-cdf d (cdf d +inf.0))
(inv-cdf d (cdf d 1.5))
(inv-cdf d (cdf d -inf.0))]
A distribution's inverse cdf is defined on the interval [0,1] and is always left-continuous,
except possibly at 0 when its support is bounded on the left (as with @racket[geometric-dist]).
@ -92,43 +104,50 @@ Every pdf and cdf can return log densities and log probabilities, in case densit
are too small to represent as flonums (i.e. are less than @racket[+min.0]):
@interaction[#:eval untyped-eval
(define d (normal-dist))
(define pdf (dist-pdf d))
(define cdf (dist-cdf d))
(pdf 40.0)
(cdf -40.0)
(pdf 40.0 #t)
(cdf -40.0 #t)]
(pdf d 40.0)
(cdf d -40.0)
(pdf d 40.0 #t)
(cdf d -40.0 #t)]
Additionally, every cdf can return upper-tail probabilities, which are always more accurate when
lower-tail probabilities are greater than @racket[0.5]:
@interaction[#:eval untyped-eval
(cdf 20.0)
(cdf 20.0 #f #t)]
Upper-tail probabilities can also be returned as logs in case probabilities are too small:
(cdf d 20.0)
(cdf d 20.0 #f #t)]
Upper-tail probabilities can also be returned as log probabilities in case probabilities are too
small:
@interaction[#:eval untyped-eval
(cdf 40.0)
(cdf 40.0 #f #t)
(cdf 40.0 #t #t)]
(cdf d 40.0)
(cdf d 40.0 #f #t)
(cdf d 40.0 #t #t)]
Inverse cdfs accept log probabilities and upper-tail probabilities.
The functions @racket[lg+] and @racket[lgsum], as well as others in @racketmodname[math/flonum],
perform arithmetic on log probabilities.
When distribution object constructors receive parameters outside their domains, they return
@deftech{undefined distributions}, or distributions whose functions all return @racket[+nan.0]:
@interaction[#:eval untyped-eval
(pdf (gamma-dist -1 2) 2)
(sample (poisson-dist -2))
(cdf (beta-dist 0 0) 1/2)
(inv-cdf (geometric-dist 1.1) 0.2)]
@section{Basic Distribution Types and Operations}
@defform[(PDF In)]{
The type of probability density functions, or @tech{pdfs}, defined as
@racketblock[(case-> (In -> Flonum)
(In Any -> Flonum))]
The second argument defaults to @racket[#f]. When the second argument is not @racket[#f], a
pdf returns a log density.
For any function of this type, the second argument should default to @racket[#f]. When not
@racket[#f], the function should return a log density.
}
@defform[(Sample Out)]{
The type of a distribution's sampling procedure, defined as
@racketblock[(case-> (-> Out)
(Integer -> (Listof Out)))]
When given a nonnegative integer @racket[n] as an argument, a sampling procedure returns a list of
random samples of length @racket[n].
When given a nonnegative integer @racket[n] as an argument, a sampling procedure should return
a length-@racket[n] list of independent, random samples.
}
@defstruct*[dist ([pdf (PDF In)] [sample (Sample Out)])]{
@ -138,7 +157,19 @@ type a distribution returns as random samples.
@examples[#:eval untyped-eval
(dist? (discrete-dist '(a b c)))
(dist? (normal-dist))]
(dist? (normal-dist))
((dist-pdf (normal-dist)) 0)
((dist-sample (normal-dist)))]
See @racket[pdf] and @racket[sample] for uncurried forms of @racket[dist-pdf] and
@racket[dist-sample].
}
@defproc[(pdf [d (dist In Out)] [v In] [log? Any #f]) Flonum]{
An uncurried form of @racket[dist-pdf]. When @racket[log?] is not @racket[#f], returns a
log density.
@examples[#:eval untyped-eval
(pdf (discrete-dist '(a b c) '(1 2 3)) 'a)
(pdf (discrete-dist '(a b c) '(1 2 3)) 'a #t)]
}
@defproc*[([(sample [d (dist In Out)]) Out]
@ -156,12 +187,8 @@ The type of cumulative distribution functions, or @tech{cdfs}, defined as
@racketblock[(case-> (In -> Flonum)
(In Any -> Flonum)
(In Any Any -> Flonum))]
Both optional arguments default to @racket[#f].
Suppose @racket[cdf : (CDF Real)], and @racket[p = (cdf x log? 1-p?)]. The flonum @racket[p]
represents a probability. If @racket[log?] is a true value, @racket[p] is a log probability.
If @racket[1-p?] is a true value, @racket[p] is an upper-tail probability or upper-tail
log probability.
For any function of this type, both optional arguments should default to @racket[#f], and be
interpreted as specified in the description of @racket[cdf].
}
@defform[(Inverse-CDF Out)]{
@ -169,12 +196,8 @@ The type of inverse cumulative distribution functions, or @tech{inverse cdfs}, d
@racketblock[(case-> (Real -> Out)
(Real Any -> Out)
(Real Any Any -> Out))]
Both optional arguments default to @racket[#f].
Suppose @racket[inv-cdf : (Inverse-CDF Flonum)], and @racket[x = (inv-cdf p log? 1-p?)].
If @racket[log?] is a true value, @racket[inv-cdf] interprets @racket[p] as a log probability.
If @racket[1-p?] is a true value, @racket[inv-cdf] interprets @racket[p] as an upper-tail probability
or upper-tail log probability.
For any function of this type, both optional arguments should default to @racket[#f], and be
interpreted as specified in the description of @racket[inv-cdf].
}
@defstruct*[(ordered-dist dist) ([cdf (CDF In)]
@ -185,17 +208,16 @@ or upper-tail log probability.
The parent type of @italic{ordered} @tech{distribution objects}.
Similarly to @racket[dist], the @racket[In] type parameter is the data type an ordered distribution
accepts as arguments to its @tech{pdf}; it is also the data type its @tech{cdf} accepts.
Also similarly to @racket[dist], the @racket[Out] type parameter is the data type an ordered
distribution returns as random samples; it is also the data type its @tech{inverse cdf} returns.
accepts as arguments to its @tech{pdf}, and the @racket[Out] type parameter is the data type an
ordered distribution returns as random samples. Additionally, its @tech{cdf} accepts values of type
@racket[In], and its @tech{inverse cdf} returns values of type @racket[Out].
@examples[#:eval untyped-eval
(ordered-dist? (discrete-dist '(a b c)))
(ordered-dist? (normal-dist))]
The median is stored in an @racket[ordered-dist] to allow interval probabilities to be calculated
as accurately as possible. For example, for @racket[d = (normal-dist)], whose median is @racket[0.0],
accurately. For example, for @racket[d = (normal-dist)], whose median is @racket[0.0],
@racket[(real-dist-prob d -2.0 -1.0)] is calculated using lower-tail probabilities, and
@racket[(real-dist-prob d 1.0 2.0)] is calculated using upper-tail probabilities.
}
@ -205,11 +227,31 @@ The parent type of real-valued distributions, such as any distribution returned
@racket[normal-dist]. Equivalent to the type @racket[(ordered-dist Real Flonum)].
}
@defproc[(cdf [d (ordered-dist In Out)] [v In] [log? Any #f] [1-p? Any #f]) Flonum]{
An uncurried form of @racket[dist-cdf].
When @racket[log?] is @racket[#f], @racket[cdf] returns a probability; otherwise, it returns a log
probability.
When @racket[1-p?] is @racket[#f], @racket[cdf] returns a lower-tail probability or log probability
(depending on @racket[log?]); otherwise, it returns an upper-tail probability or log-probability.
}
@defproc[(inv-cdf [d (ordered-dist In Out)] [p Real] [log? Any #f] [1-p? Any #f]) Out]{
An uncurried form of @racket[dist-inv-cdf].
When @racket[log?] is @racket[#f], @racket[inv-cdf] interprets @racket[p] as a probability; otherwise,
it interprets @racket[p] as a log probability.
When @racket[1-p?] is @racket[#f], @racket[inv-cdf] interprets @racket[p] as a lower-tail probability
or log probability (depending on @racket[log?]); otherwise, it interprets @racket[p] as an upper-tail
probability or log probability.
}
@defproc[(real-dist-prob [d Real-Dist] [a Real] [b Real] [log? Any #f] [1-p? Any #f]) Flonum]{
Computes the probability of the half-open interval (@racket[a], @racket[b]]. (If @racket[b < a],
the two endpoints are swapped first.) The @racket[log?] and @racket[1-p?] arguments determine how the
return value should be interpreted, just as the arguments to a function of type
@racket[(CDF Real Flonum)].
the two endpoints are swapped first.) The @racket[log?] and @racket[1-p?] arguments determine the
meaning of the return value in the same way as the corresponding arguments to @racket[cdf].
}
@deftogether[(@defproc[(dist-cdf [d (ordered-dist In Out)]) (CDF In)]
@ -228,25 +270,188 @@ The first four are synonyms for @racket[ordered-dist-cdf], @racket[ordered-dist-
(dist-median (gamma-dist 4 2))]
}
@section{Discrete Distributions}
@section{Finite Distribution Families}
@subsection{Discrete Distribution}
@subsection{Unordered Discrete Distributions}
@subsection{Categorical Distribution}
@deftogether[(@defform[(Discrete-Dist A)]
@defproc*[([(discrete-dist [xs (Sequenceof A)]) (Discrete-Dist A)]
[(discrete-dist [xs (Sequenceof A)] [ws (Sequenceof Real)])
(Discrete-Dist A)])]
@defproc[(discrete-dist-values [d (Discrete-Dist A)]) (Listof A)]
@defproc[(discrete-dist-probs [d (Discrete-Dist A)]) (Listof Positive-Flonum)])]{
Represents families of unordered, discrete distributions over values of type @racket[A], with equality
decided by @racket[equal?].
@section{Integer Distributions}
The weights in @racket[ws] must be nonnegative, and are treated as unnormalized probabilities.
When @racket[ws] is not given, the values in @racket[xs] are assigned uniform probabilities.
@subsection{Bernoulli Distribution}
The type @racket[(Discrete-Dist A)] is a subtype of @racket[(dist A A)]. This means that discrete
distribution objects are unordered, and thus have only a @tech{pdf} and a procedure to generate
random samples.
@subsection{Binomial Distribution}
@examples[#:eval untyped-eval
(define xs '(a b c))
(define d (discrete-dist xs '(2 5 3)))
(define n 500)
(define h (samples->hash (sample d n)))
(plot (list (discrete-histogram
(map vector xs (map (dist-pdf d) xs))
#:x-min 0 #:skip 2 #:label "P[x]")
(discrete-histogram
(map vector xs (map (λ (x) (/ (hash-ref h x) n)) xs))
#:x-min 1 #:skip 2 #:line-style 'dot #:alpha 0.5
#:label "est. P[x]")))]
}
@subsection{Geometric Distribution}
@section{Integer Distribution Families}
@subsection{Poisson Distribution}
Mathematically, integer distributions are commonly defined in one of two ways: over extended reals,
or over extended integers. The most common definitions use the extended reals, so the following
@tech{distribution object} constructors return objects of type @racket[Real-Dist].
@section{Real Distributions}
(Another reason is that the extended integers correspond with the type
@racket[(U Integer +inf.0 -inf.0)]. Values of this type are difficult to use as integers: in
practice, they are the same as values of type @racket[Real].)
@subsection{Beta Distribution}
This leaves us with a quandary and two design decisions users should be aware of. The quandary is
that, when an integer distribution is defined over the reals, it has a @tech{cdf}, but @italic{no
well-defined @tech{pdf}}: the pdf would be zero except at integer points, where it would be
undefined.
Unfortunately, an integer distribution without a pdf is nearly useless.
@margin-note*{In measure-theory parlance, the pdfs are defined with respect to counting measure,
while the cdfs are defined with respect to Lebesgue measure.}
So the pdfs of these integer distributions are pdfs defined over integers, while their cdfs are
defined over reals.
Most implementations, such as @hyperlink["http://www.r-project.org"]{R}'s, make the same design
choice. Unlike R's, this implementation's pdfs return @racket[+nan.0] when given non-integers,
for three reasons:
@itemlist[@item{Their domain of definition is the integers.}
@item{Applying an integer pdf to a non-integer almost certainly indicates a logic error,
which is harder to detect when a program returns an apparently sensible value.}
@item{If this design choice turns out to be wrong and we change pdfs to return
@racket[0.0], this should affect very few programs. A change from @racket[0.0] to
@racket[+nan.0] could break many programs.}]
Integer distributions defined over the extended integers are not out of the question, and may
show up in future versions of @racketmodname[math/distributions] if there is a clear need.
@subsection{Bernoulli Distributions}
@margin-note{Wikipedia:
@hyperlink["http://wikipedia.org/wiki/Bernoulli_distribution"]{Bernoulli Distribution}.}
@deftogether[(@defidform[Bernoulli-Dist]
@defproc[(bernoulli-dist [prob Real]) Bernoulli-Dist]
@defproc[(bernoulli-dist-prob [d Bernoulli-Dist]) Flonum])]{
Represents the Bernoulli distribution family parameterized by probability of success.
@racket[(bernoulli-dist prob)] is equivalent to @racket[(binomial-dist 1 prob)], but operations
on it are faster.
@examples[#:eval untyped-eval
(define d (bernoulli-dist 0.75))
(map (dist-pdf d) '(0 1))
(map (dist-cdf d) '(0 1))
(define d (binomial-dist 1 0.75))
(map (dist-pdf d) '(0 1))
(map (dist-cdf d) '(0 1))]
}
@subsection{Binomial Distributions}
@margin-note{Wikipedia:
@hyperlink["http://wikipedia.org/wiki/Binomial_distribution"]{Binomial Distribution}.}
@deftogether[(@defidform[Binomial-Dist]
@defproc[(binomial-dist [count Real] [prob Real]) Binomial-Dist]
@defproc[(binomial-dist-count [d Binomial-Dist]) Flonum]
@defproc[(binomial-dist-prob [d Binomial-Dist]) Flonum])]{
Represents the binomial distribution family parameterized by count (number of trials) and
probability of success.
@examples[#:eval untyped-eval
(define d (binomial-dist 15 0.6))
(plot (discrete-histogram
(map vector (build-list 16 values) (build-list 16 (dist-pdf d))))
#:x-label "number of successes" #:y-label "probability")
(plot (function-interval (λ (x) 0) (dist-cdf d) -0.5 15.5)
#:x-label "at-most number of successes" #:y-label "probability")]
}
@subsection{Geometric Distributions}
@margin-note{Wikipedia:
@hyperlink["http://wikipedia.org/wiki/Geometric_distribution"]{Geometric Distribution}.}
@deftogether[(@defidform[Geometric-Dist]
@defproc[(geometric-dist [prob Real]) Geometric-Dist]
@defproc[(geometric-dist-prob [d Geometric-Dist]) Flonum])]{
Represents the geometric distribution family parameterized by success probability. The random
variable is the number of failures before the first success, or equivalently, the index of the
first success starting from zero.
@examples[#:eval untyped-eval
(define d (geometric-dist 0.25))
(plot (discrete-histogram
(map vector (build-list 16 values) (build-list 16 (dist-pdf d))))
#:x-label "first success index" #:y-label "probability")
(plot (function-interval (λ (x) 0) (dist-cdf d) -0.5 15.5)
#:x-label "at-most first success index" #:y-label "probability"
#:y-max 1)]
}
@subsection{Poisson Distributions}
@margin-note{Wikipedia:
@hyperlink["http://wikipedia.org/wiki/Poisson_distribution"]{Poisson Distribution}.}
@deftogether[(@defidform[Poisson-Dist]
@defproc[(poisson-dist [mean Real]) Poisson-Dist]
@defproc[(poisson-dist-mean [d Poisson-Dist]) Flonum])]{
Represents the Poisson distribution family parameterized by the mean number of occurrences of
independent events.
@examples[#:eval untyped-eval
(define d (poisson-dist 6.2))
(plot (discrete-histogram
(map vector (build-list 16 values) (build-list 16 (dist-pdf d))))
#:x-label "number of events" #:y-label "probability")
(plot (function-interval (λ (x) 0) (dist-cdf d) -0.5 15.5)
#:x-label "at-most number of events" #:y-label "probability"
#:y-max 1)]
}
@section{Real Distribution Families}
The @tech{distribution object} constructors documented in this section return unique, useful
distributions for the largest possible parameter domain. This usually means that they return
distributions for a larger domain than their mathematical counterparts are defined on.
For example, those that have a scale parameter, such as @racket[cauchy-dist],
@racket[logistic-dist], @racket[exponential-dist] and @racket[normal-dist], are typically
undefined for a zero scale. However, in floating-point math, it is often useful to simulate
limits in finite time using special values like @racket[+inf.0]. Therefore, when a
scale-parameterized family's constructor receives @racket[0], it returns a distribution object
that behaves like a @racket[Delta-Dist]:
@interaction[#:eval untyped-eval
(pdf (normal-dist 1 0) 1)
(pdf (normal-dist 1 0) 1.0000001)]
Further, negative scales are accepted, even for @racket[exponential-dist], which results in a
distribution with positive scale reflected about zero.
Some parameters' boundary values give rise to non-unique limits. Sometimes the ambiguity
can be resolved using necessary properties; see @racket[Gamma-Dist] for an example. When no
resolution exists, as with @racket[(beta-dist 0 0)], which puts an indeterminate probability on
the value @racket[0] and the rest on @racket[1], the constructor returns an
@tech{undefined distribution}.
Some distribution object constructors attempt to return sensible distributions when given
special values such as @racket[+inf.0] as parameters. Do not count on these yet.
Many distribution families, such as @racket[Gamma-Dist], can be parameterized on either scale
or rate (which is the reciprocal of scale). In all such cases, the implementations provided by
@racketmodname[math/distributions] are parameterized on scale.
@subsection{Beta Distributions}
@margin-note{Wikipedia:
@hyperlink["http://wikipedia.org/wiki/Beta_distribution"]{Beta Distribution}.}
@ -254,7 +459,8 @@ The first four are synonyms for @racket[ordered-dist-cdf], @racket[ordered-dist-
@defproc[(beta-dist [alpha Real] [beta Real]) Beta-Dist]
@defproc[(beta-dist-alpha [d Beta-Dist]) Flonum]
@defproc[(beta-dist-beta [d Beta-Dist]) Flonum])]{
Represents the beta distribution family parameterized by two shape parameters, or pseudocounts.
Represents the beta distribution family parameterized by two shape parameters, or pseudocounts,
which must both be nonnegative.
@examples[#:eval untyped-eval
(plot (for/list ([α (in-list '(1 2 3 1/2))]
@ -270,9 +476,17 @@ Represents the beta distribution family parameterized by two shape parameters, o
(function (dist-cdf (beta-dist α β))
#:color i #:label (format "Beta(~a,~a)" α β)))
#:x-min 0 #:x-max 1 #:y-label "probability")]
@racket[(beta-dist 0 0)] and @racket[(beta-dist +inf.0 +inf.0)] are @tech{undefined distributions}.
When @racket[a = 0] or @racket[b = +inf.0], the returned distribution acts like
@racket[(delta-dist 0)].
When @racket[a = +inf.0] or @racket[b = 0], the returned distribution acts like
@racket[(delta-dist 1)].
}
@subsection{Cauchy Distribution}
@subsection{Cauchy Distributions}
@margin-note{Wikipedia:
@hyperlink["http://wikipedia.org/wiki/Cauchy_distribution"]{Cauchy Distribution}.}
@ -299,7 +513,7 @@ Represents the Cauchy distribution family parameterized by mode and scale.
#:x-min -8 #:x-max 8 #:y-label "probability")]
}
@subsection{Delta Distribution}
@subsection{Delta Distributions}
@margin-note{Wikipedia:
@hyperlink["http://wikipedia.org/wiki/Dirac_delta_function"]{Dirac Delta Function}.}
@ -323,7 +537,7 @@ When a distribution with a scale parameter has scale zero, it behaves like a del
(plot (function (dist-cdf (normal-dist 0 0)) -1e-300 1e-300))]
}
@subsection{Exponential Distribution}
@subsection{Exponential Distributions}
@margin-note{Wikipedia:
@hyperlink["http://wikipedia.org/wiki/Exponential_distribution"]{Exponential
@ -353,7 +567,7 @@ is the reciprocal of mean or scale. If you have rates, construct exponential dis
#:legend-anchor 'bottom-right)]
}
@subsection{Gamma Distribution}
@subsection{Gamma Distributions}
@margin-note{Wikipedia:
@hyperlink["http://wikipedia.org/wiki/Gamma_distribution"]{Gamma Distribution}.}
@ -361,7 +575,8 @@ is the reciprocal of mean or scale. If you have rates, construct exponential dis
@defproc[(gamma-dist [shape Real 1] [scale Real 1]) Gamma-Dist]
@defproc[(gamma-dist-shape [d Gamma-Dist]) Flonum]
@defproc[(gamma-dist-scale [d Gamma-Dist]) Flonum])]{
Represents the gamma distribution family parameterized by shape and scale.
Represents the gamma distribution family parameterized by shape and scale. The @racket[shape]
parameter must be nonnegative.
@bold{Warning:} The gamma distribution family is often parameterized by shape and @italic{rate},
which is the reciprocal of scale. If you have rates, construct gamma distributions using
@ -383,9 +598,18 @@ which is the reciprocal of scale. If you have rates, construct gamma distributio
#:color i #:label (format "Gamma(~a,~a)" k s)))
#:x-min 0 #:x-max 15 #:y-label "probability"
#:legend-anchor 'bottom-right)]
The cdf of the gamma distribution with @racket[shape = 0] could return either @racket[0.0] or
@racket[1.0] at @racket[x = 0], depending on whether a double limit is taken with respect to
@racket[scale] or with respect to @racket[x] first. However the limits are taken, the cdf
must return @racket[1.0] for @racket[x > 0]. Because cdfs are right-continuous, the only correct
choice is
@interaction[#:eval untyped-eval
(cdf (gamma-dist 0 1) 0)]
Therefore, a gamma distribution with @racket[shape = 0] behaves like @racket[(delta-dist 0)].
}
@subsection{Logistic Distribution}
@subsection{Logistic Distributions}
@margin-note{Wikipedia:
@hyperlink["http://wikipedia.org/wiki/Logistic_distribution"]{Logistic Distribution}.}
@ -413,7 +637,7 @@ and scale. In this parameterization, the variance is @racket[(* 1/3 (sqr (* pi s
#:x-min -8 #:x-max 8 #:y-label "probability")]
}
@subsection{Normal Distribution}
@subsection{Normal Distributions}
@margin-note{Wikipedia:
@hyperlink["http://wikipedia.org/wiki/Normal_distribution"]{Normal Distribution}.}
@ -444,7 +668,7 @@ using
#:x-min -5 #:x-max 5 #:y-label "probability")]
}
@subsection{Triangular Distribution}
@subsection{Triangular Distributions}
@margin-note{Wikipedia:
@hyperlink["http://wikipedia.org/wiki/Triangular_distribution"]{Triangular
@ -467,7 +691,7 @@ before constructing the distribution object.
[i (in-naturals)])
(function (dist-pdf (triangle-dist a b m)) #:color i
#:label (format "Triangle(~a,~a,~a)" a b m)))
#:x-min -3 #:x-max 3 #:y-label "density")
#:x-min -3.5 #:x-max 3.5 #:y-label "density")
(plot (for/list ([a (in-list '(-3 -1 -2))]
[b (in-list '(0 1 3))]
@ -475,12 +699,44 @@ before constructing the distribution object.
[i (in-naturals)])
(function (dist-cdf (triangle-dist a b m)) #:color i
#:label (format "Triangle(~a,~a,~a)" a b m)))
#:x-min -3 #:x-max 3 #:y-label "probability")]
#:x-min -3.5 #:x-max 3.5 #:y-label "probability")]
@racket[(triangle-dist c c c)] for any real @racket[c] behaves like a support-limited delta
distribution centered at @racket[c].
}
@subsection{Truncated Distribution}
@subsection{Truncated Distributions}
@subsection{Uniform Distribution}
@deftogether[(@defidform[Truncated-Dist]
@defproc*[([(truncated-dist [d Real-Dist]) Truncated-Dist]
[(truncated-dist [d Real-Dist] [max Real]) Truncated-Dist]
[(truncated-dist [d Real-Dist] [min Real] [max Real]) Truncated-Dist])]
@defproc[(truncated-dist-original [t Truncated-Dist]) Real-Dist]
@defproc[(truncated-dist-min [t Truncated-Dist]) Flonum]
@defproc[(truncated-dist-max [t Truncated-Dist]) Flonum])]{
Represents distributions like @racket[d], but with zero density for @racket[x < min] and
for @racket[x > max]. The probability of the interval [@racket[min], @racket[max]] is renormalized
to one.
@racket[(truncated-dist d)] is equivalent to @racket[(truncated-dist d -inf.0 +inf.0)].
@racket[(truncated-dist d max)] is equivalent to @racket[(truncated-dist d -inf.0 max)].
If @racket[min > max], they are swapped before constructing the distribution object.
Samples are taken by applying the truncated distribution's @tech{inverse cdf} to uniform samples.
@examples[#:eval untyped-eval
(define d (normal-dist))
(define t (truncated-dist d -2 1))
t
(plot (list (function (dist-pdf d) #:label "N(0,1)" #:color 0)
(function (dist-pdf t) #:label "T(N(0,1),-2,1)"))
#:x-min -3.5 #:x-max 3.5 #:y-label "density")
(plot (list (function (dist-cdf d) #:label "N(0,1)" #:color 0)
(function (dist-cdf t) #:label "T(N(0,1),-2,1)"))
#:x-min -3.5 #:x-max 3.5 #:y-label "probability")]
}
@subsection{Uniform Distributions}
@margin-note{
Wikipedia:
@ -497,21 +753,156 @@ Represents the uniform distribution family parameterized by minimum and maximum.
@racket[(uniform-dist)] is equivalent to @racket[(uniform-dist 0 1)].
@racket[(uniform-dist max)] is equivalent to @racket[(uniform-dist 0 max)]. If @racket[max < min],
they are swapped before constructing the distribution object.
@;{
@examples[#:eval untyped-eval
(plot (for/list ([a (in-list '(-3 -1 -2))]
[b (in-list '(0 1 3))]
[i (in-naturals)])
(function (dist-pdf (uniform-dist a b)) #:color i
#:label (format "Uniform(~a,~a)" a b)))
#:x-min -3 #:x-max 3 #:y-label "density")
#:x-min -3.5 #:x-max 3.5 #:y-label "density")
(plot (for/list ([a (in-list '(-3 -1 -2))]
[b (in-list '(0 1 3))]
[i (in-naturals)])
(function (dist-cdf (uniform-dist a b)) #:color i
#:label (format "Uniform(~a,~a)" a b)))
#:x-min -3 #:x-max 3 #:y-label "probability")]
#:x-min -3.5 #:x-max 3.5 #:y-label "probability")]
@racket[(uniform-dist x x)] for any real @racket[x] behaves like a support-limited delta
distribution centered at @racket[x].
}
@section[#:tag "dist:flonum"]{Low-Level Distribution Functions}
The following functions are provided for users who need lower overhead than that of
@tech{distribution objects}, such as (currently) untyped Racket users, and library writers who
are implementing their own distribution abstractions.
Because applying these functions is meant to be fast, none of them have optional arguments. In
particular, the boolean flags @racket[log?] and @racket[1-p?] are always required.
Every low-level function's argument list begins with the distribution family parameters. In the
case of @tech{pdfs} and @tech{cdfs}, these arguments are followed by a domain value and boolean
flags. In the case of @tech{inverse cdfs}, they are followed by a probability argument and boolean
flags. For sampling procedures, the distribution family parameters are followed by the requested
number of random samples.
Generally, @racket[prob] is a probability parameter, @racket[k] is an integer domain value,
@racket[x] is a real domain value, @racket[p] is the probability argument to an inverse cdf,
and @racket[n] is the number of random samples.
@subsection{Integer Distribution Functions}
@deftogether[
(@defproc[(flbernoulli-pdf [prob Flonum] [k Flonum] [log? Any]) Flonum]
@defproc[(flbernoulli-cdf [prob Flonum] [k Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flbernoulli-inv-cdf [prob Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flbernoulli-sample [prob Flonum] [n Integer]) FlVector])]{
Low-level flonum functions used to implement @racket[bernoulli-dist].
}
@deftogether[
(@defproc[(flbinomial-pdf [count Flonum] [prob Flonum] [k Flonum] [log? Any]) Flonum]
@defproc[(flbinomial-cdf [count Flonum] [prob Flonum] [k Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flbinomial-inv-cdf [count Flonum] [prob Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flbinomial-sample [count Flonum] [prob Flonum] [n Integer]) FlVector])]{
Low-level flonum functions used to implement @racket[binomial-dist].
}
@deftogether[
(@defproc[(flgeometric-pdf [prob Flonum] [k Flonum] [log? Any]) Flonum]
@defproc[(flgeometric-cdf [prob Flonum] [k Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flgeometric-inv-cdf [prob Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flgeometric-sample [prob Flonum] [n Integer]) FlVector])]{
Low-level flonum functions used to implement @racket[geometric-dist].
}
@deftogether[
(@defproc[(flpoisson-pdf [mean Flonum] [k Flonum] [log? Any]) Flonum]
@defproc[(flpoisson-cdf [mean Flonum] [k Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flpoisson-inv-cdf [mean Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flpoisson-sample [mean Flonum] [n Integer]) FlVector]
@defproc[(flpoisson-median [mean Flonum]) Flonum])]{
Low-level flonum functions used to implement @racket[poisson-dist].
@racket[(flpoisson-median mean)] runs faster than
@racket[(flpoisson-inv-cdf mean 0.5 #f #f)], significantly so when @racket[mean] is large.
}
@subsection{Real Distribution Functions}
@deftogether[
(@defproc[(flbeta-pdf [alpha Flonum] [beta Flonum] [x Flonum] [log? Any]) Flonum]
@defproc[(flbeta-cdf [alpha Flonum] [beta Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flbeta-inv-cdf [alpha Flonum] [beta Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flbeta-sample [alpha Flonum] [beta Flonum] [n Integer]) FlVector])]{
Low-level flonum functions used to implement @racket[beta-dist].
}
@deftogether[
(@defproc[(flcauchy-pdf [mode Flonum] [scale Flonum] [x Flonum] [log? Any]) Flonum]
@defproc[(flcauchy-cdf [mode Flonum] [scale Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flcauchy-inv-cdf [mode Flonum] [scale Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flcauchy-sample [mode Flonum] [scale Flonum] [n Integer]) FlVector])]{
Low-level flonum functions used to implement @racket[cauchy-dist].
}
@deftogether[
(@defproc[(fldelta-pdf [mean Flonum] [x Flonum] [log? Any]) Flonum]
@defproc[(fldelta-cdf [mean Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(fldelta-inv-cdf [mean Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum])]{
Low-level flonum functions used to implement @racket[delta-dist].
If you need delta-distributed random samples, use @racket[(make-flvector n mean)].
}
@deftogether[
(@defproc[(flexponential-pdf [mean Flonum] [x Flonum] [log? Any]) Flonum]
@defproc[(flexponential-cdf [mean Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flexponential-inv-cdf [mean Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flexponential-sample [mean Flonum] [n Integer]) FlVector])]{
Low-level flonum functions used to implement @racket[exponential-dist].
}
@deftogether[
(@defproc[(flgamma-pdf [shape Flonum] [scale Flonum] [x Flonum] [log? Any]) Flonum]
@defproc[(flgamma-cdf [shape Flonum] [scale Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flgamma-inv-cdf [shape Flonum] [scale Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flgamma-sample [shape Flonum] [scale Flonum] [n Integer]) FlVector])]{
Low-level flonum functions used to implement @racket[gamma-dist].
}
@deftogether[
(@defproc[(fllogistic-pdf [mean Flonum] [scale Flonum] [x Flonum] [log? Any]) Flonum]
@defproc[(fllogistic-cdf [mean Flonum] [scale Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(fllogistic-inv-cdf [mean Flonum] [scale Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(fllogistic-sample [mean Flonum] [scale Flonum] [n Integer]) FlVector])]{
Low-level flonum functions used to implement @racket[logistic-dist].
}
@deftogether[
(@defproc[(flnormal-pdf [mean Flonum] [stddev Flonum] [x Flonum] [log? Any]) Flonum]
@defproc[(flnormal-cdf [mean Flonum] [stddev Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flnormal-inv-cdf [mean Flonum] [stddev Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(flnormal-sample [mean Flonum] [stddev Flonum] [n Integer]) FlVector])]{
Low-level flonum functions used to implement @racket[normal-dist].
}
@deftogether[
(@defproc[(fltriangle-pdf [min Flonum] [max Flonum] [mode Flonum] [x Flonum] [log? Any]) Flonum]
@defproc[(fltriangle-cdf [min Flonum] [max Flonum] [mode Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(fltriangle-inv-cdf [min Flonum] [max Flonum] [mode Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(fltriangle-sample [min Flonum] [max Flonum] [mode Flonum] [n Integer]) FlVector])]{
Low-level flonum functions used to implement @racket[triangle-dist].
}
@deftogether[
(@defproc[(fluniform-pdf [min Flonum] [max Flonum] [x Flonum] [log? Any]) Flonum]
@defproc[(fluniform-cdf [min Flonum] [max Flonum] [x Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(fluniform-inv-cdf [min Flonum] [max Flonum] [p Flonum] [log? Any] [1-p? Any]) Flonum]
@defproc[(fluniform-sample [min Flonum] [max Flonum] [n Integer]) FlVector])]{
Low-level flonum functions used to implement @racket[uniform-dist].
}
@(close-eval untyped-eval)

View File

@ -3,10 +3,12 @@
(require "private/statistics/statistics-struct.rkt"
"private/statistics/expected-values.rkt"
"private/statistics/correlation.rkt"
"private/statistics/order-statistics.rkt")
"private/statistics/order-statistics.rkt"
"private/statistics/counting.rkt")
(provide (all-from-out
"private/statistics/statistics-struct.rkt"
"private/statistics/expected-values.rkt"
"private/statistics/correlation.rkt"
"private/statistics/order-statistics.rkt"))
"private/statistics/order-statistics.rkt"
"private/statistics/counting.rkt"))