Added HPD interval estimates and Monte Carlo variance

This commit is contained in:
Neil Toronto 2014-03-26 17:31:17 -06:00
parent 970eed82af
commit cd2632c186
6 changed files with 332 additions and 17 deletions

View File

@ -254,8 +254,8 @@ the two endpoints are swapped first.) The @racket[log?] and @racket[1-p?] argume
meaning of the return value in the same way as the corresponding arguments to @racket[cdf].
}
@defproc[(real-dist-hpd-interval [d Real-Dist] [α Real]) (Values Flonum Flonum)]{
Finds the smallest interval for which @racket[d] assigns probability @racket[α], if one exists.
@defproc[(real-dist-hpd-interval [d Real-Dist] [p Real]) (Values Flonum Flonum)]{
Finds the smallest interval for which @racket[d] assigns probability @racket[p], if one exists.
@examples[#:eval untyped-eval
(define d (beta-dist 3 2))
(define-values (x0 x1) (real-dist-hpd-interval d 0.8))

View File

@ -2,7 +2,9 @@
@(require scribble/eval
racket/sandbox
(for-label racket/base racket/promise
(for-label racket/base
racket/promise
racket/sequence
(except-in racket/list permutations) ; FIXME
math plot
(only-in typed/racket/base
@ -96,28 +98,28 @@ If @racket[ws] is not @racket[#f], they compute weighted variations of the same.
See @secref{stats:expected-values} for the meaning of the @racket[bias] keyword argument.
}
@deftogether[(@defproc[(variance/mean [mean Real]
@deftogether[(@defproc[(variance/mean [m Real]
[xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Nonnegative-Real]
@defproc[(stddev/mean [mean Real]
@defproc[(stddev/mean [m Real]
[xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Nonnegative-Real]
@defproc[(skewness/mean [mean Real]
@defproc[(skewness/mean [m Real]
[xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Real]
@defproc[(kurtosis/mean [mean Real]
@defproc[(kurtosis/mean [m Real]
[xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Nonnegative-Real])]{
Like @racket[variance], @racket[stddev], @racket[skewness] and @racket[kurtosis], but computed
using known mean @racket[mean].
using known mean @racket[m].
}
@section[#:tag "stats:running"]{Running Expected Values}
@ -226,22 +228,22 @@ Removing the correlation using importance weights:
See @secref{stats:expected-values} for the meaning of the @racket[bias] keyword argument.
}
@deftogether[(@defproc[(covariance/means [μx Real]
[μy Real]
@deftogether[(@defproc[(covariance/means [mx Real]
[my Real]
[xs (Sequenceof Real)]
[ys (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Real]
@defproc[(correlation/means [μx Real]
[μy Real]
@defproc[(correlation/means [mx Real]
[my Real]
[xs (Sequenceof Real)]
[ys (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Real])]{
Like @racket[covariance] and @racket[correlation], but computed using known means
@racket[μx] and @racket[μy].
@racket[mx] and @racket[my].
}
@section{Counting and Binning}
@ -393,4 +395,125 @@ Computes the average absolute difference between the elements in @racket[xs] and
Like @racket[(absdev xs ws)], but computed using known median @racket[median].
}
@deftogether[(@defproc[(hpd-interval [lt? (A A -> Any)]
[δ (A A -> Real)]
[p Real]
[xs (Sequenceof A)]
[ws (U #f (Sequenceof Real)) #f])
(Values A A)]
@defproc[(hpd-interval/sorted [δ (A A -> Real)]
[p Real]
[xs (Sequenceof A)]
[ws (U #f (Sequenceof Real)) #f])
(Values A A)])]{
Estimates the smallest interval for which the distribution that produced @racket[xs] (optionally
weighted by @racket[ws]) assigns probability @racket[p], which must be positive.
The type @racket[A] represents an ordered metric space with order @racket[lt?] and metric @racket[δ].
To compute an HPD interval from sorted samples, use @racket[hpd-interval/sorted].
You almost certainly want to use @racket[real-hpd-interval] or @racket[real-hpd-interval/sorted]
instead, which are defined in terms of these.
}
@deftogether[(@defproc[(real-hpd-interval [p Real]
[xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f])
(Values Real Real)]
@defproc[(real-hpd-interval/sorted [p Real]
[xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f])
(Values Real Real)])]{
Equivalent to @racket[(hpd-interval < - p xs ws)] and @racket[(hpd-interval/sorted - p xs ws)].
@examples[#:eval typed-eval
(define beta32 (beta-dist 3 2))
(real-dist-hpd-interval beta32 0.8)
(real-hpd-interval 0.8 (sample beta32 10000))]
}
@section{Simulations}
The functions in this section support Monte Carlo simulation; for example, quantifying uncertainty
about statistics estimated from samples.
@deftogether[(@defproc[(mc-variance [xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Nonnegative-Real]
@defproc[(mc-stddev [xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Nonnegative-Real])]{
Estimate the variance and standard deviation of expected values computed from random samples.
If @racket[xs] are random variable samples, then
@racketblock[(define θ (mean xs ws))]
is also a random variable sample.
These two values:
@racketblock[(mc-variance xs ws #:bias bias)
(mc-stddev xs ws #:bias bias)]
estimate the variance and standard deviation of @racket[θ].
The latter is simply the square root of the variance, and bias correction is applied as described in
@secref{stats:expected-values}.
Two different ways to estimate the standard deviation of a mean computed from 1000 samples are
@interaction[#:eval typed-eval
(mc-stddev (sample (normal-dist 0 1) 1000))
(stddev (for/list : (Listof Real) ([_ (in-range 100)])
(mean (sample (normal-dist 0 1) 1000))))]
Using @racket[mc-stddev] is 100 times faster in this case, and in most statistical applications,
replicating a sampling process 100 times is infeasible.
}
@deftogether[(@defproc[(mc-stddev/mean [m Real]
[xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Nonnegative-Real]
@defproc[(mc-variance/mean [m Real]
[xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Nonnegative-Real]
)]{
Use these in the exceedingly rare cases in which you know the mean @racket[m] but are estimating
uncertainty in an estimate of the mean anyway.
}
@defproc[(indicator [pred? (A -> Any)]) (A -> (U 0 1))]{
Converts a predicate into an indicator function.
@examples[#:eval typed-eval
(fl (mean (map (indicator (λ ([x : Real]) (< -inf.0 x -1)))
(sample (normal-dist 0 1) 5000))))
(real-dist-prob (normal-dist 0 1) -inf.0 -1)]
}
@defproc[(mc-probability [pred? (A -> Any)]
[xs (Sequenceof A)]
[ws (U #f (Sequenceof Real)) #f])
Nonnegative-Real]{
Estimates the probability of @racket[pred?] from possibly weighted samples.
Equivalent to @racket[(mean (sequence-map (indicator pred?) xs) ws)].
@examples[#:eval typed-eval
(fl (mc-probability (λ ([x : Real]) (< -inf.0 x -1))
(sample (normal-dist 0 1) 5000)))]
}
@defproc[(mc-prob-dist [pred? (A -> Any)]
[xs (Sequenceof A)]
[ws (U #f (Sequenceof Real)) #f])
Beta-Dist]{
Returns a beta distribution estimated from possibly weighted samples whose mean is
@racket[(mc-probability pred? xs ws)].
Computing a confidence interval for a probability whose endpoints are guaranteed to be
between @racket[0] and @racket[1]:
@interaction[#:eval typed-eval
(real-dist-hpd-interval
(mc-prob-dist (λ ([x : Real]) (< -inf.0 x -1))
(sample (normal-dist 0 1) 5000))
0.95)]
}
@(close-eval typed-eval)

View File

@ -0,0 +1,103 @@
#lang typed/racket
(require "../../flonum.rkt"
"order-statistics.rkt"
"statistics-utils.rkt")
(provide hpd-interval
hpd-interval/sorted
real-hpd-interval
real-hpd-interval/sorted)
(: unweighted-hpd-interval (All (A) (-> Symbol (-> A A Real) Real (Listof A) (Values A A))))
(define (unweighted-hpd-interval name metric α xs)
(define n (length xs))
(define m (exact-ceiling (* n α)))
(define as (take xs m))
(define bs (drop xs m))
(cond
[(empty? as) (error name "interval is empty; given ~e ~e" α xs)]
[(empty? bs) (values (first as) (last as))]
[else
(define a* (first as))
(define b* (first bs))
(define d* (abs (metric b* a*)))
(define-values (a b _)
(for/fold: ([a* : A a*] [b* : A b*] [d* : Real d*]) ([a (in-list as)]
[b (in-list bs)])
(define d (abs (metric b a)))
(if (d . < . d*) (values a b d) (values a* b* d*))))
(values a b)]))
(: weighted-hpd-interval (All (A) (-> Symbol (-> A A Real) Real (Listof A) (Listof Nonnegative-Real)
(Values A A))))
(define (weighted-hpd-interval name metric α xs ws)
(let* ([xs (list->vector xs)]
[ws (weights->normalized-weights name ws)]
[ws (flvector-sums (list->flvector ws))])
(define n (vector-length xs))
(: find-new-endpoint (Index Index -> (Option Index)))
;; Returns the next index after i1 for which the sum of weights >= α
(define (find-new-endpoint i0 i1)
(define w0 (flvector-ref ws i0))
(let loop ([i1 : Nonnegative-Fixnum i1])
(cond [(i1 . < . n)
(cond [((- (flvector-ref ws i1) w0) . >= . α) i1]
[else (loop (+ i1 1))])]
[else #f])))
(define i1 (find-new-endpoint 0 0))
(cond
[(not i1) (values (vector-ref xs 0) (vector-ref xs (- n 1)))]
[else
(define a* (vector-ref xs 0))
(define b* (vector-ref xs i1))
(define d* (abs (metric b* a*)))
(let loop ([i0 : Nonnegative-Fixnum 1] [i1 : Index i1] [a* a*] [b* b*] [d* d*])
;(printf "i0 = ~v i1 = ~v~na* = ~v b* = ~v~nd* = ~v~n" i0 i1 a* b* d*)
(cond [(i0 . >= . n) (values a* b*)]
[else
(let ([i1 (find-new-endpoint i0 i1)])
(cond [(not i1) (values a* b*)]
[else
;(printf "α = ~v~n~n" (- (flvector-ref ws i1) (flvector-ref ws i0)))
(define a (vector-ref xs i0))
(define b (vector-ref xs i1))
(define d (abs (metric b a)))
(cond [(d . < . d*) (loop (+ i0 1) i1 a b d)]
[else (loop (+ i0 1) i1 a* b* d*)])]))]))])))
(: hpd-interval/sorted (All (A) (->* [(-> A A Real) Real (Sequenceof A)]
[(Option (Sequenceof Real))]
(Values A A))))
(define (hpd-interval/sorted metric α xs [ws #f])
(cond [(or (α . <= . 0) (α . > . 1))
(raise-argument-error 'hpd-interval/sorted "Real in (0,1]" 1 metric α xs ws)]
[ws (let-values ([(xs ws) (sequences->weighted-samples 'sorted-hpd-interval xs ws)])
(weighted-hpd-interval 'hpd-interval/sorted metric α xs ws))]
[else (unweighted-hpd-interval 'hpd-interval/sorted metric α (sequence->list xs))]))
(: hpd-interval (All (A) (->* [(-> A A Any) (-> A A Real) Real (Sequenceof A)]
[(Option (Sequenceof Real))]
(Values A A))))
(define (hpd-interval lt? metric α xs [ws #f])
(cond [(or (α . <= . 0) (α . > . 1))
(raise-argument-error 'hpd-interval "Real in (0,1]" 2 lt? metric α xs ws)]
[ws (let-values ([(xs ws) (sort-samples lt? xs ws)])
(weighted-hpd-interval 'hpd-interval metric α xs ws))]
[else (let ([xs (sort-samples lt? xs)])
(unweighted-hpd-interval 'hpd-interval metric α xs))]))
(: real-hpd-interval/sorted (->* [Real (Sequenceof Real)]
[(Option (Sequenceof Real))]
(Values Real Real)))
(define (real-hpd-interval/sorted α xs [ws #f])
(hpd-interval/sorted - α xs ws))
(: real-hpd-interval (->* [Real (Sequenceof Real)]
[(Option (Sequenceof Real))]
(Values Real Real)))
(define (real-hpd-interval α xs [ws #f])
(hpd-interval < - α xs ws))

View File

@ -0,0 +1,85 @@
#lang typed/racket
(require "../../base.rkt"
"../distributions/beta-dist.rkt"
"expected-values.rkt"
"statistics-utils.rkt")
(provide mc-variance
mc-variance/mean
mc-stddev
mc-stddev/mean
indicator
mc-probability
mc-prob-dist)
;; ---------------------------------------------------------------------------------------------------
;; Monte Carlo variance and standard deviation
;; Correct MC variance for ratio importance method computation is from notes on a lecture series
;; Petri Koistinen. Monte Carlo Methods, with an emphasis on Bayesian computation. Summer 2010
(: mc-variance* (-> Symbol Real (Sequenceof Real) (Option (Sequenceof Real)) (U Boolean Real)
Nonnegative-Real))
(define (mc-variance* name m xs ws bias)
(define-values (xs^2 n)
(cond [ws (let-values ([(xs ws) (sequences->weighted-samples 'mc-variance xs ws)])
(values (map (λ: ([x : Real] [w : Real]) (sqr (* w (- x m)))) xs ws)
(max 0 (sum ws))))]
[else (let ([xs (sequence->list xs)])
(values (map (λ: ([x : Real]) (sqr (- x m))) xs)
(length xs)))]))
(cond [(zero? n) +nan.0]
[else
(define m2 (max 0 (/ (sum xs^2) (sqr n))))
(adjust-variance m2 n bias)]))
(: mc-variance/mean (->* [Real (Sequenceof Real)]
[(Option (Sequenceof Real)) #:bias (U Boolean Real)]
Nonnegative-Real))
(define (mc-variance/mean m xs [ws #f] #:bias [bias #f])
(mc-variance* 'mc-variance/mean m xs ws bias))
(: mc-variance (->* [(Sequenceof Real)]
[(Option (Sequenceof Real)) #:bias (U Boolean Real)]
Nonnegative-Real))
(define (mc-variance xs [ws #f] #:bias [bias #f])
(mc-variance* 'mc-variance (mean xs ws) xs ws bias))
(: mc-stddev/mean (->* [Real (Sequenceof Real)]
[(Option (Sequenceof Real)) #:bias (U Boolean Real)]
Nonnegative-Real))
(define (mc-stddev/mean m xs [ws #f] #:bias [bias #f])
(sqrt (mc-variance* 'mc-stddev/mean m xs ws bias)))
(: mc-stddev (->* [(Sequenceof Real)]
[(Option (Sequenceof Real)) #:bias (U Boolean Real)]
Nonnegative-Real))
(define (mc-stddev xs [ws #f] #:bias [bias #f])
(sqrt (mc-variance* 'mc-stddev (mean xs ws) xs ws bias)))
;; ---------------------------------------------------------------------------------------------------
;; Monte Carlo probabilities
(: indicator (All (A) ((A -> Any) -> (A -> (U 0 1)))))
(define ((indicator f) x) (if (f x) 1 0))
(: mc-probability (All (A) (->* [(A -> Any) (Sequenceof A)]
[(Option (Sequenceof Real))]
Nonnegative-Real)))
(define (mc-probability p? xs [ws #f])
(let-values ([(xs ws) (cond [ws (sequences->weighted-samples 'mc-prob xs ws)]
[else (values (sequence->list xs) #f)])])
(max 0 (mean (map ((inst indicator A) p?) xs) ws))))
(: mc-prob-dist (All (A) (->* [(A -> Any) (Sequenceof A)]
[(Option (Sequenceof Real))]
Beta-Dist)))
(define (mc-prob-dist p? xs [ws #f])
(let-values ([(xs ws) (cond [ws (sequences->weighted-samples 'mc-prob-dist xs ws)]
[else (values (sequence->list xs) #f)])])
(define n (length xs))
(define α
(cond [ws (* n (mean (map ((inst indicator A) p?) xs) ws))]
[else (count p? xs)]))
(beta-dist α (- n α))))

View File

@ -105,7 +105,7 @@
;; bias = #t Assume sum of weights is the count and correct for bias normally
;; bias = n Assume n actual samples; correct for bias
(: get-bias-adjustment (Nonnegative-Real (U #t Real) Positive-Real -> Positive-Real))
(: get-bias-adjustment (Nonnegative-Real (U #t Real) Nonnegative-Real -> Positive-Real))
(define (get-bias-adjustment c bias mn)
(define n (if (real? bias) bias c))
(if (n . > . mn) n +nan.0))

View File

@ -4,11 +4,15 @@
"private/statistics/expected-values.rkt"
"private/statistics/correlation.rkt"
"private/statistics/order-statistics.rkt"
"private/statistics/counting.rkt")
"private/statistics/counting.rkt"
"private/statistics/monte-carlo.rkt"
"private/statistics/hpd-interval.rkt")
(provide (all-from-out
"private/statistics/statistics-struct.rkt"
"private/statistics/expected-values.rkt"
"private/statistics/correlation.rkt"
"private/statistics/order-statistics.rkt"
"private/statistics/counting.rkt"))
"private/statistics/counting.rkt"
"private/statistics/monte-carlo.rkt"
"private/statistics/hpd-interval.rkt"))