From cd2632c18613fb3f19901b45b641288f6eddb697 Mon Sep 17 00:00:00 2001 From: Neil Toronto Date: Wed, 26 Mar 2014 17:31:17 -0600 Subject: [PATCH] Added HPD interval estimates and Monte Carlo variance --- .../math/scribblings/math-distributions.scrbl | 4 +- .../math/scribblings/math-statistics.scrbl | 147 ++++++++++++++++-- .../math/private/statistics/hpd-interval.rkt | 103 ++++++++++++ .../math/private/statistics/monte-carlo.rkt | 85 ++++++++++ .../private/statistics/statistics-utils.rkt | 2 +- pkgs/math-pkgs/math-lib/math/statistics.rkt | 8 +- 6 files changed, 332 insertions(+), 17 deletions(-) create mode 100644 pkgs/math-pkgs/math-lib/math/private/statistics/hpd-interval.rkt create mode 100644 pkgs/math-pkgs/math-lib/math/private/statistics/monte-carlo.rkt diff --git a/pkgs/math-pkgs/math-doc/math/scribblings/math-distributions.scrbl b/pkgs/math-pkgs/math-doc/math/scribblings/math-distributions.scrbl index bcadafe6b8..5f519c0eca 100644 --- a/pkgs/math-pkgs/math-doc/math/scribblings/math-distributions.scrbl +++ b/pkgs/math-pkgs/math-doc/math/scribblings/math-distributions.scrbl @@ -254,8 +254,8 @@ the two endpoints are swapped first.) The @racket[log?] and @racket[1-p?] argume meaning of the return value in the same way as the corresponding arguments to @racket[cdf]. } -@defproc[(real-dist-hpd-interval [d Real-Dist] [α Real]) (Values Flonum Flonum)]{ -Finds the smallest interval for which @racket[d] assigns probability @racket[α], if one exists. +@defproc[(real-dist-hpd-interval [d Real-Dist] [p Real]) (Values Flonum Flonum)]{ +Finds the smallest interval for which @racket[d] assigns probability @racket[p], if one exists. @examples[#:eval untyped-eval (define d (beta-dist 3 2)) (define-values (x0 x1) (real-dist-hpd-interval d 0.8)) diff --git a/pkgs/math-pkgs/math-doc/math/scribblings/math-statistics.scrbl b/pkgs/math-pkgs/math-doc/math/scribblings/math-statistics.scrbl index 75a70671a9..091befe20d 100644 --- a/pkgs/math-pkgs/math-doc/math/scribblings/math-statistics.scrbl +++ b/pkgs/math-pkgs/math-doc/math/scribblings/math-statistics.scrbl @@ -2,7 +2,9 @@ @(require scribble/eval racket/sandbox - (for-label racket/base racket/promise + (for-label racket/base + racket/promise + racket/sequence (except-in racket/list permutations) ; FIXME math plot (only-in typed/racket/base @@ -96,28 +98,28 @@ If @racket[ws] is not @racket[#f], they compute weighted variations of the same. See @secref{stats:expected-values} for the meaning of the @racket[bias] keyword argument. } -@deftogether[(@defproc[(variance/mean [mean Real] +@deftogether[(@defproc[(variance/mean [m Real] [xs (Sequenceof Real)] [ws (U #f (Sequenceof Real)) #f] [#:bias bias (U #t #f Real) #f]) Nonnegative-Real] - @defproc[(stddev/mean [mean Real] + @defproc[(stddev/mean [m Real] [xs (Sequenceof Real)] [ws (U #f (Sequenceof Real)) #f] [#:bias bias (U #t #f Real) #f]) Nonnegative-Real] - @defproc[(skewness/mean [mean Real] + @defproc[(skewness/mean [m Real] [xs (Sequenceof Real)] [ws (U #f (Sequenceof Real)) #f] [#:bias bias (U #t #f Real) #f]) Real] - @defproc[(kurtosis/mean [mean Real] + @defproc[(kurtosis/mean [m Real] [xs (Sequenceof Real)] [ws (U #f (Sequenceof Real)) #f] [#:bias bias (U #t #f Real) #f]) Nonnegative-Real])]{ Like @racket[variance], @racket[stddev], @racket[skewness] and @racket[kurtosis], but computed -using known mean @racket[mean]. +using known mean @racket[m]. } @section[#:tag "stats:running"]{Running Expected Values} @@ -226,22 +228,22 @@ Removing the correlation using importance weights: See @secref{stats:expected-values} for the meaning of the @racket[bias] keyword argument. } -@deftogether[(@defproc[(covariance/means [μx Real] - [μy Real] +@deftogether[(@defproc[(covariance/means [mx Real] + [my Real] [xs (Sequenceof Real)] [ys (Sequenceof Real)] [ws (U #f (Sequenceof Real)) #f] [#:bias bias (U #t #f Real) #f]) Real] - @defproc[(correlation/means [μx Real] - [μy Real] + @defproc[(correlation/means [mx Real] + [my Real] [xs (Sequenceof Real)] [ys (Sequenceof Real)] [ws (U #f (Sequenceof Real)) #f] [#:bias bias (U #t #f Real) #f]) Real])]{ Like @racket[covariance] and @racket[correlation], but computed using known means -@racket[μx] and @racket[μy]. +@racket[mx] and @racket[my]. } @section{Counting and Binning} @@ -392,5 +394,126 @@ Computes the average absolute difference between the elements in @racket[xs] and Nonnegative-Real]{ Like @racket[(absdev xs ws)], but computed using known median @racket[median]. } - + +@deftogether[(@defproc[(hpd-interval [lt? (A A -> Any)] + [δ (A A -> Real)] + [p Real] + [xs (Sequenceof A)] + [ws (U #f (Sequenceof Real)) #f]) + (Values A A)] + @defproc[(hpd-interval/sorted [δ (A A -> Real)] + [p Real] + [xs (Sequenceof A)] + [ws (U #f (Sequenceof Real)) #f]) + (Values A A)])]{ +Estimates the smallest interval for which the distribution that produced @racket[xs] (optionally +weighted by @racket[ws]) assigns probability @racket[p], which must be positive. +The type @racket[A] represents an ordered metric space with order @racket[lt?] and metric @racket[δ]. + +To compute an HPD interval from sorted samples, use @racket[hpd-interval/sorted]. + +You almost certainly want to use @racket[real-hpd-interval] or @racket[real-hpd-interval/sorted] +instead, which are defined in terms of these. +} + +@deftogether[(@defproc[(real-hpd-interval [p Real] + [xs (Sequenceof Real)] + [ws (U #f (Sequenceof Real)) #f]) + (Values Real Real)] + @defproc[(real-hpd-interval/sorted [p Real] + [xs (Sequenceof Real)] + [ws (U #f (Sequenceof Real)) #f]) + (Values Real Real)])]{ +Equivalent to @racket[(hpd-interval < - p xs ws)] and @racket[(hpd-interval/sorted - p xs ws)]. +@examples[#:eval typed-eval + (define beta32 (beta-dist 3 2)) + (real-dist-hpd-interval beta32 0.8) + (real-hpd-interval 0.8 (sample beta32 10000))] +} + +@section{Simulations} + +The functions in this section support Monte Carlo simulation; for example, quantifying uncertainty +about statistics estimated from samples. + +@deftogether[(@defproc[(mc-variance [xs (Sequenceof Real)] + [ws (U #f (Sequenceof Real)) #f] + [#:bias bias (U #t #f Real) #f]) + Nonnegative-Real] + @defproc[(mc-stddev [xs (Sequenceof Real)] + [ws (U #f (Sequenceof Real)) #f] + [#:bias bias (U #t #f Real) #f]) + Nonnegative-Real])]{ +Estimate the variance and standard deviation of expected values computed from random samples. + +If @racket[xs] are random variable samples, then +@racketblock[(define θ (mean xs ws))] +is also a random variable sample. +These two values: +@racketblock[(mc-variance xs ws #:bias bias) + (mc-stddev xs ws #:bias bias)] +estimate the variance and standard deviation of @racket[θ]. +The latter is simply the square root of the variance, and bias correction is applied as described in +@secref{stats:expected-values}. + + +Two different ways to estimate the standard deviation of a mean computed from 1000 samples are +@interaction[#:eval typed-eval + (mc-stddev (sample (normal-dist 0 1) 1000)) + (stddev (for/list : (Listof Real) ([_ (in-range 100)]) + (mean (sample (normal-dist 0 1) 1000))))] +Using @racket[mc-stddev] is 100 times faster in this case, and in most statistical applications, +replicating a sampling process 100 times is infeasible. +} + +@deftogether[(@defproc[(mc-stddev/mean [m Real] + [xs (Sequenceof Real)] + [ws (U #f (Sequenceof Real)) #f] + [#:bias bias (U #t #f Real) #f]) + Nonnegative-Real] + @defproc[(mc-variance/mean [m Real] + [xs (Sequenceof Real)] + [ws (U #f (Sequenceof Real)) #f] + [#:bias bias (U #t #f Real) #f]) + Nonnegative-Real] + )]{ +Use these in the exceedingly rare cases in which you know the mean @racket[m] but are estimating +uncertainty in an estimate of the mean anyway. +} + +@defproc[(indicator [pred? (A -> Any)]) (A -> (U 0 1))]{ +Converts a predicate into an indicator function. +@examples[#:eval typed-eval + (fl (mean (map (indicator (λ ([x : Real]) (< -inf.0 x -1))) + (sample (normal-dist 0 1) 5000)))) + (real-dist-prob (normal-dist 0 1) -inf.0 -1)] +} + +@defproc[(mc-probability [pred? (A -> Any)] + [xs (Sequenceof A)] + [ws (U #f (Sequenceof Real)) #f]) + Nonnegative-Real]{ +Estimates the probability of @racket[pred?] from possibly weighted samples. +Equivalent to @racket[(mean (sequence-map (indicator pred?) xs) ws)]. +@examples[#:eval typed-eval + (fl (mc-probability (λ ([x : Real]) (< -inf.0 x -1)) + (sample (normal-dist 0 1) 5000)))] +} + +@defproc[(mc-prob-dist [pred? (A -> Any)] + [xs (Sequenceof A)] + [ws (U #f (Sequenceof Real)) #f]) + Beta-Dist]{ +Returns a beta distribution estimated from possibly weighted samples whose mean is +@racket[(mc-probability pred? xs ws)]. + +Computing a confidence interval for a probability whose endpoints are guaranteed to be +between @racket[0] and @racket[1]: +@interaction[#:eval typed-eval + (real-dist-hpd-interval + (mc-prob-dist (λ ([x : Real]) (< -inf.0 x -1)) + (sample (normal-dist 0 1) 5000)) + 0.95)] +} + @(close-eval typed-eval) diff --git a/pkgs/math-pkgs/math-lib/math/private/statistics/hpd-interval.rkt b/pkgs/math-pkgs/math-lib/math/private/statistics/hpd-interval.rkt new file mode 100644 index 0000000000..0c574d511d --- /dev/null +++ b/pkgs/math-pkgs/math-lib/math/private/statistics/hpd-interval.rkt @@ -0,0 +1,103 @@ +#lang typed/racket + +(require "../../flonum.rkt" + "order-statistics.rkt" + "statistics-utils.rkt") + +(provide hpd-interval + hpd-interval/sorted + real-hpd-interval + real-hpd-interval/sorted) + +(: unweighted-hpd-interval (All (A) (-> Symbol (-> A A Real) Real (Listof A) (Values A A)))) +(define (unweighted-hpd-interval name metric α xs) + (define n (length xs)) + (define m (exact-ceiling (* n α))) + (define as (take xs m)) + (define bs (drop xs m)) + (cond + [(empty? as) (error name "interval is empty; given ~e ~e" α xs)] + [(empty? bs) (values (first as) (last as))] + [else + (define a* (first as)) + (define b* (first bs)) + (define d* (abs (metric b* a*))) + (define-values (a b _) + (for/fold: ([a* : A a*] [b* : A b*] [d* : Real d*]) ([a (in-list as)] + [b (in-list bs)]) + (define d (abs (metric b a))) + (if (d . < . d*) (values a b d) (values a* b* d*)))) + (values a b)])) + +(: weighted-hpd-interval (All (A) (-> Symbol (-> A A Real) Real (Listof A) (Listof Nonnegative-Real) + (Values A A)))) +(define (weighted-hpd-interval name metric α xs ws) + (let* ([xs (list->vector xs)] + [ws (weights->normalized-weights name ws)] + [ws (flvector-sums (list->flvector ws))]) + (define n (vector-length xs)) + + (: find-new-endpoint (Index Index -> (Option Index))) + ;; Returns the next index after i1 for which the sum of weights >= α + (define (find-new-endpoint i0 i1) + (define w0 (flvector-ref ws i0)) + (let loop ([i1 : Nonnegative-Fixnum i1]) + (cond [(i1 . < . n) + (cond [((- (flvector-ref ws i1) w0) . >= . α) i1] + [else (loop (+ i1 1))])] + [else #f]))) + + (define i1 (find-new-endpoint 0 0)) + (cond + [(not i1) (values (vector-ref xs 0) (vector-ref xs (- n 1)))] + [else + (define a* (vector-ref xs 0)) + (define b* (vector-ref xs i1)) + (define d* (abs (metric b* a*))) + (let loop ([i0 : Nonnegative-Fixnum 1] [i1 : Index i1] [a* a*] [b* b*] [d* d*]) + ;(printf "i0 = ~v i1 = ~v~na* = ~v b* = ~v~nd* = ~v~n" i0 i1 a* b* d*) + (cond [(i0 . >= . n) (values a* b*)] + [else + (let ([i1 (find-new-endpoint i0 i1)]) + (cond [(not i1) (values a* b*)] + [else + ;(printf "α = ~v~n~n" (- (flvector-ref ws i1) (flvector-ref ws i0))) + (define a (vector-ref xs i0)) + (define b (vector-ref xs i1)) + (define d (abs (metric b a))) + (cond [(d . < . d*) (loop (+ i0 1) i1 a b d)] + [else (loop (+ i0 1) i1 a* b* d*)])]))]))]))) + + +(: hpd-interval/sorted (All (A) (->* [(-> A A Real) Real (Sequenceof A)] + [(Option (Sequenceof Real))] + (Values A A)))) +(define (hpd-interval/sorted metric α xs [ws #f]) + (cond [(or (α . <= . 0) (α . > . 1)) + (raise-argument-error 'hpd-interval/sorted "Real in (0,1]" 1 metric α xs ws)] + [ws (let-values ([(xs ws) (sequences->weighted-samples 'sorted-hpd-interval xs ws)]) + (weighted-hpd-interval 'hpd-interval/sorted metric α xs ws))] + [else (unweighted-hpd-interval 'hpd-interval/sorted metric α (sequence->list xs))])) + +(: hpd-interval (All (A) (->* [(-> A A Any) (-> A A Real) Real (Sequenceof A)] + [(Option (Sequenceof Real))] + (Values A A)))) +(define (hpd-interval lt? metric α xs [ws #f]) + (cond [(or (α . <= . 0) (α . > . 1)) + (raise-argument-error 'hpd-interval "Real in (0,1]" 2 lt? metric α xs ws)] + [ws (let-values ([(xs ws) (sort-samples lt? xs ws)]) + (weighted-hpd-interval 'hpd-interval metric α xs ws))] + [else (let ([xs (sort-samples lt? xs)]) + (unweighted-hpd-interval 'hpd-interval metric α xs))])) + +(: real-hpd-interval/sorted (->* [Real (Sequenceof Real)] + [(Option (Sequenceof Real))] + (Values Real Real))) +(define (real-hpd-interval/sorted α xs [ws #f]) + (hpd-interval/sorted - α xs ws)) + +(: real-hpd-interval (->* [Real (Sequenceof Real)] + [(Option (Sequenceof Real))] + (Values Real Real))) +(define (real-hpd-interval α xs [ws #f]) + (hpd-interval < - α xs ws)) diff --git a/pkgs/math-pkgs/math-lib/math/private/statistics/monte-carlo.rkt b/pkgs/math-pkgs/math-lib/math/private/statistics/monte-carlo.rkt new file mode 100644 index 0000000000..7c9fcf421b --- /dev/null +++ b/pkgs/math-pkgs/math-lib/math/private/statistics/monte-carlo.rkt @@ -0,0 +1,85 @@ +#lang typed/racket + +(require "../../base.rkt" + "../distributions/beta-dist.rkt" + "expected-values.rkt" + "statistics-utils.rkt") + +(provide mc-variance + mc-variance/mean + mc-stddev + mc-stddev/mean + indicator + mc-probability + mc-prob-dist) + +;; --------------------------------------------------------------------------------------------------- +;; Monte Carlo variance and standard deviation + +;; Correct MC variance for ratio importance method computation is from notes on a lecture series +;; Petri Koistinen. Monte Carlo Methods, with an emphasis on Bayesian computation. Summer 2010 + +(: mc-variance* (-> Symbol Real (Sequenceof Real) (Option (Sequenceof Real)) (U Boolean Real) + Nonnegative-Real)) +(define (mc-variance* name m xs ws bias) + (define-values (xs^2 n) + (cond [ws (let-values ([(xs ws) (sequences->weighted-samples 'mc-variance xs ws)]) + (values (map (λ: ([x : Real] [w : Real]) (sqr (* w (- x m)))) xs ws) + (max 0 (sum ws))))] + [else (let ([xs (sequence->list xs)]) + (values (map (λ: ([x : Real]) (sqr (- x m))) xs) + (length xs)))])) + (cond [(zero? n) +nan.0] + [else + (define m2 (max 0 (/ (sum xs^2) (sqr n)))) + (adjust-variance m2 n bias)])) + +(: mc-variance/mean (->* [Real (Sequenceof Real)] + [(Option (Sequenceof Real)) #:bias (U Boolean Real)] + Nonnegative-Real)) +(define (mc-variance/mean m xs [ws #f] #:bias [bias #f]) + (mc-variance* 'mc-variance/mean m xs ws bias)) + +(: mc-variance (->* [(Sequenceof Real)] + [(Option (Sequenceof Real)) #:bias (U Boolean Real)] + Nonnegative-Real)) +(define (mc-variance xs [ws #f] #:bias [bias #f]) + (mc-variance* 'mc-variance (mean xs ws) xs ws bias)) + +(: mc-stddev/mean (->* [Real (Sequenceof Real)] + [(Option (Sequenceof Real)) #:bias (U Boolean Real)] + Nonnegative-Real)) +(define (mc-stddev/mean m xs [ws #f] #:bias [bias #f]) + (sqrt (mc-variance* 'mc-stddev/mean m xs ws bias))) + +(: mc-stddev (->* [(Sequenceof Real)] + [(Option (Sequenceof Real)) #:bias (U Boolean Real)] + Nonnegative-Real)) +(define (mc-stddev xs [ws #f] #:bias [bias #f]) + (sqrt (mc-variance* 'mc-stddev (mean xs ws) xs ws bias))) + +;; --------------------------------------------------------------------------------------------------- +;; Monte Carlo probabilities + +(: indicator (All (A) ((A -> Any) -> (A -> (U 0 1))))) +(define ((indicator f) x) (if (f x) 1 0)) + +(: mc-probability (All (A) (->* [(A -> Any) (Sequenceof A)] + [(Option (Sequenceof Real))] + Nonnegative-Real))) +(define (mc-probability p? xs [ws #f]) + (let-values ([(xs ws) (cond [ws (sequences->weighted-samples 'mc-prob xs ws)] + [else (values (sequence->list xs) #f)])]) + (max 0 (mean (map ((inst indicator A) p?) xs) ws)))) + +(: mc-prob-dist (All (A) (->* [(A -> Any) (Sequenceof A)] + [(Option (Sequenceof Real))] + Beta-Dist))) +(define (mc-prob-dist p? xs [ws #f]) + (let-values ([(xs ws) (cond [ws (sequences->weighted-samples 'mc-prob-dist xs ws)] + [else (values (sequence->list xs) #f)])]) + (define n (length xs)) + (define α + (cond [ws (* n (mean (map ((inst indicator A) p?) xs) ws))] + [else (count p? xs)])) + (beta-dist α (- n α)))) diff --git a/pkgs/math-pkgs/math-lib/math/private/statistics/statistics-utils.rkt b/pkgs/math-pkgs/math-lib/math/private/statistics/statistics-utils.rkt index 52c6f1d9bd..1feecb7054 100644 --- a/pkgs/math-pkgs/math-lib/math/private/statistics/statistics-utils.rkt +++ b/pkgs/math-pkgs/math-lib/math/private/statistics/statistics-utils.rkt @@ -105,7 +105,7 @@ ;; bias = #t Assume sum of weights is the count and correct for bias normally ;; bias = n Assume n actual samples; correct for bias -(: get-bias-adjustment (Nonnegative-Real (U #t Real) Positive-Real -> Positive-Real)) +(: get-bias-adjustment (Nonnegative-Real (U #t Real) Nonnegative-Real -> Positive-Real)) (define (get-bias-adjustment c bias mn) (define n (if (real? bias) bias c)) (if (n . > . mn) n +nan.0)) diff --git a/pkgs/math-pkgs/math-lib/math/statistics.rkt b/pkgs/math-pkgs/math-lib/math/statistics.rkt index 363fd3aa23..741d1a7651 100644 --- a/pkgs/math-pkgs/math-lib/math/statistics.rkt +++ b/pkgs/math-pkgs/math-lib/math/statistics.rkt @@ -4,11 +4,15 @@ "private/statistics/expected-values.rkt" "private/statistics/correlation.rkt" "private/statistics/order-statistics.rkt" - "private/statistics/counting.rkt") + "private/statistics/counting.rkt" + "private/statistics/monte-carlo.rkt" + "private/statistics/hpd-interval.rkt") (provide (all-from-out "private/statistics/statistics-struct.rkt" "private/statistics/expected-values.rkt" "private/statistics/correlation.rkt" "private/statistics/order-statistics.rkt" - "private/statistics/counting.rkt")) + "private/statistics/counting.rkt" + "private/statistics/monte-carlo.rkt" + "private/statistics/hpd-interval.rkt"))