From cd2632c18613fb3f19901b45b641288f6eddb697 Mon Sep 17 00:00:00 2001
From: Neil Toronto <neil.toronto@gmail.com>
Date: Wed, 26 Mar 2014 17:31:17 -0600
Subject: [PATCH] Added HPD interval estimates and Monte Carlo variance

---
 .../math/scribblings/math-distributions.scrbl |   4 +-
 .../math/scribblings/math-statistics.scrbl    | 147 ++++++++++++++++--
 .../math/private/statistics/hpd-interval.rkt  | 103 ++++++++++++
 .../math/private/statistics/monte-carlo.rkt   |  85 ++++++++++
 .../private/statistics/statistics-utils.rkt   |   2 +-
 pkgs/math-pkgs/math-lib/math/statistics.rkt   |   8 +-
 6 files changed, 332 insertions(+), 17 deletions(-)
 create mode 100644 pkgs/math-pkgs/math-lib/math/private/statistics/hpd-interval.rkt
 create mode 100644 pkgs/math-pkgs/math-lib/math/private/statistics/monte-carlo.rkt

diff --git a/pkgs/math-pkgs/math-doc/math/scribblings/math-distributions.scrbl b/pkgs/math-pkgs/math-doc/math/scribblings/math-distributions.scrbl
index bcadafe6b8..5f519c0eca 100644
--- a/pkgs/math-pkgs/math-doc/math/scribblings/math-distributions.scrbl
+++ b/pkgs/math-pkgs/math-doc/math/scribblings/math-distributions.scrbl
@@ -254,8 +254,8 @@ the two endpoints are swapped first.) The @racket[log?] and @racket[1-p?] argume
 meaning of the return value in the same way as the corresponding arguments to @racket[cdf].
 }
 
-@defproc[(real-dist-hpd-interval [d Real-Dist] [α Real]) (Values Flonum Flonum)]{
-Finds the smallest interval for which @racket[d] assigns probability @racket[α], if one exists.
+@defproc[(real-dist-hpd-interval [d Real-Dist] [p Real]) (Values Flonum Flonum)]{
+Finds the smallest interval for which @racket[d] assigns probability @racket[p], if one exists.
 @examples[#:eval untyped-eval
                  (define d (beta-dist 3 2))
                  (define-values (x0 x1) (real-dist-hpd-interval d 0.8))
diff --git a/pkgs/math-pkgs/math-doc/math/scribblings/math-statistics.scrbl b/pkgs/math-pkgs/math-doc/math/scribblings/math-statistics.scrbl
index 75a70671a9..091befe20d 100644
--- a/pkgs/math-pkgs/math-doc/math/scribblings/math-statistics.scrbl
+++ b/pkgs/math-pkgs/math-doc/math/scribblings/math-statistics.scrbl
@@ -2,7 +2,9 @@
 
 @(require scribble/eval
           racket/sandbox
-          (for-label racket/base racket/promise
+          (for-label racket/base
+                     racket/promise
+                     racket/sequence
                      (except-in racket/list permutations) ; FIXME
                      math plot
                      (only-in typed/racket/base
@@ -96,28 +98,28 @@ If @racket[ws] is not @racket[#f], they compute weighted variations of the same.
 See @secref{stats:expected-values} for the meaning of the @racket[bias] keyword argument.
 }
 
-@deftogether[(@defproc[(variance/mean [mean Real]
+@deftogether[(@defproc[(variance/mean [m Real]
                                       [xs (Sequenceof Real)]
                                       [ws (U #f (Sequenceof Real)) #f]
                                       [#:bias bias (U #t #f Real) #f])
                        Nonnegative-Real]
-              @defproc[(stddev/mean [mean Real]
+              @defproc[(stddev/mean [m Real]
                                     [xs (Sequenceof Real)]
                                     [ws (U #f (Sequenceof Real)) #f]
                                     [#:bias bias (U #t #f Real) #f])
                        Nonnegative-Real]
-              @defproc[(skewness/mean [mean Real]
+              @defproc[(skewness/mean [m Real]
                                       [xs (Sequenceof Real)]
                                       [ws (U #f (Sequenceof Real)) #f]
                                       [#:bias bias (U #t #f Real) #f])
                        Real]
-              @defproc[(kurtosis/mean [mean Real]
+              @defproc[(kurtosis/mean [m Real]
                                       [xs (Sequenceof Real)]
                                       [ws (U #f (Sequenceof Real)) #f]
                                       [#:bias bias (U #t #f Real) #f])
                        Nonnegative-Real])]{
 Like @racket[variance], @racket[stddev], @racket[skewness] and @racket[kurtosis], but computed
-using known mean @racket[mean].
+using known mean @racket[m].
 }
 
 @section[#:tag "stats:running"]{Running Expected Values}
@@ -226,22 +228,22 @@ Removing the correlation using importance weights:
 See @secref{stats:expected-values} for the meaning of the @racket[bias] keyword argument.
 }
 
-@deftogether[(@defproc[(covariance/means [μx Real]
-                                         [μy Real]
+@deftogether[(@defproc[(covariance/means [mx Real]
+                                         [my Real]
                                          [xs (Sequenceof Real)]
                                          [ys (Sequenceof Real)]
                                          [ws (U #f (Sequenceof Real)) #f]
                                          [#:bias bias (U #t #f Real) #f])
                        Real]
-              @defproc[(correlation/means [μx Real]
-                                          [μy Real]
+              @defproc[(correlation/means [mx Real]
+                                          [my Real]
                                           [xs (Sequenceof Real)]
                                           [ys (Sequenceof Real)]
                                           [ws (U #f (Sequenceof Real)) #f]
                                           [#:bias bias (U #t #f Real) #f])
                        Real])]{
 Like @racket[covariance] and @racket[correlation], but computed using known means
-@racket[μx] and @racket[μy].
+@racket[mx] and @racket[my].
 }
 
 @section{Counting and Binning}
@@ -392,5 +394,126 @@ Computes the average absolute difference between the elements in @racket[xs] and
          Nonnegative-Real]{
 Like @racket[(absdev xs ws)], but computed using known median @racket[median].
 }
-         
+
+@deftogether[(@defproc[(hpd-interval [lt? (A A -> Any)]
+                                     [δ (A A -> Real)]
+                                     [p Real]
+                                     [xs (Sequenceof A)]
+                                     [ws (U #f (Sequenceof Real)) #f])
+                       (Values A A)]
+              @defproc[(hpd-interval/sorted [δ (A A -> Real)]
+                                            [p Real]
+                                            [xs (Sequenceof A)]
+                                            [ws (U #f (Sequenceof Real)) #f])
+                       (Values A A)])]{
+Estimates the smallest interval for which the distribution that produced @racket[xs] (optionally
+weighted by @racket[ws]) assigns probability @racket[p], which must be positive.
+The type @racket[A] represents an ordered metric space with order @racket[lt?] and metric @racket[δ].
+
+To compute an HPD interval from sorted samples, use @racket[hpd-interval/sorted].
+
+You almost certainly want to use @racket[real-hpd-interval] or @racket[real-hpd-interval/sorted]
+instead, which are defined in terms of these.
+}
+
+@deftogether[(@defproc[(real-hpd-interval [p Real]
+                                          [xs (Sequenceof Real)]
+                                          [ws (U #f (Sequenceof Real)) #f])
+                       (Values Real Real)]
+              @defproc[(real-hpd-interval/sorted [p Real]
+                                                 [xs (Sequenceof Real)]
+                                                 [ws (U #f (Sequenceof Real)) #f])
+                       (Values Real Real)])]{
+Equivalent to @racket[(hpd-interval < - p xs ws)] and @racket[(hpd-interval/sorted - p xs ws)].
+@examples[#:eval typed-eval
+                 (define beta32 (beta-dist 3 2))
+                 (real-dist-hpd-interval beta32 0.8)
+                 (real-hpd-interval 0.8 (sample beta32 10000))]
+}
+
+@section{Simulations}
+
+The functions in this section support Monte Carlo simulation; for example, quantifying uncertainty
+about statistics estimated from samples.
+
+@deftogether[(@defproc[(mc-variance [xs (Sequenceof Real)]
+                                    [ws (U #f (Sequenceof Real)) #f]
+                                    [#:bias bias (U #t #f Real) #f])
+                       Nonnegative-Real]
+              @defproc[(mc-stddev [xs (Sequenceof Real)]
+                                  [ws (U #f (Sequenceof Real)) #f]
+                                  [#:bias bias (U #t #f Real) #f])
+                       Nonnegative-Real])]{
+Estimate the variance and standard deviation of expected values computed from random samples.
+
+If @racket[xs] are random variable samples, then
+@racketblock[(define θ (mean xs ws))]
+is also a random variable sample.
+These two values:
+@racketblock[(mc-variance xs ws #:bias bias)
+             (mc-stddev xs ws #:bias bias)]
+estimate the variance and standard deviation of @racket[θ].
+The latter is simply the square root of the variance, and bias correction is applied as described in
+@secref{stats:expected-values}.
+
+
+Two different ways to estimate the standard deviation of a mean computed from 1000 samples are
+@interaction[#:eval typed-eval
+                    (mc-stddev (sample (normal-dist 0 1) 1000))
+                    (stddev (for/list : (Listof Real) ([_  (in-range 100)])
+                              (mean (sample (normal-dist 0 1) 1000))))]
+Using @racket[mc-stddev] is 100 times faster in this case, and in most statistical applications,
+replicating a sampling process 100 times is infeasible.
+}
+
+@deftogether[(@defproc[(mc-stddev/mean [m Real]
+                                       [xs (Sequenceof Real)]
+                                       [ws (U #f (Sequenceof Real)) #f]
+                                       [#:bias bias (U #t #f Real) #f])
+                       Nonnegative-Real]
+              @defproc[(mc-variance/mean [m Real]
+                                         [xs (Sequenceof Real)]
+                                         [ws (U #f (Sequenceof Real)) #f]
+                                         [#:bias bias (U #t #f Real) #f])
+                       Nonnegative-Real]
+              )]{
+Use these in the exceedingly rare cases in which you know the mean @racket[m] but are estimating
+uncertainty in an estimate of the mean anyway.
+}
+
+@defproc[(indicator [pred? (A -> Any)]) (A -> (U 0 1))]{
+Converts a predicate into an indicator function.
+@examples[#:eval typed-eval                 
+                 (fl (mean (map (indicator (λ ([x : Real]) (< -inf.0 x -1)))
+                                (sample (normal-dist 0 1) 5000))))
+                 (real-dist-prob (normal-dist 0 1) -inf.0 -1)]
+}
+
+@defproc[(mc-probability [pred? (A -> Any)]
+                         [xs (Sequenceof A)]
+                         [ws (U #f (Sequenceof Real)) #f])
+         Nonnegative-Real]{
+Estimates the probability of @racket[pred?] from possibly weighted samples.
+Equivalent to @racket[(mean (sequence-map (indicator pred?) xs) ws)].
+@examples[#:eval typed-eval
+                 (fl (mc-probability (λ ([x : Real]) (< -inf.0 x -1))
+                                     (sample (normal-dist 0 1) 5000)))]
+}
+
+@defproc[(mc-prob-dist [pred? (A -> Any)]
+                       [xs (Sequenceof A)]
+                       [ws (U #f (Sequenceof Real)) #f])
+         Beta-Dist]{
+Returns a beta distribution estimated from possibly weighted samples whose mean is
+@racket[(mc-probability pred? xs ws)].
+
+Computing a confidence interval for a probability whose endpoints are guaranteed to be
+between @racket[0] and @racket[1]:
+@interaction[#:eval typed-eval
+                    (real-dist-hpd-interval
+                     (mc-prob-dist (λ ([x : Real]) (< -inf.0 x -1))
+                                   (sample (normal-dist 0 1) 5000))
+                     0.95)]
+}
+
 @(close-eval typed-eval)
diff --git a/pkgs/math-pkgs/math-lib/math/private/statistics/hpd-interval.rkt b/pkgs/math-pkgs/math-lib/math/private/statistics/hpd-interval.rkt
new file mode 100644
index 0000000000..0c574d511d
--- /dev/null
+++ b/pkgs/math-pkgs/math-lib/math/private/statistics/hpd-interval.rkt
@@ -0,0 +1,103 @@
+#lang typed/racket
+
+(require "../../flonum.rkt"
+         "order-statistics.rkt"
+         "statistics-utils.rkt")
+
+(provide hpd-interval
+         hpd-interval/sorted
+         real-hpd-interval
+         real-hpd-interval/sorted)
+
+(: unweighted-hpd-interval (All (A) (-> Symbol (-> A A Real) Real (Listof A) (Values A A))))
+(define (unweighted-hpd-interval name metric α xs)
+  (define n (length xs))
+  (define m (exact-ceiling (* n α)))
+  (define as (take xs m))
+  (define bs (drop xs m))
+  (cond
+    [(empty? as)  (error name "interval is empty; given ~e ~e" α xs)]
+    [(empty? bs)  (values (first as) (last as))]
+    [else
+     (define a* (first as))
+     (define b* (first bs))
+     (define d* (abs (metric b* a*)))
+     (define-values (a b _)
+       (for/fold: ([a* : A  a*] [b* : A  b*] [d* : Real  d*]) ([a  (in-list as)]
+                                                               [b  (in-list bs)])
+         (define d (abs (metric b a)))
+         (if (d . < . d*) (values a b d) (values a* b* d*))))
+     (values a b)]))
+
+(: weighted-hpd-interval (All (A) (-> Symbol (-> A A Real) Real (Listof A) (Listof Nonnegative-Real)
+                                      (Values A A))))
+(define (weighted-hpd-interval name metric α xs ws)
+  (let* ([xs  (list->vector xs)]
+         [ws  (weights->normalized-weights name ws)]
+         [ws  (flvector-sums (list->flvector ws))])
+    (define n (vector-length xs))
+    
+    (: find-new-endpoint (Index Index -> (Option Index)))
+    ;; Returns the next index after i1 for which the sum of weights >= α
+    (define (find-new-endpoint i0 i1)
+      (define w0 (flvector-ref ws i0))
+      (let loop ([i1 : Nonnegative-Fixnum  i1])
+        (cond [(i1 . < . n)
+               (cond [((- (flvector-ref ws i1) w0) . >= . α)  i1]
+                     [else  (loop (+ i1 1))])]
+              [else  #f])))
+    
+    (define i1 (find-new-endpoint 0 0))
+    (cond
+      [(not i1)  (values (vector-ref xs 0) (vector-ref xs (- n 1)))]
+      [else
+       (define a* (vector-ref xs 0))
+       (define b* (vector-ref xs i1))
+       (define d* (abs (metric b* a*)))
+       (let loop ([i0 : Nonnegative-Fixnum  1] [i1 : Index  i1] [a* a*] [b* b*] [d* d*])
+         ;(printf "i0 = ~v  i1 = ~v~na* = ~v  b* = ~v~nd* = ~v~n" i0 i1 a* b* d*)
+         (cond [(i0 . >= . n)  (values a* b*)]
+               [else
+                (let ([i1  (find-new-endpoint i0 i1)])
+                  (cond [(not i1)  (values a* b*)]
+                        [else
+                         ;(printf "α = ~v~n~n" (- (flvector-ref ws i1) (flvector-ref ws i0)))
+                         (define a (vector-ref xs i0))
+                         (define b (vector-ref xs i1))
+                         (define d (abs (metric b a)))
+                         (cond [(d . < . d*)  (loop (+ i0 1) i1 a b d)]
+                               [else          (loop (+ i0 1) i1 a* b* d*)])]))]))])))
+
+
+(: hpd-interval/sorted (All (A) (->* [(-> A A Real) Real (Sequenceof A)]
+                                     [(Option (Sequenceof Real))]
+                                     (Values A A))))
+(define (hpd-interval/sorted metric α xs [ws #f])
+  (cond [(or (α . <= . 0) (α . > . 1))
+         (raise-argument-error 'hpd-interval/sorted "Real in (0,1]" 1 metric α xs ws)]
+        [ws    (let-values ([(xs ws)  (sequences->weighted-samples 'sorted-hpd-interval xs ws)])
+                 (weighted-hpd-interval 'hpd-interval/sorted metric α xs ws))]
+        [else  (unweighted-hpd-interval 'hpd-interval/sorted metric α (sequence->list xs))]))
+
+(: hpd-interval (All (A) (->* [(-> A A Any) (-> A A Real) Real (Sequenceof A)]
+                              [(Option (Sequenceof Real))]
+                              (Values A A))))
+(define (hpd-interval lt? metric α xs [ws #f])
+  (cond [(or (α . <= . 0) (α . > . 1))
+         (raise-argument-error 'hpd-interval "Real in (0,1]" 2 lt? metric α xs ws)]
+        [ws    (let-values ([(xs ws)  (sort-samples lt? xs ws)])
+                 (weighted-hpd-interval 'hpd-interval metric α xs ws))]
+        [else  (let ([xs  (sort-samples lt? xs)])
+                 (unweighted-hpd-interval 'hpd-interval metric α xs))]))
+
+(: real-hpd-interval/sorted (->* [Real (Sequenceof Real)]
+                                 [(Option (Sequenceof Real))]
+                                 (Values Real Real)))
+(define (real-hpd-interval/sorted α xs [ws #f])
+  (hpd-interval/sorted - α xs ws))
+
+(: real-hpd-interval (->* [Real (Sequenceof Real)]
+                          [(Option (Sequenceof Real))]
+                          (Values Real Real)))
+(define (real-hpd-interval α xs [ws #f])
+  (hpd-interval < - α xs ws))
diff --git a/pkgs/math-pkgs/math-lib/math/private/statistics/monte-carlo.rkt b/pkgs/math-pkgs/math-lib/math/private/statistics/monte-carlo.rkt
new file mode 100644
index 0000000000..7c9fcf421b
--- /dev/null
+++ b/pkgs/math-pkgs/math-lib/math/private/statistics/monte-carlo.rkt
@@ -0,0 +1,85 @@
+#lang typed/racket
+
+(require "../../base.rkt"
+         "../distributions/beta-dist.rkt"
+         "expected-values.rkt"
+         "statistics-utils.rkt")
+
+(provide mc-variance
+         mc-variance/mean
+         mc-stddev
+         mc-stddev/mean
+         indicator
+         mc-probability
+         mc-prob-dist)
+
+;; ---------------------------------------------------------------------------------------------------
+;; Monte Carlo variance and standard deviation
+
+;; Correct MC variance for ratio importance method computation is from notes on a lecture series
+;; Petri Koistinen. Monte Carlo Methods, with an emphasis on Bayesian computation. Summer 2010
+
+(: mc-variance* (-> Symbol Real (Sequenceof Real) (Option (Sequenceof Real)) (U Boolean Real)
+                    Nonnegative-Real))
+(define (mc-variance* name m xs ws bias)
+  (define-values (xs^2 n)
+    (cond [ws  (let-values ([(xs ws)  (sequences->weighted-samples 'mc-variance xs ws)])
+                 (values (map (λ: ([x : Real] [w : Real]) (sqr (* w (- x m)))) xs ws)
+                         (max 0 (sum ws))))]
+          [else  (let ([xs  (sequence->list xs)])
+                   (values (map (λ: ([x : Real]) (sqr (- x m))) xs)
+                           (length xs)))]))
+  (cond [(zero? n)  +nan.0]
+        [else
+         (define m2 (max 0 (/ (sum xs^2) (sqr n))))
+         (adjust-variance m2 n bias)]))
+
+(: mc-variance/mean (->* [Real (Sequenceof Real)]
+                         [(Option (Sequenceof Real)) #:bias (U Boolean Real)]
+                         Nonnegative-Real))
+(define (mc-variance/mean m xs [ws #f] #:bias [bias #f])
+  (mc-variance* 'mc-variance/mean m xs ws bias))
+
+(: mc-variance (->* [(Sequenceof Real)]
+                    [(Option (Sequenceof Real)) #:bias (U Boolean Real)]
+                    Nonnegative-Real))
+(define (mc-variance xs [ws #f] #:bias [bias #f])
+  (mc-variance* 'mc-variance (mean xs ws) xs ws bias))
+
+(: mc-stddev/mean (->* [Real (Sequenceof Real)]
+                       [(Option (Sequenceof Real)) #:bias (U Boolean Real)]
+                       Nonnegative-Real))
+(define (mc-stddev/mean m xs [ws #f] #:bias [bias #f])
+  (sqrt (mc-variance* 'mc-stddev/mean m xs ws bias)))
+
+(: mc-stddev (->* [(Sequenceof Real)]
+                  [(Option (Sequenceof Real)) #:bias (U Boolean Real)]
+                  Nonnegative-Real))
+(define (mc-stddev xs [ws #f] #:bias [bias #f])
+  (sqrt (mc-variance* 'mc-stddev (mean xs ws) xs ws bias)))
+
+;; ---------------------------------------------------------------------------------------------------
+;; Monte Carlo probabilities
+
+(: indicator (All (A) ((A -> Any) -> (A -> (U 0 1)))))
+(define ((indicator f) x) (if (f x) 1 0))
+
+(: mc-probability (All (A) (->* [(A -> Any) (Sequenceof A)]
+                                [(Option (Sequenceof Real))]
+                                Nonnegative-Real)))
+(define (mc-probability p? xs [ws #f])
+  (let-values ([(xs ws)  (cond [ws    (sequences->weighted-samples 'mc-prob xs ws)]
+                               [else  (values (sequence->list xs) #f)])])
+    (max 0 (mean (map ((inst indicator A) p?) xs) ws))))
+
+(: mc-prob-dist (All (A) (->* [(A -> Any) (Sequenceof A)]
+                              [(Option (Sequenceof Real))]
+                              Beta-Dist)))
+(define (mc-prob-dist p? xs [ws #f])
+  (let-values ([(xs ws)  (cond [ws    (sequences->weighted-samples 'mc-prob-dist xs ws)]
+                               [else  (values (sequence->list xs) #f)])])
+    (define n (length xs))
+    (define α
+      (cond [ws    (* n (mean (map ((inst indicator A) p?) xs) ws))]
+            [else  (count p? xs)]))
+    (beta-dist α (- n α))))
diff --git a/pkgs/math-pkgs/math-lib/math/private/statistics/statistics-utils.rkt b/pkgs/math-pkgs/math-lib/math/private/statistics/statistics-utils.rkt
index 52c6f1d9bd..1feecb7054 100644
--- a/pkgs/math-pkgs/math-lib/math/private/statistics/statistics-utils.rkt
+++ b/pkgs/math-pkgs/math-lib/math/private/statistics/statistics-utils.rkt
@@ -105,7 +105,7 @@
 ;; bias = #t   Assume sum of weights is the count and correct for bias normally
 ;; bias = n    Assume n actual samples; correct for bias
 
-(: get-bias-adjustment (Nonnegative-Real (U #t Real) Positive-Real -> Positive-Real))
+(: get-bias-adjustment (Nonnegative-Real (U #t Real) Nonnegative-Real -> Positive-Real))
 (define (get-bias-adjustment c bias mn)
   (define n (if (real? bias) bias c))
   (if (n . > . mn) n +nan.0))
diff --git a/pkgs/math-pkgs/math-lib/math/statistics.rkt b/pkgs/math-pkgs/math-lib/math/statistics.rkt
index 363fd3aa23..741d1a7651 100644
--- a/pkgs/math-pkgs/math-lib/math/statistics.rkt
+++ b/pkgs/math-pkgs/math-lib/math/statistics.rkt
@@ -4,11 +4,15 @@
          "private/statistics/expected-values.rkt"
          "private/statistics/correlation.rkt"
          "private/statistics/order-statistics.rkt"
-         "private/statistics/counting.rkt")
+         "private/statistics/counting.rkt"
+         "private/statistics/monte-carlo.rkt"
+         "private/statistics/hpd-interval.rkt")
 
 (provide (all-from-out
           "private/statistics/statistics-struct.rkt"
           "private/statistics/expected-values.rkt"
           "private/statistics/correlation.rkt"
           "private/statistics/order-statistics.rkt"
-          "private/statistics/counting.rkt"))
+          "private/statistics/counting.rkt"
+          "private/statistics/monte-carlo.rkt"
+          "private/statistics/hpd-interval.rkt"))