Added `array->list-array'

Cleaned up expected value code a little

Refactored running statistics objects (hid private fields, added
`update-statistics*')

Documented expected value functions and running statistics

Removed `bfpsi0' from bigfloat tests (DrDr's libmpfr doesn't have it)

Commented out custodian shutdown callback that frees MPFR's cache
(something's broken)
This commit is contained in:
Neil Toronto 2012-12-06 22:51:39 -07:00
parent f607a3b061
commit 769e8b47ec
11 changed files with 292 additions and 61 deletions

View File

@ -45,4 +45,5 @@
array-all-and
array-all-or
array-axis-reduce
unsafe-array-axis-reduce)
unsafe-array-axis-reduce
array->list-array)

View File

@ -151,3 +151,15 @@
(: array-all-or (All (A B) ((Array A) -> (U A #f))))
(define (array-all-or arr)
(array-ref ((inst array-fold (U A #f)) arr array-axis-or) #()))
;; ===================================================================================================
;; Other folds
(: array->list-array (All (A) (case-> ((Array A) -> (Array (Listof A)))
((Array A) Integer -> (Array (Listof A))))))
(define (array->list-array arr [k 0])
(define dims (array-dims arr))
(cond [(and (k . >= . 0) (k . < . dims))
(unsafe-array-axis-reduce arr k (inst build-list A))]
[else
(raise-argument-error 'array->list-array (format "Index < ~a" dims) 1 arr k)]))

View File

@ -76,7 +76,7 @@
[(_ name type fail-thunk) (get-ffi-obj name mpfr-lib type fail-thunk)]))
(define mpfr-free-cache (get-mpfr-fun 'mpfr_free_cache (_fun -> _void)))
#;
(define mpfr-shutdown
(register-custodian-shutdown mpfr-free-cache (λ (free) (free))))

View File

@ -30,15 +30,23 @@
(define m2 (/ (sum zs) n))
(adjust-covariance m2 n bias))
(: covariance/means (Correlation/Means-Fun Real))
(: covariance/means (case-> (Real Real (Sequenceof Real) (Sequenceof Real)
[#:bias (U #t #f Real)] -> Real)
(Real Real (Sequenceof Real) (Sequenceof Real) (Option (Sequenceof Real))
[#:bias (U #t #f Real)] -> Real)))
(define (covariance/means mx my xs ys [ws #f] #:bias [bias #f])
(covariance* 'covariance/means mx my xs ys ws bias))
(: covariance (Correlation-Fun Real))
(: covariance (case-> ((Sequenceof Real) (Sequenceof Real) [#:bias (U #t #f Real)] -> Real)
((Sequenceof Real) (Sequenceof Real) (Option (Sequenceof Real))
[#:bias (U #t #f Real)] -> Real)))
(define (covariance xs ys [ws #f] #:bias [bias #f])
(covariance* 'covariance (mean xs ws) (mean ys ws) xs ys ws bias))
(: correlation/means (Correlation/Means-Fun Real))
(: correlation/means (case-> (Real Real (Sequenceof Real) (Sequenceof Real)
[#:bias (U #t #f Real)] -> Real)
(Real Real (Sequenceof Real) (Sequenceof Real) (Option (Sequenceof Real))
[#:bias (U #t #f Real)] -> Real)))
(define (correlation/means mx my xs ys [ws #f] #:bias [bias #f])
(define g (covariance/means mx my xs ys ws #:bias bias))
(define sx (stddev/mean mx xs ws #:bias bias))
@ -47,6 +55,8 @@
[(zero? sy) +nan.0]
[else (/ g sx sy)]))
(: correlation (Correlation-Fun Real))
(: correlation (case-> ((Sequenceof Real) (Sequenceof Real) [#:bias (U #t #f Real)] -> Real)
((Sequenceof Real) (Sequenceof Real) (Option (Sequenceof Real))
[#:bias (U #t #f Real)] -> Real)))
(define (correlation xs ys [ws #f] #:bias [bias #f])
(correlation/means (mean xs ws) (mean ys ws) xs ys ws #:bias bias))

View File

@ -80,34 +80,52 @@
(define m4 (sum xs^4))
(adjust-kurtosis (max 0 (/ (* (/ m4 m2) n) m2)) n bias)])]))
(: variance/mean (Moment/Mean-Fun Nonnegative-Real))
;; ===================================================================================================
;; User-facing functions
(: variance/mean (case-> (Real (Sequenceof Real) [#:bias (U #t #f Real)] -> Nonnegative-Real)
(Real (Sequenceof Real) (Option (Sequenceof Real)) [#:bias (U #t #f Real)]
-> Nonnegative-Real)))
(define (variance/mean m xs [ws #f] #:bias [bias #f])
(variance* 'variance/mean m xs ws bias))
(: variance (Moment-Fun Nonnegative-Real))
(: variance (case-> ((Sequenceof Real) [#:bias (U #t #f Real)] -> Nonnegative-Real)
((Sequenceof Real) (Option (Sequenceof Real)) [#:bias (U #t #f Real)]
-> Nonnegative-Real)))
(define (variance xs [ws #f] #:bias [bias #f])
(variance* 'variance (mean xs ws) xs ws bias))
(: stddev/mean (Moment/Mean-Fun Nonnegative-Real))
(: stddev/mean (case-> (Real (Sequenceof Real) [#:bias (U #t #f Real)] -> Nonnegative-Real)
(Real (Sequenceof Real) (Option (Sequenceof Real)) [#:bias (U #t #f Real)]
-> Nonnegative-Real)))
(define (stddev/mean m xs [ws #f] #:bias [bias #f])
(sqrt (variance* 'stddev/mean (mean xs ws) xs ws bias)))
(: stddev (Moment-Fun Nonnegative-Real))
(: stddev (case-> ((Sequenceof Real) [#:bias (U #t #f Real)] -> Nonnegative-Real)
((Sequenceof Real) (Option (Sequenceof Real)) [#:bias (U #t #f Real)]
-> Nonnegative-Real)))
(define (stddev xs [ws #f] #:bias [bias #f])
(sqrt (variance* 'stddev (mean xs ws) xs ws bias)))
(: skewness/mean (Moment/Mean-Fun Real))
(: skewness/mean (case-> (Real (Sequenceof Real) [#:bias (U #t #f Real)] -> Real)
(Real (Sequenceof Real) (Option (Sequenceof Real)) [#:bias (U #t #f Real)]
-> Real)))
(define (skewness/mean m xs [ws #f] #:bias [bias #f])
(skewness* 'skewness/mean m xs ws bias))
(: skewness (Moment-Fun Real))
(: skewness (case-> ((Sequenceof Real) [#:bias (U #t #f Real)] -> Real)
((Sequenceof Real) (Option (Sequenceof Real)) [#:bias (U #t #f Real)] -> Real)))
(define (skewness xs [ws #f] #:bias [bias #f])
(skewness* 'skewness (mean xs ws) xs ws bias))
(: kurtosis/mean (Moment/Mean-Fun Nonnegative-Real))
(: kurtosis/mean (case-> (Real (Sequenceof Real) [#:bias (U #t #f Real)] -> Nonnegative-Real)
(Real (Sequenceof Real) (Option (Sequenceof Real)) [#:bias (U #t #f Real)]
-> Nonnegative-Real)))
(define (kurtosis/mean m xs [ws #f] #:bias [bias #f])
(kurtosis* 'kurtosis/mean m xs ws bias))
(: kurtosis (Moment-Fun Nonnegative-Real))
(: kurtosis (case-> ((Sequenceof Real) [#:bias (U #t #f Real)] -> Nonnegative-Real)
((Sequenceof Real) (Option (Sequenceof Real)) [#:bias (U #t #f Real)]
-> Nonnegative-Real)))
(define (kurtosis xs [ws #f] #:bias [bias #f])
(kurtosis* 'kurtosis (mean xs ws) xs ws bias))

View File

@ -4,10 +4,14 @@
"../../flonum.rkt"
"statistics-utils.rkt")
(provide (struct-out statistics)
(provide statistics
statistics?
empty-statistics
update-statistics
sequence->statistics
update-statistics*
statistics-min
statistics-max
statistics-count
statistics-range
statistics-mean
statistics-variance
@ -15,15 +19,18 @@
statistics-skewness
statistics-kurtosis)
(struct: statistics ([min : Flonum]
[max : Flonum]
[count : Nonnegative-Flonum]
[m1 : Flonum]
[m2 : Nonnegative-Flonum]
[m3 : Flonum]
[m4 : Nonnegative-Flonum])
(struct: base-statistics
([min : Flonum]
[max : Flonum]
[count : Nonnegative-Flonum])
#:transparent)
(struct: statistics base-statistics
([m1 : Flonum]
[m2 : Nonnegative-Flonum]
[m3 : Flonum]
[m4 : Nonnegative-Flonum]))
(define empty-statistics (statistics +inf.0 -inf.0 0.0 0.0 0.0 0.0 0.0))
(: update-statistics (case-> (statistics Real -> statistics)
@ -49,17 +56,27 @@
0.0)])
(statistics (flmin x mn) (flmax x mx) w+n new-m1 new-m2 new-m3 new-m4))]))
(: sequence->statistics (case-> ((Sequenceof Real) -> statistics)
((Sequenceof Real) (Sequenceof Real) -> statistics)))
(define sequence->statistics
(case-lambda
[(xs) (for/fold: ([e : statistics empty-statistics]) ([x xs])
(update-statistics e x))]
[(xs ws) (for/fold: ([e : statistics empty-statistics]) ([x xs] [w ws])
(update-statistics e x w))]))
(: update-statistics*
(case-> (statistics (Sequenceof Real) -> statistics)
(statistics (Sequenceof Real) (Option (Sequenceof Real)) -> statistics)))
(define (update-statistics* e xs [ws #f])
(cond [ws (let-values ([(xs ws) (sequences->weighted-samples 'update-statistics* xs ws)])
(for/fold: ([e : statistics e]) ([x (in-list xs)] [w (in-list ws)])
(update-statistics e x w)))]
[else (for/fold: ([e : statistics e]) ([x xs])
(update-statistics e x 1.0))]))
;; ===================================================================================================
(: statistics-min (statistics -> Flonum))
(define (statistics-min e) (base-statistics-min e))
(: statistics-max (statistics -> Flonum))
(define (statistics-max e) (base-statistics-max e))
(: statistics-count (statistics -> Nonnegative-Flonum))
(define (statistics-count e) (base-statistics-count e))
(: statistics-range (statistics -> Nonnegative-Flonum))
(define (statistics-range e)
(define mn (statistics-min e))

View File

@ -7,24 +7,6 @@
(provide (all-defined-out))
(define-type (Moment-Fun T)
(case-> ((Sequenceof Real) [#:bias (U #t #f Real)] -> T)
((Sequenceof Real) (Option (Sequenceof Real)) [#:bias (U #t #f Real)] -> T)))
(define-type (Moment/Mean-Fun T)
(case-> (Real (Sequenceof Real) [#:bias (U #t #f Real)] -> T)
(Real (Sequenceof Real) (Option (Sequenceof Real)) [#:bias (U #t #f Real)] -> T)))
(define-type (Correlation-Fun T)
(case-> ((Sequenceof Real) (Sequenceof Real) [#:bias (U #t #f Real)] -> T)
((Sequenceof Real) (Sequenceof Real) (Option (Sequenceof Real))
[#:bias (U #t #f Real)] -> T)))
(define-type (Correlation/Means-Fun T)
(case-> (Real Real (Sequenceof Real) (Sequenceof Real) [#:bias (U #t #f Real)] -> T)
(Real Real (Sequenceof Real) (Sequenceof Real) (Option (Sequenceof Real))
[#:bias (U #t #f Real)] -> T)))
;; ===================================================================================================
(: find-near-pow2 (Real -> Nonnegative-Exact-Rational))

View File

@ -3,6 +3,7 @@
@(require scribble/eval
racket/sandbox
(for-label racket/base racket/vector racket/match racket/unsafe/ops racket/string
racket/list
math plot
(only-in typed/racket/base
ann inst : λ: define: make-predicate ->
@ -1377,6 +1378,9 @@ It therefore requires axis @racket[k] to have positive length.
(array-axis-fold arr 1 (inst cons Index (Listof Index)) empty)]
Notice that the second example returns an array of reversed lists.
This is therefore a left fold; see @racket[foldl].
If you need control over the order of evaluation in axis @racket[k]'s rows, see
@racket[array-axis-reduce].
}
@deftogether[(@defform*[((array-axis-sum arr k)
@ -1519,6 +1523,18 @@ and @racket[array-all-or] is defined similarly.
(array-all-or (array= arr (array 0)))]
}
@defproc[(array->list-array [arr (Array A)] [k Integer]) (Array (Listof A))]{
Returns an array of lists, computed as if by applying @racket[list] to the elements in each row of
axis @racket[k].
@examples[#:eval typed-eval
(define arr (index-array #(3 3)))
arr
(array->list-array arr 1)
(array-ref (array->list-array (array->list-array arr 1) 0) #())]
See @racket[mean] for more useful examples, and @racket[array-axis-reduce] for an example that shows
how @racket[array->list-array] is implemented.
}
@defproc[(array-axis-reduce [arr (Array A)] [k Integer] [h (Index (Integer -> A) -> B)]) (Array B)]{
Like @racket[array-axis-fold], but allows evaluation control (such as short-cutting @racket[and] and
@racket[or]) by moving the loop into @racket[h]. The result has the shape of @racket[arr], but with
@ -1538,6 +1554,14 @@ For example, summing the squares of the rows in axis @racket[1]:
(for/fold: ([s : Real 0]) ([jk (in-range dk)])
(+ s (sqr (proc jk))))))
(array-axis-sum (array-map sqr arr) 1)]
Transforming an axis into a list using @racket[array-axis-fold] and @racket[array-axis-reduce]:
@interaction[#:eval typed-eval
(array-map (inst reverse Index)
(array-axis-fold arr 1
(inst cons Index (Listof Index))
empty))
(array-axis-reduce arr 1 (inst build-list Index))]
The latter is essentially the definition of @racket[array->list-array].
Every fold, including @racket[array-axis-fold], is ultimately defined using
@racket[array-axis-reduce] or its unsafe counterpart.

View File

@ -6,39 +6,205 @@
math plot
(only-in typed/racket/base
Flonum Real Boolean Any Listof Integer case-> -> U
Sequenceof Positive-Flonum Nonnegative-Flonum))
Sequenceof Positive-Flonum Nonnegative-Flonum
Nonnegative-Real))
"utils.rkt")
@(define untyped-eval (make-untyped-math-eval))
@interaction-eval[#:eval untyped-eval (require racket/list)]
@(define typed-eval (make-math-eval))
@interaction-eval[#:eval typed-eval (require)]
@title[#:tag "stats"]{Statistical Functions}
@title[#:tag "stats"]{Statistics Functions}
@(author-neil)
@defmodule[math/statistics]
@bold{Warning:} The documentation for this module is delayed while the
types for weighted samples get reworked. Please wait.
This module exports functions that compute summary values for collections of data, or
@deftech{statistics}, such as means, standard devations, medians, and @italic{k}th order
statistics. It also exports functions for managing collections of sample values.
@;{something about accepting weighted samples whenever it makes sense
(time it doesn't make sense: autocorrelation)}
Most of the functions that compute statistics also accept a sequence of nonnegative reals
that correspond one-to-one with the data values.
These are used as weights; equivalently counts, pseudocounts or proportions.
While this makes it easy to work with weighted samples, it introduces some subtleties
in bias correction.
In particular, central moments must be computed without bias correction by default.
See @secref{stats:expected-values} for a discussion.
@local-table-of-contents[]
@section{Counting}
@defthing[samples->hash Any]{
This stub represents forthcoming documentation.
}
@defthing[count-samples Any]{
This stub represents forthcoming documentation.
}
@section{Expected Values}
@section[#:tag "stats:expected-values"]{Expected Values}
@section{Running Expected Values}
Functions documented in this section that compute higher central moments, such as @racket[variance],
@racket[stddev] and @racket[skewness], can optionally apply bias correction to their estimates.
For example, when @racket[variance] is given the argument @racket[#:bias #t], it
multiplies the result by @racket[(/ n (- n 1))], where @racket[n] is the number of samples.
The meaning of ``bias correction'' becomes less clear with weighted samples, however. Often, the
weights represent counts, so when moment-estimating functions receive @racket[#:bias #t], they
interpret it as ``use the sum of @racket[ws] for @racket[n].''
In the following example, the sample @racket[4] is first counted twice and then given weight
@racket[2]; therefore @racket[n = 5] in both cases:
@interaction[#:eval typed-eval
(variance '(1 2 3 4 4) #:bias #t)
(variance '(1 2 3 4) '(1 1 1 2) #:bias #t)]
However, sample weights often do not represent counts. For these cases, the @racket[#:bias]
keyword can be followed by a real-valued pseudocount, which is used for @racket[n]:
@interaction[#:eval typed-eval
(variance '(1 2 3 4) '(1/2 1/2 1/2 1) #:bias 5)]
Because the magnitude of the bias correction for weighted samples cannot be known without user
guidance, in all cases, the bias argument defaults to @racket[#f].
@defproc[(mean [xs (Sequenceof Real)] [ws (U #f (Sequenceof Real)) #f]) Real]{
When @racket[ws] is @racket[#f] (the default), returns the sample mean of the values in @racket[xs].
Otherwise, returns the weighted sample mean of the values in @racket[xs] with corresponding weights
@racket[ws].
@examples[#:eval typed-eval
(mean '(1 2 3 4 5))
(mean '(1 2 3 4 5) '(1 1 1 1 10.0))
(define d (normal-dist))
(mean (sample d 10000))
(define arr (array-strict (build-array #(5 1000) (λ (_) (sample d)))))
(array-map mean (array->list-array arr 1))]
}
@deftogether[(@defproc[(variance [xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Nonnegative-Real]
@defproc[(stddev [xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Nonnegative-Real]
@defproc[(skewness [xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Real]
@defproc[(kurtosis [xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Nonnegative-Real])]{
If @racket[ws] is @racket[#f], these compute the sample variance, standard deviation, skewness
and excess kurtosis the samples in @racket[xs].
If @racket[ws] is not @racket[#f], they compute weighted variations of the same.
@examples[#:eval typed-eval
(stddev '(1 2 3 4 5))
(stddev '(1 2 3 4 5) '(1 1 1 1 10))]
See @secref{stats:expected-values} for the meaning of the @racket[bias] keyword argument.
}
@deftogether[(@defproc[(variance/mean [mean Real]
[xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Nonnegative-Real]
@defproc[(stddev/mean [mean Real]
[xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Nonnegative-Real]
@defproc[(skewness/mean [mean Real]
[xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Real]
@defproc[(kurtosis/mean [mean Real]
[xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f]
[#:bias bias (U #t #f Real) #f])
Nonnegative-Real])]{
Like @racket[variance], @racket[stddev], @racket[skewness] and @racket[kurtosis], but computed
using known mean @racket[mean].
}
@section[#:tag "stats:running"]{Running Expected Values}
The @racket[statistics] object allows computing the sample minimum, maximum, count, mean, variance,
skewness, and excess kurtosis of any number of samples in O(1) space.
To use it, start with @racket[empty-statistics], then use @racket[update-statistics] to obtain a
new statistics object with updated values. Use @racket[statistics-min], @racket[statistics-mean],
and similar functions to get the current estimates.
@examples[#:eval typed-eval
(let* ([s empty-statistics]
[s (update-statistics s 1)]
[s (update-statistics s 2)]
[s (update-statistics s 3)]
[s (update-statistics s 4 2)])
(values (statistics-mean s)
(statistics-stddev s #:bias #t)))]
@defstruct*[statistics ([min Flonum]
[max Flonum]
[count Nonnegative-Flonum])]{
Represents running statistics.
The @racket[min] and @racket[max] fields are the minimum and maximum
value observed so far, and the @racket[count] field is the total weight of the samples (which is the
number of samples if all samples are unweighted).
The remaining, hidden fields are used to compute moments, and their number and meaning may change in
future releases.
}
@defthing[empty-statistics statistics]{
The empty statistics object.
@examples[#:eval typed-eval
(statistics-min empty-statistics)
(statistics-max empty-statistics)
(statistics-range empty-statistics)
(statistics-count empty-statistics)
(statistics-mean empty-statistics)
(statistics-variance empty-statistics)
(statistics-skewness empty-statistics)
(statistics-kurtosis empty-statistics)]
}
@defproc[(update-statistics [s statistics] [x Real] [w Real 1.0]) statistics]{
Returns a new statistics object that includes @racket[x] in the computed statistics. If @racket[w]
is given, @racket[x] is weighted by @racket[w] in the moment computations.
}
@defproc[(update-statistics* [s statistics]
[xs (Sequenceof Real)]
[ws (U #f (Sequenceof Real)) #f])
statistics]{
Like @racket[update-statistics], but includes all of @racket[xs], possibly weighted by corresponding
elements in @racket[ws], in the returned statistics object.
@examples[#:eval typed-eval
(define s (update-statistics* empty-statistics '(1 2 3 4) '(1 1 1 2)))
(statistics-mean s)
(statistics-stddev s #:bias #t)]
}
@deftogether[(@defproc[(statistics-range [s statistics]) Nonnegative-Flonum]
@defproc[(statistics-mean [s statistics]) Flonum]
@defproc[(statistics-variance [s statistics] [#:bias bias (U #t #f Real) #f])
Nonnegative-Flonum]
@defproc[(statistics-stddev [s statistics] [#:bias bias (U #t #f Real) #f])
Nonnegative-Flonum]
@defproc[(statistics-skewness [s statistics] [#:bias bias (U #t #f Real) #f])
Flonum]
@defproc[(statistics-kurtosis [s statistics] [#:bias bias (U #t #f Real) #f])
Nonnegative-Flonum])]{
Compute the range, mean, variance, standard deviation, skewness, and excess kurtosis of the
observations summarized in @racket[s].
See @secref{stats:expected-values} for the meaning of the @racket[bias] keyword argument.
}
@section{Correlation}
@section{Order Statistics}
@(close-eval untyped-eval)
@(close-eval typed-eval)

View File

@ -17,7 +17,7 @@ for working with numbers and collections of numbers. These include
@item{Number-theoretic functions}
@item{Functional arrays for storing and operating on large rectangular data sets}
@item{Probability distributions}
@item{Statistical functions (currently undergoing refactoring)}
@item{Statistical functions (currently being documented)}
@item{Linear algebra functions (currently under review)}
]

View File

@ -389,6 +389,7 @@
+nan.bf)
(list +nan.bf +nan.bf +nan.bf -inf.bf -inf.bf +inf.bf +inf.bf 1.bf +inf.bf +inf.bf
+nan.bf))
#;
(list
bfpsi0
(list -inf.bf -1.bf -min.bf -0.bf 0.bf +min.bf