racket/collects/math/private/statistics/statistics-utils.rkt
Neil Toronto 9865182df4 Fixes, docs, and API changes for `math/statistics'
* Fixed and added tests for `quantile' and `median', documented them

* Added `sort-samples', documented it

* Removed `real-quantile' and `real-median' (too many design choices
  right now; will revisit when implementing Kernel Density Estimators)

* Documented `absdev' and `absdev/median'

* Fixed `update-statistics*': now uses O(1) space as advertised (if the
  sequences of values and weights both use O(1) space)

* Changed types of binning functions: allows using #:key in the future
  (when TR supports function type cases that differ only by keyword
  argument types better), places optional weights at the end like other
  statistics functions

* Clarified binning docs about sort stability and half-open intervals
2012-12-10 16:45:18 -07:00

142 lines
6.0 KiB
Racket

#lang typed/racket/base
(require racket/sequence
racket/list
"../../flonum.rkt"
"../../base.rkt")
(provide (all-defined-out))
;; ===================================================================================================
(: lte?->lt? (All (A) ((A A -> Any) -> (A A -> Boolean))))
(define ((lte?->lt? lte?) x1 x2)
(and (lte? x1 x2) (not (lte? x2 x1))))
(: find-near-pow2 (Real -> Nonnegative-Exact-Rational))
(define (find-near-pow2 x)
(expt 2 (max -1073 (min 1023 (exact-round (/ (log (abs x)) (fllog 2.0)))))))
(: weights->list (Symbol (Sequenceof Real) -> (Listof Nonnegative-Real)))
(define (weights->list name w-seq)
(for/list: : (Listof Nonnegative-Real) ([w w-seq])
(cond [(w . >= . 0) w]
[else (raise-argument-error name "(Sequenceof Nonnegative-Real)" w-seq)])))
(: weights->normalized-weights (Symbol (Sequenceof Real) -> (Listof Nonnegative-Flonum)))
(define (weights->normalized-weights name ws)
(let ([ws (weights->list name ws)])
(when (empty? ws) (raise-argument-error name "nonempty (Sequenceof Real)" ws))
(define max-w (find-near-pow2 (apply max ws)))
(let ([ws (map (λ: ([w : Real]) (/ w max-w)) ws)])
(define total-w (sum ws))
(map (λ: ([w : Real]) (assert (fl (/ w total-w)) nonnegative?)) ws))))
;; ===================================================================================================
(: check-lengths! (All (A B) (Symbol String A B Index Index -> Void)))
(define (check-lengths! name what xs ys m n)
(unless (= m n) (error name "~a must be the same length; given ~e (length ~a) and ~e (length ~a)"
what xs m ys n)))
(: sequences->weighted-samples
(All (A) (Symbol (Sequenceof A) (Sequenceof Real)
-> (Values (Listof A) (Listof Nonnegative-Real)))))
(define (sequences->weighted-samples name x-seq w-seq)
(define xs (sequence->list x-seq))
(define ws
(for/list: : (Listof Nonnegative-Real) ([w w-seq])
(cond [(w . >= . 0) w]
[else (raise-argument-error name "(Sequenceof Nonnegative-Real)" 1 x-seq w-seq)])))
(check-lengths! name "values and weights" xs ws (length xs) (length ws))
(values xs ws))
(define nonnegative? (λ: ([x : Real]) (not (negative? x))))
(: sequences->normalized-weighted-samples
(All (A) (Symbol (Sequenceof A) (Sequenceof Real)
-> (Values (Listof A) (Listof Positive-Flonum)))))
(define (sequences->normalized-weighted-samples name xs ws)
(let-values ([(xs ws) (sequences->weighted-samples name xs ws)])
(when (empty? xs) (raise-argument-error name "nonempty (Sequenceof A)" 0 xs ws))
(define max-w (find-near-pow2 (assert (apply max ws) nonnegative?)))
(let ([ws (map (λ: ([w : Nonnegative-Real]) (/ w max-w)) ws)])
(define total-w (sum ws))
(let loop ([xs xs]
[ws ws]
[#{new-xs : (Listof A)} empty]
[#{new-ws : (Listof Positive-Flonum)} empty])
(cond [(or (empty? xs) (empty? ws)) (values (reverse new-xs) (reverse new-ws))]
[else
(define w (fl (/ (first ws) total-w)))
(cond [(w . > . 0.0)
(loop (rest xs) (rest ws) (cons (first xs) new-xs) (cons w new-ws))]
[else
(loop (rest xs) (rest ws) new-xs new-ws)])])))))
(: sequence->normalized-weighted-samples
(All (A) (Symbol (Sequenceof A) -> (Values (Listof A) (Listof Positive-Flonum)))))
(define (sequence->normalized-weighted-samples name xs)
(let ([xs (sequence->list xs)])
(when (empty? xs) (raise-argument-error name "nonempty (Sequenceof A)" xs))
(define n (length xs))
(define w (assert (fl/ 1.0 (fl n)) positive?))
(values xs (build-list n (λ (i) w)))))
(: sequence->vector (All (A) ((Sequenceof A) -> (Vectorof A))))
(define (sequence->vector vs)
(for/vector: ([v vs]) : A v))
(: sequences->weighted-sample-vectors
(All (A) (Symbol (Sequenceof A) (Sequenceof Real)
-> (Values (Vectorof A) (Vectorof Nonnegative-Real)))))
(define (sequences->weighted-sample-vectors name x-seq w-seq)
(define xs (sequence->vector x-seq))
(define ws
(for/vector: ([w w-seq]) : Nonnegative-Real
(cond [(w . >= . 0) w]
[else (raise-argument-error name "(Sequenceof Nonnegative-Real)" 1 x-seq w-seq)])))
(check-lengths! name "values and weights" xs ws (vector-length xs) (vector-length ws))
(values xs ws))
;; ===================================================================================================
;; bias = #f Return the central moment
;; bias = #t Assume sum of weights is the count and correct for bias normally
;; bias = n Assume n actual samples; correct for bias
(: get-bias-adjustment (Nonnegative-Real (U #t Real) Positive-Real -> Positive-Real))
(define (get-bias-adjustment c bias mn)
(define n (if (real? bias) bias c))
(if (n . > . mn) n +nan.0))
(: adjust-variance (Nonnegative-Real Nonnegative-Real (U #t #f Real) -> Nonnegative-Real))
(define (adjust-variance m2 n bias)
(cond [bias
(let ([n (get-bias-adjustment n bias 1)])
(define c (max 0 (/ n (- n 1)))) ; max proves c >= 0
(* m2 c))]
[else m2]))
(: adjust-covariance (Real Nonnegative-Real (U #t #f Real) -> Real))
(define (adjust-covariance m2 n bias)
(cond [bias
(let ([n (get-bias-adjustment n bias 1)])
(* m2 (/ n (- n 1))))]
[else m2]))
(: adjust-skewness (Real Nonnegative-Real (U #t #f Real) -> Real))
(define (adjust-skewness g n bias)
(cond [bias
(let ([n (get-bias-adjustment n bias 2)])
(fl (* g (/ (sqrt (max 0 (* n (- n 1)))) (- n 2)))))]
[else (fl g)]))
(: adjust-kurtosis (Nonnegative-Real Nonnegative-Real (U #t #f Real) -> Nonnegative-Real))
(define (adjust-kurtosis g n bias)
(cond [bias
(let ([n (get-bias-adjustment n bias 3)])
(define c (max 0 (/ (- n 1) (* (- n 2) (- n 3))))) ; max proves c >= 0
(* (+ (* (+ n 1) g) 6) c))]
[else g]))