racket/collects/math/private/statistics/counting.rkt
Neil Toronto 9865182df4 Fixes, docs, and API changes for `math/statistics'
* Fixed and added tests for `quantile' and `median', documented them

* Added `sort-samples', documented it

* Removed `real-quantile' and `real-median' (too many design choices
  right now; will revisit when implementing Kernel Density Estimators)

* Documented `absdev' and `absdev/median'

* Fixed `update-statistics*': now uses O(1) space as advertised (if the
  sequences of values and weights both use O(1) space)

* Changed types of binning functions: allows using #:key in the future
  (when TR supports function type cases that differ only by keyword
  argument types better), places optional weights at the end like other
  statistics functions

* Clarified binning docs about sort stability and half-open intervals
2012-12-10 16:45:18 -07:00

171 lines
7.7 KiB
Racket

#lang typed/racket/base
(require racket/list
racket/sequence
"../../base.rkt"
"../unsafe.rkt"
"statistics-utils.rkt")
(provide samples->hash
count-samples
(struct-out sample-bin)
sample-bin-compact
sample-bin-total
bin-samples
bin-samples/key)
;; ===================================================================================================
;; Hashing
(: unweighted-samples->hash (All (A) ((Sequenceof A) -> (HashTable A Positive-Integer))))
(define (unweighted-samples->hash xs)
(define: h : (HashTable A Positive-Integer) (make-hash))
(for: ([x : A xs])
(hash-set! h x (unsafe-fx+ 1 (hash-ref h x (λ () 0)))))
h)
(: weighted-samples->hash (All (A) ((Sequenceof A) (Sequenceof Real)
-> (HashTable A Nonnegative-Real))))
(define (weighted-samples->hash xs ws)
(let-values ([(xs ws) (sequences->weighted-samples 'samples->hash xs ws)])
(define: h : (HashTable A Nonnegative-Real) (make-hash))
(for: ([x : A xs] [w : Nonnegative-Real ws])
(hash-set! h x (+ w (hash-ref h x (λ () 0)))))
h))
(: samples->hash
(All (A) (case-> ((Sequenceof A) -> (HashTable A Positive-Integer))
((Sequenceof A) (U #f (Sequenceof Real)) -> (HashTable A Nonnegative-Real)))))
(define samples->hash
(case-lambda
[(xs) (unweighted-samples->hash xs)]
[(xs ws) (cond [ws (weighted-samples->hash xs ws)]
[else (weighted-samples->hash xs (sequence-map (λ (_) 1) xs))])]))
;; ===================================================================================================
;; Counting
(: count-unweighted-samples
(All (A) ((Sequenceof A) -> (Values (Listof A) (Listof Positive-Integer)))))
(define (count-unweighted-samples xs)
(let ([xs (sequence->list xs)])
(define h (samples->hash xs))
(let ([xs (remove-duplicates xs)])
(values xs (map (λ: ([x : A]) (hash-ref h x)) xs)))))
(: count-weighted-samples
(All (A) ((Sequenceof A) (Sequenceof Real) -> (Values (Listof A) (Listof Nonnegative-Real)))))
(define (count-weighted-samples xs ws)
(let-values ([(xs ws) (sequences->weighted-samples 'count-samples xs ws)])
(define h (weighted-samples->hash xs ws))
(let ([xs (remove-duplicates xs)])
(values xs (map (λ: ([x : A]) (hash-ref h x)) xs)))))
(: count-samples
(All (A) (case-> ((Sequenceof A) -> (Values (Listof A) (Listof Positive-Integer)))
((Sequenceof A) (U #f (Sequenceof Real))
-> (Values (Listof A) (Listof Nonnegative-Real))))))
(define count-samples
(case-lambda
[(xs) (count-unweighted-samples xs)]
[(xs ws) (if ws (count-weighted-samples xs ws) (count-unweighted-samples xs))]))
;; ===================================================================================================
;; Bins
(struct: (A B) sample-bin
([min : B] [max : B] [values : (Listof A)] [weights : (U #f (Listof Nonnegative-Real))])
#:transparent)
(: sample-bin-compact (All (A B) ((sample-bin A B) -> (sample-bin A B))))
(define (sample-bin-compact bin)
(let-values ([(xs ws) (count-samples (sample-bin-values bin) (sample-bin-weights bin))])
(sample-bin (sample-bin-min bin) (sample-bin-max bin) xs ws)))
(: sample-bin-total (All (A B) ((sample-bin A B) -> Nonnegative-Real)))
(define (sample-bin-total bin)
(define ws (sample-bin-weights bin))
(if ws (assert (sum ws) nonnegative?) (length (sample-bin-values bin))))
;; ===================================================================================================
;; Binning
(: list-split-after (All (A) ((Listof A) (A -> Any) -> (Values (Listof A) (Listof A)))))
(define (list-split-after xs pred?)
(let: loop : (Values (Listof A) (Listof A)) ([xs : (Listof A) xs]
[ys : (Listof A) empty])
(cond [(empty? xs) (values (reverse ys) xs)]
[else
(define x (first xs))
(cond [(pred? x) (loop (rest xs) (cons x ys))]
[else (values (reverse ys) xs)])])))
(: bin-unweighted-samples/key
(All (A B) ((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) -> (Listof (sample-bin A B)))))
(define (bin-unweighted-samples/key bnds lte? key xs)
(: lt? (B B -> Boolean))
(define (lt? x1 x2)
(and (lte? x1 x2) (not (lte? x2 x1))))
(let* ([bnds (sort (sequence->list bnds) lt?)]
[xs (sequence->list xs)]
[xks (map (λ: ([x : A]) (cons x (key x))) xs)]
[xks (sort xks (λ: ([xk1 : (Pair A B)] [xk2 : (Pair A B)])
(lt? (cdr xk1) (cdr xk2))))])
(cond
[(empty? bnds)
(cond [(empty? xks) empty]
[else (define min (cdr (first xks)))
(define max (cdr (last xks)))
(list (sample-bin min max (map (inst car A B) xks) #f))])]
[else
(let: loop : (Listof (sample-bin A B)) ([min : (U #f B) #f]
[max : B (first bnds)]
[bnds : (Listof B) (rest bnds)]
[xks : (Listof (Pair A B)) xks]
[bins : (Listof (sample-bin A B)) empty])
(let-values ([(yks xks) (list-split-after xks (λ: ([xk : (Pair A B)])
(lte? (cdr xk) max)))])
(define maybe-bin
(cond [min (list (sample-bin min max (map (inst car A B) yks) #f))]
[(empty? yks) empty]
[else (list (sample-bin (cdr (first yks)) max (map (inst car A B) yks) #f))]))
(cond [(empty? bnds)
(cond [(empty? xks) (reverse (append maybe-bin bins))]
[else
(define bin2
(sample-bin max (cdr (last xks)) (map (inst car A B) xks) #f))
(reverse (append (cons bin2 maybe-bin) bins))])]
[else
(loop max (first bnds) (rest bnds) xks (append maybe-bin bins))])))])))
(: bin-weighted-samples/key
(All (A B) ((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) (Sequenceof Real)
-> (Listof (sample-bin A B)))))
(define (bin-weighted-samples/key bnds lte? key xs ws)
(let-values ([(xs ws) (sequences->weighted-samples 'bin-samples/key xs ws)])
(define xws (map (inst cons A Nonnegative-Real) xs ws))
(define xw-key (λ: ([xw : (Pair A Nonnegative-Real)]) (key (car xw))))
(map (λ: ([bin : (sample-bin (Pair A Nonnegative-Real) B)])
(define xws (sample-bin-values bin))
(sample-bin (sample-bin-min bin)
(sample-bin-max bin)
(map (inst car A Nonnegative-Real) xws)
(map (inst cdr A Nonnegative-Real) xws)))
(bin-unweighted-samples/key bnds lte? xw-key xws))))
(: bin-samples/key
(All (A B)
(case-> ((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) -> (Listof (sample-bin A B)))
((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) (U #f (Sequenceof Real))
-> (Listof (sample-bin A B))))))
(define (bin-samples/key bnds lte? key xs [ws #f])
(cond [ws (bin-weighted-samples/key bnds lte? key xs ws)]
[else (bin-unweighted-samples/key bnds lte? key xs)]))
(: bin-samples
(All (A) (case-> ((Sequenceof A) (A A -> Any) (Sequenceof A) -> (Listof (sample-bin A A)))
((Sequenceof A) (A A -> Any) (Sequenceof A) (Sequenceof Real)
-> (Listof (sample-bin A A))))))
(define (bin-samples bnds lte? xs [ws #f])
(bin-samples/key bnds lte? (λ: ([x : A]) x) xs ws))