
* Fixed and added tests for `quantile' and `median', documented them * Added `sort-samples', documented it * Removed `real-quantile' and `real-median' (too many design choices right now; will revisit when implementing Kernel Density Estimators) * Documented `absdev' and `absdev/median' * Fixed `update-statistics*': now uses O(1) space as advertised (if the sequences of values and weights both use O(1) space) * Changed types of binning functions: allows using #:key in the future (when TR supports function type cases that differ only by keyword argument types better), places optional weights at the end like other statistics functions * Clarified binning docs about sort stability and half-open intervals
171 lines
7.7 KiB
Racket
171 lines
7.7 KiB
Racket
#lang typed/racket/base
|
|
|
|
(require racket/list
|
|
racket/sequence
|
|
"../../base.rkt"
|
|
"../unsafe.rkt"
|
|
"statistics-utils.rkt")
|
|
|
|
(provide samples->hash
|
|
count-samples
|
|
(struct-out sample-bin)
|
|
sample-bin-compact
|
|
sample-bin-total
|
|
bin-samples
|
|
bin-samples/key)
|
|
|
|
;; ===================================================================================================
|
|
;; Hashing
|
|
|
|
(: unweighted-samples->hash (All (A) ((Sequenceof A) -> (HashTable A Positive-Integer))))
|
|
(define (unweighted-samples->hash xs)
|
|
(define: h : (HashTable A Positive-Integer) (make-hash))
|
|
(for: ([x : A xs])
|
|
(hash-set! h x (unsafe-fx+ 1 (hash-ref h x (λ () 0)))))
|
|
h)
|
|
|
|
(: weighted-samples->hash (All (A) ((Sequenceof A) (Sequenceof Real)
|
|
-> (HashTable A Nonnegative-Real))))
|
|
(define (weighted-samples->hash xs ws)
|
|
(let-values ([(xs ws) (sequences->weighted-samples 'samples->hash xs ws)])
|
|
(define: h : (HashTable A Nonnegative-Real) (make-hash))
|
|
(for: ([x : A xs] [w : Nonnegative-Real ws])
|
|
(hash-set! h x (+ w (hash-ref h x (λ () 0)))))
|
|
h))
|
|
|
|
(: samples->hash
|
|
(All (A) (case-> ((Sequenceof A) -> (HashTable A Positive-Integer))
|
|
((Sequenceof A) (U #f (Sequenceof Real)) -> (HashTable A Nonnegative-Real)))))
|
|
(define samples->hash
|
|
(case-lambda
|
|
[(xs) (unweighted-samples->hash xs)]
|
|
[(xs ws) (cond [ws (weighted-samples->hash xs ws)]
|
|
[else (weighted-samples->hash xs (sequence-map (λ (_) 1) xs))])]))
|
|
|
|
;; ===================================================================================================
|
|
;; Counting
|
|
|
|
(: count-unweighted-samples
|
|
(All (A) ((Sequenceof A) -> (Values (Listof A) (Listof Positive-Integer)))))
|
|
(define (count-unweighted-samples xs)
|
|
(let ([xs (sequence->list xs)])
|
|
(define h (samples->hash xs))
|
|
(let ([xs (remove-duplicates xs)])
|
|
(values xs (map (λ: ([x : A]) (hash-ref h x)) xs)))))
|
|
|
|
(: count-weighted-samples
|
|
(All (A) ((Sequenceof A) (Sequenceof Real) -> (Values (Listof A) (Listof Nonnegative-Real)))))
|
|
(define (count-weighted-samples xs ws)
|
|
(let-values ([(xs ws) (sequences->weighted-samples 'count-samples xs ws)])
|
|
(define h (weighted-samples->hash xs ws))
|
|
(let ([xs (remove-duplicates xs)])
|
|
(values xs (map (λ: ([x : A]) (hash-ref h x)) xs)))))
|
|
|
|
(: count-samples
|
|
(All (A) (case-> ((Sequenceof A) -> (Values (Listof A) (Listof Positive-Integer)))
|
|
((Sequenceof A) (U #f (Sequenceof Real))
|
|
-> (Values (Listof A) (Listof Nonnegative-Real))))))
|
|
(define count-samples
|
|
(case-lambda
|
|
[(xs) (count-unweighted-samples xs)]
|
|
[(xs ws) (if ws (count-weighted-samples xs ws) (count-unweighted-samples xs))]))
|
|
|
|
;; ===================================================================================================
|
|
;; Bins
|
|
|
|
(struct: (A B) sample-bin
|
|
([min : B] [max : B] [values : (Listof A)] [weights : (U #f (Listof Nonnegative-Real))])
|
|
#:transparent)
|
|
|
|
(: sample-bin-compact (All (A B) ((sample-bin A B) -> (sample-bin A B))))
|
|
(define (sample-bin-compact bin)
|
|
(let-values ([(xs ws) (count-samples (sample-bin-values bin) (sample-bin-weights bin))])
|
|
(sample-bin (sample-bin-min bin) (sample-bin-max bin) xs ws)))
|
|
|
|
(: sample-bin-total (All (A B) ((sample-bin A B) -> Nonnegative-Real)))
|
|
(define (sample-bin-total bin)
|
|
(define ws (sample-bin-weights bin))
|
|
(if ws (assert (sum ws) nonnegative?) (length (sample-bin-values bin))))
|
|
|
|
;; ===================================================================================================
|
|
;; Binning
|
|
|
|
(: list-split-after (All (A) ((Listof A) (A -> Any) -> (Values (Listof A) (Listof A)))))
|
|
(define (list-split-after xs pred?)
|
|
(let: loop : (Values (Listof A) (Listof A)) ([xs : (Listof A) xs]
|
|
[ys : (Listof A) empty])
|
|
(cond [(empty? xs) (values (reverse ys) xs)]
|
|
[else
|
|
(define x (first xs))
|
|
(cond [(pred? x) (loop (rest xs) (cons x ys))]
|
|
[else (values (reverse ys) xs)])])))
|
|
|
|
(: bin-unweighted-samples/key
|
|
(All (A B) ((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) -> (Listof (sample-bin A B)))))
|
|
(define (bin-unweighted-samples/key bnds lte? key xs)
|
|
(: lt? (B B -> Boolean))
|
|
(define (lt? x1 x2)
|
|
(and (lte? x1 x2) (not (lte? x2 x1))))
|
|
(let* ([bnds (sort (sequence->list bnds) lt?)]
|
|
[xs (sequence->list xs)]
|
|
[xks (map (λ: ([x : A]) (cons x (key x))) xs)]
|
|
[xks (sort xks (λ: ([xk1 : (Pair A B)] [xk2 : (Pair A B)])
|
|
(lt? (cdr xk1) (cdr xk2))))])
|
|
(cond
|
|
[(empty? bnds)
|
|
(cond [(empty? xks) empty]
|
|
[else (define min (cdr (first xks)))
|
|
(define max (cdr (last xks)))
|
|
(list (sample-bin min max (map (inst car A B) xks) #f))])]
|
|
[else
|
|
(let: loop : (Listof (sample-bin A B)) ([min : (U #f B) #f]
|
|
[max : B (first bnds)]
|
|
[bnds : (Listof B) (rest bnds)]
|
|
[xks : (Listof (Pair A B)) xks]
|
|
[bins : (Listof (sample-bin A B)) empty])
|
|
(let-values ([(yks xks) (list-split-after xks (λ: ([xk : (Pair A B)])
|
|
(lte? (cdr xk) max)))])
|
|
(define maybe-bin
|
|
(cond [min (list (sample-bin min max (map (inst car A B) yks) #f))]
|
|
[(empty? yks) empty]
|
|
[else (list (sample-bin (cdr (first yks)) max (map (inst car A B) yks) #f))]))
|
|
(cond [(empty? bnds)
|
|
(cond [(empty? xks) (reverse (append maybe-bin bins))]
|
|
[else
|
|
(define bin2
|
|
(sample-bin max (cdr (last xks)) (map (inst car A B) xks) #f))
|
|
(reverse (append (cons bin2 maybe-bin) bins))])]
|
|
[else
|
|
(loop max (first bnds) (rest bnds) xks (append maybe-bin bins))])))])))
|
|
|
|
(: bin-weighted-samples/key
|
|
(All (A B) ((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) (Sequenceof Real)
|
|
-> (Listof (sample-bin A B)))))
|
|
(define (bin-weighted-samples/key bnds lte? key xs ws)
|
|
(let-values ([(xs ws) (sequences->weighted-samples 'bin-samples/key xs ws)])
|
|
(define xws (map (inst cons A Nonnegative-Real) xs ws))
|
|
(define xw-key (λ: ([xw : (Pair A Nonnegative-Real)]) (key (car xw))))
|
|
(map (λ: ([bin : (sample-bin (Pair A Nonnegative-Real) B)])
|
|
(define xws (sample-bin-values bin))
|
|
(sample-bin (sample-bin-min bin)
|
|
(sample-bin-max bin)
|
|
(map (inst car A Nonnegative-Real) xws)
|
|
(map (inst cdr A Nonnegative-Real) xws)))
|
|
(bin-unweighted-samples/key bnds lte? xw-key xws))))
|
|
|
|
(: bin-samples/key
|
|
(All (A B)
|
|
(case-> ((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) -> (Listof (sample-bin A B)))
|
|
((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) (U #f (Sequenceof Real))
|
|
-> (Listof (sample-bin A B))))))
|
|
(define (bin-samples/key bnds lte? key xs [ws #f])
|
|
(cond [ws (bin-weighted-samples/key bnds lte? key xs ws)]
|
|
[else (bin-unweighted-samples/key bnds lte? key xs)]))
|
|
|
|
(: bin-samples
|
|
(All (A) (case-> ((Sequenceof A) (A A -> Any) (Sequenceof A) -> (Listof (sample-bin A A)))
|
|
((Sequenceof A) (A A -> Any) (Sequenceof A) (Sequenceof Real)
|
|
-> (Listof (sample-bin A A))))))
|
|
(define (bin-samples bnds lte? xs [ws #f])
|
|
(bin-samples/key bnds lte? (λ: ([x : A]) x) xs ws))
|