Fixes, docs, and API changes for `math/statistics'

* Fixed and added tests for `quantile' and `median', documented them * Added `sort-samples', documented it * Removed `real-quantile' and `real-median' (too many design choices right now; will revisit when implementing Kernel Density Estimators) * Documented `absdev' and `absdev/median' * Fixed `update-statistics*': now uses O(1) space as advertised (if the sequences of values and weights both use O(1) space) * Changed types of binning functions: allows using #:key in the future (when TR supports function type cases that differ only by keyword argument types better), places optional weights at the end like other statistics functions * Clarified binning docs about sort stability and half-open intervals
2012-12-10 16:39:36 -07:00 · 2012-12-10 16:39:36 -07:00 · 9865182df4
commit 9865182df4
parent 9c70f3373d
6 changed files with 309 additions and 204 deletions
--- a/collects/math/private/statistics/counting.rkt
+++ b/collects/math/private/statistics/counting.rkt
@ -12,7 +12,7 @@
         sample-bin-compact
         sample-bin-total
         bin-samples
-         bin-weighted-samples)
+         bin-samples/key)

 ;; ===================================================================================================
 ;; Hashing
@ -100,65 +100,71 @@
           (cond [(pred? x)  (loop (rest xs) (cons x ys))]
                 [else  (values (reverse ys) xs)])])))

-(: bin-samples
-   (All (A B)
-        (case-> ((Sequenceof A) (A A -> Any) (Sequenceof A) -> (Listof (sample-bin A A)))
-                ((Sequenceof B) (B B -> Any) (Sequenceof A) (A -> B) -> (Listof (sample-bin A B))))))
-(define bin-samples
-  (case-lambda
-    [(bnds lte? xs)  (bin-samples bnds lte? xs (λ: ([x : A]) x))]
-    [(bnds lte? xs key)
-     (let* ([bnds  (sort (sequence->list bnds) (λ: ([b1 : B] [b2 : B]) (and (lte? b1 b2) #t)))]
-            [xs    (sequence->list xs)]
-            [xks   (map (λ: ([x : A]) (cons x (key x))) xs)]
-            [xks   (sort xks (λ: ([xk1 : (Pair A B)] [xk2 : (Pair A B)])
-                               (and (lte? (cdr xk1) (cdr xk2)) #t)))])
-       (cond
-         [(empty? bnds)
-          (cond [(empty? xks)  empty]
-                [else  (define min (cdr (first xks)))
-                       (define max (cdr (last xks)))
-                       (list (sample-bin min max (map (inst car A B) xks) #f))])]
-         [else
-          (let: loop : (Listof (sample-bin A B)) ([min : (U #f B)  #f]
-                                                  [max : B  (first bnds)]
-                                                  [bnds : (Listof B)  (rest bnds)]
-                                                  [xks : (Listof (Pair A B))  xks]
-                                                  [bins : (Listof (sample-bin A B))  empty])
-            (let-values ([(yks xks)  (list-split-after xks (λ: ([xk : (Pair A B)])
-                                                             (lte? (cdr xk) max)))])
-              (define maybe-bin
-                (cond [min  (list (sample-bin min max (map (inst car A B) yks) #f))]
-                      [(empty? yks)  empty]
-                      [else  (list (sample-bin (cdr (first yks)) max (map (inst car A B) yks) #f))]))
-              (cond [(empty? bnds)
-                     (cond [(empty? xks)  (reverse (append maybe-bin bins))]
-                           [else
-                            (define bin2
-                              (sample-bin max (cdr (last xks)) (map (inst car A B) xks) #f))
-                            (reverse (append (cons bin2 maybe-bin) bins))])]
-                    [else
-                     (loop max (first bnds) (rest bnds) xks (append maybe-bin bins))])))]))]))
+(: bin-unweighted-samples/key
+   (All (A B) ((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) -> (Listof (sample-bin A B)))))
+(define (bin-unweighted-samples/key bnds lte? key xs)
+  (: lt? (B B -> Boolean))
+  (define (lt? x1 x2)
+    (and (lte? x1 x2) (not (lte? x2 x1))))
+  (let* ([bnds  (sort (sequence->list bnds) lt?)]
+         [xs    (sequence->list xs)]
+         [xks   (map (λ: ([x : A]) (cons x (key x))) xs)]
+         [xks   (sort xks (λ: ([xk1 : (Pair A B)] [xk2 : (Pair A B)])
+                            (lt? (cdr xk1) (cdr xk2))))])
+    (cond
+      [(empty? bnds)
+       (cond [(empty? xks)  empty]
+             [else  (define min (cdr (first xks)))
+                    (define max (cdr (last xks)))
+                    (list (sample-bin min max (map (inst car A B) xks) #f))])]
+      [else
+       (let: loop : (Listof (sample-bin A B)) ([min : (U #f B)  #f]
+                                               [max : B  (first bnds)]
+                                               [bnds : (Listof B)  (rest bnds)]
+                                               [xks : (Listof (Pair A B))  xks]
+                                               [bins : (Listof (sample-bin A B))  empty])
+         (let-values ([(yks xks)  (list-split-after xks (λ: ([xk : (Pair A B)])
+                                                          (lte? (cdr xk) max)))])
+           (define maybe-bin
+             (cond [min  (list (sample-bin min max (map (inst car A B) yks) #f))]
+                   [(empty? yks)  empty]
+                   [else  (list (sample-bin (cdr (first yks)) max (map (inst car A B) yks) #f))]))
+           (cond [(empty? bnds)
+                  (cond [(empty? xks)  (reverse (append maybe-bin bins))]
+                        [else
+                         (define bin2
+                           (sample-bin max (cdr (last xks)) (map (inst car A B) xks) #f))
+                         (reverse (append (cons bin2 maybe-bin) bins))])]
+                 [else
+                  (loop max (first bnds) (rest bnds) xks (append maybe-bin bins))])))])))

-(: bin-weighted-samples
-   (All (A B) (case-> ((Sequenceof A) (A A -> Any) (Sequenceof A) (U #f (Sequenceof Real))
-                                      -> (Listof (sample-bin A A)))
-                      ((Sequenceof B) (B B -> Any) (Sequenceof A) (U #f (Sequenceof Real)) (A -> B)
-                                      -> (Listof (sample-bin A B))))))
-(define bin-weighted-samples
-  (case-lambda
-    [(bnds lte? xs ws)
-     (cond [ws  (bin-weighted-samples bnds lte? xs ws (λ: ([x : A]) x))]
-           [else  (bin-samples bnds lte? xs)])]
-    [(bnds lte? xs ws key)
-     (cond [ws  (let-values ([(xs ws)  (sequences->weighted-samples 'bin-samples xs ws)])
-                  (define xws (map (inst cons A Nonnegative-Real) xs ws))
-                  (define xw-key (λ: ([xw : (Pair A Nonnegative-Real)]) (key (car xw))))
-                  (map (λ: ([bin : (sample-bin (Pair A Nonnegative-Real) B)])
-                         (define xws (sample-bin-values bin))
-                         (sample-bin (sample-bin-min bin)
-                                     (sample-bin-max bin)
-                                     (map (inst car A Nonnegative-Real) xws)
-                                     (map (inst cdr A Nonnegative-Real) xws)))
-                       (bin-samples bnds lte? xws xw-key)))]
-           [else  (bin-samples bnds lte? xs key)])]))
+(: bin-weighted-samples/key
+   (All (A B) ((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) (Sequenceof Real)
+                              -> (Listof (sample-bin A B)))))
+(define (bin-weighted-samples/key bnds lte? key xs ws)
+  (let-values ([(xs ws)  (sequences->weighted-samples 'bin-samples/key xs ws)])
+    (define xws (map (inst cons A Nonnegative-Real) xs ws))
+    (define xw-key (λ: ([xw : (Pair A Nonnegative-Real)]) (key (car xw))))
+    (map (λ: ([bin : (sample-bin (Pair A Nonnegative-Real) B)])
+           (define xws (sample-bin-values bin))
+           (sample-bin (sample-bin-min bin)
+                       (sample-bin-max bin)
+                       (map (inst car A Nonnegative-Real) xws)
+                       (map (inst cdr A Nonnegative-Real) xws)))
+         (bin-unweighted-samples/key bnds lte? xw-key xws))))
+
+(: bin-samples/key
+   (All (A B)
+        (case-> ((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) -> (Listof (sample-bin A B)))
+                ((Sequenceof B) (B B -> Any) (A -> B) (Sequenceof A) (U #f (Sequenceof Real))
+                                -> (Listof (sample-bin A B))))))
+(define (bin-samples/key bnds lte? key xs [ws #f])
+  (cond [ws    (bin-weighted-samples/key bnds lte? key xs ws)]
+        [else  (bin-unweighted-samples/key bnds lte? key xs)]))
+
+(: bin-samples
+   (All (A) (case-> ((Sequenceof A) (A A -> Any) (Sequenceof A) -> (Listof (sample-bin A A)))
+                    ((Sequenceof A) (A A -> Any) (Sequenceof A) (Sequenceof Real)
+                                    -> (Listof (sample-bin A A))))))
+(define (bin-samples bnds lte? xs [ws #f])
+  (bin-samples/key bnds lte? (λ: ([x : A]) x) xs ws))
--- a/collects/math/private/statistics/order-statistics.rkt
+++ b/collects/math/private/statistics/order-statistics.rkt
@ -1,69 +1,80 @@
 #lang typed/racket/base

 (require racket/sequence
+         racket/list
         racket/fixnum
         "../../base.rkt"
         "quickselect.rkt"
         "statistics-utils.rkt")

-(provide quantile
+(provide sort-samples
+         quantile
         median
-         real-quantile
-         real-median
-         absolute-deviation/median
-         absolute-deviation)
+         absdev/median
+         absdev)
+
+(: sort-weighted-samples
+   (All (A) (Symbol (A A -> Any) (Sequenceof A) (Sequenceof Real)
+                    -> (Values (Listof A) (Listof Nonnegative-Real)))))
+(define (sort-weighted-samples name lt? xs ws)
+  (let-values ([(xs ws)  (sequences->weighted-samples name xs ws)])
+    (define xws ((inst sort (Pair A Nonnegative-Real) Real)
+                 (map (inst cons A Nonnegative-Real) xs ws)
+                 (λ: ([xw1 : (Pair A Nonnegative-Real)]
+                      [xw2 : (Pair A Nonnegative-Real)])
+                   (and (lt? (car xw1) (car xw2)) #t))))
+    (values (map (inst car A Nonnegative-Real) xws)
+            (map (inst cdr A Nonnegative-Real) xws))))
+
+(: sort-samples (All (A) (case-> ((A A -> Any) (Sequenceof A) -> (Listof A))
+                                 ((A A -> Any) (Sequenceof A) (U #f (Sequenceof Real))
+                                               -> (Values (Listof A) (Listof Nonnegative-Real))))))
+(define sort-samples
+  (case-lambda
+    [(lt? xs)
+     (sort (sequence->list xs)
+           (λ: ([x1 : A] [x2 : A])
+             (and (lt? x1 x2) #t)))]
+    [(lt? xs ws)
+     (cond [ws  (sort-weighted-samples 'sort-samples lt? xs ws)]
+           [else  (define ys (sort (sequence->list xs)
+                                   (λ: ([x1 : A] [x2 : A])
+                                     (and (lt? x1 x2) #t))))
+                  (values ys (make-list (length ys) 1))])]))

 (: quantile (All (A) (case-> (Real (A A -> Any) (Sequenceof A) -> A)
-                             (Real (A A -> Any) (Sequenceof A) (Option (Sequenceof Real)) -> A))))
+                             (Real (A A -> Any) (Sequenceof A) (U #f (Sequenceof Real)) -> A))))
 (define (quantile p lt? xs [ws #f])
  (cond [(or (p . < . 0) (p . > . 1))
-         (raise-argument-error 'quantile "Real in [0,1]" 0 p < xs ws)]
+         (raise-argument-error 'quantile "Real in [0,1]" 0 p lt? xs ws)]
        [ws
-         (let-values ([(xs ws)  (sequences->weighted-samples 'median xs ws)])
-           (define xws ((inst sort (Pair A Nonnegative-Real) Real)
-                        (map (inst cons A Nonnegative-Real) xs ws)
-                        (λ: ([xw1 : (Pair A Nonnegative-Real)]
-                             [xw2 : (Pair A Nonnegative-Real)])
-                          (if (lt? (car xw1) (car xw2)) #t #f))))
-           (let ([xs  (map (inst car A Nonnegative-Real) xws)]
-                 [ws  (map (inst cdr A Nonnegative-Real) xws)])
-             (define total-w (sum ws))
-             (cond [(zero? total-w)
-                    (raise-argument-error 'quantile "weights with positive sum" 3 p lt? xs ws)]
-                   [else
-                    (define thresh (* p total-w))
-                    (let loop ([xs (cdr xs)] [ws (cdr ws)] [x (car xs)] [s (car ws)])
-                      (cond [(s . > . thresh)  x]
-                            [(null? xs)  x]
-                            [else  (loop (cdr xs) (cdr ws) (car xs) (+ s (car ws)))]))])))]
+         (let-values ([(xs ws)  (sort-weighted-samples 'quantile lt? xs ws)])
+           (define total-w (sum ws))
+           (cond [(zero? total-w)
+                  (raise-argument-error 'quantile "weights with positive sum" 3 p lt? xs ws)]
+                 [else
+                  (let loop ([xs (cdr xs)] [ws (cdr ws)] [x (car xs)] [s (car ws)])
+                    (cond [((/ s total-w) . >= . p)  x]
+                          [(null? xs)  x]
+                          [else  (loop (cdr xs) (cdr ws) (car xs) (+ s (car ws)))]))]))]
        [else
         (let ([xs  (sequence->vector xs)])
           (define n (vector-length xs))
           (cond [(n . fx<= . 0)  (raise-argument-error 'quantile "nonempty Sequence" 2 p lt? xs)]
-                 [else  (kth-value! xs (min (- n 1) (exact-ceiling (* p n))) lt?)]))]))
+                 [else
+                  (define i (max 0 (- (exact-ceiling (* p n)) 1)))
+                  (kth-value! xs i lt?)]))]))

 (: median (All (A) (case-> ((A A -> Any) (Sequenceof A) -> A)
-                           ((A A -> Any) (Sequenceof A) (Option (Sequenceof Real)) -> A))))
+                           ((A A -> Any) (Sequenceof A) (U #f (Sequenceof Real)) -> A))))
 (define (median lt? xs [ws #f])
-  (cond [ws  (quantile 1/2 lt? xs ws)]
-        [else
-         (let ([xs  (sequence->vector xs)])
-           (define n (vector-length xs))
-           (cond [(n . fx<= . 0)  (raise-argument-error 'median "nonempty Sequence" xs)]
-                 [else  (kth-value! xs (quotient n 2) lt?)]))]))
+  (quantile 1/2 lt? xs ws))

-(: real-quantile (case-> (Real (Sequenceof Real) -> Real)
-                         (Real (Sequenceof Real) (Option (Sequenceof Real)) -> Real)))
-(define (real-quantile p xs [ws #f])
-  (quantile p < xs ws))
+;; ===================================================================================================
+;; Absolute deviation

-(: real-median (case-> ((Sequenceof Real) -> Real)
-                       ((Sequenceof Real) (Option (Sequenceof Real)) -> Real)))
-(define (real-median xs [ws #f])
-  (median < xs ws))
-
-(: absolute-deviation* (Symbol Real (Sequenceof Real) (Option (Sequenceof Real)) -> Nonnegative-Real))
-(define (absolute-deviation* name m xs ws)
+(: absdev* (Symbol Real (Sequenceof Real) (Option (Sequenceof Real)) -> Nonnegative-Real))
+(define (absdev* name m xs ws)
  (define-values (axs n)
    (cond [ws  (let-values ([(xs ws)  (sequences->weighted-samples name xs ws)])
                 (values (map (λ: ([x : Real] [w : Real]) (* w (abs (- x m)))) xs ws)
@ -75,14 +86,14 @@
        [else
         (max 0 (/ (sum axs) n))]))

-(: absolute-deviation/median
+(: absdev/median
   (case-> (Real (Sequenceof Real) -> Nonnegative-Real)
           (Real (Sequenceof Real) (Option (Sequenceof Real)) -> Nonnegative-Real)))
-(define (absolute-deviation/median m xs [ws #f])
-  (absolute-deviation* 'absolute-deviation/median m xs ws))
-  
-(: absolute-deviation
+(define (absdev/median m xs [ws #f])
+  (absdev* 'absdev/median m xs ws))
+
+(: absdev
   (case-> ((Sequenceof Real) -> Nonnegative-Real)
           ((Sequenceof Real) (Option (Sequenceof Real)) -> Nonnegative-Real)))
-(define (absolute-deviation xs [ws #f])
-  (absolute-deviation* 'absolute-deviation (real-median xs ws) xs ws))
+(define (absdev xs [ws #f])
+  (absdev* 'absdev (median < xs ws) xs ws))
--- a/collects/math/private/statistics/statistics-struct.rkt
+++ b/collects/math/private/statistics/statistics-struct.rkt
@ -60,11 +60,12 @@
   (case-> (statistics (Sequenceof Real) -> statistics)
           (statistics (Sequenceof Real) (Option (Sequenceof Real)) -> statistics)))
 (define (update-statistics* e xs [ws #f])
-  (cond [ws  (let-values ([(xs ws)  (sequences->weighted-samples 'update-statistics* xs ws)])
-               (for/fold: ([e : statistics  e]) ([x  (in-list xs)] [w  (in-list ws)])
-                 (update-statistics e x w)))]
-        [else  (for/fold: ([e : statistics  e]) ([x xs])
-                 (update-statistics e x 1.0))]))
+  (cond [ws
+         (for/fold: ([e : statistics  e]) ([x xs] [w ws])
+           (update-statistics e x w))]
+        [else
+         (for/fold: ([e : statistics  e]) ([x xs])
+           (update-statistics e x 1.0))]))

 ;; ===================================================================================================

--- a/collects/math/private/statistics/statistics-utils.rkt
+++ b/collects/math/private/statistics/statistics-utils.rkt
@ -9,6 +9,10 @@

 ;; ===================================================================================================

+(: lte?->lt? (All (A) ((A A -> Any) -> (A A -> Boolean))))
+(define ((lte?->lt? lte?) x1 x2)
+  (and (lte? x1 x2) (not (lte? x2 x1))))
+
 (: find-near-pow2 (Real -> Nonnegative-Exact-Rational))
 (define (find-near-pow2 x)
  (expt 2 (max -1073 (min 1023 (exact-round (/ (log (abs x)) (fllog 2.0)))))))
--- a/collects/math/scribblings/math-statistics.scrbl
+++ b/collects/math/scribblings/math-statistics.scrbl
@ -5,10 +5,11 @@
          (for-label racket/base racket/promise racket/list
                     math plot
                     (only-in typed/racket/base
-                              ann
+                              ann inst
                              Flonum Real Boolean Any Listof Integer case-> -> U
-                              Sequenceof Positive-Flonum Nonnegative-Flonum
-                              HashTable Positive-Integer Nonnegative-Real Values))
+                              Sequenceof Positive-Flonum Nonnegative-Flonum Symbol
+                              HashTable Positive-Integer Nonnegative-Real Values
+                              String))
          "utils.rkt")

@(define typed-eval (make-math-eval))
@ -34,89 +35,6 @@ See @secref{stats:expected-values} for a discussion.

@local-table-of-contents[]

-@section{Counting and Binning}
-
-@defproc*[([(samples->hash [xs (Sequenceof A)]) (HashTable A Positive-Integer)]
-           [(samples->hash [xs (Sequenceof A)] [ws (U #f (Sequenceof Real))])
-            (HashTable A Nonnegative-Real)])]{
-@examples[#:eval typed-eval
-                 (samples->hash '(1 2 3 4 4))
-                 (samples->hash '(1 1 2 3 4) '(1/2 1/2 1 1 2))]
-}
-
-@defproc*[([(count-samples [xs (Sequenceof A)]) (Values (Listof A) (Listof Positive-Integer))]
-           [(count-samples [xs (Sequenceof A)] [ws (U #f (Sequenceof Real))])
-            (Values (Listof A) (Listof Nonnegative-Real))])]{
-@examples[#:eval typed-eval
-                 (count-samples '(1 2 3 4 4))
-                 (count-samples '(1 1 2 3 4) '(1/2 1/2 1 1 2))]
-}
-
-@defstruct*[sample-bin ([min B]
-                        [max B]
-                        [values (Listof A)]
-                        [weights (U #f (Listof Nonnegative-Real))])]{
-Represents a @italic{bin}, or a group of samples within an interval in a total order.
-The values and bounds have a different type to allow @racket[bin-samples] and
-@racket[bin-weighted-samples] to group elements based on a function of their values (a @racket[key],
-like in @racket[sort]).
-}
-
-@defproc*[([(bin-samples [bounds (Sequenceof A)]
-                         [lte? (A A -> Any)]
-                         [xs (Sequenceof A)])
-            (Listof (sample-bin A A))]
-           [(bin-samples [bounds (Sequenceof B)]
-                         [lte? (B B -> Any)]
-                         [xs (Sequenceof A)]
-                         [key (A -> B)])
-            (Listof (sample-bin A B))])]{
-Like @racket[(sort xs lte? #:key key)], but additionally groups samples into bins.
-Keys are always cached, and @racket[bounds] is sorted before binning.
-
-If @racket[n = (length bounds)], then @racket[bin-samples] returns @italic{at least} @racket[(- n 1)]
-bins, one for each pair of adjacent (sorted) bounds.
-If some values in @racket[xs] are less than the smallest bound, they are grouped into a single bin in
-front.
-If some are greater than the largest bound, they are grouped into a single bin at the end.
-
-@examples[#:eval typed-eval
-                 (bin-samples '() <= '(0 1 2 3 4 5 6))
-                 (bin-samples '(3) <= '(0 1 2 3 4 5 6))
-                 (bin-samples '(2 4) <= '(0 1 2 3 4 5 6))]
-
-Note that @racket[bin-samples] always returns bins with @racket[#f] weights, meaning they contain
-unweighted samples.
-}
-
-@defproc*[([(bin-weighted-samples [xs (Sequenceof A)]
-                                  [ws (Sequenceof Real)]
-                                  [bounds (Sequenceof A)]
-                                  [lte? (A A -> Any)])
-            (Listof (sample-bin A A))]
-           [(bin-weighted-samples [xs (Sequenceof A)]
-                                  [ws (Sequenceof Real)]
-                                  [bounds (Sequenceof B)]
-                                  [lte? (B B -> Any)]
-                                  [key (A -> B)])
-            (Listof (sample-bin A B))])]{
-Like @racket[bin-samples], but for weighted samples.
-}
-
-@defproc[(sample-bin-compact [bin (sample-bin A B)]) (sample-bin A B)]{
-Compacts @racket[bin] by applying @racket[count-samples] to its values and weights.
-@examples[#:eval typed-eval
-                 (sample-bin-compact (sample-bin 1 4 '(1 2 3 4 4) #f))]
-}
-
-@defproc[(sample-bin-total [bin (sample-bin A B)]) Nonnegative-Real]{
-If @racket[(sample-bin-weights bin)] is @racket[#f], returns the number of samples in @racket[bin];
-otherwise, returns the sum of their weights.
-@examples[#:eval typed-eval
-                 (sample-bin-total (sample-bin 1 4 '(1 2 3 4 4) #f))
-                 (sample-bin-total (sample-bin-compact (sample-bin 1 4 '(1 2 3 4 4) #f)))]
-}
-
@section[#:tag "stats:expected-values"]{Expected Values}

 Functions documented in this section that compute higher central moments, such as @racket[variance],
@ -206,7 +124,7 @@ using known mean @racket[mean].
@section[#:tag "stats:running"]{Running Expected Values}

 The @racket[statistics] object allows computing the sample minimum, maximum, count, mean, variance,
-skewness, and excess kurtosis of any number of samples in O(1) space.
+skewness, and excess kurtosis of a sequence of samples in O(1) space.

 To use it, start with @racket[empty-statistics], then use @racket[update-statistics] to obtain a
 new statistics object with updated values. Use @racket[statistics-min], @racket[statistics-mean],
@ -326,6 +244,148 @@ Like @racket[covariance] and @racket[correlation], but computed using known mean
@racket[μx] and @racket[μy].
 }

+                              @section{Counting and Binning}
+
+@defproc*[([(samples->hash [xs (Sequenceof A)]) (HashTable A Positive-Integer)]
+           [(samples->hash [xs (Sequenceof A)] [ws (U #f (Sequenceof Real))])
+            (HashTable A Nonnegative-Real)])]{
+@examples[#:eval typed-eval
+                 (samples->hash '(1 2 3 4 4))
+                 (samples->hash '(1 1 2 3 4) '(1/2 1/2 1 1 2))]
+}
+
+@defproc*[([(count-samples [xs (Sequenceof A)]) (Values (Listof A) (Listof Positive-Integer))]
+           [(count-samples [xs (Sequenceof A)] [ws (U #f (Sequenceof Real))])
+            (Values (Listof A) (Listof Nonnegative-Real))])]{
+@examples[#:eval typed-eval
+                 (count-samples '(1 2 3 4 4))
+                 (count-samples '(1 1 2 3 4) '(1/2 1/2 1 1 2))]
+}
+
+@defstruct*[sample-bin ([min B]
+                        [max B]
+                        [values (Listof A)]
+                        [weights (U #f (Listof Nonnegative-Real))])]{
+Represents a @italic{bin}, or a group of samples within an interval in a total order.
+The values and bounds have a different type to allow @racket[bin-samples/key]
+to group elements based on a function of their values.
+}
+
+@defproc[(bin-samples [bounds (Sequenceof A)]
+                      [lte? (A A -> Any)]
+                      [xs (Sequenceof A)]
+                      [ws (U #f (Sequenceof Real))])
+         (Listof (sample-bin A A))]{
+Like @racket[(sort xs lte?)], but additionally groups samples into bins.
+The bins' @racket[bounds] are sorted before binning @racket[xs].
+
+If @racket[n = (length bounds)], then @racket[bin-samples] returns @italic{at least} @racket[(- n 1)]
+bins, one for each pair of adjacent (sorted) bounds.
+If some values in @racket[xs] are less than the smallest bound, they are grouped into a single bin in
+front.
+If some are greater than the largest bound, they are grouped into a single bin at the end.
+
+@examples[#:eval typed-eval
+                 (bin-samples '() <= '(0 1 2 3 4 5 6))
+                 (bin-samples '(3) <= '(0 1 2 3 4 5 6))
+                 (bin-samples '(2 4) <= '(0 1 2 3 4 5 6))
+                 (bin-samples '(2 4) <=
+                              '(0 1 2 3 4 5 6)
+                              '(10 20 30 40 50 60 70))]
+
+If @racket[lte?] is a less-than-or-equal relation, the bins represent half-open intervals
+(@racket[min], @racket[max]] (except possibly the first, which may be closed).
+If @racket[lte?] is a less-than relation, the bins represent half-open intervals
+[@racket[min], @racket[max]) (except possibly the last, which may be closed).
+In either case, the sorts applied to @racket[bounds] and @racket[xs] are stable.
+
+Because intervals used in probability measurements are normally open on the left, prefer to use
+less-than-or-equal relations for @racket[lte?].
+
+If @racket[ws] is @racket[#f], @racket[bin-samples] returns bins with @racket[#f] weights.
+}
+
+@defproc[(bin-samples/key [bounds (Sequenceof B)]
+                          [lte? (B B -> Any)]
+                          [key (A -> B)]
+                          [xs (Sequenceof A)]
+                          [ws (U #f (Sequenceof Real))])
+         (Listof (sample-bin A B))]{
+Like @racket[(sort xs lte? #:key key #:cache-keys? #t)], but additionally groups samples into bins.
+@examples[#:eval typed-eval
+                 (bin-samples/key '(2 4) <= (inst car Real String)
+                                  '((1 . "1") (2 . "2") (3 . "3") (4 . "4") (5 . "5")))]
+See @racket[bin-samples] for the simpler, one-type variant.
+}
+
+@defproc[(sample-bin-compact [bin (sample-bin A B)]) (sample-bin A B)]{
+Compacts @racket[bin] by applying @racket[count-samples] to its values and weights.
+@examples[#:eval typed-eval
+                 (sample-bin-compact (sample-bin 1 4 '(1 2 3 4 4) #f))]
+}
+
+@defproc[(sample-bin-total [bin (sample-bin A B)]) Nonnegative-Real]{
+If @racket[(sample-bin-weights bin)] is @racket[#f], returns the number of samples in @racket[bin];
+otherwise, returns the sum of their weights.
+@examples[#:eval typed-eval
+                 (sample-bin-total (sample-bin 1 4 '(1 2 3 4 4) #f))
+                 (sample-bin-total (sample-bin-compact (sample-bin 1 4 '(1 2 3 4 4) #f)))]
+}
+
@section{Order Statistics}

+@defproc*[([(sort-samples [lt? (A A -> Any)] [xs (Sequenceof A)]) (Listof A)]
+           [(sort-samples [lt? (A A -> Any)]
+                          [xs (Sequenceof A)]
+                          [ws (U #f (Sequenceof Real))])
+            (Values (Listof A) (Listof Nonnegative-Real))])]{
+Sorts possibly weighted samples according to @racket[lt?], which is assumed to define a total
+order over the elements in @racket[xs].
+@examples[#:eval typed-eval
+                 (sort-samples < '(5 2 3 1))
+                 (sort-samples < '(5 2 3 1) '(50 20 30 10))
+                 (sort-samples < '(5 2 3 1) #f)]
+Because @racket[sort-samples] is defined in terms of @racket[sort], the sort is only guaranteed
+to be stable if @racket[lt?] is strictly a less-than relation.
+}
+
+@defproc[(median [lt? (A A -> Any)] [xs (Sequenceof A)] [ws (U #f (Sequenceof Real)) #f]) A]{
+Equivalent to @racket[(quantile 1/2 lt? xs ws)].
+}
+
+@defproc[(quantile [p Real]
+                   [lt? (A A -> Any)]
+                   [xs (Sequenceof A)]
+                   [ws (U #f (Sequenceof Real)) #f])
+         A]{
+Computes the inverse of the empirical @tech{cdf} represented by the samples @racket[xs],
+which are optionally weighted by @racket[ws].
+
+@examples[#:eval typed-eval
+                 (quantile 0 < '(1 3 5))
+                 (quantile 0.5 < '(1 2 3 4))
+                 (quantile 0.5 < '(1 2 3 4) '(0.25 0.20 0.20 0.35))]
+
+If @racket[p = 0], @racket[quantile] returns the smallest element of @racket[xs] under the
+ordering relation @racket[lt?]. If @racket[p = 1], it returns the largest element.
+
+For weighted samples, @racket[quantile] sorts @racket[xs] and @racket[ws] together
+(using @racket[sort-samples]), then finds the least @racket[x] for which the proportion of its
+cumulative weight is greater than or equal to @racket[p].
+
+For unweighted samples, @racket[quantile] uses the quickselect algorithm to find the element that
+would be at index @racket[(ceiling (- (* p n) 1))] if @racket[xs] were sorted, where @racket[n]
+is the length of @racket[xs].
+}
+
+@defproc[(absdev [xs (Sequenceof Real)] [ws (U #f (Sequenceof Real)) #f]) Nonnegative-Real]{
+Computes the average absolute difference between the elements in @racket[xs] and
+@racket[(median < xs ws)]. If @racket[ws] is not @racket[#f], it is a weighted average.
+}
+
+@defproc[(absdev/median [median Real] [xs (Sequenceof Real)] [ws (U #f (Sequenceof Real)) #f])
+         Nonnegative-Real]{
+Like @racket[(absdev xs ws)], but computed using known median @racket[median].
+}
+         
@(close-eval typed-eval)
--- a/collects/math/tests/statistics-tests.rkt
+++ b/collects/math/tests/statistics-tests.rkt
@ -92,3 +92,26 @@
 (list (sample-bin 1 3 '(1 1 2 2 2 3) #f)
       (sample-bin 3 8 '(4 5 5 5 5 6 7 8) #f)
       (sample-bin 8 9 '(9 9) #f)))
+
+(check-equal?
+ (bin-samples '(3 8) <=
+              '(1 1 2 2 2 3 4 5 5 5 5 6 7 8 9 9)
+              '(1 1.1 2 2.1 2.2 3 4 5 5.1 5.2 5.3 6 7 8 9 9.1))
+ (list (sample-bin 1 3 '(1 1 2 2 2 3) '(1 1.1 2 2.1 2.2 3))
+       (sample-bin 3 8 '(4 5 5 5 5 6 7 8) '(4 5 5.1 5.2 5.3 6 7 8))
+       (sample-bin 8 9 '(9 9) '(9 9.1))))
+
+(check-equal?
+ (bin-samples/key '(2 4) <= (inst car Real Symbol)
+                  '((1 . a) (2 . b) (3 . c) (4 . d) (5 . e)))
+ (list (sample-bin 1 2 '((1 . a) (2 . b)) #f)
+       (sample-bin 2 4 '((3 . c) (4 . d)) #f)
+       (sample-bin 4 5 '((5 . e)) #f)))
+
+(for: ([p  '(0 1/6 2/6 3/6 4/6 5/6 6/6)])
+  (check-equal? (quantile p < '(1 2 3) '(1 1 1))
+                (quantile p < '(1 2 3))))
+
+(for: ([p  '(0 1/8 2/8 3/8 4/8 5/8 6/8 7/8 8/8)])
+  (check-equal? (quantile p < '(1 2 3 4) '(1 1 1 1))
+                (quantile p < '(1 2 3 4))))