diff --git a/collects/math/private/statistics/counting.rkt b/collects/math/private/statistics/counting.rkt index 51004b9d8f..69f4eeb578 100644 --- a/collects/math/private/statistics/counting.rkt +++ b/collects/math/private/statistics/counting.rkt @@ -74,13 +74,13 @@ ;; Bins (struct: (A B) sample-bin - ([values : (Listof A)] [weights : (U #f (Listof Nonnegative-Real))] [min : B] [max : B]) + ([min : B] [max : B] [values : (Listof A)] [weights : (U #f (Listof Nonnegative-Real))]) #:transparent) (: sample-bin-compact (All (A B) ((sample-bin A B) -> (sample-bin A B)))) (define (sample-bin-compact bin) (let-values ([(xs ws) (count-samples (sample-bin-values bin) (sample-bin-weights bin))]) - (sample-bin xs ws (sample-bin-min bin) (sample-bin-max bin)))) + (sample-bin (sample-bin-min bin) (sample-bin-max bin) xs ws))) (: sample-bin-total (All (A B) ((sample-bin A B) -> Nonnegative-Real))) (define (sample-bin-total bin) @@ -102,12 +102,12 @@ (: bin-samples (All (A B) - (case-> ((Sequenceof A) (Sequenceof A) (A A -> Any) -> (Listof (sample-bin A A))) - ((Sequenceof A) (Sequenceof B) (B B -> Any) (A -> B) -> (Listof (sample-bin A B)))))) + (case-> ((Sequenceof A) (A A -> Any) (Sequenceof A) -> (Listof (sample-bin A A))) + ((Sequenceof B) (B B -> Any) (Sequenceof A) (A -> B) -> (Listof (sample-bin A B)))))) (define bin-samples (case-lambda - [(xs bnds lte?) (bin-samples xs bnds lte? (λ: ([x : A]) x))] - [(xs bnds lte? key) + [(bnds lte? xs) (bin-samples bnds lte? xs (λ: ([x : A]) x))] + [(bnds lte? xs key) (let* ([bnds (sort (sequence->list bnds) (λ: ([b1 : B] [b2 : B]) (and (lte? b1 b2) #t)))] [xs (sequence->list xs)] [xks (map (λ: ([x : A]) (cons x (key x))) xs)] @@ -118,7 +118,7 @@ (cond [(empty? xks) empty] [else (define min (cdr (first xks))) (define max (cdr (last xks))) - (list (sample-bin (map (inst car A B) xks) #f min max))])] + (list (sample-bin min max (map (inst car A B) xks) #f))])] [else (let: loop : (Listof (sample-bin A B)) ([min : (U #f B) #f] [max : B (first bnds)] @@ -128,37 +128,37 @@ (let-values ([(yks xks) (list-split-after xks (λ: ([xk : (Pair A B)]) (lte? (cdr xk) max)))]) (define maybe-bin - (cond [min (list (sample-bin (map (inst car A B) yks) #f min max))] + (cond [min (list (sample-bin min max (map (inst car A B) yks) #f))] [(empty? yks) empty] - [else (list (sample-bin (map (inst car A B) yks) #f (cdr (first yks)) max))])) + [else (list (sample-bin (cdr (first yks)) max (map (inst car A B) yks) #f))])) (cond [(empty? bnds) (cond [(empty? xks) (reverse (append maybe-bin bins))] [else (define bin2 - (sample-bin (map (inst car A B) xks) #f max (cdr (last xks)))) + (sample-bin max (cdr (last xks)) (map (inst car A B) xks) #f)) (reverse (append (cons bin2 maybe-bin) bins))])] [else (loop max (first bnds) (rest bnds) xks (append maybe-bin bins))])))]))])) (: bin-weighted-samples - (All (A B) (case-> ((Sequenceof A) (U #f (Sequenceof Real)) (Sequenceof A) (A A -> Any) + (All (A B) (case-> ((Sequenceof A) (A A -> Any) (Sequenceof A) (U #f (Sequenceof Real)) -> (Listof (sample-bin A A))) - ((Sequenceof A) (U #f (Sequenceof Real)) (Sequenceof B) (B B -> Any) (A -> B) + ((Sequenceof B) (B B -> Any) (Sequenceof A) (U #f (Sequenceof Real)) (A -> B) -> (Listof (sample-bin A B)))))) (define bin-weighted-samples (case-lambda - [(xs ws bnds lte?) - (cond [ws (bin-weighted-samples xs ws bnds lte? (λ: ([x : A]) x))] - [else (bin-samples xs bnds lte?)])] - [(xs ws bnds lte? key) + [(bnds lte? xs ws) + (cond [ws (bin-weighted-samples bnds lte? xs ws (λ: ([x : A]) x))] + [else (bin-samples bnds lte? xs)])] + [(bnds lte? xs ws key) (cond [ws (let-values ([(xs ws) (sequences->weighted-samples 'bin-samples xs ws)]) (define xws (map (inst cons A Nonnegative-Real) xs ws)) (define xw-key (λ: ([xw : (Pair A Nonnegative-Real)]) (key (car xw)))) (map (λ: ([bin : (sample-bin (Pair A Nonnegative-Real) B)]) (define xws (sample-bin-values bin)) - (sample-bin (map (inst car A Nonnegative-Real) xws) - (map (inst cdr A Nonnegative-Real) xws) - (sample-bin-min bin) - (sample-bin-max bin))) - (bin-samples xws bnds lte? xw-key)))] - [else (bin-samples xs bnds lte? key)])])) + (sample-bin (sample-bin-min bin) + (sample-bin-max bin) + (map (inst car A Nonnegative-Real) xws) + (map (inst cdr A Nonnegative-Real) xws))) + (bin-samples bnds lte? xws xw-key)))] + [else (bin-samples bnds lte? xs key)])])) diff --git a/collects/math/scribblings/math-statistics.scrbl b/collects/math/scribblings/math-statistics.scrbl index c5d9f81aff..b052b638ce 100644 --- a/collects/math/scribblings/math-statistics.scrbl +++ b/collects/math/scribblings/math-statistics.scrbl @@ -52,20 +52,23 @@ See @secref{stats:expected-values} for a discussion. (count-samples '(1 1 2 3 4) '(1/2 1/2 1 1 2))] } -@defstruct*[sample-bin ([values (Listof A)] - [weights (U #f (Listof Nonnegative-Real))] - [min B] [max B])]{ +@defstruct*[sample-bin ([min B] + [max B] + [values (Listof A)] + [weights (U #f (Listof Nonnegative-Real))])]{ Represents a @italic{bin}, or a group of samples within an interval in a total order. The values and bounds have a different type to allow @racket[bin-samples] and @racket[bin-weighted-samples] to group elements based on a function of their values (a @racket[key], like in @racket[sort]). } -@defproc*[([(bin-samples [xs (Sequenceof A)] [bounds (Sequenceof A)] [lte? (A A -> Any)]) +@defproc*[([(bin-samples [bounds (Sequenceof A)] + [lte? (A A -> Any)] + [xs (Sequenceof A)]) (Listof (sample-bin A A))] - [(bin-samples [xs (Sequenceof A)] - [bounds (Sequenceof B)] + [(bin-samples [bounds (Sequenceof B)] [lte? (B B -> Any)] + [xs (Sequenceof A)] [key (A -> B)]) (Listof (sample-bin A B))])]{ Like @racket[(sort xs lte? #:key key)], but additionally groups samples into bins. @@ -78,9 +81,9 @@ front. If some are greater than the largest bound, they are grouped into a single bin at the end. @examples[#:eval typed-eval - (bin-samples '(0 1 2 3 4 5 6) '() <=) - (bin-samples '(0 1 2 3 4 5 6) '(3) <=) - (bin-samples '(0 1 2 3 4 5 6) '(2 4) <=)] + (bin-samples '() <= '(0 1 2 3 4 5 6)) + (bin-samples '(3) <= '(0 1 2 3 4 5 6)) + (bin-samples '(2 4) <= '(0 1 2 3 4 5 6))] Note that @racket[bin-samples] always returns bins with @racket[#f] weights, meaning they contain unweighted samples. @@ -103,15 +106,15 @@ Like @racket[bin-samples], but for weighted samples. @defproc[(sample-bin-compact [bin (sample-bin A B)]) (sample-bin A B)]{ Compacts @racket[bin] by applying @racket[count-samples] to its values and weights. @examples[#:eval typed-eval - (sample-bin-compact (sample-bin '(1 2 3 4 4) #f 1 4))] + (sample-bin-compact (sample-bin 1 4 '(1 2 3 4 4) #f))] } @defproc[(sample-bin-total [bin (sample-bin A B)]) Nonnegative-Real]{ If @racket[(sample-bin-weights bin)] is @racket[#f], returns the number of samples in @racket[bin]; otherwise, returns the sum of their weights. @examples[#:eval typed-eval - (sample-bin-total (sample-bin '(1 2 3 4 4) #f 1 4)) - (sample-bin-total (sample-bin-compact (sample-bin '(1 2 3 4 4) #f 1 4)))] + (sample-bin-total (sample-bin 1 4 '(1 2 3 4 4) #f)) + (sample-bin-total (sample-bin-compact (sample-bin 1 4 '(1 2 3 4 4) #f)))] } @section[#:tag "stats:expected-values"]{Expected Values} diff --git a/collects/math/tests/statistics-tests.rkt b/collects/math/tests/statistics-tests.rkt index b0816b2d6d..a1a80a390f 100644 --- a/collects/math/tests/statistics-tests.rkt +++ b/collects/math/tests/statistics-tests.rkt @@ -4,91 +4,91 @@ typed/rackunit) (check-equal? - (bin-samples '() '(5) <=) + (bin-samples '(5) <= '()) empty) (check-equal? - (bin-samples '(4) '(5) <=) - (list (sample-bin '(4) #f 4 5))) + (bin-samples '(5) <= '(4)) + (list (sample-bin 4 5 '(4) #f))) (check-equal? - (bin-samples '(5) '(5) <=) - (list (sample-bin '(5) #f 5 5))) + (bin-samples '(5) <= '(5)) + (list (sample-bin 5 5 '(5) #f))) (check-equal? - (bin-samples '(6) '(5) <=) - (list (sample-bin '(6) #f 5 6))) + (bin-samples '(5) <= '(6)) + (list (sample-bin 5 6 '(6) #f))) (check-equal? - (bin-samples '(4 5) '(5) <=) - (list (sample-bin '(4 5) #f 4 5))) + (bin-samples '(5) <= '(4 5)) + (list (sample-bin 4 5 '(4 5) #f))) (check-equal? - (bin-samples '(5 6) '(5) <=) - (list (sample-bin '(5) #f 5 5) - (sample-bin '(6) #f 5 6))) + (bin-samples '(5) <= '(5 6)) + (list (sample-bin 5 5 '(5) #f) + (sample-bin 5 6 '(6) #f))) (check-equal? - (bin-samples '(4 6) '(5) <=) - (list (sample-bin '(4) #f 4 5) - (sample-bin '(6) #f 5 6))) + (bin-samples '(5) <= '(4 6)) + (list (sample-bin 4 5 '(4) #f) + (sample-bin 5 6 '(6) #f))) (check-equal? - (bin-samples '(4 5 6) '(5) <=) - (list (sample-bin '(4 5) #f 4 5) - (sample-bin '(6) #f 5 6))) + (bin-samples '(5) <= '(4 5 6)) + (list (sample-bin 4 5 '(4 5) #f) + (sample-bin 5 6 '(6) #f))) (check-equal? - (bin-samples '() '(4 8) <=) - (list (sample-bin '() #f 4 8))) + (bin-samples '(4 8) <= '()) + (list (sample-bin 4 8 '() #f))) (check-equal? - (bin-samples '(2) '(4 8) <=) - (list (sample-bin '(2) #f 2 4) - (sample-bin '() #f 4 8))) + (bin-samples '(4 8) <= '(2)) + (list (sample-bin 2 4 '(2) #f) + (sample-bin 4 8 '() #f))) (check-equal? - (bin-samples '(4) '(4 8) <=) - (list (sample-bin '(4) #f 4 4) - (sample-bin '() #f 4 8))) + (bin-samples '(4 8) <= '(4)) + (list (sample-bin 4 4 '(4) #f) + (sample-bin 4 8 '() #f))) (check-equal? - (bin-samples '(6) '(4 8) <=) - (list (sample-bin '(6) #f 4 8))) + (bin-samples '(4 8) <= '(6)) + (list (sample-bin 4 8 '(6) #f))) (check-equal? - (bin-samples '(8) '(4 8) <=) - (list (sample-bin '(8) #f 4 8))) + (bin-samples '(4 8) <= '(8)) + (list (sample-bin 4 8 '(8) #f))) (check-equal? - (bin-samples '(10) '(4 8) <=) - (list (sample-bin '() #f 4 8) - (sample-bin '(10) #f 8 10))) + (bin-samples '(4 8) <= '(10)) + (list (sample-bin 4 8 '() #f) + (sample-bin 8 10 '(10) #f))) (check-equal? - (bin-samples '(4 8) '(4 8) <=) - (list (sample-bin '(4) #f 4 4) - (sample-bin '(8) #f 4 8))) + (bin-samples '(4 8) <= '(4 8)) + (list (sample-bin 4 4 '(4) #f) + (sample-bin 4 8 '(8) #f))) (check-equal? - (bin-samples '(4 10) '(4 8) <=) - (list (sample-bin '(4) #f 4 4) - (sample-bin '() #f 4 8) - (sample-bin '(10) #f 8 10))) + (bin-samples '(4 8) <= '(4 10)) + (list (sample-bin 4 4 '(4) #f) + (sample-bin 4 8 '() #f) + (sample-bin 8 10 '(10) #f))) (check-equal? - (bin-samples '(8 10) '(4 8) <=) - (list (sample-bin '(8) #f 4 8) - (sample-bin '(10) #f 8 10))) + (bin-samples '(4 8) <= '(8 10)) + (list (sample-bin 4 8 '(8) #f) + (sample-bin 8 10 '(10) #f))) (check-equal? - (bin-samples '(4 8 10) '(4 8) <=) - (list (sample-bin '(4) #f 4 4) - (sample-bin '(8) #f 4 8) - (sample-bin '(10) #f 8 10))) + (bin-samples '(4 8) <= '(4 8 10)) + (list (sample-bin 4 4 '(4) #f) + (sample-bin 4 8 '(8) #f) + (sample-bin 8 10 '(10) #f))) (check-equal? - (bin-samples '(1 1 2 2 2 3 4 5 5 5 5 6 7 8 9 9) '(3 8) <=) - (list (sample-bin '(1 1 2 2 2 3) #f 1 3) - (sample-bin '(4 5 5 5 5 6 7 8) #f 3 8) - (sample-bin '(9 9) #f 8 9))) + (bin-samples '(3 8) <= '(1 1 2 2 2 3 4 5 5 5 5 6 7 8 9 9)) + (list (sample-bin 1 3 '(1 1 2 2 2 3) #f) + (sample-bin 3 8 '(4 5 5 5 5 6 7 8) #f) + (sample-bin 8 9 '(9 9) #f)))