diff --git a/pkgs/racket-test-core/tests/racket/number.rktl b/pkgs/racket-test-core/tests/racket/number.rktl index 9502a901df..99ba500a1d 100644 --- a/pkgs/racket-test-core/tests/racket/number.rktl +++ b/pkgs/racket-test-core/tests/racket/number.rktl @@ -3,7 +3,7 @@ (Section 'numbers) -(require racket/extflonum racket/random) +(require racket/extflonum racket/random racket/list) (test #f number? 'a) (test #f complex? 'a) @@ -2514,15 +2514,21 @@ (parameterize ([current-pseudo-random-generator (vector->pseudo-random-generator #(3620087466 1904163406 3177592043 1406334318 257151704 3090455638))]) - (test 3 random-ref '(1 2 3 4 5)) - (test 10 random-ref '#(7 6 8 9 10)) - (test #\e random-ref "abcde")) + (test 1 random-ref '(1 2 3 4 5)) + (test 6 random-ref '#(7 6 8 9 10)) + (test #\a random-ref "abcde")) (parameterize ([current-pseudo-random-generator (vector->pseudo-random-generator #(3620087466 1904163406 3177592043 1406334318 257151704 3090455638))]) - (test '(3) random-sample '(1 2 3 4 5) 1) + (test '(1) random-sample '(1 2 3 4 5) 1) (test '(5 5 5) random-sample '(1 2 3 4 5) 3) - (test '(2 4 5) random-sample '(1 2 3 4 5) 3 #:replacement? #f)) + (test '(1 4 3) random-sample '(1 2 3 4 5) 3 #:replacement? #f) + ;; distribution is uniform + (test '(100077 100479 100375 99943 99869 100055 100482 99979 99405 99336) + values ; to avoid the whole pre-`length` list being printed if test fails + (map length (group-by values + (apply append (for/list ([i 10000]) + (random-sample (range 10) 100))))))) (test #t = 0 0) diff --git a/racket/collects/racket/random.rkt b/racket/collects/racket/random.rkt index 83955dbc2b..5bfe7beed8 100644 --- a/racket/collects/racket/random.rkt +++ b/racket/collects/racket/random.rkt @@ -20,23 +20,43 @@ (current-continuation-marks)))])) (define (random-ref seq [prng (current-pseudo-random-generator)]) - (sequence-ref seq (random (sequence-length seq)))) + (car (random-sample seq 1 prng))) (define (random-sample seq n [prng (current-pseudo-random-generator)] #:replacement? [replacement? #t]) - (cond [replacement? - (for/list ([i (in-range n)]) - (random-ref seq prng))] - [else - (unless (>= (sequence-length seq) n) - (raise-argument-error 'random-sample - "integer less than sequence length" - n)) - (define l (sequence-length seq)) - ;; sequences don't necessarily support removal, so instead sample - ;; indices without replacement, then index into the sequence - (let loop ([res-idx (set)]) - (cond [(= (set-count res-idx) n) ; we have all we need, we're done - (for/list ([i (in-set res-idx)]) (sequence-ref seq i))] - [else - (loop (set-add res-idx (random l)))]))])) + ;; doing reservoir sampling, to do a single pass over the sequence + ;; (some sequences may not like multiple passes) + (cond + [(not replacement?) + ;; Based on: http://rosettacode.org/wiki/Knuth's_algorithm_S#Racket + (define not-there (gensym)) + (define samples (make-vector n not-there)) + (for ([elt seq] + [i (in-naturals)]) + (cond [(< i n) ; we're not full, sample for sure + (vector-set! samples i elt)] + [(< (random (add1 i)) n) ; we've already seen n items; replace one? + (vector-set! samples (random n) elt)])) + ;; did we get enough? + (unless (for/and ([s (in-vector samples)]) + (not (eq? s not-there))) + (raise-argument-error 'random-sample + "integer less than sequence length" + n)) + (vector->list samples)] + [else + ;; similar to above, except each sample is independent + (define samples #f) + (for ([elt seq] + [i (in-naturals)]) + (cond [(= i 0) ; initialize samples + (set! samples (make-vector n elt))] + [else ; independently, maybe replace + (for ([j (in-range n)]) + (when (zero? (random (add1 i))) + (vector-set! samples j elt)))])) + (unless samples + (raise-argument-error 'random-sample + "non-empty sequence" + seq)) + (vector->list samples)]))