random-sample: use reservoir sampling to do a single pass over sequences.
Suggested by Eli.
This commit is contained in:
parent
31a9414983
commit
656044b8fc
|
@ -3,7 +3,7 @@
|
|||
|
||||
(Section 'numbers)
|
||||
|
||||
(require racket/extflonum racket/random)
|
||||
(require racket/extflonum racket/random racket/list)
|
||||
|
||||
(test #f number? 'a)
|
||||
(test #f complex? 'a)
|
||||
|
@ -2514,15 +2514,21 @@
|
|||
(parameterize ([current-pseudo-random-generator
|
||||
(vector->pseudo-random-generator
|
||||
#(3620087466 1904163406 3177592043 1406334318 257151704 3090455638))])
|
||||
(test 3 random-ref '(1 2 3 4 5))
|
||||
(test 10 random-ref '#(7 6 8 9 10))
|
||||
(test #\e random-ref "abcde"))
|
||||
(test 1 random-ref '(1 2 3 4 5))
|
||||
(test 6 random-ref '#(7 6 8 9 10))
|
||||
(test #\a random-ref "abcde"))
|
||||
(parameterize ([current-pseudo-random-generator
|
||||
(vector->pseudo-random-generator
|
||||
#(3620087466 1904163406 3177592043 1406334318 257151704 3090455638))])
|
||||
(test '(3) random-sample '(1 2 3 4 5) 1)
|
||||
(test '(1) random-sample '(1 2 3 4 5) 1)
|
||||
(test '(5 5 5) random-sample '(1 2 3 4 5) 3)
|
||||
(test '(2 4 5) random-sample '(1 2 3 4 5) 3 #:replacement? #f))
|
||||
(test '(1 4 3) random-sample '(1 2 3 4 5) 3 #:replacement? #f)
|
||||
;; distribution is uniform
|
||||
(test '(100077 100479 100375 99943 99869 100055 100482 99979 99405 99336)
|
||||
values ; to avoid the whole pre-`length` list being printed if test fails
|
||||
(map length (group-by values
|
||||
(apply append (for/list ([i 10000])
|
||||
(random-sample (range 10) 100)))))))
|
||||
|
||||
|
||||
(test #t = 0 0)
|
||||
|
|
|
@ -20,23 +20,43 @@
|
|||
(current-continuation-marks)))]))
|
||||
|
||||
(define (random-ref seq [prng (current-pseudo-random-generator)])
|
||||
(sequence-ref seq (random (sequence-length seq))))
|
||||
(car (random-sample seq 1 prng)))
|
||||
|
||||
(define (random-sample seq n [prng (current-pseudo-random-generator)]
|
||||
#:replacement? [replacement? #t])
|
||||
(cond [replacement?
|
||||
(for/list ([i (in-range n)])
|
||||
(random-ref seq prng))]
|
||||
[else
|
||||
(unless (>= (sequence-length seq) n)
|
||||
(raise-argument-error 'random-sample
|
||||
"integer less than sequence length"
|
||||
n))
|
||||
(define l (sequence-length seq))
|
||||
;; sequences don't necessarily support removal, so instead sample
|
||||
;; indices without replacement, then index into the sequence
|
||||
(let loop ([res-idx (set)])
|
||||
(cond [(= (set-count res-idx) n) ; we have all we need, we're done
|
||||
(for/list ([i (in-set res-idx)]) (sequence-ref seq i))]
|
||||
[else
|
||||
(loop (set-add res-idx (random l)))]))]))
|
||||
;; doing reservoir sampling, to do a single pass over the sequence
|
||||
;; (some sequences may not like multiple passes)
|
||||
(cond
|
||||
[(not replacement?)
|
||||
;; Based on: http://rosettacode.org/wiki/Knuth's_algorithm_S#Racket
|
||||
(define not-there (gensym))
|
||||
(define samples (make-vector n not-there))
|
||||
(for ([elt seq]
|
||||
[i (in-naturals)])
|
||||
(cond [(< i n) ; we're not full, sample for sure
|
||||
(vector-set! samples i elt)]
|
||||
[(< (random (add1 i)) n) ; we've already seen n items; replace one?
|
||||
(vector-set! samples (random n) elt)]))
|
||||
;; did we get enough?
|
||||
(unless (for/and ([s (in-vector samples)])
|
||||
(not (eq? s not-there)))
|
||||
(raise-argument-error 'random-sample
|
||||
"integer less than sequence length"
|
||||
n))
|
||||
(vector->list samples)]
|
||||
[else
|
||||
;; similar to above, except each sample is independent
|
||||
(define samples #f)
|
||||
(for ([elt seq]
|
||||
[i (in-naturals)])
|
||||
(cond [(= i 0) ; initialize samples
|
||||
(set! samples (make-vector n elt))]
|
||||
[else ; independently, maybe replace
|
||||
(for ([j (in-range n)])
|
||||
(when (zero? (random (add1 i)))
|
||||
(vector-set! samples j elt)))]))
|
||||
(unless samples
|
||||
(raise-argument-error 'random-sample
|
||||
"non-empty sequence"
|
||||
seq))
|
||||
(vector->list samples)]))
|
||||
|
|
Loading…
Reference in New Issue
Block a user