From 69e90b7ef3403d8d2b5950c724e52d69601fb322 Mon Sep 17 00:00:00 2001 From: Eli Barzilay Date: Fri, 6 Nov 2009 22:32:53 +0000 Subject: [PATCH] #:key argument to remove-duplicates svn: r16587 --- collects/scheme/list.ss | 57 ++++++++++++---------- collects/scribblings/reference/pairs.scrbl | 9 +++- collects/tests/mzscheme/list.ss | 5 +- 3 files changed, 44 insertions(+), 27 deletions(-) diff --git a/collects/scheme/list.ss b/collects/scheme/list.ss index 25820d9c9d..eb5b9d3848 100644 --- a/collects/scheme/list.ss +++ b/collects/scheme/list.ss @@ -188,38 +188,45 @@ ;; ls ;; (append l (car ls) (loop (cdr ls))))))])) -(define (remove-duplicates l [=? equal?]) +(define (remove-duplicates l [=? equal?] #:key [key #f]) + (define-syntax-rule (no-key x) x) (unless (list? l) (raise-type-error 'remove-duplicates "list" l)) (let ([h (cond [(< (length l) 40) #f] [(eq? =? eq?) (make-hasheq)] [(eq? =? equal?) (make-hash)] [else #f])]) (if h - ;; Using a hash table when the list is long enough and a using `equal?' - ;; or `eq?'. The length threshold (40) was determined by trying it out - ;; with lists of length n holding (random n) numbers. - (let loop ([l l]) - (if (null? l) - l - (let ([x (car l)] [l (cdr l)]) - (if (hash-ref h x #f) - (loop l) - (begin (hash-set! h x #t) (cons x (loop l))))))) + ;; Using a hash table when the list is long enough and when using + ;; `equal?' or `eq?'. The length threshold (40) was determined by + ;; trying it out with lists of length n holding (random n) numbers. + (let-syntax ([loop + (syntax-rules () + [(_ getkey) + (let loop ([l l]) + (if (null? l) + l + (let* ([x (car l)] [k (getkey x)] [l (cdr l)]) + (if (hash-ref h k #f) + (loop l) + (begin (hash-set! h k #t) + (cons x (loop l)))))))])]) + (if key (loop key) (loop no-key))) ;; plain n^2 list traversal (optimized for common cases) - (let-syntax ([loop (syntax-rules () - [(_ search) - (let loop ([l l] [seen null]) - (if (null? l) - l - (let ([x (car l)] [l (cdr l)]) - (if (search x seen) - (loop l seen) - (cons x (loop l (cons x seen)))))))])]) - (cond [(eq? =? equal?) (loop member)] - [(eq? =? eq?) (loop memq)] - [(eq? =? eqv?) (loop memv)] - [else (loop (lambda (x seen) - (ormap (lambda (y) (=? x y)) seen)))]))))) + (let ([key (or key (lambda (x) x))]) + (let-syntax ([loop (syntax-rules () + [(_ search) + (let loop ([l l] [seen null]) + (if (null? l) + l + (let* ([x (car l)] [k (key x)] [l (cdr l)]) + (if (search k seen) + (loop l seen) + (cons x (loop l (cons k seen)))))))])]) + (cond [(eq? =? equal?) (loop member)] + [(eq? =? eq?) (loop memq)] + [(eq? =? eqv?) (loop memv)] + [else (loop (lambda (x seen) + (ormap (lambda (y) (=? x y)) seen)))])))))) (define (filter-map f l . ls) (unless (and (procedure? f) (procedure-arity-includes? f (add1 (length ls)))) diff --git a/collects/scribblings/reference/pairs.scrbl b/collects/scribblings/reference/pairs.scrbl index 98158614b4..6b59f12b86 100644 --- a/collects/scribblings/reference/pairs.scrbl +++ b/collects/scribblings/reference/pairs.scrbl @@ -618,7 +618,10 @@ traversal. (flatten 'a) ]} -@defproc[(remove-duplicates [lst list?] [same? (any/c any/c . -> . any/c) equal?]) +@defproc[(remove-duplicates [lst list?] + [same? (any/c any/c . -> . any/c) equal?] + [#:key extract-key (any/c . -> . any/c) + (lambda (x) x)]) list?]{ Returns a list that has all items in @scheme[lst], but without @@ -627,6 +630,10 @@ of the list are equivalent. The resulting list is in the same order as @scheme[lst], and for any item that occurs multiple times, the first one is kept. +The @scheme[#:key] argument @scheme[extract-key] is used to extract a + key value from each list element, so two items are considered equal if + @scheme[(same? (extract-key x) (extract-key y))] is true. + @mz-examples[#:eval list-eval (remove-duplicates '(a b b a)) (remove-duplicates '(1 2 1.0 0)) diff --git a/collects/tests/mzscheme/list.ss b/collects/tests/mzscheme/list.ss index 891bdf4348..0aea0bd345 100644 --- a/collects/tests/mzscheme/list.ss +++ b/collects/tests/mzscheme/list.ss @@ -242,11 +242,14 @@ (test '(a b) rd '(a b a b a b)) (test '(a b) rd '(a a a b b b)) (test '(a b) rd '(a b b a)) ; keeps first occurrences + (test '("a" "b") rd '("a" "A" "b" "B" "a") #:key string-downcase) (let ([long (for/list ([i (in-range 300)]) i)]) (test long rd long) (test long rd (append long long)) (test long rd (append long (reverse long))) ; keeps first - (test long rd (append* (map (lambda (x) (list x x)) long))))) + (test long rd (append* (map (lambda (x) (list x x)) long))) + (test long rd (append long (map (lambda (x) (- x)) long)) #:key abs) + (test long rd (append long (map (lambda (x) (- x)) long)) = #:key abs))) ;; ---------- filter and filter-not ---------- (let ()