From 4d9751e98c9fbab1c3076cb3a77120c24352faec Mon Sep 17 00:00:00 2001 From: Vincent St-Amour Date: Thu, 16 Jul 2015 12:00:40 -0500 Subject: [PATCH] Move check-duplicate from unstable/list to racket/list. Make its interface uniform with remove-duplicates. --- .../scribblings/reference/pairs.scrbl | 23 ++++++++++ pkgs/racket-test-core/tests/racket/list.rktl | 9 ++++ racket/collects/racket/list.rkt | 44 +++++++++++++++++++ 3 files changed, 76 insertions(+) diff --git a/pkgs/racket-doc/scribblings/reference/pairs.scrbl b/pkgs/racket-doc/scribblings/reference/pairs.scrbl index 7625d31182..fa75e3b11e 100644 --- a/pkgs/racket-doc/scribblings/reference/pairs.scrbl +++ b/pkgs/racket-doc/scribblings/reference/pairs.scrbl @@ -1049,6 +1049,29 @@ traversal. (flatten 'a)]} +@defproc[(check-duplicate [lst list?] + [same? (any/c any/c . -> . any/c) equal?] + [#:key extract-key (-> any/c any/c) (lambda (x) x)]) + (or/c any/c #f)]{ + +Returns the first duplicate item in @racket[lst]. More precisely, it +returns the first @racket[_x] such that there was a previous +@racket[_y] where @racket[(same? (extract-key _x) (extract-key _y))]. + +The @racket[same?] argument should be an equivalence predicate such as +@racket[equal?] or @racket[eqv?] or a dictionary. +The procedures @racket[equal?], @racket[eqv?], and @racket[eq?] automatically +use a dictionary for speed. + +@examples[#:eval list-eval +(check-duplicate '(1 2 3 4)) +(check-duplicate '(1 2 3 2 1)) +(check-duplicate '((a 1) (b 2) (a 3)) #:key car) +(check-duplicate '(1 2 3 4 5 6) + (lambda (x y) (equal? (modulo x 3) (modulo y 3)))) +] +} + @defproc[(remove-duplicates [lst list?] [same? (any/c any/c . -> . any/c) equal?] [#:key extract-key (any/c . -> . any/c) diff --git a/pkgs/racket-test-core/tests/racket/list.rktl b/pkgs/racket-test-core/tests/racket/list.rktl index 2904637a6a..1a7adec316 100644 --- a/pkgs/racket-test-core/tests/racket/list.rktl +++ b/pkgs/racket-test-core/tests/racket/list.rktl @@ -328,6 +328,15 @@ (test `(,@fst ,@r2 ,@lst) add-between l x #:splice? #t #:before-first fst #:after-last lst #:before-last y))))) +;; ---------- check-duplicate ---------- + +(test #f check-duplicate '()) +(test 'a check-duplicate '(a a)) +(test 'a check-duplicate '(a b a)) +(test 'a check-duplicate '(a a b)) +(test '(a 3) check-duplicate '((a 1) (b 2) (a 3)) #:key car) +(test 4 check-duplicate '(1 2 3 4 5 6) (lambda (x y) (equal? (modulo x 3) (modulo y 3)))) + ;; ---------- remove-duplicates ---------- (let () (define rd remove-duplicates) diff --git a/racket/collects/racket/list.rkt b/racket/collects/racket/list.rkt index 6468cded09..c3745c6c00 100644 --- a/racket/collects/racket/list.rkt +++ b/racket/collects/racket/list.rkt @@ -27,6 +27,7 @@ flatten add-between remove-duplicates + check-duplicate filter-map count partition @@ -347,6 +348,49 @@ (cons x (loop l)))))))])]) (if key (loop key) (loop no-key)))]))) +;; check-duplicate : (listof X) +;; [(K K -> bool)] +;; #:key (X -> K) +;; -> X or #f +(define (check-duplicate items + [same? equal?] + #:key [key values]) + (cond [(eq? same? equal?) + (check-duplicate/t items key (make-hash))] + [(eq? same? eq?) + (check-duplicate/t items key (make-hasheq))] + [(eq? same? eqv?) + (check-duplicate/t items key (make-hasheqv))] + [else + (check-duplicate/list items key same?)])) +(define (check-duplicate/t items key table) + (let loop ([items items]) + (and (pair? items) + (let ([key-item (key (car items))]) + (if (hash-ref table key-item #f) + (car items) + (begin (hash-set! table key-item #t) + (loop (cdr items)))))))) +(define (check-duplicate/list items key same?) + (let loop ([items items] [sofar null]) + (and (pair? items) + (let ([key-item (key (car items))]) + (if (for/or ([prev (in-list sofar)]) + (same? key-item prev)) + (car items) + (loop (cdr items) (cons key-item sofar))))))) + +;; Eli: Just to have a record of this: my complaint about having this +;; code separately from `remove-duplicates' still stands. Specifically, +;; that function decides when to use a hash table to make things faster, +;; and this code would benefit from the same. It would be much better +;; to extend that function so it can be used for both tasks rather than +;; a new piece of code that does it (only do it in a worse way, re +;; performance). Doing this can also benefit `remove-duplicates' -- for +;; example, make it accept a container so that users can choose how +;; when/if to use a hash table. + + (define (check-filter-arguments who f l ls) (unless (procedure? f) (raise-argument-error who "procedure?" f))