Move check-duplicate from unstable/list to racket/list.

Make its interface uniform with remove-duplicates.
2015-07-16 12:00:40 -05:00 · 2015-07-16 12:00:40 -05:00 · 4d9751e98c
commit 4d9751e98c
parent 60e7f1b7c7
3 changed files with 76 additions and 0 deletions
--- a/pkgs/racket-doc/scribblings/reference/pairs.scrbl
+++ b/pkgs/racket-doc/scribblings/reference/pairs.scrbl
@ -1049,6 +1049,29 @@ traversal.
  (flatten 'a)]}


+@defproc[(check-duplicate [lst list?]
+                          [same? (any/c any/c . -> . any/c) equal?]
+                          [#:key extract-key (-> any/c any/c) (lambda (x) x)])
+         (or/c any/c #f)]{
+
+Returns the first duplicate item in @racket[lst]. More precisely, it
+returns the first @racket[_x] such that there was a previous
+@racket[_y] where @racket[(same? (extract-key _x) (extract-key _y))].
+
+The @racket[same?] argument should be an equivalence predicate such as
+@racket[equal?] or @racket[eqv?] or a dictionary.
+The procedures @racket[equal?], @racket[eqv?], and @racket[eq?] automatically
+use a dictionary for speed.
+
+@examples[#:eval list-eval
+(check-duplicate '(1 2 3 4))
+(check-duplicate '(1 2 3 2 1))
+(check-duplicate '((a 1) (b 2) (a 3)) #:key car)
+(check-duplicate '(1 2 3 4 5 6)
+                 (lambda (x y) (equal? (modulo x 3) (modulo y 3))))
+]
+}
+
@defproc[(remove-duplicates [lst list?]
                            [same? (any/c any/c . -> . any/c) equal?]
                            [#:key extract-key (any/c . -> . any/c)
--- a/pkgs/racket-test-core/tests/racket/list.rktl
+++ b/pkgs/racket-test-core/tests/racket/list.rktl
@ -328,6 +328,15 @@
        (test `(,@fst ,@r2 ,@lst) add-between l x
              #:splice? #t #:before-first fst #:after-last lst #:before-last y)))))

+;; ---------- check-duplicate ----------
+
+(test #f check-duplicate '())
+(test 'a check-duplicate '(a a))
+(test 'a check-duplicate '(a b a))
+(test 'a check-duplicate '(a a b))
+(test '(a 3) check-duplicate '((a 1) (b 2) (a 3)) #:key car)
+(test 4 check-duplicate '(1 2 3 4 5 6) (lambda (x y) (equal? (modulo x 3) (modulo y 3))))
+
 ;; ---------- remove-duplicates ----------
 (let ()
  (define rd remove-duplicates)
--- a/racket/collects/racket/list.rkt
+++ b/racket/collects/racket/list.rkt
@ -27,6 +27,7 @@
         flatten
         add-between
         remove-duplicates
+         check-duplicate
         filter-map
         count
         partition
@ -347,6 +348,49 @@
                                       (cons x (loop l)))))))])])
         (if key (loop key) (loop no-key)))])))

+;; check-duplicate : (listof X)
+;;                   [(K K -> bool)]
+;;                   #:key (X -> K)
+;;                -> X or #f
+(define (check-duplicate items
+                         [same? equal?]
+                         #:key [key values])
+  (cond [(eq? same? equal?)
+         (check-duplicate/t items key (make-hash))]
+        [(eq? same? eq?)
+         (check-duplicate/t items key (make-hasheq))]
+        [(eq? same? eqv?)
+         (check-duplicate/t items key (make-hasheqv))]
+        [else
+         (check-duplicate/list items key same?)]))
+(define (check-duplicate/t items key table)
+  (let loop ([items items])
+    (and (pair? items)
+         (let ([key-item (key (car items))])
+           (if (hash-ref table key-item #f)
+               (car items)
+               (begin (hash-set! table key-item #t)
+                      (loop (cdr items))))))))
+(define (check-duplicate/list items key same?)
+  (let loop ([items items] [sofar null])
+    (and (pair? items)
+         (let ([key-item (key (car items))])
+           (if (for/or ([prev (in-list sofar)])
+                 (same? key-item prev))
+               (car items)
+               (loop (cdr items) (cons key-item sofar)))))))
+
+;; Eli: Just to have a record of this: my complaint about having this
+;; code separately from `remove-duplicates' still stands.  Specifically,
+;; that function decides when to use a hash table to make things faster,
+;; and this code would benefit from the same.  It would be much better
+;; to extend that function so it can be used for both tasks rather than
+;; a new piece of code that does it (only do it in a worse way, re
+;; performance).  Doing this can also benefit `remove-duplicates' -- for
+;; example, make it accept a container so that users can choose how
+;; when/if to use a hash table.
+
+
 (define (check-filter-arguments who f l ls)
  (unless (procedure? f)
    (raise-argument-error who "procedure?" f))