diff --git a/collects/scheme/list.ss b/collects/scheme/list.ss index db7e58d29c..472431e2da 100644 --- a/collects/scheme/list.ss +++ b/collects/scheme/list.ss @@ -14,7 +14,8 @@ append* flatten - add-between) + add-between + remove-duplicates) (define (first x) (if (and (pair? x) (list? x)) @@ -107,3 +108,72 @@ ;; (if (null? ls) ;; ls ;; (append l (car ls) (loop (cdr ls))))))])) + +(define (remove-duplicates l + #:test [=? equal?] + #:mode [mode 'naive] + #:keep [keep 'first] + #:ordered? [ordered? #t]) + (unless (list? l) (raise-type-error 'remove-duplicates "list" l)) + (unless (memq keep '(first last)) + (raise-type-error 'remove-duplicates "'first or 'last" keep)) + (case mode + ;; plain n^2 list traversal (optimized, since it's a common case) + [(naive) + (if (eq? 'first keep) + (let-syntax ([loop (syntax-rules () + [(_ search) + (let loop ([l l] [seen null]) + (if (null? l) + l + (let ([x (car l)] [l (cdr l)]) + (if (search x seen) + (loop l seen) + (cons x (loop l (cons x seen)))))))])]) + (cond [(eq? =? equal?) (loop member)] + [(eq? =? eq?) (loop memq)] + [else (loop (lambda (x seen) + (ormap (lambda (y) (=? x y)) seen)))])) + (let-syntax ([loop (syntax-rules () + [(_ search) + (let loop ([l l]) + (if (null? l) + l + (let ([x (car l)] [l (cdr l)]) + (if (search x l) + (loop l) + (cons x (loop l))))))])]) + (cond [(eq? =? equal?) (loop member)] + [(eq? =? eq?) (loop memq)] + [else (loop (lambda (x seen) + (ormap (lambda (y) (=? x y)) seen)))])))] + [(hash) + (let ([h (make-hash)]) + (if ordered? + (begin (for ([x l]) (hash-set! h x (add1 (hash-ref h x 0)))) + (filter (if (eq? 'first keep) + (lambda (x) (begin0 (hash-ref h x) (hash-set! h x #f))) + (lambda (x) + (let ([c (sub1 (hash-ref h x))]) + (hash-set! h x c) + (eq? 0 c)))) + l)) + ;; note: the hash entries always have the first occurrence as the key + ;; and the last one as the value + (begin (for ([x l]) (hash-set! h x x)) + (hash-map h (if (eq? 'first keep) + (lambda (x y) x) (lambda (x y) y))))))] + [(sorted) + (if (null? l) + l + (if (eq? 'last keep) + (let loop ([l l]) + (let ([x (car l)] [r (cdr l)]) + (cond [(null? r) l] + [(=? x (car r)) (loop r)] + [else (cons x (loop r))]))) + (let loop ([x (car l)] [l (cdr l)]) + (cond [(null? l) (list x)] + [(=? x (car l)) (loop x (cdr l))] + [else (cons x (loop (car l) (cdr l)))]))))] + [else (error 'remove-duplicates "unknown mode: ~e" mode)])) diff --git a/collects/tests/mzscheme/list.ss b/collects/tests/mzscheme/list.ss index 2e1b0f9d83..29cab89349 100644 --- a/collects/tests/mzscheme/list.ss +++ b/collects/tests/mzscheme/list.ss @@ -200,4 +200,36 @@ (test '(9 1 8 1 7) add-between '(9 8 7) 1) (test '(9 (1) 8) add-between '(9 8) '(1))) +;; ---------- remove-duplicates ---------- +(let () + (define rd remove-duplicates) + ;; basic 'naive tests + (test '() rd '()) + (test '(a) rd '(a a a a)) + (test '(a b) rd '(a b)) + (test '(a b) rd '(a b a b a b)) + (test '(a b) rd '(a a a b b b)) + (test '(a b) rd '(a b b a) #:keep 'first) + (test '(b a) rd '(a b b a) #:keep 'last) + ;; test with 'hash + (test '() rd '() #:mode 'hash) + (test '(a) rd '(a a a a) #:mode 'hash) + (test '(a b) rd '(a b) #:mode 'hash) + (test '(a b) rd '(a b a b a b) #:mode 'hash #:ordered? #t) + (test '(a b) rd '(a a a b b b) #:mode 'hash #:ordered? #t) + (test '(a b) rd '(a b b a) #:keep 'first #:mode 'hash #:ordered? #t) + (test '(b a) rd '(a b b a) #:keep 'last #:mode 'hash #:ordered? #t) + ;; test with 'sorted + (test '() rd '() #:mode 'sorted) + (test '(a) rd '(a a a a) #:mode 'sorted) + (test '(a b) rd '(a b) #:mode 'sorted) + (test '(a b a b a b) rd '(a b a b a b) #:mode 'sorted) + (test '(a b) rd '(a a a b b b) #:mode 'sorted) + (let ([a1 "a"] [a2 "a"] [b1 "b"] [b2 "b"]) + (test #t andmap eq? (list a1 b1) + (rd (list a1 a2 b1 b2) #:mode 'sorted #:keep 'first)) + (test #t andmap eq? (list a2 b2) + (rd (list a1 a2 b1 b2) #:mode 'sorted #:keep 'last))) + ) + (report-errs)