From 8e6b3e9ba0d52072645e4cae8c29783083a678fd Mon Sep 17 00:00:00 2001 From: Eli Barzilay Date: Mon, 21 Apr 2008 19:46:11 +0000 Subject: [PATCH] Simplified remove-duplicates interface, adjusted tests. svn: r9386 --- collects/scheme/list.ss | 109 ++++++++++++-------------------- collects/tests/mzscheme/list.ss | 28 ++------ 2 files changed, 47 insertions(+), 90 deletions(-) diff --git a/collects/scheme/list.ss b/collects/scheme/list.ss index 472431e2da..26d287215b 100644 --- a/collects/scheme/list.ss +++ b/collects/scheme/list.ss @@ -109,71 +109,44 @@ ;; ls ;; (append l (car ls) (loop (cdr ls))))))])) -(define (remove-duplicates l - #:test [=? equal?] - #:mode [mode 'naive] - #:keep [keep 'first] - #:ordered? [ordered? #t]) - (unless (list? l) (raise-type-error 'remove-duplicates "list" l)) - (unless (memq keep '(first last)) - (raise-type-error 'remove-duplicates "'first or 'last" keep)) - (case mode - ;; plain n^2 list traversal (optimized, since it's a common case) - [(naive) - (if (eq? 'first keep) - (let-syntax ([loop (syntax-rules () - [(_ search) - (let loop ([l l] [seen null]) - (if (null? l) - l - (let ([x (car l)] [l (cdr l)]) - (if (search x seen) - (loop l seen) - (cons x (loop l (cons x seen)))))))])]) - (cond [(eq? =? equal?) (loop member)] - [(eq? =? eq?) (loop memq)] - [else (loop (lambda (x seen) - (ormap (lambda (y) (=? x y)) seen)))])) - (let-syntax ([loop (syntax-rules () - [(_ search) - (let loop ([l l]) - (if (null? l) - l - (let ([x (car l)] [l (cdr l)]) - (if (search x l) - (loop l) - (cons x (loop l))))))])]) - (cond [(eq? =? equal?) (loop member)] - [(eq? =? eq?) (loop memq)] - [else (loop (lambda (x seen) - (ormap (lambda (y) (=? x y)) seen)))])))] - [(hash) - (let ([h (make-hash)]) - (if ordered? - (begin (for ([x l]) (hash-set! h x (add1 (hash-ref h x 0)))) - (filter (if (eq? 'first keep) - (lambda (x) (begin0 (hash-ref h x) (hash-set! h x #f))) - (lambda (x) - (let ([c (sub1 (hash-ref h x))]) - (hash-set! h x c) - (eq? 0 c)))) - l)) - ;; note: the hash entries always have the first occurrence as the key - ;; and the last one as the value - (begin (for ([x l]) (hash-set! h x x)) - (hash-map h (if (eq? 'first keep) - (lambda (x y) x) (lambda (x y) y))))))] - [(sorted) - (if (null? l) - l - (if (eq? 'last keep) - (let loop ([l l]) - (let ([x (car l)] [r (cdr l)]) - (cond [(null? r) l] - [(=? x (car r)) (loop r)] - [else (cons x (loop r))]))) - (let loop ([x (car l)] [l (cdr l)]) - (cond [(null? l) (list x)] - [(=? x (car l)) (loop x (cdr l))] - [else (cons x (loop (car l) (cdr l)))]))))] - [else (error 'remove-duplicates "unknown mode: ~e" mode)])) +;; utility: returns the length for a proper list, #f otherwise; does not handle +;; circular lists +(define (length? x) + (let loop ([x x] [n 0]) + (if (pair? x) + (loop (cdr x) (add1 n)) + (and (null? x) n)))) + +(define (remove-duplicates l [=? equal?]) + (let ([len (length? l)]) + (unless len (raise-type-error 'remove-duplicates "list" l)) + (let ([h (cond [(< len 40) #f] + [(eq? =? eq?) (make-hasheq)] + [(eq? =? equal?) (make-hash)] + [else #f])]) + (if h + ;; Using a hash table when the list is long enough and a using `equal?' + ;; or `eq?'. The length threshold (40) was determined by trying it out + ;; with lists of length n holding (random n) numbers. + (let loop ([l l]) + (if (null? l) + l + (let ([x (car l)] [l (cdr l)]) + (if (hash-ref h x #f) + (loop l) + (begin (hash-set! h x #t) (cons x (loop l))))))) + ;; plain n^2 list traversal (optimized for common cases) + (let-syntax ([loop (syntax-rules () + [(_ search) + (let loop ([l l] [seen null]) + (if (null? l) + l + (let ([x (car l)] [l (cdr l)]) + (if (search x seen) + (loop l seen) + (cons x (loop l (cons x seen)))))))])]) + (cond [(eq? =? equal?) (loop member)] + [(eq? =? eq?) (loop memq)] + [(eq? =? eqv?) (loop memv)] + [else (loop (lambda (x seen) + (ormap (lambda (y) (=? x y)) seen)))])))))) diff --git a/collects/tests/mzscheme/list.ss b/collects/tests/mzscheme/list.ss index 29cab89349..8d91fb3cbf 100644 --- a/collects/tests/mzscheme/list.ss +++ b/collects/tests/mzscheme/list.ss @@ -209,27 +209,11 @@ (test '(a b) rd '(a b)) (test '(a b) rd '(a b a b a b)) (test '(a b) rd '(a a a b b b)) - (test '(a b) rd '(a b b a) #:keep 'first) - (test '(b a) rd '(a b b a) #:keep 'last) - ;; test with 'hash - (test '() rd '() #:mode 'hash) - (test '(a) rd '(a a a a) #:mode 'hash) - (test '(a b) rd '(a b) #:mode 'hash) - (test '(a b) rd '(a b a b a b) #:mode 'hash #:ordered? #t) - (test '(a b) rd '(a a a b b b) #:mode 'hash #:ordered? #t) - (test '(a b) rd '(a b b a) #:keep 'first #:mode 'hash #:ordered? #t) - (test '(b a) rd '(a b b a) #:keep 'last #:mode 'hash #:ordered? #t) - ;; test with 'sorted - (test '() rd '() #:mode 'sorted) - (test '(a) rd '(a a a a) #:mode 'sorted) - (test '(a b) rd '(a b) #:mode 'sorted) - (test '(a b a b a b) rd '(a b a b a b) #:mode 'sorted) - (test '(a b) rd '(a a a b b b) #:mode 'sorted) - (let ([a1 "a"] [a2 "a"] [b1 "b"] [b2 "b"]) - (test #t andmap eq? (list a1 b1) - (rd (list a1 a2 b1 b2) #:mode 'sorted #:keep 'first)) - (test #t andmap eq? (list a2 b2) - (rd (list a1 a2 b1 b2) #:mode 'sorted #:keep 'last))) - ) + (test '(a b) rd '(a b b a)) ; keeps first occurrences + (let ([long (for/list ([i (in-range 300)]) i)]) + (test long rd long) + (test long rd (append long long)) + (test long rd (append long (reverse long))) ; keeps first + (test long rd (append* (map (lambda (x) (list x x)) long))))) (report-errs)