fixed sort to be stable, improved a little, properly test stability

svn: r8783
This commit is contained in:
Eli Barzilay 2008-02-24 16:04:17 +00:00
parent 1e002f2e5b
commit 2e418f3f46
2 changed files with 131 additions and 93 deletions

View File

@ -25,25 +25,6 @@
compose)
;; used by sort-internal; note that a and b are reversed, to we invert `less?'
;; test
(define (merge-sorted-lists! a b less?)
(define (loop r a b r-a?) ; r-a? for optimization -- is r connected to a?
(if (not (less? (mcar b) (mcar a)))
(begin (when r-a? (set-mcdr! r b))
(if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f)))
;; (car a) <= (car b)
(begin (unless r-a? (set-mcdr! r a))
(if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t)))))
(cond [(null? a) b]
[(null? b) a]
[(not (less? (mcar b) (mcar a)))
(if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f))
b]
[else ; (car a) <= (car b)
(if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t))
a]))
;; This is a destructive stable merge-sort, adapted from slib and improved by
;; Eli Barzilay
;; The original source said:
@ -51,97 +32,135 @@
;; by David H. D. Warren, and first used in the DEC-10 Prolog system.
;; R. A. O'Keefe adapted it to work destructively in Scheme.
;; but it's a plain destructive merge sort.
(define (sort-internal lst less? copy? who)
(define (step n)
;; lst is actually reversed when we get here, so all the `less?'
;; tests are surrounded by `not':
(define (sort-internal lst less? n)
(define (merge-sorted! a b)
(define (loop r a b r-a?) ; r-a? for optimization -- is r connected to a?
(if (less? (mcar b) (mcar a))
(begin (when r-a? (set-mcdr! r b))
(if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f)))
;; (car a) <= (car b)
(begin (unless r-a? (set-mcdr! r a))
(if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t)))))
(cond [(null? a) b]
[(null? b) a]
[(less? (mcar b) (mcar a))
(if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f))
b]
[else ; (car a) <= (car b)
(if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t))
a]))
(let step ([n n])
(cond [(> n 3) (let* (; let* not really needed with mzscheme's l->r eval
[j (quotient n 2)] [a (step j)] [b (step (- n j))])
(merge-sorted-lists! a b less?))]
(merge-sorted! a b less?))]
;; the following two cases are just explicit treatment of sublists
;; of length 2 and 3, could remove both (and use the above case for
;; n>1) and it would still work, except a little slower
[(= n 3) (let ([p lst] [p1 (mcdr lst)] [p2 (mcdr (mcdr lst))])
(let ([x (mcar p)] [y (mcar p1)] [z (mcar p2)])
(set! lst (mcdr p2))
(cond [(not (less? y x)) ; y x
(cond [(not (less? z y)) ; z y x
(cond [(less? y x) ; y x
(cond [(less? z y) ; z y x
(set-mcar! p z)
(set-mcar! p1 y)
(set-mcar! p2 x)]
[(not (less? z x)) ; y z x
[(less? z x) ; y z x
(set-mcar! p y)
(set-mcar! p1 z)
(set-mcar! p2 x)]
[else ; y x z
(set-mcar! p y)
(set-mcar! p1 x)])]
[(not (less? z x)) ; z x y
[(less? z x) ; z x y
(set-mcar! p z)
(set-mcar! p1 x)
(set-mcar! p2 y)]
[(not (less? z y)) ; x z y
[(less? z y) ; x z y
(set-mcar! p1 z)
(set-mcar! p2 y)])
(set-mcdr! p2 '())
p))]
[(= n 2) (let ([x (mcar lst)] [y (mcar (mcdr lst))] [p lst])
(set! lst (mcdr (mcdr lst)))
(when (not (less? y x)) (set-mcar! p y) (set-mcar! (mcdr p) x))
(when (less? y x) (set-mcar! p y) (set-mcar! (mcdr p) x))
(set-mcdr! (mcdr p) '())
p)]
[(= n 1) (let ([p lst])
(set! lst (mcdr lst))
(set-mcdr! p '())
p)]
[else '()]))
[else '()])))
(define (sort lst less?)
(unless (list? lst)
(raise-type-error 'sort "proper list" lst))
(unless (and (procedure? less?) (procedure-arity-includes? less? 2))
(raise-type-error 'sort "procedure of arity 2" less?))
(let ([n (length lst)])
(cond [(<= n 1) lst]
;; check if the list is already sorted
;; (which can be a common case, eg, directory lists).
[(let loop ([last (car lst)] [next (cdr lst)])
(or (null? next)
(and (not (less? (car next) last))
(loop (car next) (cdr next)))))
lst]
[else (set! lst
;; copy + reverse the list:
(let loop ([lst lst][a null])
(if (null? lst)
a
(loop (cdr lst)
(mcons (car lst) a)))))
;; Sort:
(let ([r (step n)])
;; copy + reverse the result:
(let loop ([r r][a null])
(if (null? r)
a
(loop (mcdr r) (cons (mcar r) a)))))])))
(define (sort lst less?) (sort-internal lst less? #t 'sort))
(cond
;; trivial case
[(< n 2) lst]
;; check if the list is already sorted
;; (which can be a common case, eg, directory lists).
[(let loop ([last (car lst)] [next (cdr lst)])
(or (null? next)
(and (not (less? (car next) last))
(loop (car next) (cdr next)))))
lst]
;; inlined cases, for optimization of short lists
[(< n 3)
(if (= n 2)
;; (because of the above test, we can assume that the input is
;; unsorted)
(list (cadr lst) (car lst))
(let ([a (car lst)] [b (cadr lst)] [c (caddr lst)])
;; General note: we need a stable sort, so we should always
;; compare (less? later-item earlier-item) since it gives more
;; information. A good way to see that we have good code is to
;; check that each permutation appears exactly once. This means
;; that n=4 will have 23 cases, so don't bother. (Homework: write
;; a macro to generate code for a specific N. Bonus: prove
;; correctness. Extra bonus: prove optimal solution. Extra extra
;; bonus: prove optimal solution exists, extract macro from
;; proof.)
(let ([a (car lst)] [b (cadr lst)] [c (caddr lst)])
(if (less? b a)
;; b<a
(if (less? c b)
(list c b a)
;; b<a, b<=c
(if (less? c a) (list b c a) (list b a c)))
;; a<=b, so c<b (b<=c is impossible due to above test)
(if (less? c a) (list c a b) (list a c b))))))]
[else (let (;; list->mlist
[mlst (let ([mlst (mcons (car lst) null)])
(let loop ([last mlst] [lst (cdr lst)])
(if (null? lst)
mlst
(let ([new (mcons (car lst) null)])
(set-mcdr! last new)
(loop new (cdr lst))))))])
;; mlist->list
(let loop ([r (sort-internal mlst less? n)])
(if (null? r)
r
(cons (mcar r) (loop (mcdr r))))))])))
(define (do-remove who item list equal?)
(unless (list? list)
(raise-type-error who "list" list))
(let loop ([list list])
(cond
[(null? list) null]
[(equal? item (car list)) (cdr list)]
[else (cons (car list) (loop (cdr list)))])))
(cond [(null? list) null]
[(equal? item (car list)) (cdr list)]
[else (cons (car list) (loop (cdr list)))])))
(define remove
(case-lambda
[(item list) (do-remove 'remove item list equal?)]
[(item list equal?)
(unless (and (procedure? equal?)
(procedure-arity-includes? equal? 2))
(raise-type-error 'remove "procedure (arity 2)" equal?))
(do-remove 'remove item list equal?)]))
(case-lambda
[(item list) (do-remove 'remove item list equal?)]
[(item list equal?)
(unless (and (procedure? equal?)
(procedure-arity-includes? equal? 2))
(raise-type-error 'remove "procedure (arity 2)" equal?))
(do-remove 'remove item list equal?)]))
(define (remq item list)
(do-remove 'remq item list eq?))
@ -156,18 +175,18 @@
(raise-type-error who "list" r))
(let rloop ([r r])
(cond
[(null? r) null]
[else (let ([first-r (car r)])
(let loop ([l-rest l])
(cond
[(null? l-rest) (cons first-r (rloop (cdr r)))]
[(equal? (car l-rest) first-r) (rloop (cdr r))]
[else (loop (cdr l-rest))])))])))
[(null? r) null]
[else (let ([first-r (car r)])
(let loop ([l-rest l])
(cond
[(null? l-rest) (cons first-r (rloop (cdr r)))]
[(equal? (car l-rest) first-r) (rloop (cdr r))]
[else (loop (cdr l-rest))])))])))
(define remove*
(case-lambda
[(l r) (do-remove* 'remove* l r equal?)]
[(l r equal?)
[(l r equal?)
(unless (and (procedure? equal?)
(procedure-arity-includes? equal? 2))
(raise-type-error 'remove* "procedure (arity 2)" equal?))

View File

@ -67,27 +67,46 @@
'("d" "f" "e" "c" "a" "c" "b")
string<?)
(let ()
(define (random-list n)
(define (car< x y) (< (car x) (car y)))
(define (random-list n range)
(let loop ([n n] [r '()])
(if (zero? n) r (loop (sub1 n) (cons (random 1000000) r)))))
(define (test-sort sort len times)
(if (zero? n) r (loop (sub1 n) (cons (list (random range)) r)))))
(define (test-sort len times)
(or (zero? times)
(and (let* ([rand (random-list len)]
[sorted (sort rand <)]
[same (sort rand (lambda (x y) #f))])
(and (let* ([rand (random-list len (if (even? times) 1000000 10))]
[orig< (lambda (x y) (memq y (cdr (memq x rand))))]
[sorted (sort rand car<)]
[l1 (reverse (cdr (reverse sorted)))]
[l2 (cdr sorted)])
(and (= (length sorted) (length rand))
;; sorted?
(andmap <=
(reverse (cdr (reverse sorted)))
(cdr sorted))
;; stable?
(equal? rand same)))
(test-sort sort len (sub1 times)))))
(test #t test-sort sort 1 10)
(test #t test-sort sort 2 10)
(test #t test-sort sort 10 100)
(test #t test-sort sort 100 100)
(test #t test-sort sort 1000 100))
(andmap (lambda (x1 x2)
(and (not (car< x2 x1)) ; sorted?
(or (car< x1 x2) (orig< x1 x2)))) ; stable?
l1 l2)))
(test-sort len (sub1 times)))))
(test #t test-sort 1 10)
(test #t test-sort 2 20)
(test #t test-sort 3 60)
(test #t test-sort 4 200)
(test #t test-sort 5 200)
(test #t test-sort 10 200)
(test #t test-sort 100 200)
(test #t test-sort 1000 200)
;; test stability
(test '((1) (2) (3 a) (3 b) (3 c)) sort '((3 a) (1) (3 b) (2) (3 c)) car<)
;; test short lists (+ stable)
(test '() sort '() car<)
(test '((1 1)) sort '((1 1)) car<)
(test '((1 2) (1 1)) sort '((1 2) (1 1)) car<)
(test '((1) (2)) sort '((2) (1)) car<)
(for-each (lambda (l) (test '((0 3) (1 1) (1 2)) sort l car<))
'(((1 1) (1 2) (0 3))
((1 1) (0 3) (1 2))
((0 3) (1 1) (1 2))))
(for-each (lambda (l) (test '((0 2) (0 3) (1 1)) sort l car<))
'(((1 1) (0 2) (0 3))
((0 2) (1 1) (0 3))
((0 2) (0 3) (1 1)))))
(let ([s (let loop ([n 1000])
(if (zero? n)