First version of a vector-based "half-copying" merge sort, which will end up
being more than twice faster than the current version. [Currently works only with 2^n lists, and otherwise broken -- committed to keep the development history in svn.] svn: r17001
This commit is contained in:
parent
f719aac2be
commit
a272c479a6
|
@ -4,23 +4,26 @@
|
||||||
|
|
||||||
(#%provide sort)
|
(#%provide sort)
|
||||||
|
|
||||||
;; This is a destructive stable merge-sort, adapted from slib and improved by
|
#|
|
||||||
;; Eli Barzilay.
|
|
||||||
;; The original source said:
|
|
||||||
;; It uses a version of merge-sort invented, to the best of my knowledge, by
|
|
||||||
;; David H. D. Warren, and first used in the DEC-10 Prolog system.
|
|
||||||
;; R. A. O'Keefe adapted it to work destructively in Scheme.
|
|
||||||
;; but it's a plain destructive merge sort, which I optimized further.
|
|
||||||
|
|
||||||
;; The source uses macros to optimize some common cases (eg, no `getkey'
|
Based on "Fast mergesort implementation based on half-copying merge algorithm",
|
||||||
;; function, or precompiled versions with inlinable common comparison
|
Cezary Juszczak, http://kicia.ift.uni.wroc.pl/algorytmy/mergesortpaper.pdf
|
||||||
;; predicates) -- they are local macros so they're not left in the compiled
|
Written in Scheme by Eli Barzilay. (Note: the reason for the seemingly
|
||||||
;; code.
|
redundant pointer arithmetic in that paper is dealing with cases of uneven
|
||||||
|
number of elements.)
|
||||||
|
|
||||||
|
The source uses macros to optimize some common cases (eg, no `getkey'
|
||||||
|
function, or precompiled versions with inlinable common comparison
|
||||||
|
predicates) -- they are local macros so they're not left in the compiled
|
||||||
|
code.
|
||||||
|
|
||||||
|
Note that there is no error checking on the arguments -- the `sort' function
|
||||||
|
that this module provide is then wrapped up by a keyworded version in
|
||||||
|
"scheme/private/list.ss", and that's what everybody sees. The wrapper is
|
||||||
|
doing these checks.
|
||||||
|
|
||||||
|
|#
|
||||||
|
|
||||||
;; Note that there is no error checking on the arguments -- the `sort' function
|
|
||||||
;; that this module provide is then wrapped up by a keyworded version in
|
|
||||||
;; "scheme/private/list.ss", and that's what everybody sees. The wrapper is
|
|
||||||
;; doing these checks.
|
|
||||||
|
|
||||||
(define sort (let ()
|
(define sort (let ()
|
||||||
|
|
||||||
|
@ -29,80 +32,57 @@
|
||||||
[(dr (foo . pattern) template)
|
[(dr (foo . pattern) template)
|
||||||
(define-syntax foo (syntax-rules () [(_ . pattern) template]))]))
|
(define-syntax foo (syntax-rules () [(_ . pattern) template]))]))
|
||||||
|
|
||||||
(define-syntax-rule (sort-internal-body lst *less? n has-getkey? getkey)
|
(define-syntax-rule (sort-internal-body v *<? n has-getkey? getkey)
|
||||||
(begin
|
(begin
|
||||||
(define-syntax-rule (less? x y)
|
(define-syntax-rule (<? x y)
|
||||||
(if has-getkey? (*less? (getkey x) (getkey y)) (*less? x y)))
|
(if has-getkey? (*<? (getkey x) (getkey y)) (*<? x y)))
|
||||||
(define (merge-sorted! a b)
|
(define-syntax-rule (ref n) (vector-ref v n))
|
||||||
;; r-a? for optimization -- is r connected to a?
|
(define-syntax-rule (set! n x) (vector-set! v n x))
|
||||||
(define (loop r a b r-a?)
|
|
||||||
(if (less? (mcar b) (mcar a))
|
(define (merge1 A1 A2 B1 B2 C1 C2)
|
||||||
(begin
|
(when (< C1 B1)
|
||||||
(when r-a? (set-mcdr! r b))
|
(if (< B1 B2)
|
||||||
(if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f)))
|
(if (<? (ref B1) (ref A1))
|
||||||
;; (car a) <= (car b)
|
(begin (set! C1 (ref B1))
|
||||||
(begin
|
(merge1 A1 A2 (add1 B1) B2 (add1 C1) C2))
|
||||||
(unless r-a? (set-mcdr! r a))
|
(begin (set! C1 (ref A1))
|
||||||
(if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t)))))
|
(merge1 (add1 A1) A2 B1 B2 (add1 C1) C2)))
|
||||||
(cond [(null? a) b]
|
(begin (set! C1 (ref A1))
|
||||||
[(null? b) a]
|
(merge1 (add1 A1) A2 B1 B2 (add1 C1) C2)))))
|
||||||
[(less? (mcar b) (mcar a))
|
(define (merge2 A1 A2 B1 B2 C1 C2)
|
||||||
(if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f))
|
(when (< C1 B1)
|
||||||
b]
|
(if (< B1 B2)
|
||||||
[else ; (car a) <= (car b)
|
(if (<? (ref A1) (ref B1))
|
||||||
(if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t))
|
(begin (set! C1 (ref A1))
|
||||||
a]))
|
(merge2 (add1 A1) A2 B1 B2 (add1 C1) C2))
|
||||||
(let step ([n n])
|
(begin (set! C1 (ref B1))
|
||||||
(cond [(> n 3)
|
(merge2 A1 A2 (add1 B1) B2 (add1 C1) C2)))
|
||||||
(let* (; let* not really needed with mzscheme's l->r eval
|
(begin (set! C1 (ref A1))
|
||||||
[j (quotient n 2)] [a (step j)] [b (step (- n j))])
|
(merge2 (add1 A1) A2 B1 B2 (add1 C1) C2)))))
|
||||||
(merge-sorted! a b))]
|
|
||||||
;; the following two cases are just explicit treatment of sublists
|
(define (copying-mergesort Alo Ahi Blo Bhi)
|
||||||
;; of length 2 and 3, could remove both (and use the above case for
|
(cond [(< Alo (sub1 Ahi))
|
||||||
;; n>1) and it would still work, except a little slower
|
(let ([Amid (/ (+ Alo Ahi) 2)] [Bmid (/ (+ Blo Bhi) 2)])
|
||||||
[(= n 3) (let ([p lst] [p1 (mcdr lst)] [p2 (mcdr (mcdr lst))])
|
(copying-mergesort Amid Ahi Bmid Bhi)
|
||||||
(let ([x (mcar p)] [y (mcar p1)] [z (mcar p2)])
|
(copying-mergesort Alo Amid Amid Ahi)
|
||||||
(set! lst (mcdr p2))
|
(merge1 Amid Ahi Bmid Bhi Blo Bhi))]
|
||||||
(cond [(less? y x) ; y x
|
[(= Alo (sub1 Ahi))
|
||||||
(cond [(less? z y) ; z y x
|
(set! Blo (ref Alo))]))
|
||||||
(set-mcar! p z)
|
|
||||||
(set-mcar! p1 y)
|
(define (mergesort Alo Ahi B1lo B1hi)
|
||||||
(set-mcar! p2 x)]
|
(let ([Amid (/ (+ Alo Ahi) 2)])
|
||||||
[(less? z x) ; y z x
|
(copying-mergesort Amid Ahi B1lo B1hi)
|
||||||
(set-mcar! p y)
|
(copying-mergesort Alo Amid Amid Ahi)
|
||||||
(set-mcar! p1 z)
|
(merge2 B1lo B1hi Amid Ahi Alo Ahi)))
|
||||||
(set-mcar! p2 x)]
|
|
||||||
[else ; y x z
|
(mergesort 0 n n (+ n (/ n 2)))))
|
||||||
(set-mcar! p y)
|
|
||||||
(set-mcar! p1 x)])]
|
|
||||||
[(less? z x) ; z x y
|
|
||||||
(set-mcar! p z)
|
|
||||||
(set-mcar! p1 x)
|
|
||||||
(set-mcar! p2 y)]
|
|
||||||
[(less? z y) ; x z y
|
|
||||||
(set-mcar! p1 z)
|
|
||||||
(set-mcar! p2 y)])
|
|
||||||
(set-mcdr! p2 '())
|
|
||||||
p))]
|
|
||||||
[(= n 2) (let ([x (mcar lst)] [y (mcar (mcdr lst))] [p lst])
|
|
||||||
(set! lst (mcdr (mcdr lst)))
|
|
||||||
(when (less? y x)
|
|
||||||
(set-mcar! p y)
|
|
||||||
(set-mcar! (mcdr p) x))
|
|
||||||
(set-mcdr! (mcdr p) '())
|
|
||||||
p)]
|
|
||||||
[(= n 1) (let ([p lst])
|
|
||||||
(set! lst (mcdr lst))
|
|
||||||
(set-mcdr! p '())
|
|
||||||
p)]
|
|
||||||
[else '()]))))
|
|
||||||
|
|
||||||
(define sort-internals (make-hasheq))
|
(define sort-internals (make-hasheq))
|
||||||
(define _
|
(define _
|
||||||
(let ()
|
(let ()
|
||||||
(define-syntax-rule (precomp less? more ...)
|
(define-syntax-rule (precomp <? more ...)
|
||||||
(let ([proc (lambda (lst n) (sort-internal-body lst less? n #f #f))])
|
(let ([proc (lambda (vec n) (sort-internal-body vec <? n #f #f))])
|
||||||
(hash-set! sort-internals less? proc)
|
(hash-set! sort-internals <? proc)
|
||||||
(hash-set! sort-internals more proc) ...))
|
(hash-set! sort-internals more proc) ...))
|
||||||
(precomp < <=)
|
(precomp < <=)
|
||||||
(precomp > >=)
|
(precomp > >=)
|
||||||
|
@ -112,44 +92,47 @@
|
||||||
|
|
||||||
(define sort-internal
|
(define sort-internal
|
||||||
(case-lambda
|
(case-lambda
|
||||||
[(less? lst n)
|
[(<? vec n)
|
||||||
(let ([si (hash-ref sort-internals less? #f)])
|
(let ([si (hash-ref sort-internals <? #f)])
|
||||||
(if si
|
(if si
|
||||||
;; use a precompiled function if found
|
;; use a precompiled function if found
|
||||||
(si lst n)
|
(si vec n)
|
||||||
;; otherwise, use the generic code
|
;; otherwise, use the generic code
|
||||||
(let () (sort-internal-body lst less? n #f #f))))]
|
(let () (sort-internal-body vec <? n #f #f))))]
|
||||||
[(less? lst n getkey)
|
[(<? vec n getkey)
|
||||||
(sort-internal-body lst less? n #t getkey)]))
|
(let () (sort-internal-body vec <? n #t getkey))]))
|
||||||
|
|
||||||
(define-syntax-rule (sort-body lst *less? has-getkey? getkey cache-keys?)
|
(define-syntax-rule (sort-body lst *<? has-getkey? getkey cache-keys?)
|
||||||
(let ([n (length lst)])
|
(let ([n (length lst)])
|
||||||
(define-syntax-rule (less? x y)
|
(define-syntax-rule (<? x y)
|
||||||
(if has-getkey? (*less? (getkey x) (getkey y)) (*less? x y)))
|
(if has-getkey? (*<? (getkey x) (getkey y)) (*<? x y)))
|
||||||
(cond
|
(cond
|
||||||
;; trivial case
|
;; trivial case
|
||||||
[(= n 0) lst]
|
[(= n 0) lst]
|
||||||
;; below we can assume a non-empty input list
|
;; below we can assume a non-empty input list
|
||||||
[cache-keys?
|
[cache-keys?
|
||||||
;; decorate while converting to an mlist, and undecorate when going
|
;; decorate while converting to a vector, and undecorate when going
|
||||||
;; back, always do this for consistency
|
;; back, always do this for consistency
|
||||||
(let (;; list -> decorated-mlist
|
(let ([vec (make-vector (+ n (/ n 2)))])
|
||||||
[mlst (let ([x (car lst)]) (mcons (cons (getkey x) x) null))])
|
;; list -> decorated-vector
|
||||||
(let loop ([last mlst] [lst (cdr lst)])
|
(let loop ([i 0] [lst lst])
|
||||||
(when (pair? lst)
|
(when (pair? lst)
|
||||||
(let ([new (let ([x (car lst)]) (mcons (cons (getkey x) x) null))])
|
(let ([x (car lst)])
|
||||||
(set-mcdr! last new)
|
(vector-set! vec i (cons (getkey x) x))
|
||||||
(loop new (cdr lst)))))
|
(loop (add1 i) (cdr lst)))))
|
||||||
;; decorated-mlist -> list
|
;; sort
|
||||||
(let loop ([r (sort-internal *less? mlst n car)])
|
(sort-internal *<? vec n car)
|
||||||
(if (null? r) r (cons (cdr (mcar r)) (loop (mcdr r))))))]
|
;; decorated-vector -> list
|
||||||
|
(let loop ([i n] [r '()])
|
||||||
|
(let ([i (sub1 i)])
|
||||||
|
(if (< i 0) r (loop i (cons (cdr (vector-ref vec i)) r))))))]
|
||||||
;; trivial cases
|
;; trivial cases
|
||||||
[(< n 2) lst]
|
[(< n 2) lst]
|
||||||
;; check if the list is already sorted (which can be common, eg,
|
;; check if the list is already sorted (which can be common, eg,
|
||||||
;; directory lists)
|
;; directory lists)
|
||||||
[(let loop ([last (car lst)] [next (cdr lst)])
|
[(let loop ([last (car lst)] [next (cdr lst)])
|
||||||
(or (null? next)
|
(or (null? next)
|
||||||
(and (not (less? (car next) last))
|
(and (not (<? (car next) last))
|
||||||
(loop (car next) (cdr next)))))
|
(loop (car next) (cdr next)))))
|
||||||
lst]
|
lst]
|
||||||
;; below we can assume an unsorted list
|
;; below we can assume an unsorted list
|
||||||
|
@ -161,43 +144,45 @@
|
||||||
(list (cadr lst) (car lst))
|
(list (cadr lst) (car lst))
|
||||||
(let ([a (car lst)] [b (cadr lst)] [c (caddr lst)])
|
(let ([a (car lst)] [b (cadr lst)] [c (caddr lst)])
|
||||||
;; General note: we need a stable sort, so we should always compare
|
;; General note: we need a stable sort, so we should always compare
|
||||||
;; (less? later-item earlier-item) since it gives more information.
|
;; (<? later-item earlier-item) since it gives more information. A
|
||||||
;; A good way to see that we have good code is to check that each
|
;; good way to see that we have good code is to check that each
|
||||||
;; permutation appears exactly once. This means that n=4 will have
|
;; permutation appears exactly once. This means that n=4 will have
|
||||||
;; 23 cases, so don't bother. (Homework: write a macro to generate
|
;; 23 cases, so don't bother. (Homework: write a macro to generate
|
||||||
;; code for a specific N. Bonus: prove correctness. Extra bonus:
|
;; code for a specific N. Bonus: prove correctness. Extra bonus:
|
||||||
;; prove optimal solution. Extra extra bonus: prove optimal
|
;; prove optimal solution. Extra extra bonus: prove optimal
|
||||||
;; solution exists, extract macro from proof.)
|
;; solution exists, extract macro from proof.)
|
||||||
(let ([a (car lst)] [b (cadr lst)] [c (caddr lst)])
|
(let ([a (car lst)] [b (cadr lst)] [c (caddr lst)])
|
||||||
(if (less? b a)
|
(if (<? b a)
|
||||||
;; b<a
|
;; b<a
|
||||||
(if (less? c b)
|
(if (<? c b)
|
||||||
(list c b a)
|
(list c b a)
|
||||||
;; b<a, b<=c
|
;; b<a, b<=c
|
||||||
(if (less? c a) (list b c a) (list b a c)))
|
(if (<? c a) (list b c a) (list b a c)))
|
||||||
;; a<=b, so c<b (b<=c is impossible due to above test)
|
;; a<=b, so c<b (b<=c is impossible due to above test)
|
||||||
(if (less? c a) (list c a b) (list a c b))))))]
|
(if (<? c a) (list c a b) (list a c b))))))]
|
||||||
[else (let (;; list -> mlist
|
[else (let ([vec (make-vector (+ n (/ n 2)))])
|
||||||
[mlst (mcons (car lst) null)])
|
;; list -> vector
|
||||||
(let loop ([last mlst] [lst (cdr lst)])
|
(let loop ([i 0] [lst lst])
|
||||||
(when (pair? lst)
|
(when (pair? lst)
|
||||||
(let ([new (mcons (car lst) null)])
|
(vector-set! vec i (car lst))
|
||||||
(set-mcdr! last new)
|
(loop (add1 i) (cdr lst))))
|
||||||
(loop new (cdr lst)))))
|
;; sort
|
||||||
;; mlist -> list
|
(if getkey
|
||||||
(let loop ([r (if getkey
|
(sort-internal *<? vec n getkey)
|
||||||
(sort-internal *less? mlst n getkey)
|
(sort-internal *<? vec n))
|
||||||
(sort-internal *less? mlst n))])
|
;; vector -> list
|
||||||
(if (null? r) r (cons (mcar r) (loop (mcdr r))))))])))
|
(let loop ([i n] [r '()])
|
||||||
|
(let ([i (sub1 i)])
|
||||||
|
(if (< i 0) r (loop i (cons (vector-ref vec i) r))))))])))
|
||||||
|
|
||||||
;; Finally, this is the provided `sort' value
|
;; Finally, this is the provided `sort' value
|
||||||
(case-lambda
|
(case-lambda
|
||||||
[(lst less?) (sort-body lst less? #f #f #f)]
|
[(lst <?) (sort-body lst <? #f #f #f)]
|
||||||
[(lst less? getkey)
|
[(lst <? getkey)
|
||||||
(if (and getkey (not (eq? values getkey)))
|
(if (and getkey (not (eq? values getkey)))
|
||||||
(sort lst less? getkey #f) (sort lst less?))]
|
(sort lst <? getkey #f) (sort lst <?))]
|
||||||
[(lst less? getkey cache-keys?)
|
[(lst <? getkey cache-keys?)
|
||||||
(if (and getkey (not (eq? values getkey)))
|
(if (and getkey (not (eq? values getkey)))
|
||||||
(sort-body lst less? #t getkey cache-keys?) (sort lst less?))])
|
(sort-body lst <? #t getkey cache-keys?) (sort lst <?))])
|
||||||
|
|
||||||
)))
|
)))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user