racket/collects/scheme/private/sort.ss
2008-04-08 21:42:38 +00:00

204 lines
8.6 KiB
Scheme

(module sort '#%kernel
(#%require "small-scheme.ss" "define.ss" (for-syntax "stxcase-scheme.ss"))
(#%provide sort)
;; This is a destructive stable merge-sort, adapted from slib and improved by
;; Eli Barzilay.
;; The original source said:
;; It uses a version of merge-sort invented, to the best of my knowledge, by
;; David H. D. Warren, and first used in the DEC-10 Prolog system.
;; R. A. O'Keefe adapted it to work destructively in Scheme.
;; but it's a plain destructive merge sort, which I optimized further.
;; The source uses macros to optimize some common cases (eg, no `getkey'
;; function, or precompiled versions with inlinable common comparison
;; predicates) -- they are local macros so they're not left in the compiled
;; code.
;; Note that there is no error checking on the arguments -- the `sort' function
;; that this module provide is then wrapped up by a keyworded version in
;; "scheme/private/list.ss", and that's what everybody sees. The wrapper is
;; doing these checks.
(define sort (let ()
(define-syntax define-syntax-rule
(syntax-rules ()
[(dr (foo . pattern) template)
(define-syntax foo (syntax-rules () [(_ . pattern) template]))]))
(define-syntax-rule (sort-internal-body lst *less? n has-getkey? getkey)
(begin
(define-syntax-rule (less? x y)
(if has-getkey? (*less? (getkey x) (getkey y)) (*less? x y)))
(define (merge-sorted! a b)
;; r-a? for optimization -- is r connected to a?
(define (loop r a b r-a?)
(if (less? (mcar b) (mcar a))
(begin
(when r-a? (set-mcdr! r b))
(if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f)))
;; (car a) <= (car b)
(begin
(unless r-a? (set-mcdr! r a))
(if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t)))))
(cond [(null? a) b]
[(null? b) a]
[(less? (mcar b) (mcar a))
(if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f))
b]
[else ; (car a) <= (car b)
(if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t))
a]))
(let step ([n n])
(cond [(> n 3)
(let* (; let* not really needed with mzscheme's l->r eval
[j (quotient n 2)] [a (step j)] [b (step (- n j))])
(merge-sorted! a b))]
;; the following two cases are just explicit treatment of sublists
;; of length 2 and 3, could remove both (and use the above case for
;; n>1) and it would still work, except a little slower
[(= n 3) (let ([p lst] [p1 (mcdr lst)] [p2 (mcdr (mcdr lst))])
(let ([x (mcar p)] [y (mcar p1)] [z (mcar p2)])
(set! lst (mcdr p2))
(cond [(less? y x) ; y x
(cond [(less? z y) ; z y x
(set-mcar! p z)
(set-mcar! p1 y)
(set-mcar! p2 x)]
[(less? z x) ; y z x
(set-mcar! p y)
(set-mcar! p1 z)
(set-mcar! p2 x)]
[else ; y x z
(set-mcar! p y)
(set-mcar! p1 x)])]
[(less? z x) ; z x y
(set-mcar! p z)
(set-mcar! p1 x)
(set-mcar! p2 y)]
[(less? z y) ; x z y
(set-mcar! p1 z)
(set-mcar! p2 y)])
(set-mcdr! p2 '())
p))]
[(= n 2) (let ([x (mcar lst)] [y (mcar (mcdr lst))] [p lst])
(set! lst (mcdr (mcdr lst)))
(when (less? y x)
(set-mcar! p y)
(set-mcar! (mcdr p) x))
(set-mcdr! (mcdr p) '())
p)]
[(= n 1) (let ([p lst])
(set! lst (mcdr lst))
(set-mcdr! p '())
p)]
[else '()]))))
(define sort-internals (make-hasheq))
(define _
(let ()
(define-syntax-rule (precomp less? more ...)
(let ([proc (lambda (lst n) (sort-internal-body lst less? n #f #f))])
(hash-set! sort-internals less? proc)
(hash-set! sort-internals more proc) ...))
(precomp < <=)
(precomp > >=)
(precomp string<? string<=?)
(precomp string-ci<? string-ci<=?)
(precomp keyword<?)))
(define sort-internal
(case-lambda
[(less? lst n)
(let ([si (hash-ref sort-internals less? #f)])
(if si
;; use a precompiled function if found
(si lst n)
;; otherwise, use the generic code
(let () (sort-internal-body lst less? n #f #f))))]
[(less? lst n getkey)
(sort-internal-body lst less? n #t getkey)]))
(define-syntax-rule (sort-body lst *less? has-getkey? getkey cache-keys?)
(let ([n (length lst)])
(define-syntax-rule (less? x y)
(if has-getkey? (*less? (getkey x) (getkey y)) (*less? x y)))
(cond
;; trivial case
[(= n 0) lst]
;; below we can assume a non-empty input list
[cache-keys?
;; decorate while converting to an mlist, and undecorate when going
;; back, always do this for consistency
(let (;; list -> decorated-mlist
[mlst (let ([x (car lst)]) (mcons (cons (getkey x) x) null))])
(let loop ([last mlst] [lst (cdr lst)])
(when (pair? lst)
(let ([new (let ([x (car lst)]) (mcons (cons (getkey x) x) null))])
(set-mcdr! last new)
(loop new (cdr lst)))))
;; decorated-mlist -> list
(let loop ([r (sort-internal *less? mlst n car)])
(if (null? r) r (cons (cdr (mcar r)) (loop (mcdr r))))))]
;; trivial cases
[(< n 2) lst]
;; check if the list is already sorted (which can be common, eg,
;; directory lists)
[(let loop ([last (car lst)] [next (cdr lst)])
(or (null? next)
(and (not (less? (car next) last))
(loop (car next) (cdr next)))))
lst]
;; below we can assume an unsorted list
;; inlined case, for optimization of short lists
[(< n 3)
(if (= n 2)
;; (because of the above test, we can assume that the input is
;; unsorted)
(list (cadr lst) (car lst))
(let ([a (car lst)] [b (cadr lst)] [c (caddr lst)])
;; General note: we need a stable sort, so we should always compare
;; (less? later-item earlier-item) since it gives more information.
;; A good way to see that we have good code is to check that each
;; permutation appears exactly once. This means that n=4 will have
;; 23 cases, so don't bother. (Homework: write a macro to generate
;; code for a specific N. Bonus: prove correctness. Extra bonus:
;; prove optimal solution. Extra extra bonus: prove optimal
;; solution exists, extract macro from proof.)
(let ([a (car lst)] [b (cadr lst)] [c (caddr lst)])
(if (less? b a)
;; b<a
(if (less? c b)
(list c b a)
;; b<a, b<=c
(if (less? c a) (list b c a) (list b a c)))
;; a<=b, so c<b (b<=c is impossible due to above test)
(if (less? c a) (list c a b) (list a c b))))))]
[else (let (;; list -> mlist
[mlst (mcons (car lst) null)])
(let loop ([last mlst] [lst (cdr lst)])
(when (pair? lst)
(let ([new (mcons (car lst) null)])
(set-mcdr! last new)
(loop new (cdr lst)))))
;; mlist -> list
(let loop ([r (if getkey
(sort-internal *less? mlst n getkey)
(sort-internal *less? mlst n))])
(if (null? r) r (cons (mcar r) (loop (mcdr r))))))])))
;; Finally, this is the provided `sort' value
(case-lambda
[(lst less?) (sort-body lst less? #f #f #f)]
[(lst less? getkey)
(if (and getkey (not (eq? values getkey)))
(sort lst less? getkey #f) (sort lst less?))]
[(lst less? getkey cache-keys?)
(if (and getkey (not (eq? values getkey)))
(sort-body lst less? #t getkey cache-keys?) (sort lst less?))])
)))