diff --git a/collects/scheme/private/list.ss b/collects/scheme/private/list.ss
index 91c0819dcd..5810dde1a6 100644
--- a/collects/scheme/private/list.ss
+++ b/collects/scheme/private/list.ss
@@ -25,158 +25,19 @@
compose)
- (#%require (for-syntax "stxcase-scheme.ss"))
+ (#%require (rename "sort.ss" raw-sort sort)
+ (for-syntax "stxcase-scheme.ss"))
- ;; This is a destructive stable merge-sort, adapted from slib and improved by
- ;; Eli Barzilay
- ;; The original source said:
- ;; It uses a version of merge-sort invented, to the best of my knowledge,
- ;; by David H. D. Warren, and first used in the DEC-10 Prolog system.
- ;; R. A. O'Keefe adapted it to work destructively in Scheme.
- ;; but it's a plain destructive merge sort, which I optimized further.
- (define sort-internal
- (let ()
- (define-syntax sort-internal-body
- (syntax-rules ()
- [(_ lst less? n)
- (begin
- (define (merge-sorted! a b)
- ;; r-a? for optimization -- is r connected to a?
- (define (loop r a b r-a?)
- (if (less? (mcar b) (mcar a))
- (begin
- (when r-a? (set-mcdr! r b))
- (if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f)))
- ;; (car a) <= (car b)
- (begin
- (unless r-a? (set-mcdr! r a))
- (if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t)))))
- (cond [(null? a) b]
- [(null? b) a]
- [(less? (mcar b) (mcar a))
- (if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f))
- b]
- [else ; (car a) <= (car b)
- (if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t))
- a]))
- (let step ([n n])
- (cond [(> n 3)
- (let* (; let* not really needed with mzscheme's l->r eval
- [j (quotient n 2)] [a (step j)] [b (step (- n j))])
- (merge-sorted! a b))]
- ;; the following two cases are just explicit treatment of
- ;; sublists of length 2 and 3, could remove both (and use the
- ;; above case for n>1) and it would still work, except a
- ;; little slower
- [(= n 3) (let ([p lst] [p1 (mcdr lst)] [p2 (mcdr (mcdr lst))])
- (let ([x (mcar p)] [y (mcar p1)] [z (mcar p2)])
- (set! lst (mcdr p2))
- (cond [(less? y x) ; y x
- (cond [(less? z y) ; z y x
- (set-mcar! p z)
- (set-mcar! p1 y)
- (set-mcar! p2 x)]
- [(less? z x) ; y z x
- (set-mcar! p y)
- (set-mcar! p1 z)
- (set-mcar! p2 x)]
- [else ; y x z
- (set-mcar! p y)
- (set-mcar! p1 x)])]
- [(less? z x) ; z x y
- (set-mcar! p z)
- (set-mcar! p1 x)
- (set-mcar! p2 y)]
- [(less? z y) ; x z y
- (set-mcar! p1 z)
- (set-mcar! p2 y)])
- (set-mcdr! p2 '())
- p))]
- [(= n 2) (let ([x (mcar lst)] [y (mcar (mcdr lst))] [p lst])
- (set! lst (mcdr (mcdr lst)))
- (when (less? y x)
- (set-mcar! p y)
- (set-mcar! (mcdr p) x))
- (set-mcdr! (mcdr p) '())
- p)]
- [(= n 1) (let ([p lst])
- (set! lst (mcdr lst))
- (set-mcdr! p '())
- p)]
- [else '()])))]))
- (define sort-internals (make-hash-table))
- (define-syntax make-precompiled-sort
- (syntax-rules ()
- [(_ less?) (hash-table-put! sort-internals less?
- (lambda (lst n) (sort-internal-body lst less? n)))]))
- (define ((sort-internal* less?) lst n)
- (sort-internal-body lst less? n))
- (make-precompiled-sort <)
- (make-precompiled-sort string)
- (make-precompiled-sort string-ci)
- (make-precompiled-sort keyword)
- (hash-table-put! sort-internals <= (hash-table-get sort-internals <))
- (hash-table-put! sort-internals string<=? (hash-table-get sort-internals string))
- (hash-table-put! sort-internals string-ci<=? (hash-table-get sort-internals string-ci))
- (lambda (less? lst n)
- ((or (hash-table-get sort-internals less? #f)
- (sort-internal* less?))
- lst n))))
-
- (define (sort lst less?)
- (unless (list? lst)
- (raise-type-error 'sort "proper list" lst))
+ (provide sort)
+ (define (sort lst less? #:key [getkey #f] #:cache-keys [cache-keys? #f])
+ (unless (list? lst) (raise-type-error 'sort "proper list" lst))
(unless (and (procedure? less?) (procedure-arity-includes? less? 2))
(raise-type-error 'sort "procedure of arity 2" less?))
- (let ([n (length lst)])
- (cond
- ;; trivial case
- [(< n 2) lst]
- ;; check if the list is already sorted
- ;; (which can be a common case, eg, directory lists).
- [(let loop ([last (car lst)] [next (cdr lst)])
- (or (null? next)
- (and (not (less? (car next) last))
- (loop (car next) (cdr next)))))
- lst]
- ;; inlined cases, for optimization of short lists
- [(< n 3)
- (if (= n 2)
- ;; (because of the above test, we can assume that the input is
- ;; unsorted)
- (list (cadr lst) (car lst))
- (let ([a (car lst)] [b (cadr lst)] [c (caddr lst)])
- ;; General note: we need a stable sort, so we should always
- ;; compare (less? later-item earlier-item) since it gives more
- ;; information. A good way to see that we have good code is to
- ;; check that each permutation appears exactly once. This means
- ;; that n=4 will have 23 cases, so don't bother. (Homework: write
- ;; a macro to generate code for a specific N. Bonus: prove
- ;; correctness. Extra bonus: prove optimal solution. Extra extra
- ;; bonus: prove optimal solution exists, extract macro from
- ;; proof.)
- (let ([a (car lst)] [b (cadr lst)] [c (caddr lst)])
- (if (less? b a)
- ;; bmlist
- [mlst (let ([mlst (mcons (car lst) null)])
- (let loop ([last mlst] [lst (cdr lst)])
- (if (null? lst)
- mlst
- (let ([new (mcons (car lst) null)])
- (set-mcdr! last new)
- (loop new (cdr lst))))))])
- ;; mlist->list
- (let loop ([r (sort-internal less? mlst n)])
- (if (null? r)
- r
- (cons (mcar r) (loop (mcdr r))))))])))
+ (when (and getkey (not (and (procedure? getkey)
+ (procedure-arity-includes? getkey 1))))
+ (raise-type-error 'sort "procedure of arity 1" getkey))
+ ;; don't provide the extra args if not needed, it's a bit faster
+ (if getkey (raw-sort lst less? getkey cache-keys?) (raw-sort lst less?)))
(define (do-remove who item list equal?)
(unless (list? list)
diff --git a/collects/scheme/private/sort.ss b/collects/scheme/private/sort.ss
new file mode 100644
index 0000000000..462870fb2e
--- /dev/null
+++ b/collects/scheme/private/sort.ss
@@ -0,0 +1,201 @@
+(module sort "pre-base.ss"
+
+(provide sort)
+
+(#%require (for-syntax "stxcase-scheme.ss")
+ (for-syntax "pre-base.ss"))
+
+;; This is a destructive stable merge-sort, adapted from slib and improved by
+;; Eli Barzilay.
+;; The original source said:
+;; It uses a version of merge-sort invented, to the best of my knowledge, by
+;; David H. D. Warren, and first used in the DEC-10 Prolog system.
+;; R. A. O'Keefe adapted it to work destructively in Scheme.
+;; but it's a plain destructive merge sort, which I optimized further.
+
+;; The source uses macros to optimize some common cases (eg, no `getkey'
+;; function, or precompiled versions with inlinable common comparison
+;; predicates) -- they are local macros so they're not left in the compiled
+;; code.
+
+;; Note that there is no error checking on the arguments -- the `sort' function
+;; that this module provide is then wrapped up by a keyworded version in
+;; "scheme/private/list.ss", and that's what everybody sees. The wrapper is
+;; doing these checks.
+
+(define sort (let ()
+
+(define-syntax-rule (sort-internal-body lst *less? n has-getkey? getkey)
+ (begin
+ (define-syntax-rule (less? x y)
+ (if has-getkey? (*less? (getkey x) (getkey y)) (*less? x y)))
+ (define (merge-sorted! a b)
+ ;; r-a? for optimization -- is r connected to a?
+ (define (loop r a b r-a?)
+ (if (less? (mcar b) (mcar a))
+ (begin
+ (when r-a? (set-mcdr! r b))
+ (if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f)))
+ ;; (car a) <= (car b)
+ (begin
+ (unless r-a? (set-mcdr! r a))
+ (if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t)))))
+ (cond [(null? a) b]
+ [(null? b) a]
+ [(less? (mcar b) (mcar a))
+ (if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f))
+ b]
+ [else ; (car a) <= (car b)
+ (if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t))
+ a]))
+ (let step ([n n])
+ (cond [(> n 3)
+ (let* (; let* not really needed with mzscheme's l->r eval
+ [j (quotient n 2)] [a (step j)] [b (step (- n j))])
+ (merge-sorted! a b))]
+ ;; the following two cases are just explicit treatment of sublists
+ ;; of length 2 and 3, could remove both (and use the above case for
+ ;; n>1) and it would still work, except a little slower
+ [(= n 3) (let ([p lst] [p1 (mcdr lst)] [p2 (mcdr (mcdr lst))])
+ (let ([x (mcar p)] [y (mcar p1)] [z (mcar p2)])
+ (set! lst (mcdr p2))
+ (cond [(less? y x) ; y x
+ (cond [(less? z y) ; z y x
+ (set-mcar! p z)
+ (set-mcar! p1 y)
+ (set-mcar! p2 x)]
+ [(less? z x) ; y z x
+ (set-mcar! p y)
+ (set-mcar! p1 z)
+ (set-mcar! p2 x)]
+ [else ; y x z
+ (set-mcar! p y)
+ (set-mcar! p1 x)])]
+ [(less? z x) ; z x y
+ (set-mcar! p z)
+ (set-mcar! p1 x)
+ (set-mcar! p2 y)]
+ [(less? z y) ; x z y
+ (set-mcar! p1 z)
+ (set-mcar! p2 y)])
+ (set-mcdr! p2 '())
+ p))]
+ [(= n 2) (let ([x (mcar lst)] [y (mcar (mcdr lst))] [p lst])
+ (set! lst (mcdr (mcdr lst)))
+ (when (less? y x)
+ (set-mcar! p y)
+ (set-mcar! (mcdr p) x))
+ (set-mcdr! (mcdr p) '())
+ p)]
+ [(= n 1) (let ([p lst])
+ (set! lst (mcdr lst))
+ (set-mcdr! p '())
+ p)]
+ [else '()]))))
+
+(define sort-internals (make-hash-table))
+(define _
+ (let-syntax ([precomp
+ (syntax-rules ()
+ [(_ less? more ...)
+ (let ([proc (lambda (lst n)
+ (sort-internal-body lst less? n #f #f))])
+ (hash-table-put! sort-internals less? proc)
+ (hash-table-put! sort-internals more proc) ...)])])
+ (precomp < <=)
+ (precomp > >=)
+ (precomp string string<=?)
+ (precomp string-ci string-ci<=?)
+ (precomp keyword)))
+
+(define sort-internal
+ (case-lambda
+ [(less? lst n)
+ (let ([si (hash-table-get sort-internals less? #f)])
+ (if si
+ ;; use a precompiled function if found
+ (si lst n)
+ ;; otherwise, use the generic code
+ (let () (sort-internal-body lst less? n #f #f))))]
+ [(less? lst n getkey)
+ (sort-internal-body lst less? n #t getkey)]))
+
+(define-syntax-rule (sort-body lst *less? has-getkey? getkey cache-keys?)
+ (let ([n (length lst)])
+ (define-syntax-rule (less? x y)
+ (if has-getkey? (*less? (getkey x) (getkey y)) (*less? x y)))
+ (cond
+ ;; trivial case
+ [(= n 0) lst]
+ ;; below we can assume a non-empty input list
+ [cache-keys?
+ ;; decorate while converting to an mlist, and undecorate when going
+ ;; back, always do this for consistency
+ (let (;; list -> decorated-mlist
+ [mlst (let ([x (car lst)]) (mcons (cons (getkey x) x) null))])
+ (let loop ([last mlst] [lst (cdr lst)])
+ (when (pair? lst)
+ (let ([new (let ([x (car lst)]) (mcons (cons (getkey x) x) null))])
+ (set-mcdr! last new)
+ (loop new (cdr lst)))))
+ ;; decorated-mlist -> list
+ (let loop ([r (sort-internal *less? mlst n car)])
+ (if (null? r) r (cons (cdr (mcar r)) (loop (mcdr r))))))]
+ ;; trivial cases
+ [(< n 2) lst]
+ ;; check if the list is already sorted (which can be common, eg,
+ ;; directory lists)
+ [(let loop ([last (car lst)] [next (cdr lst)])
+ (or (null? next)
+ (and (not (less? (car next) last))
+ (loop (car next) (cdr next)))))
+ lst]
+ ;; below we can assume an unsorted list
+ ;; inlined case, for optimization of short lists
+ [(< n 3)
+ (if (= n 2)
+ ;; (because of the above test, we can assume that the input is
+ ;; unsorted)
+ (list (cadr lst) (car lst))
+ (let ([a (car lst)] [b (cadr lst)] [c (caddr lst)])
+ ;; General note: we need a stable sort, so we should always compare
+ ;; (less? later-item earlier-item) since it gives more information.
+ ;; A good way to see that we have good code is to check that each
+ ;; permutation appears exactly once. This means that n=4 will have
+ ;; 23 cases, so don't bother. (Homework: write a macro to generate
+ ;; code for a specific N. Bonus: prove correctness. Extra bonus:
+ ;; prove optimal solution. Extra extra bonus: prove optimal
+ ;; solution exists, extract macro from proof.)
+ (let ([a (car lst)] [b (cadr lst)] [c (caddr lst)])
+ (if (less? b a)
+ ;; b mlist
+ [mlst (mcons (car lst) null)])
+ (let loop ([last mlst] [lst (cdr lst)])
+ (when (pair? lst)
+ (let ([new (mcons (car lst) null)])
+ (set-mcdr! last new)
+ (loop new (cdr lst)))))
+ ;; mlist -> list
+ (let loop ([r (if getkey
+ (sort-internal *less? mlst n getkey)
+ (sort-internal *less? mlst n))])
+ (if (null? r) r (cons (mcar r) (loop (mcdr r))))))])))
+
+;; Finally, this is the provided `sort' value
+(case-lambda
+ [(lst less?) (sort-body lst less? #f #f #f)]
+ [(lst less? getkey)
+ (if (and getkey (not (eq? values getkey)))
+ (sort lst less? getkey #f) (sort lst less?))]
+ [(lst less? getkey cache-keys?)
+ (if (and getkey (not (eq? values getkey)))
+ (sort-body lst less? #t getkey cache-keys?) (sort lst less?))])
+
+)))
diff --git a/collects/scribblings/reference/pairs.scrbl b/collects/scribblings/reference/pairs.scrbl
index 281d81a59a..a50c01e303 100644
--- a/collects/scribblings/reference/pairs.scrbl
+++ b/collects/scribblings/reference/pairs.scrbl
@@ -326,7 +326,9 @@ Returns @scheme[(remove* v lst eq?)].}
Returns @scheme[(remove* v lst eqv?)].}
-@defproc[(sort [lst list?] [less-than? (any/c any/c . -> . any/c)])
+@defproc[(sort [lst list?] [less-than? (any/c any/c . -> . any/c)]
+ [#:key key (any/c . -> . any/c) values]
+ [#:cache-keys cache-keys boolean? #f])
list?]{
Returns a list sorted according to the @scheme[less-than?] procedure,
@@ -337,9 +339,28 @@ Returns a list sorted according to the @scheme[less-than?] procedure,
The sort is stable: if two elements of @scheme[lst] are ``equal''
(i.e., @scheme[proc] does not return a true value when given the pair
in either order), then the elements preserve their relative order
- from @scheme[lst] in the output list. You should therefore use
- @scheme[sort] with strict comparison functions (e.g., @scheme[<] or
- @scheme[string]; not @scheme[<=] or @scheme[string<=?]).}
+ from @scheme[lst] in the output list. To guarantee this, you should
+ use @scheme[sort] with a strict comparison functions (e.g.,
+ @scheme[<] or @scheme[string]; not @scheme[<=] or
+ @scheme[string<=?]).
+
+If a @scheme[key] argument is specified, it is used to extract key
+ values for comparison from the list elements. Specifying it is
+ roughly equivalent to using a comparison procedure such as
+ @scheme[(lambda (x y) (less-than? (key x) (key y)))]. The
+ @scheme[key] procedure is used on two items in every comparison,
+ which is fine for simple cheap accessor function; a
+ @scheme[cache-keys] argument can be specified as @scheme[#t] if you
+ want to minimize uses of the key (e.g., with
+ @scheme[file-or-directory-modify-seconds]). In this case, the
+ @scheme[key] function will be used exactly once on each of the items:
+ sorting will proceed by ``decorating'' the input list with key values
+ first, and ``undecorating'' the resulting list (this can be done
+ manually, but at a greater overhead). For example, specifying a
+ @scheme[key] as @scheme[(lambda (x) (random))] with caching will
+ assign a random number for each item in the list and sort it
+ according to these numbers, which will shuffle the list in a uniform
+ way.}
@; ----------------------------------------
@section{List Searching}
diff --git a/collects/tests/mzscheme/list.ss b/collects/tests/mzscheme/list.ss
index 6692d84243..be63a683c4 100644
--- a/collects/tests/mzscheme/list.ss
+++ b/collects/tests/mzscheme/list.ss
@@ -50,11 +50,18 @@
(define (random-list n range)
(let loop ([n n] [r '()])
(if (zero? n) r (loop (sub1 n) (cons (list (random range)) r)))))
+ (define (sort* lst)
+ (let ([s1 (sort lst car<)]
+ [s2 (sort lst < #:key car)]
+ [s3 (sort lst < #:key car #:cache-keys #t)])
+ (test #t andmap eq? s1 s2)
+ (test #t andmap eq? s1 s3)
+ s1))
(define (test-sort len times)
(or (zero? times)
(and (let* ([rand (random-list len (if (even? times) 1000000 10))]
[orig< (lambda (x y) (memq y (cdr (memq x rand))))]
- [sorted (sort rand car<)]
+ [sorted (sort* rand)]
[l1 (reverse (cdr (reverse sorted)))]
[l2 (cdr sorted)])
(and (= (length sorted) (length rand))
@@ -66,26 +73,74 @@
(test #t test-sort 1 10)
(test #t test-sort 2 20)
(test #t test-sort 3 60)
- (test #t test-sort 4 200)
- (test #t test-sort 5 200)
- (test #t test-sort 10 200)
- (test #t test-sort 100 200)
- (test #t test-sort 1000 200)
+ (test #t test-sort 4 100)
+ (test #t test-sort 5 100)
+ (test #t test-sort 10 100)
+ (test #t test-sort 100 100)
+ (test #t test-sort 1000 100)
;; test stability
- (test '((1) (2) (3 a) (3 b) (3 c)) sort '((3 a) (1) (3 b) (2) (3 c)) car<)
+ (test '((1) (2) (3 a) (3 b) (3 c)) sort* '((3 a) (1) (3 b) (2) (3 c)))
;; test short lists (+ stable)
- (test '() sort '() car<)
- (test '((1 1)) sort '((1 1)) car<)
- (test '((1 2) (1 1)) sort '((1 2) (1 1)) car<)
- (test '((1) (2)) sort '((2) (1)) car<)
- (for-each (lambda (l) (test '((0 3) (1 1) (1 2)) sort l car<))
+ (test '() sort* '())
+ (test '((1 1)) sort* '((1 1)))
+ (test '((1 2) (1 1)) sort* '((1 2) (1 1)))
+ (test '((1) (2)) sort* '((2) (1)))
+ (for-each (lambda (l) (test '((0 3) (1 1) (1 2)) sort* l))
'(((1 1) (1 2) (0 3))
((1 1) (0 3) (1 2))
((0 3) (1 1) (1 2))))
- (for-each (lambda (l) (test '((0 2) (0 3) (1 1)) sort l car<))
+ (for-each (lambda (l) (test '((0 2) (0 3) (1 1)) sort* l))
'(((1 1) (0 2) (0 3))
((0 2) (1 1) (0 3))
((0 2) (0 3) (1 1)))))
+;; test #:key and #:cache-keys
+(let ()
+ (define l '((0) (9) (1) (8) (2) (7) (3) (6) (4) (5)))
+ (define sorted '((0) (1) (2) (3) (4) (5) (6) (7) (8) (9)))
+ ;; can't use keyword args, so use values and the sort call
+ (test sorted values (sort l < #:key car))
+ (let ([c1 0] [c2 0] [touched '()])
+ (test sorted values
+ (sort l (lambda (x y) (set! c1 (add1 c1)) (< x y))
+ #:key (lambda (x)
+ (set! c2 (add1 c2))
+ (set! touched (cons x touched))
+ (car x))))
+ ;; test that the number of key uses is half the number of comparisons
+ (test #t = (* 2 c1) c2)
+ ;; and that this is larger than the number of items in the list
+ (test #t < (length l) c2)
+ ;; and that every item was touched
+ (test null remove* touched l))
+ (let ([c 0] [touched '()])
+ ;; now cache the keys
+ (test sorted values
+ (sort l <
+ #:key (lambda (x)
+ (set! c (add1 c))
+ (set! touched (cons x touched))
+ (car x))
+ #:cache-keys #t))
+ ;; test that the number of key uses is the same as the list length
+ (test #t = c (length l))
+ ;; and that every item was touched
+ (test null remove* touched l))
+ (let* ([c 0] [getkey (lambda (x) (set! c (add1 c)) x)])
+ ;; either way, we never use the key proc on no arguments
+ (test '() values (sort '() < #:key getkey #:cache-keys #f))
+ (test '() values (sort '() < #:key getkey #:cache-keys #t))
+ (test #t = c 0)
+ ;; we also don't use it for 1-arg lists
+ (test '(1) values (sort '(1) < #:key getkey #:cache-keys #f))
+ (test #t = c 0)
+ ;; but we do use it once if caching happens (it's a consistent interface)
+ (test '(1) values (sort '(1) < #:key getkey #:cache-keys #t))
+ (test #t = c 1)
+ ;; check a few other short lists
+ (test '(1 2) values (sort '(2 1) < #:key getkey #:cache-keys #t))
+ (test '(1 2 3) values (sort '(2 3 1) < #:key getkey #:cache-keys #t))
+ (test '(1 2 3 4) values (sort '(4 2 3 1) < #:key getkey #:cache-keys #t))
+ (test #t = c 10)))
;; ---------- take/drop ----------
(let ()