From bfc990e3c5c815212e48e9318c094f6b4107dcef Mon Sep 17 00:00:00 2001 From: Eli Barzilay Date: Tue, 1 Apr 2008 20:58:41 +0000 Subject: [PATCH] Added #:key and #:cache-keys to `sort', documented and tested. svn: r9128 --- collects/scheme/private/list.ss | 159 +--------------- collects/scheme/private/sort.ss | 201 +++++++++++++++++++++ collects/scribblings/reference/pairs.scrbl | 29 ++- collects/tests/mzscheme/list.ss | 81 +++++++-- 4 files changed, 304 insertions(+), 166 deletions(-) create mode 100644 collects/scheme/private/sort.ss diff --git a/collects/scheme/private/list.ss b/collects/scheme/private/list.ss index 91c0819dcd..5810dde1a6 100644 --- a/collects/scheme/private/list.ss +++ b/collects/scheme/private/list.ss @@ -25,158 +25,19 @@ compose) - (#%require (for-syntax "stxcase-scheme.ss")) + (#%require (rename "sort.ss" raw-sort sort) + (for-syntax "stxcase-scheme.ss")) - ;; This is a destructive stable merge-sort, adapted from slib and improved by - ;; Eli Barzilay - ;; The original source said: - ;; It uses a version of merge-sort invented, to the best of my knowledge, - ;; by David H. D. Warren, and first used in the DEC-10 Prolog system. - ;; R. A. O'Keefe adapted it to work destructively in Scheme. - ;; but it's a plain destructive merge sort, which I optimized further. - (define sort-internal - (let () - (define-syntax sort-internal-body - (syntax-rules () - [(_ lst less? n) - (begin - (define (merge-sorted! a b) - ;; r-a? for optimization -- is r connected to a? - (define (loop r a b r-a?) - (if (less? (mcar b) (mcar a)) - (begin - (when r-a? (set-mcdr! r b)) - (if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f))) - ;; (car a) <= (car b) - (begin - (unless r-a? (set-mcdr! r a)) - (if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t))))) - (cond [(null? a) b] - [(null? b) a] - [(less? (mcar b) (mcar a)) - (if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f)) - b] - [else ; (car a) <= (car b) - (if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t)) - a])) - (let step ([n n]) - (cond [(> n 3) - (let* (; let* not really needed with mzscheme's l->r eval - [j (quotient n 2)] [a (step j)] [b (step (- n j))]) - (merge-sorted! a b))] - ;; the following two cases are just explicit treatment of - ;; sublists of length 2 and 3, could remove both (and use the - ;; above case for n>1) and it would still work, except a - ;; little slower - [(= n 3) (let ([p lst] [p1 (mcdr lst)] [p2 (mcdr (mcdr lst))]) - (let ([x (mcar p)] [y (mcar p1)] [z (mcar p2)]) - (set! lst (mcdr p2)) - (cond [(less? y x) ; y x - (cond [(less? z y) ; z y x - (set-mcar! p z) - (set-mcar! p1 y) - (set-mcar! p2 x)] - [(less? z x) ; y z x - (set-mcar! p y) - (set-mcar! p1 z) - (set-mcar! p2 x)] - [else ; y x z - (set-mcar! p y) - (set-mcar! p1 x)])] - [(less? z x) ; z x y - (set-mcar! p z) - (set-mcar! p1 x) - (set-mcar! p2 y)] - [(less? z y) ; x z y - (set-mcar! p1 z) - (set-mcar! p2 y)]) - (set-mcdr! p2 '()) - p))] - [(= n 2) (let ([x (mcar lst)] [y (mcar (mcdr lst))] [p lst]) - (set! lst (mcdr (mcdr lst))) - (when (less? y x) - (set-mcar! p y) - (set-mcar! (mcdr p) x)) - (set-mcdr! (mcdr p) '()) - p)] - [(= n 1) (let ([p lst]) - (set! lst (mcdr lst)) - (set-mcdr! p '()) - p)] - [else '()])))])) - (define sort-internals (make-hash-table)) - (define-syntax make-precompiled-sort - (syntax-rules () - [(_ less?) (hash-table-put! sort-internals less? - (lambda (lst n) (sort-internal-body lst less? n)))])) - (define ((sort-internal* less?) lst n) - (sort-internal-body lst less? n)) - (make-precompiled-sort <) - (make-precompiled-sort stringmlist - [mlst (let ([mlst (mcons (car lst) null)]) - (let loop ([last mlst] [lst (cdr lst)]) - (if (null? lst) - mlst - (let ([new (mcons (car lst) null)]) - (set-mcdr! last new) - (loop new (cdr lst))))))]) - ;; mlist->list - (let loop ([r (sort-internal less? mlst n)]) - (if (null? r) - r - (cons (mcar r) (loop (mcdr r))))))]))) + (when (and getkey (not (and (procedure? getkey) + (procedure-arity-includes? getkey 1)))) + (raise-type-error 'sort "procedure of arity 1" getkey)) + ;; don't provide the extra args if not needed, it's a bit faster + (if getkey (raw-sort lst less? getkey cache-keys?) (raw-sort lst less?))) (define (do-remove who item list equal?) (unless (list? list) diff --git a/collects/scheme/private/sort.ss b/collects/scheme/private/sort.ss new file mode 100644 index 0000000000..462870fb2e --- /dev/null +++ b/collects/scheme/private/sort.ss @@ -0,0 +1,201 @@ +(module sort "pre-base.ss" + +(provide sort) + +(#%require (for-syntax "stxcase-scheme.ss") + (for-syntax "pre-base.ss")) + +;; This is a destructive stable merge-sort, adapted from slib and improved by +;; Eli Barzilay. +;; The original source said: +;; It uses a version of merge-sort invented, to the best of my knowledge, by +;; David H. D. Warren, and first used in the DEC-10 Prolog system. +;; R. A. O'Keefe adapted it to work destructively in Scheme. +;; but it's a plain destructive merge sort, which I optimized further. + +;; The source uses macros to optimize some common cases (eg, no `getkey' +;; function, or precompiled versions with inlinable common comparison +;; predicates) -- they are local macros so they're not left in the compiled +;; code. + +;; Note that there is no error checking on the arguments -- the `sort' function +;; that this module provide is then wrapped up by a keyworded version in +;; "scheme/private/list.ss", and that's what everybody sees. The wrapper is +;; doing these checks. + +(define sort (let () + +(define-syntax-rule (sort-internal-body lst *less? n has-getkey? getkey) + (begin + (define-syntax-rule (less? x y) + (if has-getkey? (*less? (getkey x) (getkey y)) (*less? x y))) + (define (merge-sorted! a b) + ;; r-a? for optimization -- is r connected to a? + (define (loop r a b r-a?) + (if (less? (mcar b) (mcar a)) + (begin + (when r-a? (set-mcdr! r b)) + (if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f))) + ;; (car a) <= (car b) + (begin + (unless r-a? (set-mcdr! r a)) + (if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t))))) + (cond [(null? a) b] + [(null? b) a] + [(less? (mcar b) (mcar a)) + (if (null? (mcdr b)) (set-mcdr! b a) (loop b a (mcdr b) #f)) + b] + [else ; (car a) <= (car b) + (if (null? (mcdr a)) (set-mcdr! a b) (loop a (mcdr a) b #t)) + a])) + (let step ([n n]) + (cond [(> n 3) + (let* (; let* not really needed with mzscheme's l->r eval + [j (quotient n 2)] [a (step j)] [b (step (- n j))]) + (merge-sorted! a b))] + ;; the following two cases are just explicit treatment of sublists + ;; of length 2 and 3, could remove both (and use the above case for + ;; n>1) and it would still work, except a little slower + [(= n 3) (let ([p lst] [p1 (mcdr lst)] [p2 (mcdr (mcdr lst))]) + (let ([x (mcar p)] [y (mcar p1)] [z (mcar p2)]) + (set! lst (mcdr p2)) + (cond [(less? y x) ; y x + (cond [(less? z y) ; z y x + (set-mcar! p z) + (set-mcar! p1 y) + (set-mcar! p2 x)] + [(less? z x) ; y z x + (set-mcar! p y) + (set-mcar! p1 z) + (set-mcar! p2 x)] + [else ; y x z + (set-mcar! p y) + (set-mcar! p1 x)])] + [(less? z x) ; z x y + (set-mcar! p z) + (set-mcar! p1 x) + (set-mcar! p2 y)] + [(less? z y) ; x z y + (set-mcar! p1 z) + (set-mcar! p2 y)]) + (set-mcdr! p2 '()) + p))] + [(= n 2) (let ([x (mcar lst)] [y (mcar (mcdr lst))] [p lst]) + (set! lst (mcdr (mcdr lst))) + (when (less? y x) + (set-mcar! p y) + (set-mcar! (mcdr p) x)) + (set-mcdr! (mcdr p) '()) + p)] + [(= n 1) (let ([p lst]) + (set! lst (mcdr lst)) + (set-mcdr! p '()) + p)] + [else '()])))) + +(define sort-internals (make-hash-table)) +(define _ + (let-syntax ([precomp + (syntax-rules () + [(_ less? more ...) + (let ([proc (lambda (lst n) + (sort-internal-body lst less? n #f #f))]) + (hash-table-put! sort-internals less? proc) + (hash-table-put! sort-internals more proc) ...)])]) + (precomp < <=) + (precomp > >=) + (precomp string decorated-mlist + [mlst (let ([x (car lst)]) (mcons (cons (getkey x) x) null))]) + (let loop ([last mlst] [lst (cdr lst)]) + (when (pair? lst) + (let ([new (let ([x (car lst)]) (mcons (cons (getkey x) x) null))]) + (set-mcdr! last new) + (loop new (cdr lst))))) + ;; decorated-mlist -> list + (let loop ([r (sort-internal *less? mlst n car)]) + (if (null? r) r (cons (cdr (mcar r)) (loop (mcdr r))))))] + ;; trivial cases + [(< n 2) lst] + ;; check if the list is already sorted (which can be common, eg, + ;; directory lists) + [(let loop ([last (car lst)] [next (cdr lst)]) + (or (null? next) + (and (not (less? (car next) last)) + (loop (car next) (cdr next))))) + lst] + ;; below we can assume an unsorted list + ;; inlined case, for optimization of short lists + [(< n 3) + (if (= n 2) + ;; (because of the above test, we can assume that the input is + ;; unsorted) + (list (cadr lst) (car lst)) + (let ([a (car lst)] [b (cadr lst)] [c (caddr lst)]) + ;; General note: we need a stable sort, so we should always compare + ;; (less? later-item earlier-item) since it gives more information. + ;; A good way to see that we have good code is to check that each + ;; permutation appears exactly once. This means that n=4 will have + ;; 23 cases, so don't bother. (Homework: write a macro to generate + ;; code for a specific N. Bonus: prove correctness. Extra bonus: + ;; prove optimal solution. Extra extra bonus: prove optimal + ;; solution exists, extract macro from proof.) + (let ([a (car lst)] [b (cadr lst)] [c (caddr lst)]) + (if (less? b a) + ;; b mlist + [mlst (mcons (car lst) null)]) + (let loop ([last mlst] [lst (cdr lst)]) + (when (pair? lst) + (let ([new (mcons (car lst) null)]) + (set-mcdr! last new) + (loop new (cdr lst))))) + ;; mlist -> list + (let loop ([r (if getkey + (sort-internal *less? mlst n getkey) + (sort-internal *less? mlst n))]) + (if (null? r) r (cons (mcar r) (loop (mcdr r))))))]))) + +;; Finally, this is the provided `sort' value +(case-lambda + [(lst less?) (sort-body lst less? #f #f #f)] + [(lst less? getkey) + (if (and getkey (not (eq? values getkey))) + (sort lst less? getkey #f) (sort lst less?))] + [(lst less? getkey cache-keys?) + (if (and getkey (not (eq? values getkey))) + (sort-body lst less? #t getkey cache-keys?) (sort lst less?))]) + +))) diff --git a/collects/scribblings/reference/pairs.scrbl b/collects/scribblings/reference/pairs.scrbl index 281d81a59a..a50c01e303 100644 --- a/collects/scribblings/reference/pairs.scrbl +++ b/collects/scribblings/reference/pairs.scrbl @@ -326,7 +326,9 @@ Returns @scheme[(remove* v lst eq?)].} Returns @scheme[(remove* v lst eqv?)].} -@defproc[(sort [lst list?] [less-than? (any/c any/c . -> . any/c)]) +@defproc[(sort [lst list?] [less-than? (any/c any/c . -> . any/c)] + [#:key key (any/c . -> . any/c) values] + [#:cache-keys cache-keys boolean? #f]) list?]{ Returns a list sorted according to the @scheme[less-than?] procedure, @@ -337,9 +339,28 @@ Returns a list sorted according to the @scheme[less-than?] procedure, The sort is stable: if two elements of @scheme[lst] are ``equal'' (i.e., @scheme[proc] does not return a true value when given the pair in either order), then the elements preserve their relative order - from @scheme[lst] in the output list. You should therefore use - @scheme[sort] with strict comparison functions (e.g., @scheme[<] or - @scheme[string