From c9383cffc2321de3408a63b9a2577a871337fa35 Mon Sep 17 00:00:00 2001 From: Vincent St-Amour Date: Mon, 21 Jun 2010 17:54:24 -0400 Subject: [PATCH] Propagated Eli's changes to reversecomplement and fasta to the typed versions. --- .../benchmarks/shootout/typed/fasta.rktl | 210 ++++++++++-------- .../shootout/typed/reversecomplement.rktl | 138 +++++++----- 2 files changed, 203 insertions(+), 145 deletions(-) diff --git a/collects/tests/racket/benchmarks/shootout/typed/fasta.rktl b/collects/tests/racket/benchmarks/shootout/typed/fasta.rktl index 0d2870b999..1fa91a92d5 100644 --- a/collects/tests/racket/benchmarks/shootout/typed/fasta.rktl +++ b/collects/tests/racket/benchmarks/shootout/typed/fasta.rktl @@ -3,113 +3,139 @@ ;; ;; fasta - benchmark ;; -;; Derived from the Chicken variant, which was -;; Contributed by Anthony Borla - -(require racket/cmdline) - -(define-type CumulativeTable (Listof (Pair Natural Float))) +;; Very loosely based on the Chicken variant by Anthony Borla, some +;; optimizations taken from the GCC version by Petr Prokhorenkov, and +;; additional heavy optimizations by Eli Barzilay (not really related to +;; the above two now). +;; +;; If you use some of these optimizations in other solutions, please +;; include a proper attribution to this Racket code. (define +alu+ - (bytes-append - #"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" - #"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" - #"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" - #"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" - #"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" - #"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" - #"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")) + (bytes-append #"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" + #"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" + #"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" + #"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" + #"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" + #"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" + #"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")) -(define +iub+ - (list - '(#\a . 0.27) '(#\c . 0.12) '(#\g . 0.12) '(#\t . 0.27) '(#\B . 0.02) - '(#\D . 0.02) '(#\H . 0.02) '(#\K . 0.02) '(#\M . 0.02) '(#\N . 0.02) - '(#\R . 0.02) '(#\S . 0.02) '(#\V . 0.02) '(#\W . 0.02) '(#\Y . 0.02))) +(define-type Table (Listof (List Char Float))) -(define +homosapien+ - (list - '(#\a . 0.3029549426680) '(#\c . 0.1979883004921) - '(#\g . 0.1975473066391) '(#\t . 0.3015094502008))) +(define IUB + '([#\a 0.27] [#\c 0.12] [#\g 0.12] [#\t 0.27] [#\B 0.02] + [#\D 0.02] [#\H 0.02] [#\K 0.02] [#\M 0.02] [#\N 0.02] + [#\R 0.02] [#\S 0.02] [#\V 0.02] [#\W 0.02] [#\Y 0.02])) -;; ------------- +(define HOMOSAPIEN + '([#\a 0.3029549426680] [#\c 0.1979883004921] + [#\g 0.1975473066391] [#\t 0.3015094502008])) -(define +line-size+ 60) +(define line-length 60) -;; ------------------------------- +;; ---------------------------------------- -(: make-random (Integer -> (Real -> Real))) -(define (make-random seed) - (let* ((ia 3877) (ic 29573) (im 139968) (last seed)) - (lambda: ((max : Real)) - (set! last (modulo (+ ic (* last ia)) im)) - (/ (* max last) im)))) +(require racket/cmdline racket/require (for-syntax racket/base) + (filtered-in (lambda (name) (regexp-replace #rx"unsafe-" name "")) + racket/unsafe/ops)) -;; ------------------------------- +;; ---------------------------------------- -(: make-cumulative-table ((Listof (Pair Char Float)) -> CumulativeTable)) -(define (make-cumulative-table frequency-table) - (let ([cumulative 0.0]) - (for/list: : CumulativeTable - ([x : (Pair Char Float) frequency-table]) - (set! cumulative (+ cumulative (cdr x))) - (cons (char->integer (car x)) cumulative)))) +(: repeat-fasta (String Integer Bytes -> Void)) +(define (repeat-fasta header N sequence) + (define out (current-output-port)) + (define len (bytes-length sequence)) + (define buf (make-bytes (+ len line-length))) + (bytes-copy! buf 0 sequence) + (bytes-copy! buf len sequence 0 line-length) + (display header out) + (let: loop : Void ([n : Integer N] [start : Integer 0]) + (when (fx> n 0) + (let ([end (fx+ start (fxmin n line-length))]) + (write-bytes buf out start end) + (newline) + (loop (fx- n line-length) (if (fx> end len) (fx- end len) end)))))) -;; ------------- +;; ---------------------------------------- -(define random-next (make-random 42)) -(define +segmarker+ ">") +(define IA 3877) +(define IC 29573) +(define IM 139968) +(define IM.0 (fx->fl IM)) -;; ------------- +(define-syntax-rule (define/IM (name id) E) + (begin (define V + (let: ([v : (Vectorof Integer) (make-vector IM)]) + (for ([id (in-range IM)]) (vector-set! v id E)) + v)) + (define-syntax-rule (name id) (vector-ref V id)))) -(: select-random (CumulativeTable -> Natural)) -(define (select-random cumulative-table) - (let ((rvalue (random-next 1.0))) - (let select-over-threshold ([table cumulative-table]) - (if (<= rvalue (cdar table)) - (caar table) - (select-over-threshold (cdr table)))))) +(define/IM (random-next cur) (fxmodulo (fx+ IC (fx* cur IA)) IM)) -;; ------------- +(: make-lookup-table (Table -> Bytes)) +(define (make-lookup-table frequency-table) + (define v (make-bytes IM)) + (let: loop : Void ([t : Table frequency-table] [c : Integer 0] [c. : Float 0.0]) + (unless (null? t) + (let* ([c1. (fl+ c. (fl* IM.0 (cadar t)))] + [c1 (assert (inexact->exact (flceiling c1.)) exact-integer?)] + [b (char->integer (caar t))]) + (for ([i (in-range c c1)]) (bytes-set! v i b)) + (loop (cdr t) c1 c1.)))) + v) -(: repeat-fasta (String String Integer Bytes Integer -> Void)) -(define (repeat-fasta id desc n_ sequence line-length) - (let ((seqlen (bytes-length sequence)) - (out (current-output-port))) - (display (string-append +segmarker+ id " " desc "\n") out) - (let: loop-o : Void - ((n : Integer n_) (k : Integer 0)) - (unless (<= n 0) - (let ((m (min n line-length))) - (let loop-i ((i 0) (k k)) - (if (>= i m) - (begin - (newline out) - (loop-o (- n line-length) k)) - (let ([k (if (= k seqlen) 0 k)]) - (write-byte (bytes-ref sequence k) out) - (loop-i (add1 i) (add1 k)))))))))) +(: random-fasta (String Integer Table Integer -> Integer)) +(define (random-fasta header N table R) + (define out (current-output-port)) + (define lookup-byte (make-lookup-table table)) + (: n-randoms (Integer Integer -> Integer)) + (define (n-randoms to R) + (let loop ([n 0] [R R]) + (if (fx< n to) + (let ([R (random-next R)]) + (bytes-set! buf n (bytes-ref lookup-byte R)) + (loop (fx+ n 1) R)) + (begin (write-bytes buf out 0 (fx+ to 1)) R)))) + (: make-line! (Bytes Integer Integer -> Integer)) + (define (make-line! buf start R) + (let ([end (fx+ start line-length)]) + (bytes-set! buf end LF) + (let loop ([n start] [R R]) + (if (fx< n end) + (let ([R (random-next R)]) + (bytes-set! buf n (bytes-ref lookup-byte R)) + (loop (fx+ n 1) R)) + R)))) + (: LF Integer) + (define LF (char->integer #\newline)) + (: buf Bytes) + (define buf (make-bytes (fx+ line-length 1))) + (: full-lines Integer) + (: last Integer) + (define-values (full-lines last) (quotient/remainder N line-length)) + (: C Bytes) + (define C + (let* ([len+1 (fx+ line-length 1)] + [buflen (fx* len+1 IM)] + [buf (make-bytes buflen)]) + (let loop ([R R] [i 0]) + (if (fx< i buflen) + (loop (make-line! buf i R) (fx+ i len+1)) + buf)))) + (bytes-set! buf line-length LF) + (display header out) + (let loop ([i full-lines] [R R]) + (if (fx> i IM) + (begin (display C out) (loop (fx- i IM) R)) + (let loop ([i i] [R R]) + (cond [(fx> i 0) (loop (fx- i 1) (n-randoms line-length R))] + [(fx> last 0) (bytes-set! buf last LF) (n-randoms last R)] + [else R]))))) -;; ------------- +;; ---------------------------------------- -(: random-fasta (String String Integer CumulativeTable Integer -> Void)) -(define (random-fasta id desc n_ cumulative-table line-length) - (let ((out (current-output-port))) - (display (string-append +segmarker+ id " " desc "\n") out) - (let: loop-o : Void ((n : Integer n_)) - (unless (<= n 0) - (for ([i (in-range (min n line-length))]) - (write-byte (select-random cumulative-table) out)) - (newline out) - (loop-o (- n line-length)))))) - -;; ------------------------------- - -(let ((n (command-line #:args (n) (assert (string->number (assert n string?)) exact-integer?)))) - - (repeat-fasta "ONE" "Homo sapiens alu" (* n 2) +alu+ +line-size+) - - (random-fasta "TWO" "IUB ambiguity codes" (* n 3) - (make-cumulative-table +iub+) +line-size+) - - (random-fasta "THREE" "Homo sapiens frequency" (* n 5) - (make-cumulative-table +homosapien+) +line-size+)) +(let ([n (command-line #:args (n) (assert (string->number (assert n string?)) exact-integer?))]) + (repeat-fasta ">ONE Homo sapiens alu\n" (* n 2) +alu+) + (random-fasta ">THREE Homo sapiens frequency\n" (* n 5) HOMOSAPIEN + (random-fasta ">TWO IUB ambiguity codes\n" (* n 3) IUB 42)) + (void)) diff --git a/collects/tests/racket/benchmarks/shootout/typed/reversecomplement.rktl b/collects/tests/racket/benchmarks/shootout/typed/reversecomplement.rktl index 02f1070332..caf99799d4 100644 --- a/collects/tests/racket/benchmarks/shootout/typed/reversecomplement.rktl +++ b/collects/tests/racket/benchmarks/shootout/typed/reversecomplement.rktl @@ -1,60 +1,92 @@ ;; The Computer Language Benchmarks Game ;; http://shootout.alioth.debian.org/ -(require scheme/cmdline) - (define translation (make-vector 128)) -(for: : Void - ([from-to : (List Symbol Symbol) - '([a t] - [c g] - [g c] - [t a] - [u a] - [m k] - [r y] - [w w] - [s s] - [y R] - [k M] - [v b] - [h d] - [d h] - [b v] - [n n])]) - (let ([char (lambda: ((sym : Symbol)) - (string-ref (symbol->string sym) 0))]) - (let ([from (char (car from-to))] - [to (char->integer (char-upcase (char (cadr from-to))))]) - (vector-set! translation (char->integer from) to) - (vector-set! translation (char->integer (char-upcase from)) to)))) +(for: ([from : Char (in-string "ACGTUMRWSYKVHDBN")] + [to : Char (in-string "TGCAAKYWSRMBDHVN")]) + (let ([to (char->integer to)]) + (vector-set! translation (char->integer from) to) + (vector-set! translation (char->integer (char-downcase from)) to))) -(: output ((Listof Bytes) -> Void)) -(define (output lines) - (let*: ([str : Bytes (apply bytes-append lines)] - [o : Output-Port (current-output-port)] - [len : Natural (bytes-length str)]) - (for: : Void - ([offset : Natural (in-range 0 len 60)]) - (write-bytes str o offset (min len (+ offset 60))) - (newline o)))) +(define I (current-input-port)) +(define O (current-output-port)) -(let ([in (current-input-port)]) - (let: loop : Void ([accum : (Listof Bytes) null]) - (let ([l (read-bytes-line in)]) - (if (eof-object? l) - (output accum) - (cond - [(regexp-match? #rx#"^>" l) - (output accum) - (printf "~a\n" l) - (loop null)] - [else - (let* ([len (bytes-length l)] - [dest (make-bytes len)]) - (for ([i (in-range len)]) - (bytes-set! dest - (- (- len i) 1) - (vector-ref translation (bytes-ref l i)))) - (loop (cons dest accum)))]))))) +(define marker (char->integer #\>)) + +(require racket/require (for-syntax racket/base) + (filtered-in (lambda (name) (regexp-replace #rx"unsafe-" name "")) + racket/unsafe/ops)) + +(define line-length 60) +(define buf-size (* 64 1024)) +(define out-buf ; so there's always enough room for newlines + (make-bytes (+ buf-size 1 (quotient buf-size line-length)))) +(define LF (char->integer #\newline)) + +#| +The basic idea is to read the input in chunks, and keep pointers to +them, then on output process each chunk to translate and reverse it +before dumping it out. +|# + +(define-type Chunk (Vector Integer Integer Bytes)) + +(: output ((Listof Chunk) -> Void)) +(define (output chunks) + (let: loop : Void + ([chunks : (Listof Chunk) chunks] [col : Integer line-length]) + (when (pair? chunks) + (let ([chunk (car chunks)]) + (let: ([start : Integer (vector-ref chunk 0)] + [end : Integer (vector-ref chunk 1)] + [in-buf : Bytes (vector-ref chunk 2)]) + (let: chunk-loop : Void + ([i : Integer end] [j : Integer 0] [col : Integer col]) + (if (fx> i start) + (let* ([i (fx- i 1)] [b (bytes-ref in-buf i)]) + (if (fx= b LF) + (chunk-loop i j col) + (let ([b (vector-ref translation b)]) + (if (fx= 0 col) + (begin (bytes-set! out-buf j LF) + (bytes-set! out-buf (fx+ j 1) b) + (chunk-loop i (fx+ j 2) (fx- line-length 1))) + (begin (bytes-set! out-buf j b) + (chunk-loop i (fx+ j 1) (fx- col 1))))))) + (begin (write-bytes out-buf O 0 j) + (loop (cdr chunks) col)))))))) + (newline O)) + +(define-syntax case-regexp-posns + (syntax-rules (=> else) + [(_ rx buf start [id B1 ...] [else B2 ...]) + (let ([m (regexp-match-positions rx buf start)]) + (if m (let ([id (assert (car m))]) B1 ...) (begin B2 ...)))])) + +(let ([m (regexp-match #rx"^([^\n]+)\n" I)]) (display (car (assert m)))) + +(let: loop : Void + ([buf : (U Bytes EOF) (read-bytes buf-size I)] + [start : Integer 0] + [chunks : (Listof Chunk) '()]) + (if (eof-object? buf) + (begin (output chunks) (void)) + (case-regexp-posns #rx">" buf start + [p1 (output (cons (ann (vector start (car p1) buf) + (Vector Integer Integer Bytes)) + chunks)) + (case-regexp-posns #rx"\n" buf (cdr p1) + [p2 (write-bytes buf O (car p1) (cdr p2)) + (loop buf (cdr p2) '())] + [else (write-bytes buf O (car p1)) + (let header-loop () + (let ([buf (assert (read-bytes buf-size I) bytes?)]) + (case-regexp-posns #rx"\n" buf 0 + [p2 (write-bytes buf O 0 (cdr p2)) + (loop buf (cdr p2) '())] + [else (write-bytes buf O) (header-loop)])))])] + [else (loop (read-bytes buf-size I) 0 + (cons (ann (vector start (bytes-length buf) buf) + (Vector Integer Integer Bytes)) + chunks))])))