Propagated Eli's changes to reversecomplement and fasta to the typed
versions.
This commit is contained in:
parent
4f501726fb
commit
c9383cffc2
|
@ -3,113 +3,139 @@
|
|||
;;
|
||||
;; fasta - benchmark
|
||||
;;
|
||||
;; Derived from the Chicken variant, which was
|
||||
;; Contributed by Anthony Borla
|
||||
|
||||
(require racket/cmdline)
|
||||
|
||||
(define-type CumulativeTable (Listof (Pair Natural Float)))
|
||||
;; Very loosely based on the Chicken variant by Anthony Borla, some
|
||||
;; optimizations taken from the GCC version by Petr Prokhorenkov, and
|
||||
;; additional heavy optimizations by Eli Barzilay (not really related to
|
||||
;; the above two now).
|
||||
;;
|
||||
;; If you use some of these optimizations in other solutions, please
|
||||
;; include a proper attribution to this Racket code.
|
||||
|
||||
(define +alu+
|
||||
(bytes-append
|
||||
#"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
|
||||
#"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
|
||||
#"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
|
||||
#"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"
|
||||
#"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"
|
||||
#"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"
|
||||
#"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"))
|
||||
(bytes-append #"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
|
||||
#"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
|
||||
#"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
|
||||
#"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"
|
||||
#"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"
|
||||
#"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"
|
||||
#"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"))
|
||||
|
||||
(define +iub+
|
||||
(list
|
||||
'(#\a . 0.27) '(#\c . 0.12) '(#\g . 0.12) '(#\t . 0.27) '(#\B . 0.02)
|
||||
'(#\D . 0.02) '(#\H . 0.02) '(#\K . 0.02) '(#\M . 0.02) '(#\N . 0.02)
|
||||
'(#\R . 0.02) '(#\S . 0.02) '(#\V . 0.02) '(#\W . 0.02) '(#\Y . 0.02)))
|
||||
(define-type Table (Listof (List Char Float)))
|
||||
|
||||
(define +homosapien+
|
||||
(list
|
||||
'(#\a . 0.3029549426680) '(#\c . 0.1979883004921)
|
||||
'(#\g . 0.1975473066391) '(#\t . 0.3015094502008)))
|
||||
(define IUB
|
||||
'([#\a 0.27] [#\c 0.12] [#\g 0.12] [#\t 0.27] [#\B 0.02]
|
||||
[#\D 0.02] [#\H 0.02] [#\K 0.02] [#\M 0.02] [#\N 0.02]
|
||||
[#\R 0.02] [#\S 0.02] [#\V 0.02] [#\W 0.02] [#\Y 0.02]))
|
||||
|
||||
;; -------------
|
||||
(define HOMOSAPIEN
|
||||
'([#\a 0.3029549426680] [#\c 0.1979883004921]
|
||||
[#\g 0.1975473066391] [#\t 0.3015094502008]))
|
||||
|
||||
(define +line-size+ 60)
|
||||
(define line-length 60)
|
||||
|
||||
;; -------------------------------
|
||||
;; ----------------------------------------
|
||||
|
||||
(: make-random (Integer -> (Real -> Real)))
|
||||
(define (make-random seed)
|
||||
(let* ((ia 3877) (ic 29573) (im 139968) (last seed))
|
||||
(lambda: ((max : Real))
|
||||
(set! last (modulo (+ ic (* last ia)) im))
|
||||
(/ (* max last) im))))
|
||||
(require racket/cmdline racket/require (for-syntax racket/base)
|
||||
(filtered-in (lambda (name) (regexp-replace #rx"unsafe-" name ""))
|
||||
racket/unsafe/ops))
|
||||
|
||||
;; -------------------------------
|
||||
;; ----------------------------------------
|
||||
|
||||
(: make-cumulative-table ((Listof (Pair Char Float)) -> CumulativeTable))
|
||||
(define (make-cumulative-table frequency-table)
|
||||
(let ([cumulative 0.0])
|
||||
(for/list: : CumulativeTable
|
||||
([x : (Pair Char Float) frequency-table])
|
||||
(set! cumulative (+ cumulative (cdr x)))
|
||||
(cons (char->integer (car x)) cumulative))))
|
||||
(: repeat-fasta (String Integer Bytes -> Void))
|
||||
(define (repeat-fasta header N sequence)
|
||||
(define out (current-output-port))
|
||||
(define len (bytes-length sequence))
|
||||
(define buf (make-bytes (+ len line-length)))
|
||||
(bytes-copy! buf 0 sequence)
|
||||
(bytes-copy! buf len sequence 0 line-length)
|
||||
(display header out)
|
||||
(let: loop : Void ([n : Integer N] [start : Integer 0])
|
||||
(when (fx> n 0)
|
||||
(let ([end (fx+ start (fxmin n line-length))])
|
||||
(write-bytes buf out start end)
|
||||
(newline)
|
||||
(loop (fx- n line-length) (if (fx> end len) (fx- end len) end))))))
|
||||
|
||||
;; -------------
|
||||
;; ----------------------------------------
|
||||
|
||||
(define random-next (make-random 42))
|
||||
(define +segmarker+ ">")
|
||||
(define IA 3877)
|
||||
(define IC 29573)
|
||||
(define IM 139968)
|
||||
(define IM.0 (fx->fl IM))
|
||||
|
||||
;; -------------
|
||||
(define-syntax-rule (define/IM (name id) E)
|
||||
(begin (define V
|
||||
(let: ([v : (Vectorof Integer) (make-vector IM)])
|
||||
(for ([id (in-range IM)]) (vector-set! v id E))
|
||||
v))
|
||||
(define-syntax-rule (name id) (vector-ref V id))))
|
||||
|
||||
(: select-random (CumulativeTable -> Natural))
|
||||
(define (select-random cumulative-table)
|
||||
(let ((rvalue (random-next 1.0)))
|
||||
(let select-over-threshold ([table cumulative-table])
|
||||
(if (<= rvalue (cdar table))
|
||||
(caar table)
|
||||
(select-over-threshold (cdr table))))))
|
||||
(define/IM (random-next cur) (fxmodulo (fx+ IC (fx* cur IA)) IM))
|
||||
|
||||
;; -------------
|
||||
(: make-lookup-table (Table -> Bytes))
|
||||
(define (make-lookup-table frequency-table)
|
||||
(define v (make-bytes IM))
|
||||
(let: loop : Void ([t : Table frequency-table] [c : Integer 0] [c. : Float 0.0])
|
||||
(unless (null? t)
|
||||
(let* ([c1. (fl+ c. (fl* IM.0 (cadar t)))]
|
||||
[c1 (assert (inexact->exact (flceiling c1.)) exact-integer?)]
|
||||
[b (char->integer (caar t))])
|
||||
(for ([i (in-range c c1)]) (bytes-set! v i b))
|
||||
(loop (cdr t) c1 c1.))))
|
||||
v)
|
||||
|
||||
(: repeat-fasta (String String Integer Bytes Integer -> Void))
|
||||
(define (repeat-fasta id desc n_ sequence line-length)
|
||||
(let ((seqlen (bytes-length sequence))
|
||||
(out (current-output-port)))
|
||||
(display (string-append +segmarker+ id " " desc "\n") out)
|
||||
(let: loop-o : Void
|
||||
((n : Integer n_) (k : Integer 0))
|
||||
(unless (<= n 0)
|
||||
(let ((m (min n line-length)))
|
||||
(let loop-i ((i 0) (k k))
|
||||
(if (>= i m)
|
||||
(begin
|
||||
(newline out)
|
||||
(loop-o (- n line-length) k))
|
||||
(let ([k (if (= k seqlen) 0 k)])
|
||||
(write-byte (bytes-ref sequence k) out)
|
||||
(loop-i (add1 i) (add1 k))))))))))
|
||||
(: random-fasta (String Integer Table Integer -> Integer))
|
||||
(define (random-fasta header N table R)
|
||||
(define out (current-output-port))
|
||||
(define lookup-byte (make-lookup-table table))
|
||||
(: n-randoms (Integer Integer -> Integer))
|
||||
(define (n-randoms to R)
|
||||
(let loop ([n 0] [R R])
|
||||
(if (fx< n to)
|
||||
(let ([R (random-next R)])
|
||||
(bytes-set! buf n (bytes-ref lookup-byte R))
|
||||
(loop (fx+ n 1) R))
|
||||
(begin (write-bytes buf out 0 (fx+ to 1)) R))))
|
||||
(: make-line! (Bytes Integer Integer -> Integer))
|
||||
(define (make-line! buf start R)
|
||||
(let ([end (fx+ start line-length)])
|
||||
(bytes-set! buf end LF)
|
||||
(let loop ([n start] [R R])
|
||||
(if (fx< n end)
|
||||
(let ([R (random-next R)])
|
||||
(bytes-set! buf n (bytes-ref lookup-byte R))
|
||||
(loop (fx+ n 1) R))
|
||||
R))))
|
||||
(: LF Integer)
|
||||
(define LF (char->integer #\newline))
|
||||
(: buf Bytes)
|
||||
(define buf (make-bytes (fx+ line-length 1)))
|
||||
(: full-lines Integer)
|
||||
(: last Integer)
|
||||
(define-values (full-lines last) (quotient/remainder N line-length))
|
||||
(: C Bytes)
|
||||
(define C
|
||||
(let* ([len+1 (fx+ line-length 1)]
|
||||
[buflen (fx* len+1 IM)]
|
||||
[buf (make-bytes buflen)])
|
||||
(let loop ([R R] [i 0])
|
||||
(if (fx< i buflen)
|
||||
(loop (make-line! buf i R) (fx+ i len+1))
|
||||
buf))))
|
||||
(bytes-set! buf line-length LF)
|
||||
(display header out)
|
||||
(let loop ([i full-lines] [R R])
|
||||
(if (fx> i IM)
|
||||
(begin (display C out) (loop (fx- i IM) R))
|
||||
(let loop ([i i] [R R])
|
||||
(cond [(fx> i 0) (loop (fx- i 1) (n-randoms line-length R))]
|
||||
[(fx> last 0) (bytes-set! buf last LF) (n-randoms last R)]
|
||||
[else R])))))
|
||||
|
||||
;; -------------
|
||||
;; ----------------------------------------
|
||||
|
||||
(: random-fasta (String String Integer CumulativeTable Integer -> Void))
|
||||
(define (random-fasta id desc n_ cumulative-table line-length)
|
||||
(let ((out (current-output-port)))
|
||||
(display (string-append +segmarker+ id " " desc "\n") out)
|
||||
(let: loop-o : Void ((n : Integer n_))
|
||||
(unless (<= n 0)
|
||||
(for ([i (in-range (min n line-length))])
|
||||
(write-byte (select-random cumulative-table) out))
|
||||
(newline out)
|
||||
(loop-o (- n line-length))))))
|
||||
|
||||
;; -------------------------------
|
||||
|
||||
(let ((n (command-line #:args (n) (assert (string->number (assert n string?)) exact-integer?))))
|
||||
|
||||
(repeat-fasta "ONE" "Homo sapiens alu" (* n 2) +alu+ +line-size+)
|
||||
|
||||
(random-fasta "TWO" "IUB ambiguity codes" (* n 3)
|
||||
(make-cumulative-table +iub+) +line-size+)
|
||||
|
||||
(random-fasta "THREE" "Homo sapiens frequency" (* n 5)
|
||||
(make-cumulative-table +homosapien+) +line-size+))
|
||||
(let ([n (command-line #:args (n) (assert (string->number (assert n string?)) exact-integer?))])
|
||||
(repeat-fasta ">ONE Homo sapiens alu\n" (* n 2) +alu+)
|
||||
(random-fasta ">THREE Homo sapiens frequency\n" (* n 5) HOMOSAPIEN
|
||||
(random-fasta ">TWO IUB ambiguity codes\n" (* n 3) IUB 42))
|
||||
(void))
|
||||
|
|
|
@ -1,60 +1,92 @@
|
|||
;; The Computer Language Benchmarks Game
|
||||
;; http://shootout.alioth.debian.org/
|
||||
|
||||
(require scheme/cmdline)
|
||||
|
||||
(define translation (make-vector 128))
|
||||
|
||||
(for: : Void
|
||||
([from-to : (List Symbol Symbol)
|
||||
'([a t]
|
||||
[c g]
|
||||
[g c]
|
||||
[t a]
|
||||
[u a]
|
||||
[m k]
|
||||
[r y]
|
||||
[w w]
|
||||
[s s]
|
||||
[y R]
|
||||
[k M]
|
||||
[v b]
|
||||
[h d]
|
||||
[d h]
|
||||
[b v]
|
||||
[n n])])
|
||||
(let ([char (lambda: ((sym : Symbol))
|
||||
(string-ref (symbol->string sym) 0))])
|
||||
(let ([from (char (car from-to))]
|
||||
[to (char->integer (char-upcase (char (cadr from-to))))])
|
||||
(vector-set! translation (char->integer from) to)
|
||||
(vector-set! translation (char->integer (char-upcase from)) to))))
|
||||
(for: ([from : Char (in-string "ACGTUMRWSYKVHDBN")]
|
||||
[to : Char (in-string "TGCAAKYWSRMBDHVN")])
|
||||
(let ([to (char->integer to)])
|
||||
(vector-set! translation (char->integer from) to)
|
||||
(vector-set! translation (char->integer (char-downcase from)) to)))
|
||||
|
||||
(: output ((Listof Bytes) -> Void))
|
||||
(define (output lines)
|
||||
(let*: ([str : Bytes (apply bytes-append lines)]
|
||||
[o : Output-Port (current-output-port)]
|
||||
[len : Natural (bytes-length str)])
|
||||
(for: : Void
|
||||
([offset : Natural (in-range 0 len 60)])
|
||||
(write-bytes str o offset (min len (+ offset 60)))
|
||||
(newline o))))
|
||||
(define I (current-input-port))
|
||||
(define O (current-output-port))
|
||||
|
||||
(let ([in (current-input-port)])
|
||||
(let: loop : Void ([accum : (Listof Bytes) null])
|
||||
(let ([l (read-bytes-line in)])
|
||||
(if (eof-object? l)
|
||||
(output accum)
|
||||
(cond
|
||||
[(regexp-match? #rx#"^>" l)
|
||||
(output accum)
|
||||
(printf "~a\n" l)
|
||||
(loop null)]
|
||||
[else
|
||||
(let* ([len (bytes-length l)]
|
||||
[dest (make-bytes len)])
|
||||
(for ([i (in-range len)])
|
||||
(bytes-set! dest
|
||||
(- (- len i) 1)
|
||||
(vector-ref translation (bytes-ref l i))))
|
||||
(loop (cons dest accum)))])))))
|
||||
(define marker (char->integer #\>))
|
||||
|
||||
(require racket/require (for-syntax racket/base)
|
||||
(filtered-in (lambda (name) (regexp-replace #rx"unsafe-" name ""))
|
||||
racket/unsafe/ops))
|
||||
|
||||
(define line-length 60)
|
||||
(define buf-size (* 64 1024))
|
||||
(define out-buf ; so there's always enough room for newlines
|
||||
(make-bytes (+ buf-size 1 (quotient buf-size line-length))))
|
||||
(define LF (char->integer #\newline))
|
||||
|
||||
#|
|
||||
The basic idea is to read the input in chunks, and keep pointers to
|
||||
them, then on output process each chunk to translate and reverse it
|
||||
before dumping it out.
|
||||
|#
|
||||
|
||||
(define-type Chunk (Vector Integer Integer Bytes))
|
||||
|
||||
(: output ((Listof Chunk) -> Void))
|
||||
(define (output chunks)
|
||||
(let: loop : Void
|
||||
([chunks : (Listof Chunk) chunks] [col : Integer line-length])
|
||||
(when (pair? chunks)
|
||||
(let ([chunk (car chunks)])
|
||||
(let: ([start : Integer (vector-ref chunk 0)]
|
||||
[end : Integer (vector-ref chunk 1)]
|
||||
[in-buf : Bytes (vector-ref chunk 2)])
|
||||
(let: chunk-loop : Void
|
||||
([i : Integer end] [j : Integer 0] [col : Integer col])
|
||||
(if (fx> i start)
|
||||
(let* ([i (fx- i 1)] [b (bytes-ref in-buf i)])
|
||||
(if (fx= b LF)
|
||||
(chunk-loop i j col)
|
||||
(let ([b (vector-ref translation b)])
|
||||
(if (fx= 0 col)
|
||||
(begin (bytes-set! out-buf j LF)
|
||||
(bytes-set! out-buf (fx+ j 1) b)
|
||||
(chunk-loop i (fx+ j 2) (fx- line-length 1)))
|
||||
(begin (bytes-set! out-buf j b)
|
||||
(chunk-loop i (fx+ j 1) (fx- col 1)))))))
|
||||
(begin (write-bytes out-buf O 0 j)
|
||||
(loop (cdr chunks) col))))))))
|
||||
(newline O))
|
||||
|
||||
(define-syntax case-regexp-posns
|
||||
(syntax-rules (=> else)
|
||||
[(_ rx buf start [id B1 ...] [else B2 ...])
|
||||
(let ([m (regexp-match-positions rx buf start)])
|
||||
(if m (let ([id (assert (car m))]) B1 ...) (begin B2 ...)))]))
|
||||
|
||||
(let ([m (regexp-match #rx"^([^\n]+)\n" I)]) (display (car (assert m))))
|
||||
|
||||
(let: loop : Void
|
||||
([buf : (U Bytes EOF) (read-bytes buf-size I)]
|
||||
[start : Integer 0]
|
||||
[chunks : (Listof Chunk) '()])
|
||||
(if (eof-object? buf)
|
||||
(begin (output chunks) (void))
|
||||
(case-regexp-posns #rx">" buf start
|
||||
[p1 (output (cons (ann (vector start (car p1) buf)
|
||||
(Vector Integer Integer Bytes))
|
||||
chunks))
|
||||
(case-regexp-posns #rx"\n" buf (cdr p1)
|
||||
[p2 (write-bytes buf O (car p1) (cdr p2))
|
||||
(loop buf (cdr p2) '())]
|
||||
[else (write-bytes buf O (car p1))
|
||||
(let header-loop ()
|
||||
(let ([buf (assert (read-bytes buf-size I) bytes?)])
|
||||
(case-regexp-posns #rx"\n" buf 0
|
||||
[p2 (write-bytes buf O 0 (cdr p2))
|
||||
(loop buf (cdr p2) '())]
|
||||
[else (write-bytes buf O) (header-loop)])))])]
|
||||
[else (loop (read-bytes buf-size I) 0
|
||||
(cons (ann (vector start (bytes-length buf) buf)
|
||||
(Vector Integer Integer Bytes))
|
||||
chunks))])))
|
||||
|
|
Loading…
Reference in New Issue
Block a user