More shootout optimization.
This commit is contained in:
parent
f2b7d1eca3
commit
2d3a713f0d
121
collects/tests/racket/benchmarks/shootout/fasta-new.rkt
Normal file
121
collects/tests/racket/benchmarks/shootout/fasta-new.rkt
Normal file
|
@ -0,0 +1,121 @@
|
|||
#lang racket/base
|
||||
|
||||
;;; The Computer Language Benchmarks Game
|
||||
;;; http://shootout.alioth.debian.org/
|
||||
|
||||
;;; Derived from C version by Joern Inge Vestgaarden
|
||||
;;; and Jorge Peixoto de Morais Neto
|
||||
;;; Contributed by Sam Tobin-Hochstadt
|
||||
|
||||
(require racket/cmdline racket/require (for-syntax racket/base) (only-in racket/flonum for/flvector)
|
||||
(filtered-in (λ (name) (regexp-replace #rx"unsafe-" name ""))
|
||||
racket/unsafe/ops))
|
||||
|
||||
(define +alu+
|
||||
(bytes-append #"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
|
||||
#"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
|
||||
#"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
|
||||
#"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"
|
||||
#"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"
|
||||
#"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"
|
||||
#"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"))
|
||||
|
||||
(define (build-table t)
|
||||
(cons (apply bytes (map (compose char->integer car) t))
|
||||
(for/flvector ([i t]) (cdr i))))
|
||||
|
||||
(define IUB
|
||||
(build-table
|
||||
'([#\a . 0.27] [#\c . 0.12] [#\g . 0.12] [#\t . 0.27] [#\B . 0.02]
|
||||
[#\D . 0.02] [#\H . 0.02] [#\K . 0.02] [#\M . 0.02] [#\N . 0.02]
|
||||
[#\R . 0.02] [#\S . 0.02] [#\V . 0.02] [#\W . 0.02] [#\Y . 0.02])))
|
||||
|
||||
(define HOMOSAPIEN
|
||||
(build-table '([#\a . 0.3029549426680] [#\c . 0.1979883004921]
|
||||
[#\g . 0.1975473066391] [#\t . 0.3015094502008])))
|
||||
|
||||
;; -------------
|
||||
|
||||
(define line-length 60)
|
||||
|
||||
(define IA 3877)
|
||||
(define IC 29573)
|
||||
(define IM 139968)
|
||||
|
||||
;; -------------------------------
|
||||
|
||||
(define LAST 42)
|
||||
|
||||
;; -------------------------------
|
||||
|
||||
(define (make-cumulative-table frequency-table)
|
||||
(define bs (car frequency-table))
|
||||
(define ps (cdr frequency-table))
|
||||
(define len (bytes-length bs))
|
||||
(let loop ([i 0] [cum 0.0])
|
||||
(when (fx< i len)
|
||||
(define this (flvector-ref ps i))
|
||||
(define new (fl+ this cum))
|
||||
(flvector-set! ps i new)
|
||||
(loop (fx+ 1 i) new))))
|
||||
|
||||
;; -------------
|
||||
|
||||
(define (random-next max)
|
||||
(set! LAST (fxmodulo (fx+ IC (fx* LAST IA)) IM))
|
||||
(fl/ (fl* max (fx->fl LAST)) (fx->fl IM)))
|
||||
|
||||
;; -------------
|
||||
|
||||
(define (repeat-fasta s count)
|
||||
(define out (current-output-port))
|
||||
(define len (bytes-length s))
|
||||
(define s2 (make-bytes (fx+ len line-length)))
|
||||
(bytes-copy! s2 0 s 0 len)
|
||||
(bytes-copy! s2 len s 0 line-length)
|
||||
(let loop ([count count] [pos 0])
|
||||
(define line (fxmin line-length count))
|
||||
(write-bytes s2 out pos (fx+ pos line))
|
||||
(newline out)
|
||||
(define count* (fx- count line))
|
||||
(when (fx> count* 0)
|
||||
(define pos* (fx+ pos line))
|
||||
(loop count* (if (fx>= pos* len) (fx- pos* len) pos*)))))
|
||||
|
||||
|
||||
;; -------------
|
||||
|
||||
(define-syntax-rule (random-fasta genelist cnt)
|
||||
(let ()
|
||||
(define out (current-output-port))
|
||||
(define ps (cdr genelist))
|
||||
(define cs (car genelist))
|
||||
(let loop ([count cnt])
|
||||
(define line (fxmin line-length count))
|
||||
(define buf (make-bytes (fx+ 1 line-length)))
|
||||
(let inner ([pos 0])
|
||||
(define r (random-next 1.0))
|
||||
(define i (let wh ([i 0]) (if (fl< (flvector-ref ps i) r) (wh (fx+ i 1)) i)))
|
||||
(bytes-set! buf pos (bytes-ref cs i))
|
||||
(define pos+ (fx+ pos 1))
|
||||
(when (fx< pos+ line)
|
||||
(inner pos+)))
|
||||
(bytes-set! buf line (char->integer #\newline))
|
||||
(write-bytes buf out 0 (fx+ line 1))
|
||||
(define count- (fx- count line))
|
||||
(when (fx> count- 0)
|
||||
(loop count-)))))
|
||||
|
||||
;; -------------------------------
|
||||
|
||||
(define n (command-line #:args (n) (string->number n)))
|
||||
|
||||
(make-cumulative-table IUB)
|
||||
(make-cumulative-table HOMOSAPIEN)
|
||||
|
||||
(display ">ONE Homo sapiens alu\n")
|
||||
(repeat-fasta +alu+ (* n 2))
|
||||
(display ">TWO IUB ambiguity codes\n")
|
||||
(random-fasta IUB (* n 3))
|
||||
(display ">THREE Homo sapiens frequency\n")
|
||||
(random-fasta HOMOSAPIEN (* n 5))
|
|
@ -0,0 +1,53 @@
|
|||
#lang racket/base
|
||||
|
||||
;; The Computer Language Benchmarks Game
|
||||
;; http://shootout.alioth.debian.org/
|
||||
;;
|
||||
;; Based on a version by by Anthony Borla
|
||||
;; Parallelize by Sam Tobin-Hochstadt
|
||||
|
||||
(require racket/port racket/place)
|
||||
|
||||
;; -------------------------------
|
||||
|
||||
(define VARIANTS
|
||||
'(#"agggtaaa|tttaccct" #"[cgt]gggtaaa|tttaccc[acg]" #"a[act]ggtaaa|tttacc[agt]t"
|
||||
#"ag[act]gtaaa|tttac[agt]ct" #"agg[act]taaa|ttta[agt]cct" #"aggg[acg]aaa|ttt[cgt]ccct"
|
||||
#"agggt[cgt]aa|tt[acg]accct" #"agggta[cgt]a|t[acg]taccct" #"agggtaa[cgt]|[acg]ttaccct"))
|
||||
|
||||
|
||||
(define IUBS
|
||||
'((#"B" #"(c|g|t)") (#"D" #"(a|g|t)") (#"H" #"(a|c|t)")
|
||||
(#"K" #"(g|t)") (#"M" #"(a|c)") (#"N" #"(a|c|g|t)")
|
||||
(#"R" #"(a|g)") (#"S" #"(c|g)") (#"V" #"(a|c|g)")
|
||||
(#"W" #"(a|t)") (#"Y" #"(c|t)")))
|
||||
|
||||
;; -------------------------------
|
||||
|
||||
(define (main . _)
|
||||
;; Load sequence and record its length
|
||||
(define orig (port->bytes))
|
||||
(define filtered (regexp-replace* #rx#"(?:>.*?\n)|\n" orig #""))
|
||||
;; Perform regexp counts
|
||||
(define pls
|
||||
(for/list ([i (in-list VARIANTS)])
|
||||
(define p (place ch
|
||||
(define seq (place-channel-get ch))
|
||||
(define i (place-channel-get ch))
|
||||
(place-channel-put ch (length (regexp-match-positions* (byte-regexp i) seq)))))
|
||||
(place-channel-put p filtered)
|
||||
(place-channel-put p i)
|
||||
p))
|
||||
|
||||
;; Perform regexp replacements, and record sequence length
|
||||
(define replaced
|
||||
(for/fold ([sequence filtered]) ([IUB IUBS])
|
||||
(regexp-replace* (byte-regexp (car IUB)) sequence (cadr IUB))))
|
||||
(for ([p pls] [i VARIANTS]) (printf "~a ~a\n" i (place-channel-get p)))
|
||||
|
||||
;; Print statistics
|
||||
(printf "\n~a\n~a\n~a\n"
|
||||
(bytes-length orig)
|
||||
(bytes-length filtered)
|
||||
(bytes-length replaced)))
|
||||
(provide main)
|
Loading…
Reference in New Issue
Block a user