racket/collects/tests/mzscheme/benchmarks/shootout/k-nucleotide.ss
2008-02-24 21:27:36 +00:00

62 lines
1.8 KiB
Scheme

;; The Computer Language Shootout
;; http://shootout.alioth.debian.org/
(module k-nucleotide mzscheme
(require mzlib/list
mzlib/string
(only srfi/13 string-pad-right))
(define (all-counts len dna)
(let ([table (make-hash-table)]
[seq (make-string len)])
(let loop ([s (- (string-length dna) len)])
(string-copy! seq 0 dna s (+ s len))
(let ([key (string->symbol seq)])
(let ([cnt (hash-table-get table key 0)])
(hash-table-put! table key (add1 cnt))))
(unless (zero? s)
(loop (sub1 s))))
table))
(define (write-freqs table)
(let* ([content (hash-table-map table cons)]
[total (exact->inexact (apply + (map cdr content)))])
(for-each
(lambda (a)
(printf "~a ~a\n"
(car a)
(real->decimal-string (* 100 (/ (cdr a) total)) 3)))
(sort content (lambda (a b) (> (cdr a) (cdr b)))))))
(define (write-one-freq table key)
(let ([cnt (hash-table-get table key 0)])
(printf "~a\t~a\n" cnt key)))
(define dna
(begin
;; Skip to ">THREE ..."
(regexp-match #rx#"(?m:^>THREE.*$)" (current-input-port))
(let ([s (open-output-string)])
;; Copy everything but newlines to s:
(let loop ()
(when (regexp-match #rx#"\n" (current-input-port) 0 #f s)
(loop)))
;; Extract the string from s:
(string-upcase (get-output-string s)))))
;; 1-nucleotide counts:
(write-freqs (all-counts 1 dna))
(newline)
;; 2-nucleotide counts:
(write-freqs (all-counts 2 dna))
(newline)
;; Specific sequences:
(for-each (lambda (seq)
(write-one-freq (all-counts (string-length seq) dna)
(string->symbol seq)))
'("GGT" "GGTA" "GGTATT" "GGTATTTTAATT" "GGTATTTTAATTTATAGT"))
)