racket/collects/tests/mzscheme/benchmarks/shootout/k-nucleotide.ss
Matthew Flatt b8672845fc shootout benchmarks
svn: r10839
2008-07-19 02:40:34 +00:00

56 lines
1.6 KiB
Scheme

;; The Computer Language Shootout
;; http://shootout.alioth.debian.org/
#lang scheme/base
(define (all-counts len dna)
(let ([table (make-hasheq)]
[seq (make-string len)])
(for ([s (in-range (- (string-length dna) len) -1 -1)])
(string-copy! seq 0 dna s (+ s len))
(let ([key (string->symbol seq)])
(let ([cnt (hash-ref table key 0)])
(hash-set! table key (add1 cnt)))))
table))
(define (write-freqs table)
(let* ([content (hash-map table cons)]
[total (exact->inexact (apply + (map cdr content)))])
(for-each
(lambda (a)
(printf "~a ~a\n"
(car a)
(real->decimal-string (* 100 (/ (cdr a) total)) 3)))
(sort content (lambda (a b) (> (cdr a) (cdr b)))))))
(define (write-one-freq table key)
(let ([cnt (hash-ref table key 0)])
(printf "~a\t~a\n" cnt key)))
(define dna
(let ([in (current-input-port)])
;; Skip to ">THREE ..."
(regexp-match #rx#"(?m:^>THREE.*$)" in)
(let ([s (open-output-string)])
;; Copy everything but newlines to s:
(let loop ()
(when (regexp-match #rx#"\n" in 0 #f s)
(loop)))
;; Extract the string from s:
(string-upcase (get-output-string s)))))
;; 1-nucleotide counts:
(write-freqs (all-counts 1 dna))
(newline)
;; 2-nucleotide counts:
(write-freqs (all-counts 2 dna))
(newline)
;; Specific sequences:
(for-each (lambda (seq)
(write-one-freq (all-counts (string-length seq) dna)
(string->symbol seq)))
'("GGT" "GGTA" "GGTATT" "GGTATTTTAATT" "GGTATTTTAATTTATAGT"))