62 lines
1.8 KiB
Scheme
62 lines
1.8 KiB
Scheme
;; The Computer Language Shootout
|
|
;; http://shootout.alioth.debian.org/
|
|
|
|
(module k-nucleotide mzscheme
|
|
(require mzlib/list
|
|
mzlib/string
|
|
(only srfi/13 string-pad-right))
|
|
|
|
(define (all-counts len dna)
|
|
(let ([table (make-hash-table)]
|
|
[seq (make-string len)])
|
|
(let loop ([s (- (string-length dna) len)])
|
|
(string-copy! seq 0 dna s (+ s len))
|
|
(let ([key (string->symbol seq)])
|
|
(let ([cnt (hash-table-get table key 0)])
|
|
(hash-table-put! table key (add1 cnt))))
|
|
(unless (zero? s)
|
|
(loop (sub1 s))))
|
|
table))
|
|
|
|
(define (write-freqs table)
|
|
(let* ([content (hash-table-map table cons)]
|
|
[total (exact->inexact (apply + (map cdr content)))])
|
|
(for-each
|
|
(lambda (a)
|
|
(printf "~a ~a\n"
|
|
(car a)
|
|
(real->decimal-string (* 100 (/ (cdr a) total)) 3)))
|
|
(sort content (lambda (a b) (> (cdr a) (cdr b)))))))
|
|
|
|
(define (write-one-freq table key)
|
|
(let ([cnt (hash-table-get table key 0)])
|
|
(printf "~a\t~a\n" cnt key)))
|
|
|
|
(define dna
|
|
(begin
|
|
;; Skip to ">THREE ..."
|
|
(regexp-match #rx#"(?m:^>THREE.*$)" (current-input-port))
|
|
(let ([s (open-output-string)])
|
|
;; Copy everything but newlines to s:
|
|
(let loop ()
|
|
(when (regexp-match #rx#"\n" (current-input-port) 0 #f s)
|
|
(loop)))
|
|
;; Extract the string from s:
|
|
(string-upcase (get-output-string s)))))
|
|
|
|
;; 1-nucleotide counts:
|
|
(write-freqs (all-counts 1 dna))
|
|
(newline)
|
|
|
|
;; 2-nucleotide counts:
|
|
(write-freqs (all-counts 2 dna))
|
|
(newline)
|
|
|
|
;; Specific sequences:
|
|
(for-each (lambda (seq)
|
|
(write-one-freq (all-counts (string-length seq) dna)
|
|
(string->symbol seq)))
|
|
'("GGT" "GGTA" "GGTATT" "GGTATTTTAATT" "GGTATTTTAATTTATAGT"))
|
|
|
|
)
|