56 lines
1.6 KiB
Scheme
56 lines
1.6 KiB
Scheme
;; The Computer Language Shootout
|
|
;; http://shootout.alioth.debian.org/
|
|
|
|
#lang scheme/base
|
|
|
|
(define (all-counts len dna)
|
|
(let ([table (make-hasheq)]
|
|
[seq (make-string len)])
|
|
(for ([s (in-range (- (string-length dna) len) -1 -1)])
|
|
(string-copy! seq 0 dna s (+ s len))
|
|
(let ([key (string->symbol seq)])
|
|
(let ([cnt (hash-ref table key 0)])
|
|
(hash-set! table key (add1 cnt)))))
|
|
table))
|
|
|
|
(define (write-freqs table)
|
|
(let* ([content (hash-map table cons)]
|
|
[total (exact->inexact (apply + (map cdr content)))])
|
|
(for-each
|
|
(lambda (a)
|
|
(printf "~a ~a\n"
|
|
(car a)
|
|
(real->decimal-string (* 100 (/ (cdr a) total)) 3)))
|
|
(sort content (lambda (a b) (> (cdr a) (cdr b)))))))
|
|
|
|
(define (write-one-freq table key)
|
|
(let ([cnt (hash-ref table key 0)])
|
|
(printf "~a\t~a\n" cnt key)))
|
|
|
|
(define dna
|
|
(let ([in (current-input-port)])
|
|
;; Skip to ">THREE ..."
|
|
(regexp-match #rx#"(?m:^>THREE.*$)" in)
|
|
(let ([s (open-output-string)])
|
|
;; Copy everything but newlines to s:
|
|
(let loop ()
|
|
(when (regexp-match #rx#"\n" in 0 #f s)
|
|
(loop)))
|
|
;; Extract the string from s:
|
|
(string-upcase (get-output-string s)))))
|
|
|
|
;; 1-nucleotide counts:
|
|
(write-freqs (all-counts 1 dna))
|
|
(newline)
|
|
|
|
;; 2-nucleotide counts:
|
|
(write-freqs (all-counts 2 dna))
|
|
(newline)
|
|
|
|
;; Specific sequences:
|
|
(for-each (lambda (seq)
|
|
(write-one-freq (all-counts (string-length seq) dna)
|
|
(string->symbol seq)))
|
|
'("GGT" "GGTA" "GGTATT" "GGTATTTTAATT" "GGTATTTTAATTTATAGT"))
|
|
|