(module read-lines mzscheme (require (lib "etc.ss") (lib "pregexp.ss") "util.ss") (provide read-lines) (define read-lines (opt-lambda (file caption [offset #f]) (template caption (get-the-lines file offset)))) (define (semi-flatten lst) (if (null? lst) '() (cons (caar lst) (cons (cadar lst) (semi-flatten (cdr lst)))))) (define temp-anchor `(A ((NAME "temp")) "")) (define (spacify s) (if (and (string? s) (string=? s "")) " " ; to appease IE s)) (define (template caption lines) `(TABLE ((CELLPADDING "0") (CELLSPACING "0")) (B ,(color-with "blue" caption)) (P) (PRE ((STYLE "font-family:monospace")) ; use
's instead of newlines, for Opera ; don't put in a
for the temp-anchor, which wasn't a line in the source ,@(semi-flatten (map (lambda (ln) (if (eq? ln temp-anchor) `(,ln "") `(,(spacify ln) (BR)))) lines))))) (define eoregexp "($|\\s|(\\.(\\s|$))|>)") (define trailing-regexp (pregexp "[\\s>)(\"]")) (define url-regexp-base (string-append "://([^\\s]*)" eoregexp)) (define (make-url-regexp ty) (pregexp (string-append ty url-regexp-base))) (define http-regexp (make-url-regexp "http")) (define cheap-http-regexp (regexp "http://")) (define (http-format url) `(A ((HREF ,url)) ,url)) (define ftp-regexp (make-url-regexp "ftp")) (define cheap-ftp-regexp (regexp "ftp://")) (define ftp-format http-format) (define email-regexp (let ([chars "[^\\s)(<>\"']"]) (pregexp (string-append chars "+" "@" chars "{3,}")))) (define cheap-email-regexp (regexp "@")) (define (email-format addr) `(A ((HREF ,(string-append "mailto:" addr))) ,addr)) (define (rtrim s) (let* ([presult (pregexp-replace* trailing-regexp s "")] [plen (string-length presult)] [qlen (sub1 plen)]) (if (and (> qlen 0) (char=? (string-ref presult qlen) #\.)) (substring presult 0 qlen) presult))) (define (process-for-urls line) (let loop ([built-line line]) (let ([curr-len (string-length built-line)]) (let-values ([(raw-indices formatter) (let regexp-loop ([regexps (list http-regexp ftp-regexp email-regexp)] [cheap-regexps (list cheap-http-regexp cheap-ftp-regexp cheap-email-regexp)] [formats (list http-format ftp-format email-format)]) (if (null? regexps) (values #f #f) (let* ([curr-regexp (car regexps)] [curr-cheap-regexp (car cheap-regexps)] [curr-formatter (car formats)] [match-indices (and (regexp-match-positions curr-cheap-regexp built-line) (pregexp-match-positions curr-regexp built-line))]) (if match-indices (values match-indices curr-formatter) (regexp-loop (cdr regexps) (cdr cheap-regexps) (cdr formats))))))]) (if raw-indices (let* ([indices (car raw-indices)] [string-start (car indices)] [string-end (cdr indices)] [raw-item (substring built-line string-start string-end)] [raw-item-len (string-length raw-item)] [item (rtrim raw-item)] [item-len (string-length item)]) `(TT ,(substring built-line 0 string-start) ,(formatter item) ,(substring raw-item ; text removed by rtrim item-len raw-item-len) ,(loop (substring built-line string-end curr-len)))) built-line))))) (define (process-for-keywords line) (let ([len (string-length line)]) (if (and (> len 3) (char=? (string-ref line 0) #\>)) (let* ([rest-of-line (substring line 1 len)] [port (open-input-string rest-of-line)] [dist (with-handlers ([exn:fail:read? (lambda (x) #f)]) (read port) (let-values ([(_1 _2 pos) (port-next-location port)]) pos))]) (if dist `(div (b ">" ,(color-highlight (substring line 1 dist))) ,(substring line dist len)) line)) #f))) ; format line for doc.txt files (define (process-doc-line line) (let ([key-result (process-for-keywords line)]) (if key-result key-result (process-for-urls line)))) (define (get-the-lines file offset) (let* ([port (open-input-file file 'text)] [doc-txt? (let ([len (string-length file)]) (string=? (substring file (- len 7) len) "doc.txt"))] [process-line (if doc-txt? process-doc-line (lambda (x) x))] [lines (let loop ([lines '()]) (let ([line (read-line port)]) (if (eof-object? line) (begin (close-input-port port) (reverse lines)) (loop (cons line lines)))))]) (if offset (let loop ([lines lines] [count 0]) (if (null? lines) '() (let ([len (add1 (string-length (car lines)))]) ; add1 because newline in source omitted (if (>= count offset) (cons temp-anchor (if doc-txt? (map process-doc-line lines) lines)) (cons (process-line (car lines)) (loop (cdr lines) (+ count len))))))) (map process-line lines)))))