batch io revised with X-expression reader, plus tests

This commit is contained in:
Matthias Felleisen 2013-08-11 15:19:08 -04:00
parent f3444c7e51
commit 2996b5ffc4
2 changed files with 349 additions and 76 deletions

View File

@ -1,22 +1,13 @@
#lang racket/base #lang racket
(require racket/function
racket/file
racket/format
racket/string
racket/local
(for-syntax racket/base
syntax/parse)
htdp/error
(rename-in lang/prim (first-order->higher-order f2h))
"private/csv/csv.rkt")
;; todo? ;; todo?
;; -- export tokenization? ;; -- export tokenization?
;; I am tryiing to use these lists to automate the documentation of the (require (rename-in lang/prim (first-order->higher-order f2h)))
;; functions but my scribble skills are insufficient and my time is running
;; out. ;; I am trying to use these lists to automate the documentation of the functions
;; but my scribble skills are insufficient and my time is running out.
(module devices racket/base (module devices racket/base
(provide *input-devices* *output-devices*) (provide *input-devices* *output-devices*)
(define *input-devices* `((stdin ,current-input-port) (standard-in ,current-input-port))) (define *input-devices* `((stdin ,current-input-port) (standard-in ,current-input-port)))
@ -31,44 +22,128 @@
;; all reader functions consume the name of a file f: ;; all reader functions consume the name of a file f:
;; -- f must be a file name (string) in the same folder as the program ;; -- f must be a file name (string) in the same folder as the program
read-file ;; String -> String ;; String -> String
;; read the specified file as a string ;; read the specified file as a string
read-file
read-1strings ;; String -> [Listof 1String] ;; String -> [Listof 1String]
;; read the specified file as a list of 1strings (characters) ;; read the specified file as a list of 1strings (characters)
read-1strings
read-lines ;; String -> [Listof String] ;; String -> [Listof String]
;; read the specified file as a list of strings, one per line ;; read the specified file as a list of strings, one per line
read-lines
read-words ;; String -> [Listof String] ;; String -> [Listof String]
;; read the specified file as a list of white-space separated tokens ;; read the specified file as a list of white-space separated tokens
read-words
read-words/line ;; String -> [Listof [Listof String]] ;; String -> [Listof [Listof String]]
;; read the specified file as a list of lines, each line as a list of words ;; read the specified file as a list of lines, each line as a list of words
read-words/line
read-words-and-numbers/line ;; String -> [Listof [Listof (Union Number String)]] ;; String -> [Listof [Listof (Union Number String)]]
;; read the specified file as a list of lines, each line as a list of words and numbers ;; read the specified file as a list of lines, each line as a list of words and numbers
read-words-and-numbers/line
read-csv-file ;; String -> [Listof [Listof (U Any)]] ;; String -> [Listof [Listof (U Any)]]
;; -- f must be formated as a a file with comma-separated values (Any) ;; -- f must be formated as a a file with comma-separated values (Any)
;; read the specified file as a list of lists---one per line---of values (Any) ;; read the specified file as a list of lists---one per line---of values (Any)
read-csv-file
write-file ;; String String -> String ;; String String -> String
;; (write-file filename str) writes str to filename; ;; (write-file filename str) writes str to filename;
;; produces the file name as a confirmation that the write succeeded ;; produces the file name as a confirmation that the write succeeded
write-file)
;; *input-devives*: symbols that redirect input from an input-port #;
;; *output-devives*: symbols that redirect output from a output-port (provide
) ;; [List-of Symbol]
;; symbols that redirect input from an input-port
*input-devives*
;; [List-of Symbol]
;; symbols that redirect output from a output-port
*output-devives*)
;; ---------------------------------------------------------------------------------------------------
;; reading simple X-expressions via HTML/XML files
(provide-higher-order-primitive (provide-higher-order-primitive
read-csv-file/rows (_ row-processor) ;; String ([Listof Any] -> X) -> [Listof X] ;; String ([Listof Any] -> X) -> [Listof X]
;; -- f must be formated as a a file with comma-separated values (Any) ;; -- f must be formated as a a file with comma-separated values (Any)
;; read the specified file as a file of comma-separated values, apply the second ;; read the specified file as a file of comma-separated values, apply the second
;; argument to each row, i.e., list of CSV on one line ;; argument to each row, i.e., list of CSV on one line
) read-csv-file/rows (_ row-processor))
;; ----------------------------------------------------------------------------- (provide
; Xexpr is one of:
; -- Symbol
; -- String
; -- (cons Symbol (cons [List-of Attribute] [List-of Xexpr]))
; -- (cons Symbol [List-of Xexpr])
;
; Attribute is:
; (list Symbol String)
; (list 'a "some text") is called an a-Attribute and "some text" is a's value.
;; Any -> Boolean
;; is the given value an Xexpr?
;; effect: display bad piece if x is not an xexpr
(rename-out (my-xexpr? xexpr?))
;; String -> Xexpr
;; given a file name, read an Xexpr from a file that contains at least one XML element
;; the XML element does not contain any strings as elements other than whitespace,
;; and all whitespace between embedded elements is eliminated
read-plain-xexpr
;; String -> Xexpr
;; given a file name, read an Xexpr from a file that contains at least one XML element
read-xexpr
;; String -> Boolean
;; false, if this url returns a '404'; true otherwise
url-exists?
;; String -> [Maybe Xexpr]
;; given a URL, find web site and retrieve the first XML element, false if the web page isn't found
;; read HTML as XML (if possible)
;; effect: signals an error in case of network problems
read-xexpr/web
;; String -> [Maybe Xexpr]
;; given a URL, find web site and retrieve the first XML element, false if the web page isn't found
;; read HTML as XML (if possible)
;; effect: signals an error in case of network problems
;; the XML element does not contain any strings as elements other than whitespace,
;; and all whitespace between embedded elements is eliminated
read-plain-xexpr/web
;; String -> [Listof String]
;; produce the list of (fully resolve) .html references in a elements from url
url-html-neighbors
;; Xexpr -> String
;; turn the X-expression into a string
xexpr-as-string)
;; ---------------------------------------------------------------------------------------------------
;; exported functions
(require htdp/error
(for-syntax racket/base syntax/parse)
"private/csv/csv.rkt"
;; --- xml/html
net/url
xml/xml
html
srfi/13)
(module+ test
(require rackunit))
(define-syntax-rule (define-syntax-rule
(def-reader (name f s ...) body ...) (def-reader (name f s ...) body ...)
@ -77,8 +152,6 @@
(let () (let ()
body ...))) body ...)))
;; --- exported functions --
(def-reader (read-file f) (def-reader (read-file f)
(list->string (read-chunks f read-char drop-last-newline))) (list->string (read-chunks f read-char drop-last-newline)))
@ -92,22 +165,13 @@
(read-words/line/internal f append)) (read-words/line/internal f append))
(def-reader (read-words/line f) (def-reader (read-words/line f)
;; String -> [Listof [Listof String]]
;; read the specified file as a list of lines, each line as a list of words
(read-words/line/internal f cons)) (read-words/line/internal f cons))
(def-reader (read-words-and-numbers/line f) (def-reader (read-words-and-numbers/line f)
;; String -> [Listof [Listof (U String Number)]] ;; String [Listof [Listof (U String Number)]] -> [Listof [Listof (U String Number)]]
;; read the specified file as a list of lines, each line as a list of words and numbers (define (tease-out-numbers line1 r)
(read-words/line/internal f (lambda (line1 r) (cons (for/list ((t (in-list line1))) (or (string->number t) t)) r))
(cons (for/list ((t (in-list line1))) (or (string->number t) t)) r)))) (read-words/line/internal f tease-out-numbers))
(define (read-words/line/internal f combine)
(define lines (read-chunks f *read-line (lambda (x) x)))
(foldl (lambda (f r)
(define fst (filter (compose not (curry string=? "")) (split f)))
(combine fst r))
'() lines))
(def-reader (read-csv-file f) (def-reader (read-csv-file f)
(read-csv-file/func f)) (read-csv-file/func f))
@ -119,33 +183,37 @@
(define (*read-line) (define (*read-line)
(read-line (current-input-port) 'any)) (read-line (current-input-port) 'any))
;; ----------------------------------------------------------------------------- ;; ---------------------------------------------------------------------------------------------------
;; tester (define (my-xexpr? x0)
;; -- IN --
(with-handlers (((lambda (x) (and (cons? x) (eq? (car x) tag)))
(lambda (x)
(displayln `(,(cdr x) occurs in ,x0))
#f)))
(xexpr-aux? x0)))
(define-syntax (simulate-file stx) (define (url-html-neighbors u)
(syntax-case stx () (define x (read-xexpr/web u))
[(simulate-file) (if x (url-html-neighbors-aux u x) '()))
(raise-syntax-error #f "expects at least one sub-expression" stx)]
[(simulate-file reader str ...) #'(simulate-file/proc (f2h reader) str ...)]))
(define (simulate-file/proc reader . los) (define (url-exists? url:string)
(define _1 (check-proc "simulate-file" reader 1 "reader" "one argument")) (retrieve url:string (lambda (_ h) (not (404? h)))))
(define _2
(andmap (def-reader (read-plain-xexpr f)
(lambda (f) (read-xexpr-aux 'read-plain-xexpr f fix-up))
(check-arg "simulate-file" (string? f) "sequence of strings" "" f))
los)) (def-reader (read-xexpr f)
(define t (make-temporary-file "drracket-temporary-file-~a")) (read-xexpr-aux 'read-xexpr f values))
(dynamic-wind
(lambda () (define (read-xexpr/web url:string)
(with-output-to-file t (read-xexpr/web-aux 'read-xexpr/web url:string values))
(lambda () (for-each displayln los))
#:mode 'text (define (read-plain-xexpr/web url:string)
#:exists 'replace)) (read-xexpr/web-aux 'read-xexpr/web url:string fix-up))
(lambda ()
(reader (path->string t))) (define (xexpr-as-string x)
(lambda () (check-arg 'xexpr->string (and (pair? x) (xexpr? x)) 'xexpr "first" x)
(delete-file t)))) (call-with-output-string (curry display-xml/content (xexpr->xml x))))
;; ----------------------------------------------------------------------------- ;; -----------------------------------------------------------------------------
;; writer ;; writer
@ -163,7 +231,13 @@
;; ----------------------------------------------------------------------------- ;; -----------------------------------------------------------------------------
;; auxiliaries ;; auxiliaries
;; String [([Listof X] -> Y)] -> [Listof Y] ;; String [String [Listof [Listof X]] -> Y] -> Y
(define (read-words/line/internal f combine)
(define lines (read-chunks f *read-line (lambda (x) x)))
(foldl (lambda (f r)
(define fst (filter (compose not (curry string=? "")) (split f)))
(combine fst r))
'() lines))
;; String (-> X) ([Listof X] -> [Listof X]) -> [Listof X] ;; String (-> X) ([Listof X] -> [Listof X]) -> [Listof X]
;; read a file as a list of X where process-accu is applied to accu when eof ;; read a file as a list of X where process-accu is applied to accu when eof
@ -172,6 +246,10 @@
(let loop ([accu '()]) (let loop ([accu '()])
(define nxt (read-chunk)) (define nxt (read-chunk))
(if (eof-object? nxt) (process-accu accu) (loop (cons nxt accu))))) (if (eof-object? nxt) (process-accu accu) (loop (cons nxt accu)))))
(read-from-file-or-device f rd))
;; String [-> X] -> X
(define (read-from-file-or-device f rd)
(define device (assq f *input-devices*)) (define device (assq f *input-devices*))
(if device (if device
(parameterize ((current-input-port [(cadr device)])) (rd)) (parameterize ((current-input-port [(cadr device)])) (rd))
@ -220,3 +298,197 @@
;; splits a string with newlines into a list of lines ;; splits a string with newlines into a list of lines
(define (split-lines str) (define (split-lines str)
(map string-trim (split str "\r*\n"))) (map string-trim (split str "\r*\n")))
;; ---------------------------------------------------------------------------------------------------
;; is the given value a
(define tag (gensym))
;; Any -> Boolean
(define (xexpr-aux? x)
(cond
[(string? x) #t]
[(symbol? x) #t]
[(and (cons? x) (symbol? (first x)))
(define body (rest x))
(or (and (list-of-attributes? (first body)) (list-of-xexpr? (rest body)))
(list-of-xexpr? body))]
[else (raise (cons tag x))]))
;; Any -> Boolean
(define (list-of-attributes? xs)
(and (or (list? xs) (raise (cons tag xs)))
(for/and ((x xs))
(and (list? x) (= (length x) 2) (symbol? (first x)) (string? (second x))))))
;; Any -> Boolean
(define (list-of-xexpr? xs)
(and (or (list? xs) (raise (cons tag xs)))
(andmap xexpr? xs)))
;; ---------------------------------------------------------------------------------------------------
;; Symbol String [XML -> XML] -> Xexpr
;; read an Xexpr from a file that contains one XML element
(define (read-xexpr-aux tag f fix-up)
(define raw (read-from-file-or-device f read-xml/element))
(check-result 'read-plain-xexpr my-xexpr? 'xexpr (xml->xexpr (fix-up raw))))
;; Symbol String [XML -> XML] -> Xexpr
(define (read-xexpr/web-aux tag url:string fix-up)
(retrieve
url:string
(lambda (url h)
(cond
[(404? h) #f]
[else
(define e (filter element? (call/input-url url get-pure-port read-html-as-xml)))
(and (cons? e) (check-result tag my-xexpr? 'xexpr (xml->xexpr (fix-up (first e)))))]))))
(define fix-up (eliminate-whitespace '() (lambda (x) #t)))
;; ---------------------------------------------------------------------------------------------------
;; String [URL String -> X] -> X
;; retrieve the first text line from the url represented by url:string, hand url and line to consumer
(define (retrieve url:string consumer)
(define URL (string->url url:string))
(with-handlers ([exn:fail:network?
(lambda (e)
(define msg (format "working url, details:\n ~a" (exn-message e)))
(check-arg 'read-xexpr/web #f msg "" url:string))])
(consumer URL (call/input-url URL get-impure-port read-line))))
;; ---------------------------------------------------------------------------------------------------
;; String -> Boolean
;; does the string contain "404"
(define (404? s) (pair? (regexp-match "404" s)))
;; ---------------------------------------------------------------------------------------------------
;; String Xexpr -> [Listof String]
;; retrieve the domain-local neighbors of url that point to HTML files
(module+ test
(check-equal? (url-html-neighbors-aux
"http://fun.com/"
'(div
(a "hello")
(a ((href " one.html")) "world")
(a ((href "Papers/ignore.pdf")) "world")
(img ((alt "two")))
(img ((alt "three") (src "ignore.jpg")))))
'("http://fun.com/one.html")))
(define (url-html-neighbors-aux u xexpr)
(define url (string->url u))
(for/fold ((result '())) ((e (xexpr-elements xexpr 'a)))
(cond
[(and (cons? (rest e)) (loa? (second e)))
(define html-targets
(for/fold ((htmls '())) ((attributes-of-a-element (second e)))
(cond
[(symbol=? (first attributes-of-a-element) 'href)
(define value:str (string-trim-both (second attributes-of-a-element)))
(define value:url (combine-url/relative url value:str))
(if (not (url-ends-in-html? value:url))
htmls
(cons (url->string value:url) htmls))]
[else htmls])))
(append html-targets result)]
[else result])))
;; ---------------------------------------------------------------------------------------------------
;; URL -> Boolean
;; does the url end in html?
(module+ test
(check-true (url-ends-in-html? (string->url "seconds.html")))
(check-false (url-ends-in-html? (string->url "seconds.pdf"))))
(define (url-ends-in-html? u)
(define q (reverse (map path/param-path (url-path u))))
(and (cons? q) (pair? (regexp-match ".html$" (first q)))))
;; ---------------------------------------------------------------------------------------------------
;; Xexpr Symbol -> [Listof Xexpr]
;; retrieve all elements whose tag is 'tag'
(module+ test
(check-equal? (xexpr-elements '(p () (p ((align "center")) "hello") (a)) 'p)
(list '(p () (p ((align "center")) "hello") (a))
'(p ((align "center")) "hello"))))
(define (xexpr-elements x0 tag)
(define (K- fst rst) rst)
(xexpr-abs x0
'()
K-
K-
append
(lambda (e loa rst) (if (symbol=? (first e) tag) (cons e rst) rst))
(lambda (e rst) (if (symbol=? (first e) tag) (cons e rst) rst))))
;; ---------------------------------------------------------------------------------------------------
;; Xexpr
;; [Attribute -> X]
;; Y
;; [String Y -> Y]
;; [Xexpr Y -> Y]
;; [Xexpr [Listof X] Z -> W]
;; [Symbol Z -> W]
;; ->
;; W
;; traverse X-expression and combine results
(define (xexpr-abs x0 body0 attr-combine string-combine xexpr-combine loa-combine plain-combine)
;; Xexpr -> W
(define (f-xexpr x)
(cond
[(and (cons? (rest x)) (loa? (second x)))
(loa-combine x (f-loa (second x)) (f-xbody (rest (rest x))))]
[else (plain-combine x (f-xbody (rest x)))]))
;; Xbody -> Z
(define (f-xbody x)
(cond
[(empty? x) body0]
[(string? (first x)) (string-combine (first x) (f-xbody (rest x)))]
[(cons? (first x)) (xexpr-combine (f-xexpr (first x)) (f-xbody (rest x)))]
[else (f-xbody (rest x))]))
;; LOA -> [Listof X]
(define (f-loa x)
(cond
[(empty? x) '()]
[else (attr-combine (first x) (f-loa (rest x)))]))
;; -- IN --
(f-xexpr x0))
;; String or (cons Symbol Y) or empty or (cons (list Symbol String) Any) --> Boolean
;; is the given value a loa, possibly in front of some other Xexpr elements
(define (loa? x)
(or (empty? x) (and (cons? x) (cons? (first x)))))
;; ---------------------------------------------------------------------------------------------------
;; tester
(define-syntax (simulate-file stx)
(syntax-case stx ()
[(simulate-file)
(raise-syntax-error #f "expects at least one sub-expression" stx)]
[(simulate-file reader str ...) #'(simulate-file/proc (f2h reader) str ...)]))
(define (simulate-file/proc reader . los)
(define _1 (check-proc "simulate-file" reader 1 "reader" "one argument"))
(define _2
(andmap
(lambda (f)
(check-arg "simulate-file" (string? f) "sequence of strings" "" f))
los))
(define t (make-temporary-file "drracket-temporary-file-~a"))
(dynamic-wind
(lambda ()
(with-output-to-file t
(lambda () (for-each displayln los))
#:mode 'text
#:exists 'replace))
(lambda ()
(reader (path->string t)))
(lambda ()
(delete-file t))))

View File

@ -19,6 +19,7 @@ run -t batch-io.rkt
run -t batch-io2.rkt run -t batch-io2.rkt
run -t batch-io3.rkt run -t batch-io3.rkt
run -t batch-io-csv-ho.rkt run -t batch-io-csv-ho.rkt
run -t batch-io-xexpr.rkt
run clause-once.rkt run clause-once.rkt
run full-scene-visible.rkt run full-scene-visible.rkt
run image-too-large.rkt run image-too-large.rkt