batch io revised with X-expression reader, plus tests
This commit is contained in:
parent
f3444c7e51
commit
2996b5ffc4
|
@ -1,22 +1,13 @@
|
|||
#lang racket/base
|
||||
|
||||
(require racket/function
|
||||
racket/file
|
||||
racket/format
|
||||
racket/string
|
||||
racket/local
|
||||
(for-syntax racket/base
|
||||
syntax/parse)
|
||||
htdp/error
|
||||
(rename-in lang/prim (first-order->higher-order f2h))
|
||||
"private/csv/csv.rkt")
|
||||
#lang racket
|
||||
|
||||
;; todo?
|
||||
;; -- export tokenization?
|
||||
|
||||
;; I am tryiing to use these lists to automate the documentation of the
|
||||
;; functions but my scribble skills are insufficient and my time is running
|
||||
;; out.
|
||||
(require (rename-in lang/prim (first-order->higher-order f2h)))
|
||||
|
||||
;; I am trying to use these lists to automate the documentation of the functions
|
||||
;; but my scribble skills are insufficient and my time is running out.
|
||||
|
||||
(module devices racket/base
|
||||
(provide *input-devices* *output-devices*)
|
||||
(define *input-devices* `((stdin ,current-input-port) (standard-in ,current-input-port)))
|
||||
|
@ -31,44 +22,128 @@
|
|||
;; all reader functions consume the name of a file f:
|
||||
;; -- f must be a file name (string) in the same folder as the program
|
||||
|
||||
read-file ;; String -> String
|
||||
;; String -> String
|
||||
;; read the specified file as a string
|
||||
read-file
|
||||
|
||||
read-1strings ;; String -> [Listof 1String]
|
||||
;; String -> [Listof 1String]
|
||||
;; read the specified file as a list of 1strings (characters)
|
||||
read-1strings
|
||||
|
||||
read-lines ;; String -> [Listof String]
|
||||
;; String -> [Listof String]
|
||||
;; read the specified file as a list of strings, one per line
|
||||
read-lines
|
||||
|
||||
read-words ;; String -> [Listof String]
|
||||
;; String -> [Listof String]
|
||||
;; read the specified file as a list of white-space separated tokens
|
||||
read-words
|
||||
|
||||
read-words/line ;; String -> [Listof [Listof String]]
|
||||
;; String -> [Listof [Listof String]]
|
||||
;; read the specified file as a list of lines, each line as a list of words
|
||||
read-words/line
|
||||
|
||||
read-words-and-numbers/line ;; String -> [Listof [Listof (Union Number String)]]
|
||||
;; String -> [Listof [Listof (Union Number String)]]
|
||||
;; read the specified file as a list of lines, each line as a list of words and numbers
|
||||
read-words-and-numbers/line
|
||||
|
||||
read-csv-file ;; String -> [Listof [Listof (U Any)]]
|
||||
;; String -> [Listof [Listof (U Any)]]
|
||||
;; -- f must be formated as a a file with comma-separated values (Any)
|
||||
;; read the specified file as a list of lists---one per line---of values (Any)
|
||||
read-csv-file
|
||||
|
||||
write-file ;; String String -> String
|
||||
;; String String -> String
|
||||
;; (write-file filename str) writes str to filename;
|
||||
;; produces the file name as a confirmation that the write succeeded
|
||||
write-file)
|
||||
|
||||
#;
|
||||
(provide
|
||||
;; [List-of Symbol]
|
||||
;; symbols that redirect input from an input-port
|
||||
*input-devives*
|
||||
|
||||
;; *input-devives*: symbols that redirect input from an input-port
|
||||
;; *output-devives*: symbols that redirect output from a output-port
|
||||
)
|
||||
;; [List-of Symbol]
|
||||
;; symbols that redirect output from a output-port
|
||||
*output-devives*)
|
||||
|
||||
;; ---------------------------------------------------------------------------------------------------
|
||||
;; reading simple X-expressions via HTML/XML files
|
||||
|
||||
(provide-higher-order-primitive
|
||||
read-csv-file/rows (_ row-processor) ;; String ([Listof Any] -> X) -> [Listof X]
|
||||
;; -- f must be formated as a a file with comma-separated values (Any)
|
||||
;; read the specified file as a file of comma-separated values, apply the second
|
||||
;; argument to each row, i.e., list of CSV on one line
|
||||
)
|
||||
;; String ([Listof Any] -> X) -> [Listof X]
|
||||
;; -- f must be formated as a a file with comma-separated values (Any)
|
||||
;; read the specified file as a file of comma-separated values, apply the second
|
||||
;; argument to each row, i.e., list of CSV on one line
|
||||
read-csv-file/rows (_ row-processor))
|
||||
|
||||
;; -----------------------------------------------------------------------------
|
||||
(provide
|
||||
; Xexpr is one of:
|
||||
; -- Symbol
|
||||
; -- String
|
||||
; -- (cons Symbol (cons [List-of Attribute] [List-of Xexpr]))
|
||||
; -- (cons Symbol [List-of Xexpr])
|
||||
;
|
||||
; Attribute is:
|
||||
; (list Symbol String)
|
||||
; (list 'a "some text") is called an a-Attribute and "some text" is a's value.
|
||||
|
||||
;; Any -> Boolean
|
||||
;; is the given value an Xexpr?
|
||||
;; effect: display bad piece if x is not an xexpr
|
||||
(rename-out (my-xexpr? xexpr?))
|
||||
|
||||
;; String -> Xexpr
|
||||
;; given a file name, read an Xexpr from a file that contains at least one XML element
|
||||
|
||||
;; the XML element does not contain any strings as elements other than whitespace,
|
||||
;; and all whitespace between embedded elements is eliminated
|
||||
read-plain-xexpr
|
||||
|
||||
;; String -> Xexpr
|
||||
;; given a file name, read an Xexpr from a file that contains at least one XML element
|
||||
read-xexpr
|
||||
|
||||
;; String -> Boolean
|
||||
;; false, if this url returns a '404'; true otherwise
|
||||
url-exists?
|
||||
|
||||
;; String -> [Maybe Xexpr]
|
||||
;; given a URL, find web site and retrieve the first XML element, false if the web page isn't found
|
||||
;; read HTML as XML (if possible)
|
||||
;; effect: signals an error in case of network problems
|
||||
read-xexpr/web
|
||||
|
||||
;; String -> [Maybe Xexpr]
|
||||
;; given a URL, find web site and retrieve the first XML element, false if the web page isn't found
|
||||
;; read HTML as XML (if possible)
|
||||
;; effect: signals an error in case of network problems
|
||||
|
||||
;; the XML element does not contain any strings as elements other than whitespace,
|
||||
;; and all whitespace between embedded elements is eliminated
|
||||
read-plain-xexpr/web
|
||||
|
||||
;; String -> [Listof String]
|
||||
;; produce the list of (fully resolve) .html references in a elements from url
|
||||
url-html-neighbors
|
||||
|
||||
;; Xexpr -> String
|
||||
;; turn the X-expression into a string
|
||||
xexpr-as-string)
|
||||
|
||||
;; ---------------------------------------------------------------------------------------------------
|
||||
;; exported functions
|
||||
|
||||
(require htdp/error
|
||||
(for-syntax racket/base syntax/parse)
|
||||
"private/csv/csv.rkt"
|
||||
;; --- xml/html
|
||||
net/url
|
||||
xml/xml
|
||||
html
|
||||
srfi/13)
|
||||
|
||||
(module+ test
|
||||
(require rackunit))
|
||||
|
||||
(define-syntax-rule
|
||||
(def-reader (name f s ...) body ...)
|
||||
|
@ -77,8 +152,6 @@
|
|||
(let ()
|
||||
body ...)))
|
||||
|
||||
;; --- exported functions --
|
||||
|
||||
(def-reader (read-file f)
|
||||
(list->string (read-chunks f read-char drop-last-newline)))
|
||||
|
||||
|
@ -92,22 +165,13 @@
|
|||
(read-words/line/internal f append))
|
||||
|
||||
(def-reader (read-words/line f)
|
||||
;; String -> [Listof [Listof String]]
|
||||
;; read the specified file as a list of lines, each line as a list of words
|
||||
(read-words/line/internal f cons))
|
||||
|
||||
(def-reader (read-words-and-numbers/line f)
|
||||
;; String -> [Listof [Listof (U String Number)]]
|
||||
;; read the specified file as a list of lines, each line as a list of words and numbers
|
||||
(read-words/line/internal f (lambda (line1 r)
|
||||
(cons (for/list ((t (in-list line1))) (or (string->number t) t)) r))))
|
||||
|
||||
(define (read-words/line/internal f combine)
|
||||
(define lines (read-chunks f *read-line (lambda (x) x)))
|
||||
(foldl (lambda (f r)
|
||||
(define fst (filter (compose not (curry string=? "")) (split f)))
|
||||
(combine fst r))
|
||||
'() lines))
|
||||
;; String [Listof [Listof (U String Number)]] -> [Listof [Listof (U String Number)]]
|
||||
(define (tease-out-numbers line1 r)
|
||||
(cons (for/list ((t (in-list line1))) (or (string->number t) t)) r))
|
||||
(read-words/line/internal f tease-out-numbers))
|
||||
|
||||
(def-reader (read-csv-file f)
|
||||
(read-csv-file/func f))
|
||||
|
@ -119,33 +183,37 @@
|
|||
(define (*read-line)
|
||||
(read-line (current-input-port) 'any))
|
||||
|
||||
;; -----------------------------------------------------------------------------
|
||||
;; tester
|
||||
;; ---------------------------------------------------------------------------------------------------
|
||||
(define (my-xexpr? x0)
|
||||
;; -- IN --
|
||||
(with-handlers (((lambda (x) (and (cons? x) (eq? (car x) tag)))
|
||||
(lambda (x)
|
||||
(displayln `(,(cdr x) occurs in ,x0))
|
||||
#f)))
|
||||
(xexpr-aux? x0)))
|
||||
|
||||
(define-syntax (simulate-file stx)
|
||||
(syntax-case stx ()
|
||||
[(simulate-file)
|
||||
(raise-syntax-error #f "expects at least one sub-expression" stx)]
|
||||
[(simulate-file reader str ...) #'(simulate-file/proc (f2h reader) str ...)]))
|
||||
|
||||
(define (simulate-file/proc reader . los)
|
||||
(define _1 (check-proc "simulate-file" reader 1 "reader" "one argument"))
|
||||
(define _2
|
||||
(andmap
|
||||
(lambda (f)
|
||||
(check-arg "simulate-file" (string? f) "sequence of strings" "" f))
|
||||
los))
|
||||
(define t (make-temporary-file "drracket-temporary-file-~a"))
|
||||
(dynamic-wind
|
||||
(lambda ()
|
||||
(with-output-to-file t
|
||||
(lambda () (for-each displayln los))
|
||||
#:mode 'text
|
||||
#:exists 'replace))
|
||||
(lambda ()
|
||||
(reader (path->string t)))
|
||||
(lambda ()
|
||||
(delete-file t))))
|
||||
(define (url-html-neighbors u)
|
||||
(define x (read-xexpr/web u))
|
||||
(if x (url-html-neighbors-aux u x) '()))
|
||||
|
||||
(define (url-exists? url:string)
|
||||
(retrieve url:string (lambda (_ h) (not (404? h)))))
|
||||
|
||||
(def-reader (read-plain-xexpr f)
|
||||
(read-xexpr-aux 'read-plain-xexpr f fix-up))
|
||||
|
||||
(def-reader (read-xexpr f)
|
||||
(read-xexpr-aux 'read-xexpr f values))
|
||||
|
||||
(define (read-xexpr/web url:string)
|
||||
(read-xexpr/web-aux 'read-xexpr/web url:string values))
|
||||
|
||||
(define (read-plain-xexpr/web url:string)
|
||||
(read-xexpr/web-aux 'read-xexpr/web url:string fix-up))
|
||||
|
||||
(define (xexpr-as-string x)
|
||||
(check-arg 'xexpr->string (and (pair? x) (xexpr? x)) 'xexpr "first" x)
|
||||
(call-with-output-string (curry display-xml/content (xexpr->xml x))))
|
||||
|
||||
;; -----------------------------------------------------------------------------
|
||||
;; writer
|
||||
|
@ -163,15 +231,25 @@
|
|||
;; -----------------------------------------------------------------------------
|
||||
;; auxiliaries
|
||||
|
||||
;; String [([Listof X] -> Y)] -> [Listof Y]
|
||||
;; String [String [Listof [Listof X]] -> Y] -> Y
|
||||
(define (read-words/line/internal f combine)
|
||||
(define lines (read-chunks f *read-line (lambda (x) x)))
|
||||
(foldl (lambda (f r)
|
||||
(define fst (filter (compose not (curry string=? "")) (split f)))
|
||||
(combine fst r))
|
||||
'() lines))
|
||||
|
||||
;; String (-> X) ([Listof X] -> [Listof X]) -> [Listof X]
|
||||
;; read a file as a list of X where process-accu is applied to accu when eof
|
||||
(define (read-chunks f read-chunk process-accu)
|
||||
(define (rd)
|
||||
(let loop ([accu '()])
|
||||
(define nxt (read-chunk))
|
||||
(if (eof-object? nxt) (process-accu accu) (loop (cons nxt accu)))))
|
||||
(define nxt (read-chunk))
|
||||
(if (eof-object? nxt) (process-accu accu) (loop (cons nxt accu)))))
|
||||
(read-from-file-or-device f rd))
|
||||
|
||||
;; String [-> X] -> X
|
||||
(define (read-from-file-or-device f rd)
|
||||
(define device (assq f *input-devices*))
|
||||
(if device
|
||||
(parameterize ((current-input-port [(cadr device)])) (rd))
|
||||
|
@ -220,3 +298,197 @@
|
|||
;; splits a string with newlines into a list of lines
|
||||
(define (split-lines str)
|
||||
(map string-trim (split str "\r*\n")))
|
||||
|
||||
;; ---------------------------------------------------------------------------------------------------
|
||||
;; is the given value a
|
||||
|
||||
(define tag (gensym))
|
||||
|
||||
;; Any -> Boolean
|
||||
(define (xexpr-aux? x)
|
||||
(cond
|
||||
[(string? x) #t]
|
||||
[(symbol? x) #t]
|
||||
[(and (cons? x) (symbol? (first x)))
|
||||
(define body (rest x))
|
||||
(or (and (list-of-attributes? (first body)) (list-of-xexpr? (rest body)))
|
||||
(list-of-xexpr? body))]
|
||||
[else (raise (cons tag x))]))
|
||||
|
||||
;; Any -> Boolean
|
||||
(define (list-of-attributes? xs)
|
||||
(and (or (list? xs) (raise (cons tag xs)))
|
||||
(for/and ((x xs))
|
||||
(and (list? x) (= (length x) 2) (symbol? (first x)) (string? (second x))))))
|
||||
|
||||
;; Any -> Boolean
|
||||
(define (list-of-xexpr? xs)
|
||||
(and (or (list? xs) (raise (cons tag xs)))
|
||||
(andmap xexpr? xs)))
|
||||
|
||||
;; ---------------------------------------------------------------------------------------------------
|
||||
;; Symbol String [XML -> XML] -> Xexpr
|
||||
;; read an Xexpr from a file that contains one XML element
|
||||
(define (read-xexpr-aux tag f fix-up)
|
||||
(define raw (read-from-file-or-device f read-xml/element))
|
||||
(check-result 'read-plain-xexpr my-xexpr? 'xexpr (xml->xexpr (fix-up raw))))
|
||||
|
||||
;; Symbol String [XML -> XML] -> Xexpr
|
||||
(define (read-xexpr/web-aux tag url:string fix-up)
|
||||
(retrieve
|
||||
url:string
|
||||
(lambda (url h)
|
||||
(cond
|
||||
[(404? h) #f]
|
||||
[else
|
||||
(define e (filter element? (call/input-url url get-pure-port read-html-as-xml)))
|
||||
(and (cons? e) (check-result tag my-xexpr? 'xexpr (xml->xexpr (fix-up (first e)))))]))))
|
||||
|
||||
(define fix-up (eliminate-whitespace '() (lambda (x) #t)))
|
||||
|
||||
;; ---------------------------------------------------------------------------------------------------
|
||||
;; String [URL String -> X] -> X
|
||||
;; retrieve the first text line from the url represented by url:string, hand url and line to consumer
|
||||
(define (retrieve url:string consumer)
|
||||
(define URL (string->url url:string))
|
||||
(with-handlers ([exn:fail:network?
|
||||
(lambda (e)
|
||||
(define msg (format "working url, details:\n ~a" (exn-message e)))
|
||||
(check-arg 'read-xexpr/web #f msg "" url:string))])
|
||||
(consumer URL (call/input-url URL get-impure-port read-line))))
|
||||
|
||||
;; ---------------------------------------------------------------------------------------------------
|
||||
;; String -> Boolean
|
||||
;; does the string contain "404"
|
||||
(define (404? s) (pair? (regexp-match "404" s)))
|
||||
|
||||
;; ---------------------------------------------------------------------------------------------------
|
||||
;; String Xexpr -> [Listof String]
|
||||
;; retrieve the domain-local neighbors of url that point to HTML files
|
||||
|
||||
(module+ test
|
||||
(check-equal? (url-html-neighbors-aux
|
||||
"http://fun.com/"
|
||||
'(div
|
||||
(a "hello")
|
||||
(a ((href " one.html")) "world")
|
||||
(a ((href "Papers/ignore.pdf")) "world")
|
||||
(img ((alt "two")))
|
||||
(img ((alt "three") (src "ignore.jpg")))))
|
||||
'("http://fun.com/one.html")))
|
||||
|
||||
(define (url-html-neighbors-aux u xexpr)
|
||||
(define url (string->url u))
|
||||
(for/fold ((result '())) ((e (xexpr-elements xexpr 'a)))
|
||||
(cond
|
||||
[(and (cons? (rest e)) (loa? (second e)))
|
||||
(define html-targets
|
||||
(for/fold ((htmls '())) ((attributes-of-a-element (second e)))
|
||||
(cond
|
||||
[(symbol=? (first attributes-of-a-element) 'href)
|
||||
(define value:str (string-trim-both (second attributes-of-a-element)))
|
||||
(define value:url (combine-url/relative url value:str))
|
||||
(if (not (url-ends-in-html? value:url))
|
||||
htmls
|
||||
(cons (url->string value:url) htmls))]
|
||||
[else htmls])))
|
||||
(append html-targets result)]
|
||||
[else result])))
|
||||
|
||||
;; ---------------------------------------------------------------------------------------------------
|
||||
;; URL -> Boolean
|
||||
;; does the url end in html?
|
||||
|
||||
(module+ test
|
||||
(check-true (url-ends-in-html? (string->url "seconds.html")))
|
||||
(check-false (url-ends-in-html? (string->url "seconds.pdf"))))
|
||||
|
||||
(define (url-ends-in-html? u)
|
||||
(define q (reverse (map path/param-path (url-path u))))
|
||||
(and (cons? q) (pair? (regexp-match ".html$" (first q)))))
|
||||
|
||||
;; ---------------------------------------------------------------------------------------------------
|
||||
;; Xexpr Symbol -> [Listof Xexpr]
|
||||
;; retrieve all elements whose tag is 'tag'
|
||||
|
||||
(module+ test
|
||||
(check-equal? (xexpr-elements '(p () (p ((align "center")) "hello") (a)) 'p)
|
||||
(list '(p () (p ((align "center")) "hello") (a))
|
||||
'(p ((align "center")) "hello"))))
|
||||
|
||||
(define (xexpr-elements x0 tag)
|
||||
(define (K- fst rst) rst)
|
||||
(xexpr-abs x0
|
||||
'()
|
||||
K-
|
||||
K-
|
||||
append
|
||||
(lambda (e loa rst) (if (symbol=? (first e) tag) (cons e rst) rst))
|
||||
(lambda (e rst) (if (symbol=? (first e) tag) (cons e rst) rst))))
|
||||
|
||||
;; ---------------------------------------------------------------------------------------------------
|
||||
;; Xexpr
|
||||
;; [Attribute -> X]
|
||||
;; Y
|
||||
;; [String Y -> Y]
|
||||
;; [Xexpr Y -> Y]
|
||||
;; [Xexpr [Listof X] Z -> W]
|
||||
;; [Symbol Z -> W]
|
||||
;; ->
|
||||
;; W
|
||||
|
||||
;; traverse X-expression and combine results
|
||||
(define (xexpr-abs x0 body0 attr-combine string-combine xexpr-combine loa-combine plain-combine)
|
||||
;; Xexpr -> W
|
||||
(define (f-xexpr x)
|
||||
(cond
|
||||
[(and (cons? (rest x)) (loa? (second x)))
|
||||
(loa-combine x (f-loa (second x)) (f-xbody (rest (rest x))))]
|
||||
[else (plain-combine x (f-xbody (rest x)))]))
|
||||
;; Xbody -> Z
|
||||
(define (f-xbody x)
|
||||
(cond
|
||||
[(empty? x) body0]
|
||||
[(string? (first x)) (string-combine (first x) (f-xbody (rest x)))]
|
||||
[(cons? (first x)) (xexpr-combine (f-xexpr (first x)) (f-xbody (rest x)))]
|
||||
[else (f-xbody (rest x))]))
|
||||
;; LOA -> [Listof X]
|
||||
(define (f-loa x)
|
||||
(cond
|
||||
[(empty? x) '()]
|
||||
[else (attr-combine (first x) (f-loa (rest x)))]))
|
||||
;; -- IN --
|
||||
(f-xexpr x0))
|
||||
|
||||
;; String or (cons Symbol Y) or empty or (cons (list Symbol String) Any) --> Boolean
|
||||
;; is the given value a loa, possibly in front of some other Xexpr elements
|
||||
(define (loa? x)
|
||||
(or (empty? x) (and (cons? x) (cons? (first x)))))
|
||||
|
||||
;; ---------------------------------------------------------------------------------------------------
|
||||
;; tester
|
||||
|
||||
(define-syntax (simulate-file stx)
|
||||
(syntax-case stx ()
|
||||
[(simulate-file)
|
||||
(raise-syntax-error #f "expects at least one sub-expression" stx)]
|
||||
[(simulate-file reader str ...) #'(simulate-file/proc (f2h reader) str ...)]))
|
||||
|
||||
(define (simulate-file/proc reader . los)
|
||||
(define _1 (check-proc "simulate-file" reader 1 "reader" "one argument"))
|
||||
(define _2
|
||||
(andmap
|
||||
(lambda (f)
|
||||
(check-arg "simulate-file" (string? f) "sequence of strings" "" f))
|
||||
los))
|
||||
(define t (make-temporary-file "drracket-temporary-file-~a"))
|
||||
(dynamic-wind
|
||||
(lambda ()
|
||||
(with-output-to-file t
|
||||
(lambda () (for-each displayln los))
|
||||
#:mode 'text
|
||||
#:exists 'replace))
|
||||
(lambda ()
|
||||
(reader (path->string t)))
|
||||
(lambda ()
|
||||
(delete-file t))))
|
||||
|
|
|
@ -19,6 +19,7 @@ run -t batch-io.rkt
|
|||
run -t batch-io2.rkt
|
||||
run -t batch-io3.rkt
|
||||
run -t batch-io-csv-ho.rkt
|
||||
run -t batch-io-xexpr.rkt
|
||||
run clause-once.rkt
|
||||
run full-scene-visible.rkt
|
||||
run image-too-large.rkt
|
||||
|
|
Loading…
Reference in New Issue
Block a user