From 2996b5ffc435c5ce33d467f0842a99d5b68bb1ee Mon Sep 17 00:00:00 2001 From: Matthias Felleisen Date: Sun, 11 Aug 2013 15:19:08 -0400 Subject: [PATCH] batch io revised with X-expression reader, plus tests --- pkgs/htdp-pkgs/htdp-lib/2htdp/batch-io.rkt | 424 +++++++++++++++++---- pkgs/htdp-pkgs/htdp-lib/2htdp/xtest | 1 + 2 files changed, 349 insertions(+), 76 deletions(-) diff --git a/pkgs/htdp-pkgs/htdp-lib/2htdp/batch-io.rkt b/pkgs/htdp-pkgs/htdp-lib/2htdp/batch-io.rkt index f0d8e900d4..ec49b2c3ab 100644 --- a/pkgs/htdp-pkgs/htdp-lib/2htdp/batch-io.rkt +++ b/pkgs/htdp-pkgs/htdp-lib/2htdp/batch-io.rkt @@ -1,22 +1,13 @@ -#lang racket/base - -(require racket/function - racket/file - racket/format - racket/string - racket/local - (for-syntax racket/base - syntax/parse) - htdp/error - (rename-in lang/prim (first-order->higher-order f2h)) - "private/csv/csv.rkt") +#lang racket ;; todo? ;; -- export tokenization? -;; I am tryiing to use these lists to automate the documentation of the -;; functions but my scribble skills are insufficient and my time is running -;; out. +(require (rename-in lang/prim (first-order->higher-order f2h))) + +;; I am trying to use these lists to automate the documentation of the functions +;; but my scribble skills are insufficient and my time is running out. + (module devices racket/base (provide *input-devices* *output-devices*) (define *input-devices* `((stdin ,current-input-port) (standard-in ,current-input-port))) @@ -31,44 +22,128 @@ ;; all reader functions consume the name of a file f: ;; -- f must be a file name (string) in the same folder as the program - read-file ;; String -> String + ;; String -> String ;; read the specified file as a string + read-file - read-1strings ;; String -> [Listof 1String] + ;; String -> [Listof 1String] ;; read the specified file as a list of 1strings (characters) + read-1strings - read-lines ;; String -> [Listof String] + ;; String -> [Listof String] ;; read the specified file as a list of strings, one per line + read-lines - read-words ;; String -> [Listof String] + ;; String -> [Listof String] ;; read the specified file as a list of white-space separated tokens + read-words - read-words/line ;; String -> [Listof [Listof String]] + ;; String -> [Listof [Listof String]] ;; read the specified file as a list of lines, each line as a list of words + read-words/line - read-words-and-numbers/line ;; String -> [Listof [Listof (Union Number String)]] + ;; String -> [Listof [Listof (Union Number String)]] ;; read the specified file as a list of lines, each line as a list of words and numbers + read-words-and-numbers/line - read-csv-file ;; String -> [Listof [Listof (U Any)]] + ;; String -> [Listof [Listof (U Any)]] ;; -- f must be formated as a a file with comma-separated values (Any) ;; read the specified file as a list of lists---one per line---of values (Any) + read-csv-file - write-file ;; String String -> String + ;; String String -> String ;; (write-file filename str) writes str to filename; ;; produces the file name as a confirmation that the write succeeded + write-file) + +#; +(provide + ;; [List-of Symbol] + ;; symbols that redirect input from an input-port + *input-devives* - ;; *input-devives*: symbols that redirect input from an input-port - ;; *output-devives*: symbols that redirect output from a output-port - ) + ;; [List-of Symbol] + ;; symbols that redirect output from a output-port + *output-devives*) + +;; --------------------------------------------------------------------------------------------------- +;; reading simple X-expressions via HTML/XML files (provide-higher-order-primitive - read-csv-file/rows (_ row-processor) ;; String ([Listof Any] -> X) -> [Listof X] - ;; -- f must be formated as a a file with comma-separated values (Any) - ;; read the specified file as a file of comma-separated values, apply the second - ;; argument to each row, i.e., list of CSV on one line - ) + ;; String ([Listof Any] -> X) -> [Listof X] + ;; -- f must be formated as a a file with comma-separated values (Any) + ;; read the specified file as a file of comma-separated values, apply the second + ;; argument to each row, i.e., list of CSV on one line + read-csv-file/rows (_ row-processor)) -;; ----------------------------------------------------------------------------- +(provide + ; Xexpr is one of: + ; -- Symbol + ; -- String + ; -- (cons Symbol (cons [List-of Attribute] [List-of Xexpr])) + ; -- (cons Symbol [List-of Xexpr]) + ; + ; Attribute is: + ; (list Symbol String) + ; (list 'a "some text") is called an a-Attribute and "some text" is a's value. + + ;; Any -> Boolean + ;; is the given value an Xexpr? + ;; effect: display bad piece if x is not an xexpr + (rename-out (my-xexpr? xexpr?)) + + ;; String -> Xexpr + ;; given a file name, read an Xexpr from a file that contains at least one XML element + + ;; the XML element does not contain any strings as elements other than whitespace, + ;; and all whitespace between embedded elements is eliminated + read-plain-xexpr + + ;; String -> Xexpr + ;; given a file name, read an Xexpr from a file that contains at least one XML element + read-xexpr + + ;; String -> Boolean + ;; false, if this url returns a '404'; true otherwise + url-exists? + + ;; String -> [Maybe Xexpr] + ;; given a URL, find web site and retrieve the first XML element, false if the web page isn't found + ;; read HTML as XML (if possible) + ;; effect: signals an error in case of network problems + read-xexpr/web + + ;; String -> [Maybe Xexpr] + ;; given a URL, find web site and retrieve the first XML element, false if the web page isn't found + ;; read HTML as XML (if possible) + ;; effect: signals an error in case of network problems + + ;; the XML element does not contain any strings as elements other than whitespace, + ;; and all whitespace between embedded elements is eliminated + read-plain-xexpr/web + + ;; String -> [Listof String] + ;; produce the list of (fully resolve) .html references in a elements from url + url-html-neighbors + + ;; Xexpr -> String + ;; turn the X-expression into a string + xexpr-as-string) + +;; --------------------------------------------------------------------------------------------------- +;; exported functions + +(require htdp/error + (for-syntax racket/base syntax/parse) + "private/csv/csv.rkt" + ;; --- xml/html + net/url + xml/xml + html + srfi/13) + +(module+ test + (require rackunit)) (define-syntax-rule (def-reader (name f s ...) body ...) @@ -77,8 +152,6 @@ (let () body ...))) -;; --- exported functions -- - (def-reader (read-file f) (list->string (read-chunks f read-char drop-last-newline))) @@ -92,22 +165,13 @@ (read-words/line/internal f append)) (def-reader (read-words/line f) - ;; String -> [Listof [Listof String]] - ;; read the specified file as a list of lines, each line as a list of words (read-words/line/internal f cons)) (def-reader (read-words-and-numbers/line f) - ;; String -> [Listof [Listof (U String Number)]] - ;; read the specified file as a list of lines, each line as a list of words and numbers - (read-words/line/internal f (lambda (line1 r) - (cons (for/list ((t (in-list line1))) (or (string->number t) t)) r)))) - -(define (read-words/line/internal f combine) - (define lines (read-chunks f *read-line (lambda (x) x))) - (foldl (lambda (f r) - (define fst (filter (compose not (curry string=? "")) (split f))) - (combine fst r)) - '() lines)) + ;; String [Listof [Listof (U String Number)]] -> [Listof [Listof (U String Number)]] + (define (tease-out-numbers line1 r) + (cons (for/list ((t (in-list line1))) (or (string->number t) t)) r)) + (read-words/line/internal f tease-out-numbers)) (def-reader (read-csv-file f) (read-csv-file/func f)) @@ -119,33 +183,37 @@ (define (*read-line) (read-line (current-input-port) 'any)) -;; ----------------------------------------------------------------------------- -;; tester +;; --------------------------------------------------------------------------------------------------- +(define (my-xexpr? x0) + ;; -- IN -- + (with-handlers (((lambda (x) (and (cons? x) (eq? (car x) tag))) + (lambda (x) + (displayln `(,(cdr x) occurs in ,x0)) + #f))) + (xexpr-aux? x0))) -(define-syntax (simulate-file stx) - (syntax-case stx () - [(simulate-file) - (raise-syntax-error #f "expects at least one sub-expression" stx)] - [(simulate-file reader str ...) #'(simulate-file/proc (f2h reader) str ...)])) - -(define (simulate-file/proc reader . los) - (define _1 (check-proc "simulate-file" reader 1 "reader" "one argument")) - (define _2 - (andmap - (lambda (f) - (check-arg "simulate-file" (string? f) "sequence of strings" "" f)) - los)) - (define t (make-temporary-file "drracket-temporary-file-~a")) - (dynamic-wind - (lambda () - (with-output-to-file t - (lambda () (for-each displayln los)) - #:mode 'text - #:exists 'replace)) - (lambda () - (reader (path->string t))) - (lambda () - (delete-file t)))) +(define (url-html-neighbors u) + (define x (read-xexpr/web u)) + (if x (url-html-neighbors-aux u x) '())) + +(define (url-exists? url:string) + (retrieve url:string (lambda (_ h) (not (404? h))))) + +(def-reader (read-plain-xexpr f) + (read-xexpr-aux 'read-plain-xexpr f fix-up)) + +(def-reader (read-xexpr f) + (read-xexpr-aux 'read-xexpr f values)) + +(define (read-xexpr/web url:string) + (read-xexpr/web-aux 'read-xexpr/web url:string values)) + +(define (read-plain-xexpr/web url:string) + (read-xexpr/web-aux 'read-xexpr/web url:string fix-up)) + +(define (xexpr-as-string x) + (check-arg 'xexpr->string (and (pair? x) (xexpr? x)) 'xexpr "first" x) + (call-with-output-string (curry display-xml/content (xexpr->xml x)))) ;; ----------------------------------------------------------------------------- ;; writer @@ -163,15 +231,25 @@ ;; ----------------------------------------------------------------------------- ;; auxiliaries -;; String [([Listof X] -> Y)] -> [Listof Y] +;; String [String [Listof [Listof X]] -> Y] -> Y +(define (read-words/line/internal f combine) + (define lines (read-chunks f *read-line (lambda (x) x))) + (foldl (lambda (f r) + (define fst (filter (compose not (curry string=? "")) (split f))) + (combine fst r)) + '() lines)) ;; String (-> X) ([Listof X] -> [Listof X]) -> [Listof X] ;; read a file as a list of X where process-accu is applied to accu when eof (define (read-chunks f read-chunk process-accu) (define (rd) (let loop ([accu '()]) - (define nxt (read-chunk)) - (if (eof-object? nxt) (process-accu accu) (loop (cons nxt accu))))) + (define nxt (read-chunk)) + (if (eof-object? nxt) (process-accu accu) (loop (cons nxt accu))))) + (read-from-file-or-device f rd)) + +;; String [-> X] -> X +(define (read-from-file-or-device f rd) (define device (assq f *input-devices*)) (if device (parameterize ((current-input-port [(cadr device)])) (rd)) @@ -220,3 +298,197 @@ ;; splits a string with newlines into a list of lines (define (split-lines str) (map string-trim (split str "\r*\n"))) + +;; --------------------------------------------------------------------------------------------------- +;; is the given value a + +(define tag (gensym)) + +;; Any -> Boolean +(define (xexpr-aux? x) + (cond + [(string? x) #t] + [(symbol? x) #t] + [(and (cons? x) (symbol? (first x))) + (define body (rest x)) + (or (and (list-of-attributes? (first body)) (list-of-xexpr? (rest body))) + (list-of-xexpr? body))] + [else (raise (cons tag x))])) + +;; Any -> Boolean +(define (list-of-attributes? xs) + (and (or (list? xs) (raise (cons tag xs))) + (for/and ((x xs)) + (and (list? x) (= (length x) 2) (symbol? (first x)) (string? (second x)))))) + +;; Any -> Boolean +(define (list-of-xexpr? xs) + (and (or (list? xs) (raise (cons tag xs))) + (andmap xexpr? xs))) + +;; --------------------------------------------------------------------------------------------------- +;; Symbol String [XML -> XML] -> Xexpr +;; read an Xexpr from a file that contains one XML element +(define (read-xexpr-aux tag f fix-up) + (define raw (read-from-file-or-device f read-xml/element)) + (check-result 'read-plain-xexpr my-xexpr? 'xexpr (xml->xexpr (fix-up raw)))) + +;; Symbol String [XML -> XML] -> Xexpr +(define (read-xexpr/web-aux tag url:string fix-up) + (retrieve + url:string + (lambda (url h) + (cond + [(404? h) #f] + [else + (define e (filter element? (call/input-url url get-pure-port read-html-as-xml))) + (and (cons? e) (check-result tag my-xexpr? 'xexpr (xml->xexpr (fix-up (first e)))))])))) + +(define fix-up (eliminate-whitespace '() (lambda (x) #t))) + +;; --------------------------------------------------------------------------------------------------- +;; String [URL String -> X] -> X +;; retrieve the first text line from the url represented by url:string, hand url and line to consumer +(define (retrieve url:string consumer) + (define URL (string->url url:string)) + (with-handlers ([exn:fail:network? + (lambda (e) + (define msg (format "working url, details:\n ~a" (exn-message e))) + (check-arg 'read-xexpr/web #f msg "" url:string))]) + (consumer URL (call/input-url URL get-impure-port read-line)))) + +;; --------------------------------------------------------------------------------------------------- +;; String -> Boolean +;; does the string contain "404" +(define (404? s) (pair? (regexp-match "404" s))) + +;; --------------------------------------------------------------------------------------------------- +;; String Xexpr -> [Listof String] +;; retrieve the domain-local neighbors of url that point to HTML files + +(module+ test + (check-equal? (url-html-neighbors-aux + "http://fun.com/" + '(div + (a "hello") + (a ((href " one.html")) "world") + (a ((href "Papers/ignore.pdf")) "world") + (img ((alt "two"))) + (img ((alt "three") (src "ignore.jpg"))))) + '("http://fun.com/one.html"))) + +(define (url-html-neighbors-aux u xexpr) + (define url (string->url u)) + (for/fold ((result '())) ((e (xexpr-elements xexpr 'a))) + (cond + [(and (cons? (rest e)) (loa? (second e))) + (define html-targets + (for/fold ((htmls '())) ((attributes-of-a-element (second e))) + (cond + [(symbol=? (first attributes-of-a-element) 'href) + (define value:str (string-trim-both (second attributes-of-a-element))) + (define value:url (combine-url/relative url value:str)) + (if (not (url-ends-in-html? value:url)) + htmls + (cons (url->string value:url) htmls))] + [else htmls]))) + (append html-targets result)] + [else result]))) + +;; --------------------------------------------------------------------------------------------------- +;; URL -> Boolean +;; does the url end in html? + +(module+ test + (check-true (url-ends-in-html? (string->url "seconds.html"))) + (check-false (url-ends-in-html? (string->url "seconds.pdf")))) + +(define (url-ends-in-html? u) + (define q (reverse (map path/param-path (url-path u)))) + (and (cons? q) (pair? (regexp-match ".html$" (first q))))) + +;; --------------------------------------------------------------------------------------------------- +;; Xexpr Symbol -> [Listof Xexpr] +;; retrieve all elements whose tag is 'tag' + +(module+ test + (check-equal? (xexpr-elements '(p () (p ((align "center")) "hello") (a)) 'p) + (list '(p () (p ((align "center")) "hello") (a)) + '(p ((align "center")) "hello")))) + +(define (xexpr-elements x0 tag) + (define (K- fst rst) rst) + (xexpr-abs x0 + '() + K- + K- + append + (lambda (e loa rst) (if (symbol=? (first e) tag) (cons e rst) rst)) + (lambda (e rst) (if (symbol=? (first e) tag) (cons e rst) rst)))) + +;; --------------------------------------------------------------------------------------------------- +;; Xexpr +;; [Attribute -> X] +;; Y +;; [String Y -> Y] +;; [Xexpr Y -> Y] +;; [Xexpr [Listof X] Z -> W] +;; [Symbol Z -> W] +;; -> +;; W + +;; traverse X-expression and combine results +(define (xexpr-abs x0 body0 attr-combine string-combine xexpr-combine loa-combine plain-combine) + ;; Xexpr -> W + (define (f-xexpr x) + (cond + [(and (cons? (rest x)) (loa? (second x))) + (loa-combine x (f-loa (second x)) (f-xbody (rest (rest x))))] + [else (plain-combine x (f-xbody (rest x)))])) + ;; Xbody -> Z + (define (f-xbody x) + (cond + [(empty? x) body0] + [(string? (first x)) (string-combine (first x) (f-xbody (rest x)))] + [(cons? (first x)) (xexpr-combine (f-xexpr (first x)) (f-xbody (rest x)))] + [else (f-xbody (rest x))])) + ;; LOA -> [Listof X] + (define (f-loa x) + (cond + [(empty? x) '()] + [else (attr-combine (first x) (f-loa (rest x)))])) + ;; -- IN -- + (f-xexpr x0)) + +;; String or (cons Symbol Y) or empty or (cons (list Symbol String) Any) --> Boolean +;; is the given value a loa, possibly in front of some other Xexpr elements +(define (loa? x) + (or (empty? x) (and (cons? x) (cons? (first x))))) + +;; --------------------------------------------------------------------------------------------------- +;; tester + +(define-syntax (simulate-file stx) + (syntax-case stx () + [(simulate-file) + (raise-syntax-error #f "expects at least one sub-expression" stx)] + [(simulate-file reader str ...) #'(simulate-file/proc (f2h reader) str ...)])) + +(define (simulate-file/proc reader . los) + (define _1 (check-proc "simulate-file" reader 1 "reader" "one argument")) + (define _2 + (andmap + (lambda (f) + (check-arg "simulate-file" (string? f) "sequence of strings" "" f)) + los)) + (define t (make-temporary-file "drracket-temporary-file-~a")) + (dynamic-wind + (lambda () + (with-output-to-file t + (lambda () (for-each displayln los)) + #:mode 'text + #:exists 'replace)) + (lambda () + (reader (path->string t))) + (lambda () + (delete-file t)))) diff --git a/pkgs/htdp-pkgs/htdp-lib/2htdp/xtest b/pkgs/htdp-pkgs/htdp-lib/2htdp/xtest index 93050d9a2f..30ab0dd673 100755 --- a/pkgs/htdp-pkgs/htdp-lib/2htdp/xtest +++ b/pkgs/htdp-pkgs/htdp-lib/2htdp/xtest @@ -19,6 +19,7 @@ run -t batch-io.rkt run -t batch-io2.rkt run -t batch-io3.rkt run -t batch-io-csv-ho.rkt +run -t batch-io-xexpr.rkt run clause-once.rkt run full-scene-visible.rkt run image-too-large.rkt