...
original commit: 3db955225aa26a439963cc8c961389244d19ab9d
This commit is contained in:
parent
92ac4f3219
commit
5e699a714b
18
collects/net/url-sig.ss
Normal file
18
collects/net/url-sig.ss
Normal file
|
@ -0,0 +1,18 @@
|
|||
(module url-sig mzscheme
|
||||
(require (lib "unitsig.ss"))
|
||||
(provide net:url^)
|
||||
|
||||
(define-signature net:url^
|
||||
((struct url (scheme host port path params query fragment))
|
||||
(struct mime-header (name value))
|
||||
get-pure-port ;; url [x list (str)] -> in-port
|
||||
get-impure-port ;; url [x list (str)] -> in-port
|
||||
display-pure-port ;; in-port -> ()
|
||||
purify-port ;; in-port -> list (mime-header)
|
||||
netscape/string->url ;; (string -> url)
|
||||
string->url ;; str -> url
|
||||
url->string
|
||||
call/input-url ;; url x (url -> in-port) x
|
||||
;; (in-port -> T)
|
||||
;; [x list (str)] -> T
|
||||
combine-url/relative))) ;; url x str -> url
|
530
collects/net/url-unit.ss
Normal file
530
collects/net/url-unit.ss
Normal file
|
@ -0,0 +1,530 @@
|
|||
;; To do:
|
||||
;; Handle HTTP/file errors.
|
||||
;; Not throw away MIME headers.
|
||||
;; Determine file type.
|
||||
|
||||
;; ----------------------------------------------------------------------
|
||||
|
||||
;; Input ports have two statuses:
|
||||
;; "impure" = they have text waiting
|
||||
;; "pure" = the MIME headers have been read
|
||||
|
||||
(module url-unit mzscheme
|
||||
(require (lib "file.ss")
|
||||
(lib "unitsig.ss")
|
||||
"url-sig.ss")
|
||||
(provide url@)
|
||||
|
||||
(define url@
|
||||
(unit/sig net:url^
|
||||
(import)
|
||||
|
||||
(define-struct (url-exception struct:exn) ())
|
||||
|
||||
;; This is commented out; it's here for debugging.
|
||||
;; It used to be outside the unit.
|
||||
|
||||
(quote
|
||||
(begin
|
||||
(invoke-open-unit/sig mzlib:url@ #f)
|
||||
(define url:cs (string->url "http://www.cs.rice.edu/"))
|
||||
(define url:me (string->url "http://www.cs.rice.edu/~shriram/"))
|
||||
(define (test url)
|
||||
(call/input-url url
|
||||
get-pure-port
|
||||
display-pure-port))))
|
||||
|
||||
(define url-error
|
||||
(lambda (fmt . args)
|
||||
(let ((s (apply format fmt (map (lambda (arg)
|
||||
(if (url? arg)
|
||||
(url->string arg)
|
||||
arg))
|
||||
args))))
|
||||
(raise (make-url-exception s (current-continuation-marks))))))
|
||||
|
||||
;; scheme : str + #f
|
||||
;; host : str + #f
|
||||
;; port : num + #f
|
||||
;; path : str
|
||||
;; params : str + #f
|
||||
;; query : str + #f
|
||||
;; fragment : str + #f
|
||||
(define-struct url (scheme host port path params query fragment))
|
||||
|
||||
;; name : str (all lowercase; not including the colon)
|
||||
;; value : str (doesn't have the eol delimiter)
|
||||
(define-struct mime-header (name value))
|
||||
|
||||
(define url->string
|
||||
(lambda (url)
|
||||
(let ((scheme (url-scheme url))
|
||||
(host (url-host url))
|
||||
(port (url-port url))
|
||||
(path (url-path url))
|
||||
(params (url-params url))
|
||||
(query (url-query url))
|
||||
(fragment (url-fragment url)))
|
||||
(cond
|
||||
((and scheme (string=? scheme "file"))
|
||||
(string-append "file:" path))
|
||||
(else
|
||||
(let ((sa string-append))
|
||||
(sa (if scheme (sa scheme "://") "")
|
||||
(if host host "")
|
||||
(if port (sa ":" (number->string port)) "")
|
||||
; There used to be a "/" here, but that causes an
|
||||
; extra leading slash -- wonder why it ever worked!
|
||||
path
|
||||
(if params (sa ";" params) "")
|
||||
(if query (sa "?" query) "")
|
||||
(if fragment (sa "#" fragment) ""))))))))
|
||||
|
||||
;; url->default-port : url -> num
|
||||
(define url->default-port
|
||||
(lambda (url)
|
||||
(let ((scheme (url-scheme url)))
|
||||
(cond
|
||||
((not scheme) 80)
|
||||
((string=? scheme "http") 80)
|
||||
(else
|
||||
(url-error "Scheme ~a not supported" (url-scheme url)))))))
|
||||
|
||||
;; make-ports : url -> in-port x out-port
|
||||
(define make-ports
|
||||
(lambda (url)
|
||||
(let ((port-number (or (url-port url)
|
||||
(url->default-port url))))
|
||||
(tcp-connect (url-host url) port-number))))
|
||||
|
||||
;; http://get-impure-port : url [x list (str)] -> in-port
|
||||
(define http://get-impure-port
|
||||
(case-lambda
|
||||
[(url) (http://get-impure-port url '())]
|
||||
[(url strings)
|
||||
(let-values (((server->client client->server)
|
||||
(make-ports url)))
|
||||
(let ((access-string
|
||||
(url->string
|
||||
(make-url #f #f #f
|
||||
(url-path url) (url-params url)
|
||||
(url-query url) (url-fragment url)))))
|
||||
(for-each (lambda (s)
|
||||
(display s client->server)
|
||||
(newline client->server))
|
||||
(cons (format "GET ~a HTTP/1.0" access-string)
|
||||
(cons (format "Host: ~a" (url-host url))
|
||||
strings))))
|
||||
(newline client->server)
|
||||
(close-output-port client->server)
|
||||
server->client)]))
|
||||
|
||||
;; file://get-pure-port : url -> in-port
|
||||
(define file://get-pure-port
|
||||
(lambda (url)
|
||||
(when (url-host url)
|
||||
(url-error "Don't know how to get files from remote hosts"))
|
||||
(open-input-file (url-path url))))
|
||||
|
||||
;; get-impure-port : url [x list (str)] -> in-port
|
||||
(define get-impure-port
|
||||
(case-lambda
|
||||
[(url) (get-impure-port url '())]
|
||||
[(url strings)
|
||||
(let ((scheme (url-scheme url)))
|
||||
(cond
|
||||
((not scheme)
|
||||
(url-error "Scheme unspecified in ~a" url))
|
||||
((string=? scheme "http")
|
||||
(http://get-impure-port url strings))
|
||||
((string=? scheme "file")
|
||||
(url-error "There are no impure file: ports"))
|
||||
(else
|
||||
(url-error "Scheme ~a unsupported" scheme))))]))
|
||||
|
||||
;; get-pure-port : url [x list (str)] -> in-port
|
||||
(define get-pure-port
|
||||
(case-lambda
|
||||
[(url) (get-pure-port url '())]
|
||||
[(url strings)
|
||||
(let ((scheme (url-scheme url)))
|
||||
(cond
|
||||
((not scheme)
|
||||
(url-error "Scheme unspecified in ~a" url))
|
||||
((string=? scheme "http")
|
||||
(let ((port (http://get-impure-port url strings)))
|
||||
(purify-port port)
|
||||
port))
|
||||
((string=? scheme "file")
|
||||
(file://get-pure-port url))
|
||||
(else
|
||||
(url-error "Scheme ~a unsupported" scheme))))]))
|
||||
|
||||
;; display-pure-port : in-port -> ()
|
||||
(define display-pure-port
|
||||
(lambda (server->client)
|
||||
(let loop ()
|
||||
(let ((c (read-char server->client)))
|
||||
(unless (eof-object? c)
|
||||
(display c)
|
||||
(loop))))
|
||||
(close-input-port server->client)))
|
||||
|
||||
(define empty-url?
|
||||
(lambda (url)
|
||||
(and (not (url-scheme url)) (not (url-params url))
|
||||
(not (url-query url)) (not (url-fragment url))
|
||||
(andmap (lambda (c) (char=? c #\space))
|
||||
(string->list (url-path url))))))
|
||||
|
||||
;; file://combine-url/relative : fs-path x s/t/r -> fs-path
|
||||
|
||||
(define file://combine-url/relative
|
||||
(let ((path-segment-regexp (regexp "([^/]*)/(.*)"))
|
||||
(translate-dir
|
||||
(lambda (s)
|
||||
(cond
|
||||
[(string=? s "") 'same] ;; handle double slashes
|
||||
[(string=? s "..") 'up]
|
||||
[(string=? s ".") 'same]
|
||||
[else s]))))
|
||||
(lambda (index offset)
|
||||
(let*-values ([(simple-index) (simplify-path index)]
|
||||
[(base name dir?)
|
||||
(split-path simple-index)])
|
||||
(if (string=? "" offset)
|
||||
(build-path base name)
|
||||
(build-path
|
||||
(if (or dir?
|
||||
(directory-exists? simple-index))
|
||||
simple-index
|
||||
(if (eq? base 'relative)
|
||||
'same
|
||||
base))
|
||||
(let loop ((str offset))
|
||||
(let ((m (regexp-match path-segment-regexp str)))
|
||||
(cond
|
||||
[(not m) str]
|
||||
[else
|
||||
(if (string=? "" (caddr m))
|
||||
(translate-dir (cadr m))
|
||||
(build-path (translate-dir (cadr m))
|
||||
(loop (caddr m))))])))))))))
|
||||
|
||||
;; combine-url/relative : url x str -> url
|
||||
(define combine-url/relative
|
||||
(lambda (base string)
|
||||
(let ((relative (string->url string)))
|
||||
(cond
|
||||
((empty-url? base) ; Step 1
|
||||
relative)
|
||||
((empty-url? relative) ; Step 2a
|
||||
base)
|
||||
((url-scheme relative) ; Step 2b
|
||||
relative)
|
||||
(else ; Step 2c
|
||||
(set-url-scheme! relative (url-scheme base))
|
||||
(cond
|
||||
;; This case is here because the above tests
|
||||
;; ensure the relative extension is not really
|
||||
;; an absolute path itself, so we need not
|
||||
;; examine its contents further.
|
||||
((and (url-scheme base) ; Interloper step
|
||||
(string=? (url-scheme base) "file"))
|
||||
(set-url-path! relative
|
||||
(file://combine-url/relative
|
||||
(url-path base)
|
||||
(url-path relative)))
|
||||
relative)
|
||||
((url-host relative) ; Step 3
|
||||
relative)
|
||||
(else
|
||||
(set-url-host! relative (url-host base))
|
||||
(set-url-port! relative (url-port base)) ; Unspecified!
|
||||
(let ((rel-path (url-path relative)))
|
||||
(cond
|
||||
((and rel-path ; Step 4
|
||||
(not (string=? "" rel-path))
|
||||
(char=? #\/ (string-ref rel-path 0)))
|
||||
relative)
|
||||
((or (not rel-path) ; Step 5
|
||||
(string=? rel-path ""))
|
||||
(set-url-path! relative (url-path base))
|
||||
(or (url-params relative)
|
||||
(set-url-params! relative (url-params base)))
|
||||
(or (url-query relative)
|
||||
(set-url-query! relative (url-query base)))
|
||||
relative)
|
||||
(else ; Step 6
|
||||
(merge-and-normalize
|
||||
(url-path base) relative)))))))))))
|
||||
|
||||
(define merge-and-normalize
|
||||
(lambda (base-path relative-url)
|
||||
(let ((rel-path (url-path relative-url)))
|
||||
(let ((base-list (string->list base-path))
|
||||
(rel-list (string->list rel-path)))
|
||||
(let*
|
||||
((joined-list
|
||||
(let loop ((base (reverse base-list)))
|
||||
(if (null? base)
|
||||
rel-list
|
||||
(if (char=? #\/ (car base))
|
||||
(append (reverse base) rel-list)
|
||||
(loop (cdr base))))))
|
||||
(grouped
|
||||
(let loop ((joined joined-list) (current '()))
|
||||
(if (null? joined)
|
||||
(list (list->string (reverse current)))
|
||||
(if (char=? #\/ (car joined))
|
||||
(cons (list->string
|
||||
(reverse (cons #\/ current)))
|
||||
(loop (cdr joined) '()))
|
||||
(loop (cdr joined)
|
||||
(cons (car joined) current))))))
|
||||
(grouped
|
||||
(let loop ((grouped grouped))
|
||||
(if (null? grouped) '()
|
||||
(if (string=? "./" (car grouped))
|
||||
(loop (cdr grouped))
|
||||
(cons (car grouped) (loop (cdr grouped)))))))
|
||||
(grouped
|
||||
(let loop ((grouped grouped))
|
||||
(if (null? grouped) '()
|
||||
(if (null? (cdr grouped))
|
||||
(if (string=? "." (car grouped)) '()
|
||||
grouped)
|
||||
(cons (car grouped) (loop (cdr grouped)))))))
|
||||
(grouped
|
||||
(let remove-loop ((grouped grouped))
|
||||
(let walk-loop ((r-pre '()) (post grouped))
|
||||
(if (null? post)
|
||||
(reverse r-pre)
|
||||
(let ((first (car post))
|
||||
(rest (cdr post)))
|
||||
(if (null? rest)
|
||||
(walk-loop (cons first r-pre) rest)
|
||||
(let ((second (car rest)))
|
||||
(if (and (not (string=? first "../"))
|
||||
(string=? second "../"))
|
||||
(remove-loop
|
||||
(append (reverse r-pre) (cddr post)))
|
||||
(walk-loop (cons first r-pre) rest)))))))))
|
||||
(grouped
|
||||
(let loop ((grouped grouped))
|
||||
(if (null? grouped) '()
|
||||
(if (null? (cdr grouped)) grouped
|
||||
(if (and (null? (cddr grouped))
|
||||
(not (string=? (car grouped) "../"))
|
||||
(string=? (cadr grouped) ".."))
|
||||
'()
|
||||
(cons (car grouped) (loop (cdr grouped)))))))))
|
||||
(set-url-path! relative-url
|
||||
(apply string-append grouped))
|
||||
relative-url)))))
|
||||
|
||||
;; call/input-url : url x (url -> in-port) x (in-port -> T)
|
||||
;; [x list (str)] -> T
|
||||
(define call/input-url
|
||||
(let ((handle-port (lambda (server->client handler)
|
||||
(dynamic-wind (lambda () 'do-nothing)
|
||||
(lambda () (handler server->client))
|
||||
(lambda () (close-input-port server->client))))))
|
||||
(case-lambda
|
||||
((url getter handler)
|
||||
(handle-port (getter url) handler))
|
||||
((url getter handler params)
|
||||
(handle-port (getter url params) handler)))))
|
||||
|
||||
(define empty-line?
|
||||
(lambda (chars)
|
||||
(or (null? chars)
|
||||
(and (memv (car chars) '(#\return #\linefeed #\tab #\space))
|
||||
(empty-line? (cdr chars))))))
|
||||
|
||||
(define extract-mime-headers-as-char-lists
|
||||
(lambda (port)
|
||||
(let headers-loop ((headers '()))
|
||||
(let char-loop ((header '()))
|
||||
(let ((c (read-char port)))
|
||||
(if (eof-object? c)
|
||||
(reverse headers) ; CHECK: INCOMPLETE MIME: SERVER BUG
|
||||
(if (char=? c #\newline)
|
||||
(if (empty-line? header)
|
||||
(reverse headers)
|
||||
(begin
|
||||
(headers-loop (cons (reverse header) headers))))
|
||||
(char-loop (cons c header)))))))))
|
||||
|
||||
;; purify-port : in-port -> list (mime-header)
|
||||
(define purify-port
|
||||
(lambda (port)
|
||||
(let ((headers-as-chars (extract-mime-headers-as-char-lists port)))
|
||||
(let header-loop ((headers headers-as-chars))
|
||||
(if (null? headers)
|
||||
'()
|
||||
(let ((header (car headers)))
|
||||
(let char-loop ((pre '()) (post header))
|
||||
(if (null? post)
|
||||
(header-loop (cdr headers))
|
||||
(if (char=? #\: (car post))
|
||||
(cons (make-mime-header
|
||||
(list->string (reverse pre))
|
||||
(list->string post))
|
||||
(header-loop (cdr headers)))
|
||||
(char-loop (cons (char-downcase (car post)) pre)
|
||||
(cdr post)))))))))))
|
||||
|
||||
(define character-set-size 256)
|
||||
|
||||
(define marker-list
|
||||
'(#\: #\; #\? #\#))
|
||||
|
||||
(define ascii-marker-list
|
||||
(map char->integer marker-list))
|
||||
|
||||
(define marker-locations
|
||||
(make-vector character-set-size))
|
||||
|
||||
(define first-position-of-marker
|
||||
(lambda (c)
|
||||
(vector-ref marker-locations (char->integer c))))
|
||||
|
||||
;; netscape/string->url : str -> url
|
||||
(define netscape/string->url
|
||||
(lambda (string)
|
||||
(let ((url (string->url string)))
|
||||
(if (url-scheme url)
|
||||
url
|
||||
(if (string=? string "")
|
||||
(url-error "Can't resolve empty string as URL")
|
||||
(begin
|
||||
(set-url-scheme! url
|
||||
(if (char=? (string-ref string 0) #\/)
|
||||
"file"
|
||||
"http"))
|
||||
url))))))
|
||||
|
||||
;; string->url : str -> url
|
||||
(define string->url
|
||||
(lambda (string)
|
||||
(let loop ((markers ascii-marker-list))
|
||||
(unless (null? markers)
|
||||
(vector-set! marker-locations (car markers) #f)
|
||||
(loop (cdr markers))))
|
||||
(let loop ((chars (string->list string)) (index 0))
|
||||
(unless (null? chars)
|
||||
(let ((first (car chars)))
|
||||
(when (memq first marker-list)
|
||||
(let ((posn (char->integer first)))
|
||||
(unless (vector-ref marker-locations posn)
|
||||
(vector-set! marker-locations posn index)))))
|
||||
(loop (cdr chars) (add1 index))))
|
||||
(let
|
||||
((first-colon (first-position-of-marker #\:))
|
||||
(first-semicolon (first-position-of-marker #\;))
|
||||
(first-question (first-position-of-marker #\?))
|
||||
(first-hash (first-position-of-marker #\#)))
|
||||
(let
|
||||
((scheme-start (and first-colon 0))
|
||||
(path-start (if first-colon (add1 first-colon) 0))
|
||||
(params-start (and first-semicolon (add1 first-semicolon)))
|
||||
(query-start (and first-question (add1 first-question)))
|
||||
(fragment-start (and first-hash (add1 first-hash))))
|
||||
(let ((total-length (string-length string)))
|
||||
(let*
|
||||
((scheme-finish (and scheme-start first-colon))
|
||||
(path-finish (if first-semicolon first-semicolon
|
||||
(if first-question first-question
|
||||
(if first-hash first-hash
|
||||
total-length))))
|
||||
(fragment-finish (and fragment-start total-length))
|
||||
(query-finish (and query-start
|
||||
(if first-hash first-hash
|
||||
total-length)))
|
||||
(params-finish (and params-start
|
||||
(if first-question first-question
|
||||
(if first-hash first-hash
|
||||
total-length)))))
|
||||
(let ((scheme (and scheme-start
|
||||
(substring string
|
||||
scheme-start scheme-finish))))
|
||||
(if (and scheme
|
||||
(string=? scheme "file"))
|
||||
(let ((path (substring string path-start total-length)))
|
||||
(if (or (relative-path? path)
|
||||
(absolute-path? path))
|
||||
(make-url
|
||||
scheme
|
||||
#f ; host
|
||||
#f ; port
|
||||
path
|
||||
#f ; params
|
||||
#f ; query
|
||||
#f) ; fragment
|
||||
(url-error "scheme 'file' path ~s neither relative nor absolute" path)))
|
||||
(let-values (((host port path)
|
||||
(parse-host/port/path
|
||||
string path-start path-finish)))
|
||||
(make-url
|
||||
scheme
|
||||
host
|
||||
port
|
||||
path
|
||||
(and params-start
|
||||
(substring string params-start params-finish))
|
||||
(and query-start
|
||||
(substring string query-start query-finish))
|
||||
(and fragment-start
|
||||
(substring string fragment-start
|
||||
fragment-finish))))))))))))
|
||||
|
||||
;; parse-host/port/path : str x num x num -> (str + #f) + (num + #f) + str
|
||||
(define parse-host/port/path
|
||||
(lambda (path begin-point end-point)
|
||||
(when (> begin-point end-point)
|
||||
(url-error "Path ~s contains illegal characters" path))
|
||||
(let ((has-host? (and (>= (- end-point begin-point) 2)
|
||||
(char=? (string-ref path begin-point) #\/)
|
||||
(char=? (string-ref path (add1 begin-point))
|
||||
#\/))))
|
||||
(let ((begin-point (if has-host?
|
||||
(+ begin-point 2)
|
||||
begin-point)))
|
||||
(let loop ((index begin-point)
|
||||
(first-colon #f)
|
||||
(first-slash #f))
|
||||
(cond
|
||||
((>= index end-point)
|
||||
;; We come here only if the string has not had a /
|
||||
;; yet. This can happen in two cases:
|
||||
;; 1. The input is a relative URL, and the hostname
|
||||
;; will not be specified. In such cases, has-host?
|
||||
;; will be false.
|
||||
;; 2. The input is an absolute URL with a hostname,
|
||||
;; and the intended path is "/", but the URL is missing
|
||||
;; a "/" at the end. has-host? must be true.
|
||||
(let ((host/path (substring path begin-point end-point)))
|
||||
(if has-host?
|
||||
(values host/path #f "/")
|
||||
(values #f #f host/path))))
|
||||
((char=? #\: (string-ref path index))
|
||||
(loop (add1 index) (or first-colon index) first-slash))
|
||||
((char=? #\/ (string-ref path index))
|
||||
(if first-colon
|
||||
(values
|
||||
(substring path begin-point first-colon)
|
||||
(string->number (substring path (add1 first-colon)
|
||||
index))
|
||||
(substring path index end-point))
|
||||
(if has-host?
|
||||
(values
|
||||
(substring path begin-point index)
|
||||
#f
|
||||
(substring path index end-point))
|
||||
(values
|
||||
#f
|
||||
#f
|
||||
(substring path begin-point end-point)))))
|
||||
(else
|
||||
(loop (add1 index) first-colon first-slash)))))))))))
|
Loading…
Reference in New Issue
Block a user