URL parsing is closer to the RFC now
svn: r5965
This commit is contained in:
parent
cc8e660737
commit
e885e79295
|
@ -13,8 +13,8 @@
|
|||
(require (lib "file.ss")
|
||||
(lib "unit.ss")
|
||||
(lib "port.ss")
|
||||
(lib "string.ss")
|
||||
(lib "list.ss")
|
||||
(lib "string.ss")
|
||||
"url-structs.ss"
|
||||
"uri-codec.ss"
|
||||
"url-sig.ss"
|
||||
|
@ -351,26 +351,37 @@
|
|||
(if (char=? (string-ref string 0) #\/) "file" "http"))
|
||||
url)))))
|
||||
|
||||
;; string->url : str -> url
|
||||
;; New implementation, mostly provided by Neil Van Dyke
|
||||
;; URL parsing regexp
|
||||
;; this is following the regexp in Appendix B of rfc 3986, except for using
|
||||
;; `*' instead of `+' for the scheme part (it is checked later anyway, and
|
||||
;; we don't want to parse it as a path element), and the user@host:port is
|
||||
;; parsed here.
|
||||
(define url-rx
|
||||
(regexp (string-append
|
||||
"^"
|
||||
"[ \t\f\r\n]*"
|
||||
"(?:" ; <A front-opt
|
||||
"(?:([^:&;?/]*):)?" ; =1 scheme-colon-opt (see below)
|
||||
"(?:" ; <B slashslash-opt
|
||||
"//"
|
||||
"(?:([^:/@;?#]*)@)?" ; =2 user-at-opt
|
||||
"([^:/@;?#]*)?" ; =3 host-opt
|
||||
"(?::([0-9]*))?" ; =4 colon-port-opt
|
||||
")?" ; >B slashslash-opt
|
||||
")?" ; >A front-opt
|
||||
"([^?#]*)" ; =5 path
|
||||
"(?:\\?([^#]*))?" ; =6 question-query-opt
|
||||
"(?:#(.*))?" ; =7 hash-fragment-opt
|
||||
"[ \t\f\r\n]*"
|
||||
"(?:" ; / scheme-colon-opt
|
||||
"([^:/?#]*)" ; | #1 = scheme-opt
|
||||
":)?" ; \
|
||||
"(?://" ; / slash-slash-authority-opt
|
||||
"(?:" ; | / user-at-opt
|
||||
"([^/?#@]*)" ; | | #2 = user-opt
|
||||
"@)?" ; | \
|
||||
"([^/?#:]*)?" ; | #3 = host-opt
|
||||
"(?::" ; | / colon-port-opt
|
||||
"([0-9]*)" ; | | #4 = port-opt
|
||||
")?" ; | \
|
||||
")?" ; \
|
||||
"([^?#]*)" ; #5 = path
|
||||
"(?:\\?" ; / question-query-opt
|
||||
"([^#]*)" ; | #6 = query-opt
|
||||
")?" ; \
|
||||
"(?:#" ; / hash-fragment-opt
|
||||
"(.*)" ; | #7 = fragment-opt
|
||||
")?" ; \
|
||||
"$")))
|
||||
|
||||
;; string->url : str -> url
|
||||
;; Original version by Neil Van Dyke
|
||||
(define (string->url str)
|
||||
(apply
|
||||
(lambda (scheme user host port path query fragment)
|
||||
|
@ -382,21 +393,14 @@
|
|||
(eq? 'windows url:os-type))
|
||||
(set! path (string-append host ":" path))
|
||||
(set! host #f))
|
||||
(let* ([user (uri-decode/maybe user)]
|
||||
[port (and port (string->number port))]
|
||||
[abs? (and (not (= 0 (string-length path)))
|
||||
(char=? #\/ (string-ref path 0)))]
|
||||
[path (separate-path-strings
|
||||
;; If path is "" and the input is an absolute URL
|
||||
;; with a hostname, then the intended path is "/",
|
||||
;; but the URL is missing a "/" at the end.
|
||||
path
|
||||
#;
|
||||
(if (and (string=? path "") host) "/" path))]
|
||||
[query (if query (form-urlencoded->alist query) '())]
|
||||
(let* ([scheme (and scheme (string-downcase scheme))]
|
||||
[host (and host (string-downcase host))]
|
||||
[user (uri-decode/maybe user)]
|
||||
[port (and port (string->number port))]
|
||||
[abs? (regexp-match? #rx"^/" path)]
|
||||
[path (separate-path-strings path)]
|
||||
[query (if query (form-urlencoded->alist query) '())]
|
||||
[fragment (uri-decode/maybe fragment)])
|
||||
(when (string? scheme) (string-lowercase! scheme))
|
||||
(when (string? host) (string-lowercase! host))
|
||||
(make-url scheme user host port abs? path query fragment)))
|
||||
(cdr (or (regexp-match url-rx str)
|
||||
(url-error "Invalid URL string: ~e" str)))))
|
||||
|
@ -405,7 +409,7 @@
|
|||
;; If #f, and leave unmolested any % that is followed by hex digit
|
||||
;; if a % is not followed by a hex digit, replace it with %25
|
||||
;; in an attempt to be "friendly"
|
||||
(and f (uri-decode (regexp-replace* "%([^0-9a-fA-F])" f "%25\\1"))))
|
||||
(and f (uri-decode (regexp-replace* #rx"%([^0-9a-fA-F])" f "%25\\1"))))
|
||||
|
||||
;; separate-path-strings : string[starting with /] -> (listof path/param)
|
||||
(define (separate-path-strings str)
|
||||
|
@ -545,5 +549,4 @@
|
|||
(tcp-abandon-port client->server)
|
||||
server->client))
|
||||
|
||||
|
||||
))
|
||||
|
|
|
@ -252,12 +252,16 @@
|
|||
(test-s->u #("a+b-c456.d" #f "www.foo.com" #f #t (#("")) () #f)
|
||||
"a+b-c456.d://www.foo.com/")
|
||||
|
||||
;; a colon can appear in absolute paths
|
||||
(test-s->u #(#f #f #f #f #t (#("x:y") #("z")) () #f)
|
||||
"/x:y/z")
|
||||
;; a colon and other junk (`sub-delims') can appear in usernames
|
||||
(test #("http" "x:!$&'()*+,;=y" "www.drscheme.org" #f #t (#("a")) () #f)
|
||||
string->url/vec
|
||||
"http://x:!$&'()*+,;=y@www.drscheme.org/a")
|
||||
;; a colon and atsign can appear in absolute paths
|
||||
(test-s->u #(#f #f #f #f #t (#("x:@y") #("z")) () #f)
|
||||
"/x:@y/z")
|
||||
;; and in relative paths as long as it's not in the first element
|
||||
(test-s->u #(#f #f #f #f #f (#("x") #("y:z")) () #f)
|
||||
"x/y:z")
|
||||
(test-s->u #(#f #f #f #f #f (#("x") #("y:@z")) () #f)
|
||||
"x/y:@z")
|
||||
|
||||
;; test bad schemes
|
||||
(err/rt-test (string->url "://www.foo.com/") url-exception?)
|
||||
|
|
Loading…
Reference in New Issue
Block a user