parse url schemes properly (some test reformatting)

svn: r5241
This commit is contained in:
Eli Barzilay 2007-01-07 09:09:27 +00:00
parent 5f1c8da8f5
commit 9984b48d88
2 changed files with 153 additions and 140 deletions

View File

@ -359,7 +359,7 @@
"^"
"[ \t\f\r\n]*"
"(?:" ; <A front-opt
"(?:([a-zA-Z]*):)?" ; =1 scheme-colon-opt
"(?:([^:]*):)?" ; =1 scheme-colon-opt (see below)
"(?:" ; <B slashslash-opt
"//"
"(?:([^:/@;?#]*)@)?" ; =2 user-at-opt
@ -375,6 +375,9 @@
(define (string->url str)
(apply
(lambda (scheme user host port path query fragment)
(when (and scheme (not (regexp-match? #rx"^[a-zA-Z][a-zA-Z0-9+-.]*$"
scheme)))
(url-error "Invalid URL string (bad scheme): ~e" str))
;; Windows => "file://xxx:/...." specifies a "xxx:/..." path
(when (and (equal? "" port) (equal? "file" scheme)
(eq? 'windows url:os-type))

View File

@ -75,7 +75,6 @@
(unless (= i to)
(loop (+ i 1)))))]))
(test "hello" uri-encode "hello")
(test "hello%20there" uri-encode "hello there")
@ -95,7 +94,6 @@
uri-encode
(string (integer->char code))))))
(test "" alist->form-urlencoded '())
(test "key=hello+there" alist->form-urlencoded '((key . "hello there")))
(test "key1=hi;key2=hello" alist->form-urlencoded '((key1 . "hi") (key2 . "hello")))
@ -153,11 +151,11 @@
(define (test-c-u/r expected base relative)
(define (combine-url/relative-vec x y)
(url->vec (combine-url/relative (vec->url x) y)))
(test (url->vec expected) combine-url/relative-vec (url->vec base) relative))
(define (->vec x) (url->vec (if (string? x) (string->url x) x)))
(test (->vec expected) combine-url/relative-vec (->vec base) relative))
(define (vec->url vec)
(make-url
(vector-ref vec 0)
(make-url (vector-ref vec 0)
(vector-ref vec 1)
(vector-ref vec 2)
(vector-ref vec 3)
@ -180,175 +178,191 @@
(url-query url)
(url-fragment url)))
(test-s->u (vector #f #f #f #f #t '(#("")) '() #f)
(test-s->u #(#f #f #f #f #t (#("")) () #f)
"/")
(test-s->u (vector #f #f #f #f #f '() '() #f)
(test-s->u #(#f #f #f #f #f () () #f)
"")
(test-s->u (vector "http" #f #f #f #t '(#("")) '() #f)
(test-s->u #("http" #f #f #f #t (#("")) () #f)
"http:/")
(test-s->u (vector "http" #f "" #f #t '(#("")) '() #f)
(test-s->u #("http" #f "" #f #t (#("")) () #f)
"http:///")
(test-s->u (vector "http" #f "www.drscheme.org" #f #f '() '() #f)
(test-s->u #("http" #f "www.drscheme.org" #f #f () () #f)
"http://www.drscheme.org")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t '(#("")) '() #f)
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("")) () #f)
"http://www.drscheme.org/")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '() #f)
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c")) () #f)
"http://www.drscheme.org/a/b/c")
(test-s->u (vector "http" "robby" "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '() #f)
(test-s->u #("http" "robby" "www.drscheme.org" #f #t (#("a") #("b") #("c")) () #f)
"http://robby@www.drscheme.org/a/b/c")
(test-s->u (vector "http" #f "www.drscheme.org" 8080 #t (list #("a") #("b") #("c")) '() #f)
(test-s->u #("http" #f "www.drscheme.org" 8080 #t (#("a") #("b") #("c")) () #f)
"http://www.drscheme.org:8080/a/b/c")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '() "joe")
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c")) () "joe")
"http://www.drscheme.org/a/b/c#joe")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tim . "")) #f)
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c")) ((tim . "")) #f)
"http://www.drscheme.org/a/b/c?tim=")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tim . "")) "joe")
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c")) ((tim . "")) "joe")
"http://www.drscheme.org/a/b/c?tim=#joe")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tim . "tim")) "joe")
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c")) ((tim . "tim")) "joe")
"http://www.drscheme.org/a/b/c?tim=tim#joe")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom")) "joe")
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c")) ((tam . "tom")) "joe")
"http://www.drscheme.org/a/b/c?tam=tom#joe")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom") (pam . "pom")) "joe")
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c")) ((tam . "tom") (pam . "pom")) "joe")
"http://www.drscheme.org/a/b/c?tam=tom;pam=pom#joe")
(parameterize ([current-alist-separator-mode 'semi])
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom") (pam . "pom")) "joe")
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c")) ((tam . "tom") (pam . "pom")) "joe")
"http://www.drscheme.org/a/b/c?tam=tom;pam=pom#joe"))
(parameterize ([current-alist-separator-mode 'amp])
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((tam . "tom") (pam . "pom")) "joe")
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c")) ((tam . "tom") (pam . "pom")) "joe")
"http://www.drscheme.org/a/b/c?tam=tom&pam=pom#joe"))
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c" "b")) '() #f)
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c" "b")) () #f)
"http://www.drscheme.org/a/b/c;b")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a" "x") #("b") #("c" "b")) '() #f)
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a" "x") #("b") #("c" "b")) () #f)
"http://www.drscheme.org/a;x/b/c;b")
;; test unquoting for %
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a") #("b") #("c")) '((ti#m . "")) "jo e")
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a") #("b") #("c")) ((ti#m . "")) "jo e")
"http://www.drscheme.org/a/b/c?ti%23m=#jo%20e")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a " " a") #(" b ") #(" c ")) '() #f)
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a " " a") #(" b ") #(" c ")) () #f)
"http://www.drscheme.org/a%20;%20a/%20b%20/%20c%20")
(test-s->u (vector "http" "robb y" "www.drscheme.org" #f #t '(#("")) '() #f)
(test-s->u #("http" "robb y" "www.drscheme.org" #f #t (#("")) () #f)
"http://robb%20y@www.drscheme.org/")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("%a") #("b/") #("c")) '() #f)
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("%a") #("b/") #("c")) () #f)
"http://www.drscheme.org/%25a/b%2F/c")
;; test the characters that need to be encoded in paths vs those that do not need to
;; be encoded in paths
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("a:@!$&'()*+,=z") #("/?#[];") #("")) '() #f)
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("a:@!$&'()*+,=z") #("/?#[];") #("")) () #f)
"http://www.drscheme.org/a:@!$&'()*+,=z/%2F%3F%23%5B%5D%3B/")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #(".") #("..") '#(same) '#(up) #("...") #("abc.def")) '() #f)
(test-s->u #("http" #f "www.drscheme.org" #f #t (#(".") #("..") #(same) #(up) #("...") #("abc.def")) () #f)
"http://www.drscheme.org/%2e/%2e%2e/./../.../abc.def")
(test-s->u (vector "http" #f "www.drscheme.org" #f #t (list #("." "") #(".." "") #(same "") #(up "") #("..." "") #("abc.def" "")) '() #f)
(test-s->u #("http" #f "www.drscheme.org" #f #t (#("." "") #(".." "") #(same "") #(up "") #("..." "") #("abc.def" "")) () #f)
"http://www.drscheme.org/%2e;/%2e%2e;/.;/..;/...;/abc.def;")
;; test other scheme identifiers
(test-s->u #("blah" #f "www.foo.com" #f #t (#("")) () #f)
"blah://www.foo.com/")
(test-s->u #("blah99" #f "www.foo.com" #f #t (#("")) () #f)
"blah99://www.foo.com/")
(test-s->u #("blah+" #f "www.foo.com" #f #t (#("")) () #f)
"blah+://www.foo.com/")
(test-s->u #("a+b-c456.d" #f "www.foo.com" #f #t (#("")) () #f)
"a+b-c456.d://www.foo.com/")
;; test bad schemes
(err/rt-test (string->url "://www.foo.com/") url-exception?)
(err/rt-test (string->url "9://www.foo.com/") url-exception?)
(err/rt-test (string->url "9a://www.foo.com/") url-exception?)
(err/rt-test (string->url "a*b://www.foo.com/") url-exception?)
(err/rt-test (string->url "a b://www.foo.com/") url-exception?)
;; test file: urls
(test-s->u (vector "file" #f #f #f #t '(#("abc") #("def.html")) '() #f)
(test-s->u #("file" #f #f #f #t (#("abc") #("def.html")) () #f)
"file:/abc/def.html")
(test-s->u (vector "file" #f "localhost" #f #t '(#("abc") #("def.html")) '() #f)
(test-s->u #("file" #f "localhost" #f #t (#("abc") #("def.html")) () #f)
"file://localhost/abc/def.html")
;; test files: urls with colons, and the different parsing on Windows
(test-s->u (vector "file" #f "localhost" 123 #t '(#("abc") #("def.html")) '() #f)
(test-s->u #("file" #f "localhost" 123 #t (#("abc") #("def.html")) () #f)
"file://localhost:123/abc/def.html")
(set-url:os-type! 'unix)
;; different parse for file://foo:/...
(test (vector "file" #f "foo" #f #t '(#("abc") #("def.html")) '() #f)
(test #("file" #f "foo" #f #t (#("abc") #("def.html")) () #f)
string->url/vec
"file://foo:/abc/def.html")
(set-url:os-type! 'windows)
(test (vector "file" #f #f #f #f '(#("foo:") #("abc") #("def.html")) '() #f)
(test #("file" #f #f #f #f (#("foo:") #("abc") #("def.html")) () #f)
string->url/vec
"file://foo:/abc/def.html")
(set-url:os-type! 'unix)
;; but no effect on http://foo:/...
(test (vector "http" #f "foo" #f #t '(#("abc") #("def.html")) '() #f)
(test #("http" #f "foo" #f #t (#("abc") #("def.html")) () #f)
string->url/vec
"http://foo:/abc/def.html")
(set-url:os-type! 'windows)
(test (vector "http" #f "foo" #f #t '(#("abc") #("def.html")) '() #f)
(test #("http" #f "foo" #f #t (#("abc") #("def.html")) () #f)
string->url/vec
"http://foo:/abc/def.html")
(set-url:os-type! 'unix)
;; test case sensitivity
(test (vector "http" "ROBBY" "www.drscheme.org" 80 #t '(#("INDEX.HTML" "XXX")) '((T . "P")) "YYY")
(test #("http" "ROBBY" "www.drscheme.org" 80 #t (#("INDEX.HTML" "XXX")) ((T . "P")) "YYY")
string->url/vec
"HTTP://ROBBY@WWW.DRSCHEME.ORG:80/INDEX.HTML;XXX?T=P#YYY")
(test-s->u (vector "mailto" #f #f #f #f '(#("robby@plt-scheme.org")) '() #f)
(test-s->u #("mailto" #f #f #f #f (#("robby@plt-scheme.org")) () #f)
"mailto:robby@plt-scheme.org")
(test (vector "http" #f "www.drscheme.org" #f #f '() '((bar . "馨慧")) #f)
(test #("http" #f "www.drscheme.org" #f #f () ((bar . "馨慧")) #f)
string->url/vec
"http://www.drscheme.org?bar=馨慧")
(test (vector "http" #f "www.drscheme.org" #f #f '() '((bár . "é")) #f)
(test #("http" #f "www.drscheme.org" #f #f () ((bár . "é")) #f)
string->url/vec
"http://www.drscheme.org?bár=é")
(let ([empty-url (make-url #f #f #f #f #f '() '() #f)])
(test-c-u/r (string->url "http://www.drscheme.org")
empty-url
"http://www.drscheme.org"))
(test-c-u/r "http://www.drscheme.org"
(make-url #f #f #f #f #f '() '() #f)
"http://www.drscheme.org")
(test-c-u/r (string->url "http://www.drscheme.org")
(string->url "http://www.drscheme.org")
(test-c-u/r "http://www.drscheme.org"
"http://www.drscheme.org"
"")
(test-c-u/r (string->url "http://www.mzscheme.org")
(string->url "http://www.drscheme.org/")
(test-c-u/r "http://www.mzscheme.org"
"http://www.drscheme.org/"
"http://www.mzscheme.org")
(test-c-u/r (string->url "http://www.drscheme.org/index.html")
(string->url "http://www.drscheme.org/")
(test-c-u/r "http://www.drscheme.org/index.html"
"http://www.drscheme.org/"
"index.html")
(test-c-u/r (string->url "http://www.drscheme.org/index.html")
(string->url "http://www.drscheme.org/")
(test-c-u/r "http://www.drscheme.org/index.html"
"http://www.drscheme.org/"
"/index.html")
(test-c-u/r (string->url "http://www.drscheme.org/index.html")
(string->url "http://www.drscheme.org/a/b/c/")
(test-c-u/r "http://www.drscheme.org/index.html"
"http://www.drscheme.org/a/b/c/"
"/index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/index.html")
(string->url "http://www.drscheme.org/a/b/c")
(test-c-u/r "http://www.drscheme.org/a/b/index.html"
"http://www.drscheme.org/a/b/c"
"index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/c/index.html")
(string->url "http://www.drscheme.org/a/b/c/")
(test-c-u/r "http://www.drscheme.org/a/b/c/index.html"
"http://www.drscheme.org/a/b/c/"
"index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/d/index.html")
(string->url "http://www.drscheme.org/a/b/c")
(test-c-u/r "http://www.drscheme.org/a/b/d/index.html"
"http://www.drscheme.org/a/b/c"
"d/index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/c/d/index.html")
(string->url "http://www.drscheme.org/a/b/c/")
(test-c-u/r "http://www.drscheme.org/a/b/c/d/index.html"
"http://www.drscheme.org/a/b/c/"
"d/index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/index.html")
(string->url "http://www.drscheme.org/a/b/c/")
(test-c-u/r "http://www.drscheme.org/a/b/index.html"
"http://www.drscheme.org/a/b/c/"
"../index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/c/index.html")
(string->url "http://www.drscheme.org/a/b/c/")
(test-c-u/r "http://www.drscheme.org/a/b/c/index.html"
"http://www.drscheme.org/a/b/c/"
"./index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/c/%2e%2e/index.html")
(string->url "http://www.drscheme.org/a/b/c/")
(test-c-u/r "http://www.drscheme.org/a/b/c/%2e%2e/index.html"
"http://www.drscheme.org/a/b/c/"
"%2e%2e/index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/index.html")
(string->url "http://www.drscheme.org/a/b/../c/")
(test-c-u/r "http://www.drscheme.org/a/index.html"
"http://www.drscheme.org/a/b/../c/"
"../index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/c/d/index.html")
(string->url "http://www.drscheme.org/a/b/c/d/index.html#ghijkl")
(test-c-u/r "http://www.drscheme.org/a/b/c/d/index.html"
"http://www.drscheme.org/a/b/c/d/index.html#ghijkl"
"index.html")
(test-c-u/r (string->url "http://www.drscheme.org/a/b/c/d/index.html#abcdef")
(string->url "http://www.drscheme.org/a/b/c/d/index.html#ghijkl")
(test-c-u/r "http://www.drscheme.org/a/b/c/d/index.html#abcdef"
"http://www.drscheme.org/a/b/c/d/index.html#ghijkl"
"#abcdef")
(test-c-u/r (string->url "file:///a/b/c/d/index.html")
(string->url "file:///a/b/c/")
(test-c-u/r "file:///a/b/c/d/index.html"
"file:///a/b/c/"
"d/index.html")
(test-c-u/r (string->url "file:///a/b/d/index.html")
(string->url "file:///a/b/c")
(test-c-u/r "file:///a/b/d/index.html"
"file:///a/b/c"
"d/index.html")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -357,10 +371,7 @@
;;
(for-each
(λ (line)
(test-c-u/r (string->url (caddr line))
(string->url "http://a/b/c/d;p?q")
(car line)))
(λ (line) (test-c-u/r (caddr line) "http://a/b/c/d;p?q" (car line)))
'(("g:h" = "g:h")
("g" = "http://a/b/c/g")
("./g" = "http://a/b/c/g")
@ -412,7 +423,6 @@
))
)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;