racket/collects/web-server/request-parsing.ss
Jay McCarthy d97a87bc62 union -> or/c
svn: r2143
2006-02-06 17:35:37 +00:00

235 lines
10 KiB
Scheme

(module request-parsing mzscheme
(require (lib "contract.ss")
(lib "url.ss" "net")
(lib "list.ss")
"util.ss"
"connection-manager.ss"
(lib "port.ss")
"request-structs.ss")
(provide (all-from "request-structs.ss"))
;; path-prefix: (listof string)
;; The part of the URL path that maps to the servlet
;; path-suffix: (listof string)
;; The part of the URL path that gets passed to the servlet as arguments.
(provide/contract
[read-request ((connection? number? ((input-port?) . ->* . (string? string?))) . ->* . (request? boolean?))]
[read-bindings (connection? symbol? url? (listof header?)
. -> . (or/c (listof binding?) string?))])
;; **************************************************
;; read-request: connection number (input-port -> string string) -> request boolean?
;; read the request line, and the headers, determine if the connection should
;; be closed after servicing the request and build a request structure
(define (read-request conn host-port port-addresses)
(call-with-semaphore
(connection-mutex conn)
(lambda ()
(let ([ip (connection-i-port conn)])
(let-values ([(method uri major-version minor-version)
(read-request-line ip)])
(let ([headers (read-headers ip)])
(let-values ([(host-ip client-ip) (port-addresses ip)])
(values
(make-request method uri headers '() host-ip host-port client-ip)
(close-connection?
headers major-version minor-version client-ip host-ip)))))))))
;; **************************************************
;; close-connection?
; close-connection? : (listof (cons symbol bytes)) number number string string -> boolean
; determine if this connection should be closed after serving the response
(define (close-connection? headers major minor client-ip host-ip)
(or (< major 1)
(and (= major 1) (= minor 0))
(cond
[(assq 'connection headers)
=> (lambda (x) (string-ci=? "close" (bytes->string/utf-8 (cdr x))))]
[else #f])
(msie-from-local-machine? headers client-ip host-ip)))
; : table str str -> bool
; to work around a bug in MSIE for documents < 265 bytes when connecting from the local
; machine. The server could pad the response as MSIIS does, but closing the connection works, too.
; We do not check for version numbers since IE 6 under windows is 5.2 under macosX
(define (msie-from-local-machine? headers client-ip host-ip)
(and (string=? host-ip client-ip)
(cond
[(or (assq 'HTTP_USER_AGENT headers)
(assq 'user-agent headers))
=> (lambda (client)
(and (regexp-match MSIE-regexp (cdr client))
#t))]
[else #f])))
(define MSIE-regexp (regexp "MSIE"))
;; **************************************************
;; read-request-line
; Method = (or/c 'get 'post 'head 'put 'delete 'trace)
(define METHOD:REGEXP
(byte-regexp #"^(GET|HEAD|POST|PUT|DELETE|TRACE) (.+) HTTP/([0-9]+)\\.([0-9]+)$"))
(define (match-method x)
(regexp-match METHOD:REGEXP x))
;:(define match-method (type: (str -> (or/c false (list str str str str str)))))
; read-request-line : iport -> symbol url number number
; to read in the first line of an http request, AKA the "request line"
; effect: in case of errors, complain [MF: where] and close the ports
(define (read-request-line ip)
(let ([line (read-bytes-line ip 'any)])
(if (eof-object? line)
(network-error 'read-request "http input closed abruptly")
(cond
[(match-method line)
=> (lambda (x)
(values
(lowercase-symbol! (list-ref x 1))
(string->url (bytes->string/utf-8 (list-ref x 2)))
(string->number (bytes->string/utf-8 (list-ref x 3)))
(string->number (bytes->string/utf-8 (list-ref x 4)))))]
[else (network-error 'read-request "malformed request ~a" line)]))))
;; **************************************************
;; read-headers
;(define COLON:REGEXP (regexp (format "^([^:]*):[ ~a]*(.*)" #\tab)))
(define COLON:REGEXP (byte-regexp (bytes-append #"^([^:]*):[ " (bytes 9) #"]*(.*)")))
(define (match-colon s)
(regexp-match COLON:REGEXP s))
;:(define match-colon (type: (str -> (or/c false (list str str str)))))
; read-headers : iport -> (listof (cons symbol bytes))
(define (read-headers in)
(let read-header ()
(let ([l (read-bytes-line in 'any)])
(cond
[(eof-object? l) null]
[(zero? (bytes-length l)) null]
[(match-colon l) =>
(lambda (match)
; (cadr match) exists because COLON:REGEXP contains two (.)
; (caddr match) exists because COLON:REGEXP contains two (.)
(cons (cons (lowercase-symbol! (cadr match))
(read-one-head in (caddr match)))
(read-header)))]
[else (network-error 'read-headers "malformed header")]))))
; read-one-head : iport bytes -> bytes
(define (read-one-head in rhs)
(let ([c (peek-byte in)])
(cond
[(or (= c 32) (= c 9)) ;(or (eq? c #\space) (eq? c #\tab))
; (read-bytes-line in 'any) can't return eof
; because we just checked with peek-char
; Spidey: FLOW
(read-one-head in (bytes-append rhs (read-bytes-line in 'any)))]
[else rhs])))
;; **************************************************
;; read-bindings
(define INPUT-BUFFER-SIZE 4096)
;; read-bindings: connection symboll url (listof header?) -> (or/c (listof binding?) string?)
(define (read-bindings conn meth uri headers)
(case meth
[(get) (url-query uri)]
[(post)
(call-with-semaphore
(connection-mutex conn)
(lambda ()
(let ([content-type (assq 'content-type headers)])
(cond
[(and content-type (regexp-match FILE-FORM-REGEXP (cdr content-type)))
=> (lambda (content-boundary)
(map (lambda (part)
;; more here - better checks, avoid string-append
(cons (get-field-name (cdr (assq 'content-disposition (car part))))
(apply bytes-append (cdr part))))
(read-mime-multipart (cadr content-boundary) (connection-i-port conn))))]
[else
(let ([len-str (assq 'content-length headers)]
[in (connection-i-port conn)])
(if len-str
(cond
[(string->number (bytes->string/utf-8 (cdr len-str)))
=> (lambda (len) (read-string len in))]
[else (network-error 'read-bindings "Post request contained a non-numeric content-length")])
(apply string-append
(let read-to-eof ()
(let ([s (read-string INPUT-BUFFER-SIZE in)])
(if (eof-object? s)
null
(cons s (read-to-eof))))))))]))))]
[else (network-error 'read-bindings "unsupported method" meth)]))
(define FILE-FORM-REGEXP (regexp "multipart/form-data; *boundary=(.*)"))
;; GregP: this is where I would get the filename out.
; get-field-name : str -> symbol
(define (get-field-name rhs)
(let ([x (regexp-match "name=(\"([^\"]*)\"|([^ ;]*))" rhs)])
(unless x
(network-error 'get-field-name "Couldn't extract form field name for file upload from ~a" x))
(lowercase-symbol! (or (caddr x) (cadddr x)))))
;; **************************************************
;; read-mime-multipart
; read-mime-multipart : bytes iport -> (listof part)
(define (read-mime-multipart boundary in)
(let* ([boundary-len (bytes-length boundary)]
[start-boundary (bytes-append #"--" boundary)]
[end-boundary (bytes-append start-boundary #"--")])
(let skip-preamble ()
(let ([line (read-bytes-line in 'return-linefeed)])
(cond
[(bytes=? line start-boundary)
(let read-parts ()
(let ([headers (read-headers in)])
(let read-mime-part-body ([more-k (lambda (contents)
(cons (construct-mime-part
headers contents)
(read-parts)))]
[end-k (lambda (contents)
(list (construct-mime-part
headers contents)))])
(let ([line (read-bytes-line in 'return-linefeed)])
(cond
[(bytes=? line start-boundary)
(more-k null)]
[(bytes=? line end-boundary)
(end-k null)]
[else (read-mime-part-body
(lambda (x) (more-k (cons line x)))
(lambda (x) (end-k (cons line x))))])))))]
[(bytes=? line end-boundary) null]
[else (skip-preamble)])))))
; more here - use structure, perhaps
; construct-mime-part : (listof header) (listof bytes) -> part
(define (construct-mime-part headers body)
(cons headers
(cond
[(null? body) null]
[else (cons (car body)
(foldr (lambda (byt acc)
(list* (string->bytes/utf-8 CR-NL) byt acc))
null
(cdr body)))])))
(define CR-NL (format "~a~a" #\return #\newline)))