diff --git a/collects/net/head-unit.ss b/collects/net/head-unit.ss index 7d98264..41d2e8c 100644 --- a/collects/net/head-unit.ss +++ b/collects/net/head-unit.ss @@ -174,7 +174,10 @@ (define blank (format "[~a~a~a~a~a]" #\space #\tab #\newline #\return #\vtab)) (define re:all-blank (regexp (format "^~a*$" blank))) - + (define re:quoted (regexp "\"[^\"]*\"")) + (define re:comma (regexp ",")) + (define re:comma-separated (regexp "([^,]*),(.*)")) + (define (extract-addresses s form) (unless (memq form '(name address full all)) (raise-type-error 'extract-addresses @@ -184,8 +187,8 @@ null (let loop ([prefix ""][s s]) ;; Which comes first - a quote or a comma? - (let ([mq (regexp-match-positions "\"[^\"]*\"" s)] - [mc (regexp-match-positions "," s)]) + (let ([mq (regexp-match-positions re:quoted s)] + [mc (regexp-match-positions re:comma s)]) (if (and mq mc (< (caar mq) (caar mc) (cdar mq))) ;; Quote contains a comma (loop (string-append @@ -193,7 +196,7 @@ (substring s 0 (cdar mq))) (substring s (cdar mq) (string-length s))) ;; Normal comma parsing: - (let ([m (regexp-match "([^,]*),(.*)" s)]) + (let ([m (regexp-match re:comma-separated s)]) (if m (let ([n (extract-one-name (string-append prefix (cadr m)) form)] [rest (extract-addresses (caddr m) form)]) @@ -211,22 +214,32 @@ (define (one-result form s) (select-result form s s s)) + (define re:quoted-name (regexp (format "^~a*(\"[^\"]*\")(.*)" blank))) + (define re:parened-name (regexp (format "(.*)[(]([^)]*)[)]~a*$" blank))) + (define re:simple-name (regexp (format "^~a*(.*)(<.*>)~a*$" blank blank))) + (define re:normal-name (regexp (format "~a*<([^>]*)>~a*" blank blank))) + (define re:double-less (regexp "<.*<")) + (define re:double-greater (regexp ">.*>")) + (define re:bad-chars (regexp "[,\"()<>]")) + (define re:tail-blanks (regexp (format "~a+$" blank))) + (define re:head-blanks (regexp (format "^~a+" blank))) + (define (extract-one-name s form) (cond - [(regexp-match (format "^~a*(\"[^\"]*\")(.*)" blank) s) + [(regexp-match re:quoted-name s) => (lambda (m) (let ([name (cadr m)] [addr (extract-angle-addr (caddr m))]) (select-result form name addr (format "~a <~a>" name addr))))] ;; ?!?!? Where does the "addr (name)" standard come from ?!?!? - [(regexp-match (format "(.*)[(]([^)]*)[)]~a*$" blank) s) + [(regexp-match re:parened-name s) => (lambda (m) (let ([name (caddr m)] [addr (extract-simple-addr (cadr m))]) (select-result form name addr (format "~a (~a)" addr name))))] - [(regexp-match (format "^~a*(.*)(<.*>)~a*$" blank blank) s) + [(regexp-match re:simple-name s) => (lambda (m) (let ([name (regexp-replace (format "~a*$" blank) (cadr m) "")] [addr (extract-angle-addr (caddr m))]) @@ -238,22 +251,22 @@ (one-result form (extract-simple-addr s))])) (define (extract-angle-addr s) - (if (or (regexp-match "<.*<" s) (regexp-match ">.*>" s)) + (if (or (regexp-match re:double-less s) (regexp-match re:double-greater s)) (error 'extract-address "too many angle brackets: ~a" s) - (let ([m (regexp-match (format "~a*<([^>]*)>~a*" blank blank) s)]) + (let ([m (regexp-match re:normal-name s)]) (if m (extract-simple-addr (cadr m)) (error 'extract-address "cannot parse address: ~a" s))))) (define (extract-simple-addr s) (cond - [(regexp-match "[,\"()<>]" s) + [(regexp-match re:bad-chars s) (error 'extract-address "cannot parse address: ~a" s)] [else ;; final whitespace strip (regexp-replace - (format "~a*$" blank) - (regexp-replace (format "~a*" blank) s "") + re:tail-blanks + (regexp-replace re:head-blanks s "") "")])) (define (assemble-address-field addresses) diff --git a/collects/net/mime-unit.ss b/collects/net/mime-unit.ss index b5fbf00..d3d004c 100644 --- a/collects/net/mime-unit.ss +++ b/collects/net/mime-unit.ss @@ -363,12 +363,13 @@ fields)) (values mime non-mime))))) + (define re:content (regexp (format "^~a" (regexp-quote "content-" #f)))) + (define re:mime (regexp (format "^~a:" (regexp-quote "mime-version" #f)))) + (define mime-header? (lambda (h) - (let ((content (regexp "^[Cc]ontent-")) - (mime (regexp "^MIME-Version:"))) - (or (regexp-match content h) - (regexp-match mime h))))) + (or (regexp-match re:content h) + (regexp-match re:mime h)))) ;;; Headers @@ -377,10 +378,11 @@ ;; *(";" parameter) ;; ; Matching of media type and subtype ;; ; is ALWAYS case-insensitive. + (define re:content-type (regexp (format "^~a:([^/]+)/([^/]+)$" (regexp-quote "content-type" #f)))) (define content (lambda (header entity) (let* ((params (string-tokenizer #\; header)) - (one (regexp "^[Cc]ontent-[Tt]ype:([^/]+)/([^/]+)$")) + (one re:content-type) (h (trim-all-spaces (car params))) (target (regexp-match one h)) (old-param (entity-params entity))) @@ -412,10 +414,11 @@ ;; disposition := "Content-Disposition" ":" ;; disposition-type ;; *(";" disposition-parm) + (define re:content-disposition (regexp (format "^~a:(.+)$" (regexp-quote "content-disposition" #f)))) (define dispositione (lambda (header entity) (let* ((params (string-tokenizer #\; header)) - (reg (regexp "^[Cc]ontent-[Dd]isposition:(.+)$")) + (reg re:content-disposition) (h (trim-all-spaces (car params))) (target (regexp-match reg h)) (disp-struct (entity-disposition entity))) @@ -426,9 +429,10 @@ (disp-params (cdr params) disp-struct))))) ;; version := "MIME-Version" ":" 1*DIGIT "." 1*DIGIT + (define re:mime-version (regexp (format "^~a:([0-9]+)\\.([0-9]+)$" (regexp-quote "MIME-Version" #f)))) (define version (lambda (header message) - (let* ((reg (regexp "^MIME-Version:([0-9]+)\\.([0-9]+)$")) + (let* ((reg re:mime-version) (h (trim-all-spaces header)) (target (regexp-match reg h))) (and target @@ -437,9 +441,10 @@ (string->number (regexp-replace reg h "\\1.\\2"))))))) ;; description := "Content-Description" ":" *text + (define re:content-description (regexp (format "^~a:[ \t\r\n]*(.*)$" (regexp-quote "content-description" #f)))) (define description (lambda (header entity) - (let* ((reg (regexp "^[Cc]ontent-[Dd]escription:[ ]*(.*)$")) + (let* ((reg re:content-description) (target (regexp-match reg header))) (and target (set-entity-description! @@ -447,9 +452,10 @@ (trim-spaces (regexp-replace reg header "\\1"))))))) ;; encoding := "Content-Transfer-Encoding" ":" mechanism + (define re:content-transfer-encoding (regexp (format "^~a:(.+)$" (regexp-quote "content-transfer-encoding" #f)))) (define encoding (lambda (header entity) - (let* ((reg (regexp "^[Cc]ontent-[Tt]ransfer-[Ee]ncoding:(.+)$")) + (let* ((reg re:content-transfer-encoding) (h (trim-all-spaces header)) (target (regexp-match reg h))) (and target @@ -458,9 +464,10 @@ (mechanism (regexp-replace reg h "\\1"))))))) ;; id := "Content-ID" ":" msg-id + (define re:content-id (regexp (format "^~a:(.+)$" (regexp-quote "content-id" #f)))) (define id (lambda (header entity) - (let* ((reg (regexp "^[Cc]ontent-ID:(.+)$")) + (let* ((reg re:content-id) (h (trim-all-spaces header)) (target (regexp-match reg h))) (and target diff --git a/collects/net/qp-unit.ss b/collects/net/qp-unit.ss index e3e4159..ba47774 100644 --- a/collects/net/qp-unit.ss +++ b/collects/net/qp-unit.ss @@ -149,7 +149,7 @@ (define display-qp-encoded (lambda (line out) - (let* ((blanks (regexp "[ ]+$")) + (let* ((blanks (regexp "[ \t]+$")) (pos (regexp-match-positions blanks line)) (col (caar pos)) (rest-of-line (substring line col (string-length line))))