From 188c65661d1b4bf9dc605406f7eb612873ce032d Mon Sep 17 00:00:00 2001 From: Eli Barzilay Date: Wed, 21 Dec 2011 03:13:34 -0500 Subject: [PATCH] Fixes and improvements to `net/unihead'. * Use `re:non-ascii' to look for a non-ascii character => faster. * Use either CR or LF for a newline, not just LF. * Use `regexp-replace*' to encode the parts between the lines. Besides making the code simpler, it fixes a bug in the previous code where multiple lines would each get encoded and the results concatenated without the newlines or any other whitespace. --- collects/net/unihead.rkt | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/collects/net/unihead.rkt b/collects/net/unihead.rkt index f7a99e9e7d..1cc821c1c1 100644 --- a/collects/net/unihead.rkt +++ b/collects/net/unihead.rkt @@ -1,16 +1,14 @@ #lang racket/base -(require net/base64 net/qp racket/string) +(require net/base64 net/qp) (provide encode-for-header decode-for-header generalize-encoding) -(define re:ascii #rx"^[\u0-\u7F]*$") +(define re:non-ascii #rx"[^\u0-\u7F]") (define (encode-for-header s) - (if (regexp-match? re:ascii s) - s - (let ([l (regexp-split #rx"\r\n" s)]) - (apply string-append - (map encode-line-for-header l))))) + (cond [(not (regexp-match? re:non-ascii s)) s] + [(not (regexp-match? #rx"\r\n" s)) (encode-line-for-header s)] ; speed + [else (regexp-replace* #rx"[^\r\n]+" s encode-line-for-header)])) (define (encode-line-for-header s) (define (loop s string->bytes charset encode encoding) @@ -32,14 +30,13 @@ (encode (string->bytes s)) #""))))))) (cond - [(regexp-match? re:ascii s) - ;; ASCII - do nothing - s] + ;; ASCII - do nothing + [(not (regexp-match? re:non-ascii s)) s] + ;; Not Latin-1, so use UTF-8 [(regexp-match? #rx"[^\u0-\uFF]" s) - ;; Not Latin-1, so use UTF-8 (loop s string->bytes/utf-8 "UTF-8" base64-encode "B")] + ;; use Latin-1 [else - ;; use Latin-1 (loop s string->bytes/latin-1 "ISO-8859-1" (lambda (s) (regexp-replace #rx#" " (qp-encode s) #"_"))