From 188c65661d1b4bf9dc605406f7eb612873ce032d Mon Sep 17 00:00:00 2001
From: Eli Barzilay <eli@barzilay.org>
Date: Wed, 21 Dec 2011 03:13:34 -0500
Subject: [PATCH] Fixes and improvements to `net/unihead'.

* Use `re:non-ascii' to look for a non-ascii character => faster.

* Use either CR or LF for a newline, not just LF.

* Use `regexp-replace*' to encode the parts between the lines.  Besides
  making the code simpler, it fixes a bug in the previous code where
  multiple lines would each get encoded and the results concatenated
  without the newlines or any other whitespace.
---
 collects/net/unihead.rkt | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/collects/net/unihead.rkt b/collects/net/unihead.rkt
index f7a99e9e7d..1cc821c1c1 100644
--- a/collects/net/unihead.rkt
+++ b/collects/net/unihead.rkt
@@ -1,16 +1,14 @@
 #lang racket/base
-(require net/base64 net/qp racket/string)
+(require net/base64 net/qp)
 
 (provide encode-for-header decode-for-header generalize-encoding)
 
-(define re:ascii #rx"^[\u0-\u7F]*$")
+(define re:non-ascii #rx"[^\u0-\u7F]")
 
 (define (encode-for-header s)
-  (if (regexp-match? re:ascii s)
-    s
-    (let ([l (regexp-split #rx"\r\n" s)])
-      (apply string-append
-             (map encode-line-for-header l)))))
+  (cond [(not (regexp-match? re:non-ascii s)) s]
+        [(not (regexp-match? #rx"\r\n" s)) (encode-line-for-header s)] ; speed
+        [else (regexp-replace* #rx"[^\r\n]+" s encode-line-for-header)]))
 
 (define (encode-line-for-header s)
   (define (loop s string->bytes charset encode encoding)
@@ -32,14 +30,13 @@
                                      (encode (string->bytes s))
                                      #"")))))))
   (cond
-    [(regexp-match? re:ascii s)
-     ;; ASCII - do nothing
-     s]
+    ;; ASCII - do nothing
+    [(not (regexp-match? re:non-ascii s)) s]
+    ;; Not Latin-1, so use UTF-8
     [(regexp-match? #rx"[^\u0-\uFF]" s)
-     ;; Not Latin-1, so use UTF-8
      (loop s string->bytes/utf-8 "UTF-8" base64-encode "B")]
+    ;; use Latin-1
     [else
-     ;; use Latin-1
      (loop s string->bytes/latin-1 "ISO-8859-1"
            (lambda (s)
              (regexp-replace #rx#" " (qp-encode s) #"_"))