Fixes and improvements to `net/unihead'.
* Use `re:non-ascii' to look for a non-ascii character => faster.
* Use either CR or LF for a newline, not just LF.
* Use `regexp-replace*' to encode the parts between the lines. Besides
making the code simpler, it fixes a bug in the previous code where
multiple lines would each get encoded and the results concatenated
without the newlines or any other whitespace.
original commit: 188c65661d
This commit is contained in:
parent
42503882a8
commit
b232bb25ac
|
@ -1,16 +1,14 @@
|
||||||
#lang racket/base
|
#lang racket/base
|
||||||
(require net/base64 net/qp racket/string)
|
(require net/base64 net/qp)
|
||||||
|
|
||||||
(provide encode-for-header decode-for-header generalize-encoding)
|
(provide encode-for-header decode-for-header generalize-encoding)
|
||||||
|
|
||||||
(define re:ascii #rx"^[\u0-\u7F]*$")
|
(define re:non-ascii #rx"[^\u0-\u7F]")
|
||||||
|
|
||||||
(define (encode-for-header s)
|
(define (encode-for-header s)
|
||||||
(if (regexp-match? re:ascii s)
|
(cond [(not (regexp-match? re:non-ascii s)) s]
|
||||||
s
|
[(not (regexp-match? #rx"\r\n" s)) (encode-line-for-header s)] ; speed
|
||||||
(let ([l (regexp-split #rx"\r\n" s)])
|
[else (regexp-replace* #rx"[^\r\n]+" s encode-line-for-header)]))
|
||||||
(apply string-append
|
|
||||||
(map encode-line-for-header l)))))
|
|
||||||
|
|
||||||
(define (encode-line-for-header s)
|
(define (encode-line-for-header s)
|
||||||
(define (loop s string->bytes charset encode encoding)
|
(define (loop s string->bytes charset encode encoding)
|
||||||
|
@ -32,14 +30,13 @@
|
||||||
(encode (string->bytes s))
|
(encode (string->bytes s))
|
||||||
#"")))))))
|
#"")))))))
|
||||||
(cond
|
(cond
|
||||||
[(regexp-match? re:ascii s)
|
|
||||||
;; ASCII - do nothing
|
;; ASCII - do nothing
|
||||||
s]
|
[(not (regexp-match? re:non-ascii s)) s]
|
||||||
[(regexp-match? #rx"[^\u0-\uFF]" s)
|
|
||||||
;; Not Latin-1, so use UTF-8
|
;; Not Latin-1, so use UTF-8
|
||||||
|
[(regexp-match? #rx"[^\u0-\uFF]" s)
|
||||||
(loop s string->bytes/utf-8 "UTF-8" base64-encode "B")]
|
(loop s string->bytes/utf-8 "UTF-8" base64-encode "B")]
|
||||||
[else
|
|
||||||
;; use Latin-1
|
;; use Latin-1
|
||||||
|
[else
|
||||||
(loop s string->bytes/latin-1 "ISO-8859-1"
|
(loop s string->bytes/latin-1 "ISO-8859-1"
|
||||||
(lambda (s)
|
(lambda (s)
|
||||||
(regexp-replace #rx#" " (qp-encode s) #"_"))
|
(regexp-replace #rx#" " (qp-encode s) #"_"))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user