commit
4d097b7bfc
|
@ -10,7 +10,7 @@
|
||||||
(define re:ascii #rx"^[\u0-\u7F]*$")
|
(define re:ascii #rx"^[\u0-\u7F]*$")
|
||||||
|
|
||||||
(define (encode-for-header s)
|
(define (encode-for-header s)
|
||||||
(if (regexp-match re:ascii s)
|
(if (regexp-match? re:ascii s)
|
||||||
s
|
s
|
||||||
(let ([l (regexp-split #rx"\r\n" s)])
|
(let ([l (regexp-split #rx"\r\n" s)])
|
||||||
(apply string-append
|
(apply string-append
|
||||||
|
@ -36,10 +36,10 @@
|
||||||
(encode (string->bytes s))
|
(encode (string->bytes s))
|
||||||
#"")))))))
|
#"")))))))
|
||||||
(cond
|
(cond
|
||||||
[(regexp-match re:ascii s)
|
[(regexp-match? re:ascii s)
|
||||||
;; ASCII - do nothing
|
;; ASCII - do nothing
|
||||||
s]
|
s]
|
||||||
[(regexp-match #rx"[^\u0-\uFF]" s)
|
[(regexp-match? #rx"[^\u0-\uFF]" s)
|
||||||
;; Not Latin-1, so use UTF-8
|
;; Not Latin-1, so use UTF-8
|
||||||
(loop s string->bytes/utf-8 "UTF-8" base64-encode "B")]
|
(loop s string->bytes/utf-8 "UTF-8" base64-encode "B")]
|
||||||
[else
|
[else
|
||||||
|
@ -51,36 +51,30 @@
|
||||||
|
|
||||||
;; ----------------------------------------
|
;; ----------------------------------------
|
||||||
|
|
||||||
(define re:us-ascii #rx#"^[uS][sS]-[aA][sS][cC][iI][iI]$")
|
(define re:us-ascii #rx#"^(?i:us-ascii)$")
|
||||||
(define re:iso #rx#"^[iI][sS][oO]-8859-1$")
|
(define re:iso #rx#"^(?i:iso-8859-1)$")
|
||||||
(define re:gb #rx#"^[gG][bB](2312)?$")
|
(define re:gb #rx#"^(?i:gb(?:2312)?)$")
|
||||||
(define re:ks_c #rx#"^[kK][sS]_[cC]_5601-1987$")
|
(define re:ks_c #rx#"^(?i:ks_c_5601-1987)$")
|
||||||
(define re:utf-8 #rx#"^[uU][tT][fF]-8$")
|
(define re:utf-8 #rx#"^(?i:utf-8)$")
|
||||||
|
|
||||||
(define re:encoded #rx#"^(.*?)=[?]([^?]+)[?]([qQbB])[?](.*?)[?]=(.*)$")
|
(define re:encoded #rx#"^(.*?)=[?]([^?]+)[?]([qQbB])[?](.*?)[?]=(.*)$")
|
||||||
|
|
||||||
(define (generalize-encoding encoding)
|
(define (generalize-encoding encoding)
|
||||||
;; Treat Latin-1 as Windows-1252 and also threat GB and GB2312
|
;; Treat Latin-1 as Windows-1252 and also threat GB and GB2312
|
||||||
;; as GBK, because some mailers are broken.
|
;; as GBK, because some mailers are broken.
|
||||||
(cond
|
(cond [(or (regexp-match? re:iso encoding)
|
||||||
[(or (regexp-match re:iso encoding)
|
(regexp-match? re:us-ascii encoding))
|
||||||
(regexp-match re:us-ascii encoding))
|
(if (bytes? encoding) #"WINDOWS-1252" "WINDOWS-1252")]
|
||||||
(if (bytes? encoding)
|
[(regexp-match? re:gb encoding)
|
||||||
#"WINDOWS-1252"
|
(if (bytes? encoding) #"GBK" "GBK")]
|
||||||
"WINDOWS-1252")]
|
[(regexp-match? re:ks_c encoding)
|
||||||
[(regexp-match re:gb encoding)
|
(if (bytes? encoding) #"CP949" "CP949")]
|
||||||
(if (bytes? encoding)
|
[else encoding]))
|
||||||
#"GBK"
|
|
||||||
"GBK")]
|
|
||||||
[(regexp-match re:ks_c encoding)
|
|
||||||
(if (bytes? encoding)
|
|
||||||
#"CP949"
|
|
||||||
"CP949")]
|
|
||||||
[else encoding]))
|
|
||||||
|
|
||||||
(define (decode-for-header s)
|
(define (decode-for-header s)
|
||||||
(and s
|
(and s
|
||||||
(let ([m (regexp-match re:encoded (string->bytes/latin-1 s (char->integer #\?)))])
|
(let ([m (regexp-match re:encoded
|
||||||
|
(string->bytes/latin-1 s (char->integer #\?)))])
|
||||||
(if m
|
(if m
|
||||||
(let ([s ((if (member (cadddr m) '(#"q" #"Q"))
|
(let ([s ((if (member (cadddr m) '(#"q" #"Q"))
|
||||||
;; quoted-printable, with special _ handling
|
;; quoted-printable, with special _ handling
|
||||||
|
@ -94,10 +88,13 @@
|
||||||
(decode-for-header (bytes->string/latin-1 (cadr m)))
|
(decode-for-header (bytes->string/latin-1 (cadr m)))
|
||||||
(let ([encoding (generalize-encoding encoding)])
|
(let ([encoding (generalize-encoding encoding)])
|
||||||
(cond
|
(cond
|
||||||
[(regexp-match re:utf-8 encoding) (bytes->string/utf-8 s #\?)]
|
[(regexp-match? re:utf-8 encoding)
|
||||||
[else (let ([c (bytes-open-converter (bytes->string/latin-1 encoding) "UTF-8")])
|
(bytes->string/utf-8 s #\?)]
|
||||||
|
[else (let ([c (bytes-open-converter
|
||||||
|
(bytes->string/latin-1 encoding) "UTF-8")])
|
||||||
(if c
|
(if c
|
||||||
(let-values ([(r got status) (bytes-convert c s)])
|
(let-values ([(r got status)
|
||||||
|
(bytes-convert c s)])
|
||||||
(bytes-close-converter c)
|
(bytes-close-converter c)
|
||||||
(if (eq? status 'complete)
|
(if (eq? status 'complete)
|
||||||
(bytes->string/utf-8 r #\?)
|
(bytes->string/utf-8 r #\?)
|
||||||
|
@ -105,13 +102,14 @@
|
||||||
(bytes->string/latin-1 s)))]))
|
(bytes->string/latin-1 s)))]))
|
||||||
(let ([rest (cadddr (cddr m))])
|
(let ([rest (cadddr (cddr m))])
|
||||||
(let ([rest
|
(let ([rest
|
||||||
;; A CR-LF-space-encoding sequence means that we should
|
;; A CR-LF-space-encoding sequence means that we
|
||||||
;; drop the space.
|
;; should drop the space.
|
||||||
(if (and (> (bytes-length rest) 4)
|
(if (and (> (bytes-length rest) 4)
|
||||||
(= 13 (bytes-ref rest 0))
|
(= 13 (bytes-ref rest 0))
|
||||||
(= 10 (bytes-ref rest 1))
|
(= 10 (bytes-ref rest 1))
|
||||||
(= 32 (bytes-ref rest 2))
|
(= 32 (bytes-ref rest 2))
|
||||||
(let ([m (regexp-match-positions re:encoded rest)])
|
(let ([m (regexp-match-positions
|
||||||
|
re:encoded rest)])
|
||||||
(and m (= (caaddr m) 5))))
|
(and m (= (caaddr m) 5))))
|
||||||
(subbytes rest 3)
|
(subbytes rest 3)
|
||||||
rest)])
|
rest)])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user