original commit: ba5ffdfd07098e0856a3f157f003feb6e0b2b734
This commit is contained in:
Matthew Flatt 2004-08-17 22:13:16 +00:00
parent db092b517c
commit 0a0ab9fcd8

View File

@ -5,7 +5,7 @@
(provide encode-for-header (provide encode-for-header
decode-for-header decode-for-header
latin-1->windows-1252) generalize-encoding)
(define re:ascii #rx"^[\u0-\u7F]*$") (define re:ascii #rx"^[\u0-\u7F]*$")
@ -51,21 +51,23 @@
;; ---------------------------------------- ;; ----------------------------------------
(define re:iso #rx#"[iI][sS][oO]-8859-1") (define re:iso #rx#"^[iI][sS][oO]-8859-1$")
(define re:utf-8 #rx#"[uU][tT][fF]-8") (define re:gb #rx#"^[gG][bB](2312)?$")
(define re:utf-8 #rx#"^[uU][tT][fF]-8$")
(define re:encoded #rx#"^(.*?)=[?]([^?]+)[?]([qQbB])[?](.*?)[?]=(.*)$") (define re:encoded #rx#"^(.*?)=[?]([^?]+)[?]([qQbB])[?](.*?)[?]=(.*)$")
(define (latin-1->windows-1252 encoding) (define (generalize-encoding encoding)
(if (regexp-match re:iso encoding) ;; Treat Latin-1 as Windows-1252 and also threat GB and GB2312
;; Treat Latin-1 as Windows-1252, because ;; as GBK, because some mailers are broken.
;; some mailers are broken. The only difference (cond
;; is control chaarcters (which are technically [(regexp-match re:iso encoding) (if (bytes? encoding)
;; not mapped in Latin-1, anyway). #"WINDOWS-1252"
(if (bytes? encoding) "WINDOWS-1252")]
#"WINDOWS-1252" [(regexp-match re:gb encoding) (if (bytes? encoding)
"WINDOWS-1252") #"GBK"
encoding)) "GBK")]
[else encoding]))
(define (decode-for-header s) (define (decode-for-header s)
(and s (and s
@ -81,7 +83,7 @@
[encoding (caddr m)]) [encoding (caddr m)])
(string-append (string-append
(decode-for-header (bytes->string/latin-1 (cadr m))) (decode-for-header (bytes->string/latin-1 (cadr m)))
(let ([encoding (latin-1->windows-1252 encoding)]) (let ([encoding (generalize-encoding encoding)])
(cond (cond
[(regexp-match re:utf-8 encoding) (bytes->string/utf-8 s #\?)] [(regexp-match re:utf-8 encoding) (bytes->string/utf-8 s #\?)]
[else (let ([c (bytes-open-converter (bytes->string/latin-1 encoding) "UTF-8")]) [else (let ([c (bytes-open-converter (bytes->string/latin-1 encoding) "UTF-8")])