diff --git a/net-doc/net/scribblings/net.scrbl b/net-doc/net/scribblings/net.scrbl index 7f31fb982f..a289e4aa66 100644 --- a/net-doc/net/scribblings/net.scrbl +++ b/net-doc/net/scribblings/net.scrbl @@ -13,6 +13,7 @@ @include-section["smtp.scrbl"] @include-section["sendmail.scrbl"] @include-section["head.scrbl"] +@include-section["unihead.scrbl"] @include-section["imap.scrbl"] @include-section["pop3.scrbl"] @include-section["mime.scrbl"] diff --git a/net-doc/net/scribblings/unihead.scrbl b/net-doc/net/scribblings/unihead.scrbl new file mode 100644 index 0000000000..b26a1a7c3d --- /dev/null +++ b/net-doc/net/scribblings/unihead.scrbl @@ -0,0 +1,63 @@ +#lang scribble/doc +@(require "common.rkt" + scribble/eval + scribble/bnf + (for-label net/unihead)) + +@(define unihead-eval (make-base-eval)) +@interaction-eval[#:eval unihead-eval (require net/unihead)] + +@(define l1-seq @elem{@litchar{=?ISO-8859-1?Q?}...@litchar{?=}}) +@(define uni-seq @elem{@litchar{=?UTF-8?B?}...@litchar{?=}}) + +@(define enc-format @elem{@litchar{=?}@nonterm{encoding}@litchar{?}@nonterm{transport}@litchar{?}...@litchar{?=}}) + +@title[#:tag "unihead"]{Header Field Encoding} + +@defmodule[net/unihead]{The @racketmodname[net/unihead] module +provides utilities for encoding and decoding header fields using the +@|enc-format| format.} + +@defproc[(encode-for-header [s string?]) string?]{ + +Encodes @racket[s] for use in a header. + +If @racket[s] contains only ASCII characters, then the result string +will have the same content as the given string. If @racket[s] contains +only Latin-1 characters, then on each CRLF-delimited line, the +space-delimited sequence containing all non-ASCII characters in +@racket[s] is encoded with a @|l1-seq| sequence. If @racket[s] +contains non-Latin-1 characters, then on each CRLF-delimited line, a +space-delimited sequence containing all non-ASCII characters in +@racket[s] is encoded with a @|uni-seq| sequence. + +@examples[ +#:eval unihead-eval +(encode-for-header "English") +(encode-for-header "français") +(encode-for-header "→" ) +(encode-for-header "→\r\nboth → and français here") +]} + +@defproc[(decode-for-header [s string?]) string?]{ + +Decodes header fields that use the @|enc-format| encoding format. The +specified @nonterm{encoding} is generalized via +@racket[generalize-encoding] before decoding content. + +@examples[ +#:eval unihead-eval +(decode-for-header "English") +(decode-for-header "=?UTF-8?B?4oaS?= =?ISO-8859-1?Q?fran=E7ais?=") +]} + + +@defproc[(generalize-encoding [s (or string? bytes?)]) (or string? bytes?)]{ + +Generalizes the encoding name @racket[s] to compensate for typical +mailer bugs: Latin-1 and ASCII encodings are geenralized to +WINDOWS-1252; GB and GB2312 are generalized to GBK; and KS_C_5601-1987 +is generalized to CP949.} + + +@close-eval[unihead-eval]