From 78bce17be36d756259e40b9f0604041fde9a33dd Mon Sep 17 00:00:00 2001 From: Jon Zeppieri Date: Mon, 29 Aug 2011 21:33:56 -0400 Subject: [PATCH] Moved `net/mime' code from unit to module. original commit: 67ca846fd9440eec9c154ca5cde7d3019f12f821 --- collects/net/mime-unit.rkt | 736 +------------------------------------ 1 file changed, 5 insertions(+), 731 deletions(-) diff --git a/collects/net/mime-unit.rkt b/collects/net/mime-unit.rkt index fee2e9f..538eab9 100644 --- a/collects/net/mime-unit.rkt +++ b/collects/net/mime-unit.rkt @@ -1,734 +1,8 @@ -;;; -;;; ---- MIME support -;;; -;;; Copyright (C) 2002 by PLT. -;;; Copyright (C) 2001 by Wish Computing. -;;; -;;; This file is part of mime +#lang racket/base -;;; mime-plt is free software; you can redistribute it and/or modify -;;; it under the terms of the GNU General Public License as published by -;;; the Free Software Foundation; either version 2, or (at your option) -;;; any later version. +(require racket/unit + "mime-sig.rkt" "mime.rkt") -;;; mime-plt is distributed in the hope that it will be useful, -;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;;; GNU General Public License for more details. +(define-unit-from-context mime@ mime^) -;;; You should have received a copy of the GNU General Public License -;;; along with mime-plt; see the file COPYING. If not, write to the -;;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -;;; Boston, MA 02110-1301 USA. - -;;; Author: Francisco Solsona -;; -;; -;; Commentary: MIME support for PLT Scheme: an implementation of -;; rfc2045, rfc2046, rfc2047, rfc2048, and rfc2049. - -#lang racket/unit - -(require "mime-sig.rkt" "qp-sig.rkt" "base64-sig.rkt" "head-sig.rkt" - "mime-util.rkt" - racket/port) - -(import base64^ qp^ head^) -(export mime^) - -;; Constants: -(define discrete-alist - '(("text" . text) - ("image" . image) - ("audio" . audio) - ("video" . video) - ("application" . application))) - -(define disposition-alist - '(("inline" . inline) - ("attachment" . attachment) - ("file" . attachment) ;; This is used (don't know why) by - ;; multipart/form-data - ("messagetext" . inline) - ("form-data" . form-data))) - -(define composite-alist - '(("message" . message) - ("multipart" . multipart))) - -(define mechanism-alist - '(("7bit" . 7bit) - ("8bit" . 8bit) - ("binary" . binary) - ("quoted-printable" . quoted-printable) - ("base64" . base64))) - -(define ietf-extensions '()) -(define iana-extensions - '(;; text - ("plain" . plain) - ("html" . html) - ("enriched" . enriched) ; added 5/2005 - probably not iana - ("richtext" . richtext) - ("tab-separated-values" . tab-separated-values) - ;; Multipart - ("mixed" . mixed) - ("alternative" . alternative) - ("digest" . digest) - ("parallel" . parallel) - ("appledouble" . appledouble) - ("header-set" . header-set) - ("form-data" . form-data) - ;; Message - ("rfc822" . rfc822) - ("partial" . partial) - ("external-body" . external-body) - ("news" . news) - ;; Application - ("octet-stream" . octet-stream) - ("postscript" . postscript) - ("oda" . oda) - ("atomicmail" . atomicmail) - ("andrew-inset" . andrew-inset) - ("slate" . slate) - ("wita" . wita) - ("dec-dx" . dec-dx) - ("dca-rf" . dca-rf) - ("activemessage" . activemessage) - ("rtf" . rtf) - ("applefile" . applefile) - ("mac-binhex40" . mac-binhex40) - ("news-message-id" . news-message-id) - ("news-transmissio" . news-transmissio) - ("wordperfect5.1" . wordperfect5.1) - ("pdf" . pdf) - ("zip" . zip) - ("macwritei" . macwritei) - ;; "image" - ("jpeg" . jpeg) - ("gif" . gif) - ("ief" . ief) - ("tiff" . tiff) - ;; "audio" - ("basic" . basic) - ;; "video" . - ("mpeg" . mpeg) - ("quicktime" . quicktime))) - -;; Basic structures -(define-struct message (version entity fields) - #:mutable) -(define-struct entity - (type subtype charset encoding disposition params id description other - fields parts body) - #:mutable) -(define-struct disposition - (type filename creation modification read size params) - #:mutable) - -;; Exceptions -(define-struct mime-error ()) -(define-struct (unexpected-termination mime-error) (msg)) -(define-struct (missing-multipart-boundary-parameter mime-error) ()) -(define-struct (malformed-multipart-entity mime-error) (msg)) -(define-struct (empty-mechanism mime-error) ()) -(define-struct (empty-type mime-error) ()) -(define-struct (empty-subtype mime-error) ()) -(define-struct (empty-disposition-type mime-error) ()) - -;; ************************************* -;; Practical stuff, aka MIME in action: -;; ************************************* -(define CRLF (format "~a~a" #\return #\newline)) -(define CRLF-binary "=0D=0A") ;; quoted printable representation - -;; get-headers : input-port -> string -;; returns the header part of a message/part conforming to rfc822, and -;; rfc2045. -(define (get-headers in) - (let loop ([headers ""] [ln (read-line in 'any)]) - (cond [(eof-object? ln) - ;; (raise (make-unexpected-termination "eof reached! while parsing headers")) - (warning "premature eof while parsing headers") - headers] - [(string=? ln "") headers] - [else - ;; Quoting rfc822: - ;; " Headers occur before the message body and are - ;; terminated by a null line (i.e., two contiguous - ;; CRLFs)." - ;; That is: Two empty lines. But most MUAs seem to count - ;; the CRLF ending the last field (header) as the first - ;; CRLF of the null line. - (loop (string-append headers ln CRLF) - (read-line in 'any))]))) - -(define (make-default-disposition) - (make-disposition - 'inline ;; type - "" ;; filename - #f ;; creation - #f ;; modification - #f ;; read - #f ;; size - null ;; params - )) - -(define (make-default-entity) - (make-entity - 'text ;; type - 'plain ;; subtype - 'us-ascii ;; charset - '7bit ;; encoding - (make-default-disposition) ;; disposition - null ;; params - "" ;; id - "" ;; description - null ;; other MIME fields (MIME-extension-fields) - null ;; fields - null ;; parts - null ;; body - )) - -(define (make-default-message) - (make-message 1.0 (make-default-entity) null)) - -(define (mime-decode entity input) - (set-entity-body! - entity - (case (entity-encoding entity) - [(quoted-printable) - (lambda (output) - (qp-decode-stream input output))] - [(base64) - (lambda (output) - (base64-decode-stream input output))] - [else ;; 7bit, 8bit, binary - (lambda (output) - (copy-port input output))]))) - -(define (mime-analyze input [part #f]) - (let* ([iport (if (bytes? input) - (open-input-bytes input) - input)] - [headers (get-headers iport)] - [msg (if part - (MIME-part-headers headers) - (MIME-message-headers headers))] - [entity (message-entity msg)]) - ;; OK we have in msg a MIME-message structure, lets see what we have: - (case (entity-type entity) - [(text image audio video application) - ;; decode part, and save port and thunk - (mime-decode entity iport)] - [(message multipart) - (let ([boundary (entity-boundary entity)]) - (when (not boundary) - (when (eq? 'multipart (entity-type entity)) - (raise (make-missing-multipart-boundary-parameter)))) - (set-entity-parts! entity - (map (lambda (part) - (mime-analyze part #t)) - (if boundary - (multipart-body iport boundary) - (list iport)))))] - [else - ;; Unrecognized type, you're on your own! (sorry) - (mime-decode entity iport)]) - ;; return mime structure - msg)) - -(define (entity-boundary entity) - (let* ([params (entity-params entity)] - [ans (assoc "boundary" params)]) - (and ans (cdr ans)))) - -;; ************************************************* -;; MIME Specific: rfc2045-2049, and rfc0822, rfc2183 -;; ************************************************* - -;;multipart-body := [preamble CRLF] -;; dash-boundary transport-padding CRLF -;; body-part *encapsulation -;; close-delimiter transport-padding -;; [CRLF epilogue] -;; Returns a list of input ports, each one containing the correspongind part. -(define (multipart-body input boundary) - (let* ([make-re (lambda (prefix) - (regexp (string-append prefix "--" (regexp-quote boundary) "(--)?\r\n")))] - [re (make-re "\r\n")]) - (letrec ([eat-part (lambda () - (let-values ([(pin pout) (make-pipe)]) - (let ([m (regexp-match re input 0 #f pout)]) - (cond - [(not m) - (close-output-port pout) - (values pin ;; part - #f ;; close-delimiter? - #t ;; eof reached? - )] - [(cadr m) - (close-output-port pout) - (values pin #t #f)] - [else - (close-output-port pout) - (values pin #f #f)]))))]) - ;; pre-amble is allowed to be completely empty: - (if (regexp-match-peek (make-re "^") input) - ;; No \r\f before first separator: - (read-line input) - ;; non-empty preamble: - (eat-part)) - (let loop () - (let-values ([(part close? eof?) (eat-part)]) - (cond [close? (list part)] - [eof? (list part)] - [else (cons part (loop))])))))) - -;; MIME-message-headers := entity-headers -;; fields -;; version CRLF -;; ; The ordering of the header -;; ; fields implied by this BNF -;; ; definition should be ignored. -(define (MIME-message-headers headers) - (let ([message (make-default-message)]) - (entity-headers headers message #t) - message)) - -;; MIME-part-headers := entity-headers -;; [ fields ] -;; ; Any field not beginning with -;; ; "content-" can have no defined -;; ; meaning and may be ignored. -;; ; The ordering of the header -;; ; fields implied by this BNF -;; ; definition should be ignored. -(define (MIME-part-headers headers) - (let ([message (make-default-message)]) - (entity-headers headers message #f) - message)) - -;; entity-headers := [ content CRLF ] -;; [ encoding CRLF ] -;; [ id CRLF ] -;; [ description CRLF ] -;; *( MIME-extension-field CRLF ) -(define (entity-headers headers message version?) - (let ([entity (message-entity message)]) - (let-values ([(mime non-mime) (get-fields headers)]) - (let loop ([fields mime]) - (unless (null? fields) - ;; Process MIME field - (let ([trimmed-h (trim-comments (car fields))]) - (or (and version? (version trimmed-h message)) - (content trimmed-h entity) - (encoding trimmed-h entity) - (dispositione trimmed-h entity) - (id trimmed-h entity) - (description trimmed-h entity) - (MIME-extension-field trimmed-h entity)) - ;; keep going - (loop (cdr fields))))) - ;; NON-mime headers (or semantically incorrect). In order to make - ;; this implementation of rfc2045 robuts, we will save the header in - ;; the fields field of the message struct: - (set-message-fields! message non-mime) - ;; Return message - message))) - -(define (get-fields headers) - (let ([mime null] [non-mime null]) - (letrec ([store-field - (lambda (f) - (unless (string=? f "") - (if (mime-header? f) - (set! mime (append mime (list (trim-spaces f)))) - (set! non-mime (append non-mime (list (trim-spaces f)))))))]) - (let ([fields (extract-all-fields headers)]) - (for-each (lambda (p) - (store-field (format "~a: ~a" (car p) (cdr p)))) - fields)) - (values mime non-mime)))) - -(define re:content #rx"^(?i:content-)") -(define re:mime #rx"^(?i:mime-version):") - -(define (mime-header? h) - (or (regexp-match? re:content h) - (regexp-match? re:mime h))) - -;;; Headers -;;; Content-type follows this BNF syntax: -;; content := "Content-Type" ":" type "/" subtype -;; *(";" parameter) -;; ; Matching of media type and subtype -;; ; is ALWAYS case-insensitive. -(define re:content-type #rx"^(?i:content-type):([^/]+)/([^/]+)$") -(define (content header entity) - (let* ([params (string-tokenizer #\; header)] - [one re:content-type] - [h (trim-all-spaces (car params))] - [target (regexp-match one h)] - [old-param (entity-params entity)]) - (and target - (set-entity-type! entity - (type (regexp-replace one h "\\1"))) ;; type - (set-entity-subtype! entity - (subtype (regexp-replace one h "\\2"))) ;; subtype - (set-entity-params! - entity - (append old-param - (let loop ([p (cdr params)] ;; parameters - [ans null]) - (cond [(null? p) ans] - [else - (let ([par-pair (parameter (trim-all-spaces (car p)))]) - (cond [par-pair - (when (string=? (car par-pair) "charset") - (set-entity-charset! entity (cdr par-pair))) - (loop (cdr p) (append ans (list par-pair)))] - [else - (warning "Invalid parameter for Content-Type: `~a'" (car p)) - ;; go on... - (loop (cdr p) ans)]))]))))))) - -;; From rfc2183 Content-Disposition -;; disposition := "Content-Disposition" ":" -;; disposition-type -;; *(";" disposition-parm) -(define re:content-disposition #rx"^(?i:content-disposition):(.+)$") -(define (dispositione header entity) - (let* ([params (string-tokenizer #\; header)] - [reg re:content-disposition] - [h (trim-all-spaces (car params))] - [target (regexp-match reg h)] - [disp-struct (entity-disposition entity)]) - (and target - (set-disposition-type! - disp-struct - (disp-type (regexp-replace reg h "\\1"))) - (disp-params (cdr params) disp-struct)))) - -;; version := "MIME-Version" ":" 1*DIGIT "." 1*DIGIT -(define re:mime-version #rx"^(?i:MIME-Version):([0-9]+)\\.([0-9]+)$") -(define (version header message) - (let* ([reg re:mime-version] - [h (trim-all-spaces header)] - [target (regexp-match reg h)]) - (and target - (set-message-version! - message - (string->number (regexp-replace reg h "\\1.\\2")))))) - -;; description := "Content-Description" ":" *text -(define re:content-description #rx"^(?i:content-description):[ \t\r\n]*(.*)$") -(define (description header entity) - (let* ([reg re:content-description] - [target (regexp-match reg header)]) - (and target - (set-entity-description! - entity - (trim-spaces (regexp-replace reg header "\\1")))))) - -;; encoding := "Content-Transfer-Encoding" ":" mechanism -(define re:content-transfer-encoding #rx"^(?i:content-transfer-encoding):(.+)$") -(define (encoding header entity) - (let* ([reg re:content-transfer-encoding] - [h (trim-all-spaces header)] - [target (regexp-match reg h)]) - (and target - (set-entity-encoding! - entity - (mechanism (regexp-replace reg h "\\1")))))) - -;; id := "Content-ID" ":" msg-id -(define re:content-id #rx"^(?i:content-id):(.+)$") -(define (id header entity) - (let* ([reg re:content-id] - [h (trim-all-spaces header)] - [target (regexp-match reg h)]) - (and target - (set-entity-id! - entity - (msg-id (regexp-replace reg h "\\1")))))) - -;; From rfc822: -;; msg-id = "<" addr-spec ">" ; Unique message id -;; addr-spec = local-part "@" domain ; global address -;; local-part = word *("." word) ; uninterpreted -;; ; case-preserved -;; domain = sub-domain *("." sub-domain) -;; sub-domain = domain-ref / domain-literal -;; domain-literal = "[" *(dtext / quoted-pair) "]" -;; domain-ref = atom ; symbolic reference -(define (msg-id str) - (let* ([r (regexp "^<[^@>]+@[^.]+(\\.[^.]+)*>$")] - [ans (regexp-match r str)]) - (if ans - str - (begin (warning "Invalid msg-id: ~a" str) str)))) - -;; mechanism := "7bit" / "8bit" / "binary" / -;; "quoted-printable" / "base64" / -;; ietf-token / x-token -(define (mechanism mech) - (if (not mech) - (raise (make-empty-mechanism)) - (let ([val (assoc (lowercase mech) mechanism-alist)]) - (or (and val (cdr val)) - (ietf-token mech) - (x-token mech))))) - -;; MIME-extension-field := -;; -(define (MIME-extension-field header entity) - (let* ([reg (regexp "^[Cc]ontent-(.+):[ \t]*(.+)$")] - [target (regexp-match reg header)]) - (and target - (set-entity-other! - entity - (append (entity-other entity) - (list (cons (regexp-replace reg header "\\1") - (trim-spaces (regexp-replace reg header "\\2"))))))))) - -;; type := discrete-type / composite-type -(define (type value) - (if (not value) - (raise (make-empty-type)) - (or (discrete-type value) - (composite-type value)))) - -;; disposition-type := "inline" / "attachment" / extension-token -(define (disp-type value) - (if (not value) - (raise (make-empty-disposition-type)) - (let ([val (assoc (lowercase (trim-spaces value)) disposition-alist)]) - (if val (cdr val) (extension-token value))))) - -;; discrete-type := "text" / "image" / "audio" / "video" / -;; "application" / extension-token -(define (discrete-type value) - (let ([val (assoc (lowercase (trim-spaces value)) discrete-alist)]) - (if val (cdr val) (extension-token value)))) - -;; composite-type := "message" / "multipart" / extension-token -(define (composite-type value) - (let ([val (assoc (lowercase (trim-spaces value)) composite-alist)]) - (if val (cdr val) (extension-token value)))) - -;; extension-token := ietf-token / x-token -(define (extension-token value) - (or (ietf-token value) - (x-token value))) - -;; ietf-token := -(define (ietf-token value) - (let ([ans (assoc (lowercase (trim-spaces value)) ietf-extensions)]) - (and ans (cdr ans)))) - -;; Directly from RFC 1700: -;; Type Subtype Description Reference -;; ---- ------- ----------- --------- -;; text plain [RFC1521,NSB] -;; richtext [RFC1521,NSB] -;; tab-separated-values [Paul Lindner] -;; -;; multipart mixed [RFC1521,NSB] -;; alternative [RFC1521,NSB] -;; digest [RFC1521,NSB] -;; parallel [RFC1521,NSB] -;; appledouble [MacMime,Patrik Faltstrom] -;; header-set [Dave Crocker] -;; -;; message rfc822 [RFC1521,NSB] -;; partial [RFC1521,NSB] -;; external-body [RFC1521,NSB] -;; news [RFC 1036, Henry Spencer] -;; -;; application octet-stream [RFC1521,NSB] -;; postscript [RFC1521,NSB] -;; oda [RFC1521,NSB] -;; atomicmail [atomicmail,NSB] -;; andrew-inset [andrew-inset,NSB] -;; slate [slate,terry crowley] -;; wita [Wang Info Transfer,Larry Campbell] -;; dec-dx [Digital Doc Trans, Larry Campbell] -;; dca-rft [IBM Doc Content Arch, Larry Campbell] -;; activemessage [Ehud Shapiro] -;; rtf [Paul Lindner] -;; applefile [MacMime,Patrik Faltstrom] -;; mac-binhex40 [MacMime,Patrik Faltstrom] -;; news-message-id [RFC1036, Henry Spencer] -;; news-transmission [RFC1036, Henry Spencer] -;; wordperfect5.1 [Paul Lindner] -;; pdf [Paul Lindner] -;; zip [Paul Lindner] -;; macwriteii [Paul Lindner] -;; msword [Paul Lindner] -;; remote-printing [RFC1486,MTR] -;; -;; image jpeg [RFC1521,NSB] -;; gif [RFC1521,NSB] -;; ief Image Exchange Format [RFC1314] -;; tiff Tag Image File Format [MTR] -;; -;; audio basic [RFC1521,NSB] -;; -;; video mpeg [RFC1521,NSB] -;; quicktime [Paul Lindner] - -;; x-token := -(define (x-token value) - (let* ([r #rx"^[xX]-(.*)"] - [h (trim-spaces value)] - [ans (regexp-match r h)]) - (and ans - (token (regexp-replace r h "\\1")) - h))) - -;; subtype := extension-token / iana-token -(define (subtype value) - (if (not value) - (raise (make-empty-subtype)) - (or (extension-token value) - (iana-token value)))) - -;; iana-token := -(define (iana-token value) - (let ([ans (assoc (lowercase (trim-spaces value)) iana-extensions)]) - (and ans (cdr ans)))) - -;; parameter := attribute "=" value -(define re:parameter (regexp "([^=]+)=(.+)")) -(define (parameter par) - (let* ([r re:parameter] - [att (attribute (regexp-replace r par "\\1"))] - [val (value (regexp-replace r par "\\2"))]) - (if (regexp-match r par) - (cons (if att (lowercase att) "???") val) - (cons "???" par)))) - -;; value := token / quoted-string -(define (value val) - (or (token val) - (quoted-string val) - val)) - -;; token := 1* -;; tspecials := "(" / ")" / "<" / ">" / "@" / -;; "," / ";" / ":" / "\" / <"> -;; "/" / "[" / "]" / "?" / "=" -;; ; Must be in quoted-string, -;; ; to use within parameter values -(define (token value) - (let* ([tspecials (regexp "[^][()<>@,;:\\\"/?= ]+")] - [ans (regexp-match tspecials value)]) - (and ans - (string=? value (car ans)) - (car ans)))) - -;; attribute := token -;; ; Matching of attributes -;; ; is ALWAYS case-insensitive. -(define attribute token) - -(define re:quotes (regexp "\"(.+)\"")) -(define (quoted-string str) - (let* ([quotes re:quotes] - [ans (regexp-match quotes str)]) - (and ans (regexp-replace quotes str "\\1")))) - -;; disposition-parm := filename-parm -;; / creation-date-parm -;; / modification-date-parm -;; / read-date-parm -;; / size-parm -;; / parameter -;; -;; filename-parm := "filename" "=" value -;; -;; creation-date-parm := "creation-date" "=" quoted-date-time -;; -;; modification-date-parm := "modification-date" "=" quoted-date-time -;; -;; read-date-parm := "read-date" "=" quoted-date-time -;; -;; size-parm := "size" "=" 1*DIGIT -(define (disp-params lst disp) - (let loop ([lst lst]) - (unless (null? lst) - (let* ([p (parameter (trim-all-spaces (car lst)))] - [parm (car p)] - [value (cdr p)]) - (cond [(string=? parm "filename") - (set-disposition-filename! disp value)] - [(string=? parm "creation-date") - (set-disposition-creation! - disp - (disp-quoted-data-time value))] - [(string=? parm "modification-date") - (set-disposition-modification! - disp - (disp-quoted-data-time value))] - [(string=? parm "read-date") - (set-disposition-read! - disp - (disp-quoted-data-time value))] - [(string=? parm "size") - (set-disposition-size! - disp - (string->number value))] - [else - (set-disposition-params! - disp - (append (disposition-params disp) (list p)))]) - (loop (cdr lst)))))) - -;; date-time = [ day "," ] date time ; dd mm yy -;; ; hh:mm:ss zzz -;; -;; day = "Mon" / "Tue" / "Wed" / "Thu" -;; / "Fri" / "Sat" / "Sun" -;; -;; date = 1*2DIGIT month 2DIGIT ; day month year -;; ; e.g. 20 Jun 82 -;; -;; month = "Jan" / "Feb" / "Mar" / "Apr" -;; / "May" / "Jun" / "Jul" / "Aug" -;; / "Sep" / "Oct" / "Nov" / "Dec" -;; -;; time = hour zone ; ANSI and Military -;; -;; hour = 2DIGIT ":" 2DIGIT [":" 2DIGIT] -;; ; 00:00:00 - 23:59:59 -;; -;; zone = "UT" / "GMT" ; Universal Time -;; ; North American : UT -;; / "EST" / "EDT" ; Eastern: - 5/ - 4 -;; / "CST" / "CDT" ; Central: - 6/ - 5 -;; / "MST" / "MDT" ; Mountain: - 7/ - 6 -;; / "PST" / "PDT" ; Pacific: - 8/ - 7 -;; / 1ALPHA ; Military: Z = UT; -;; ; A:-1; (J not used) -;; ; M:-12; N:+1; Y:+12 -;; / ( ("+" / "-") 4DIGIT ) ; Local differential -;; ; hours+min. (HHMM) -(define date-time - (lambda (str) - ;; Fix Me: I have to return a date structure, or time in seconds. - str)) - -;; quoted-date-time := quoted-string -;; ; contents MUST be an RFC 822 `date-time' -;; ; numeric timezones (+HHMM or -HHMM) MUST be used - -(define disp-quoted-data-time date-time) +(provide mime@)