racket/collects/file/unzip.rkt
Matthew Flatt 8a77d87a30 add file/unzip
Based on Dave Herman's "zip.plt" Planet package.
2012-11-20 10:12:27 -07:00

337 lines
13 KiB
Racket

#lang racket/base
(require racket/contract/base
racket/port
racket/file
file/gunzip
"private/strip-prefix.rkt")
(provide
(struct-out exn:fail:unzip:no-such-entry)
(contract-out
[unzip (((or/c path-string? input-port?))
((bytes? boolean? input-port? . -> . any))
. ->* . any)]
[make-filesystem-entry-reader (() (#:dest
(or/c #f path-string?)
#:strip-count
exact-nonnegative-integer?
#:exists
(or/c 'skip
'error 'replace 'truncate 'truncate/replace 'append 'update
'can-update 'must-truncate))
. ->* .
(bytes? boolean? input-port? . -> . any))]
[read-zip-directory ((or/c path-string? input-port?) . -> . zip-directory?)]
[zip-directory? (any/c . -> . boolean?)]
[zip-directory-entries (zip-directory? . -> . (listof bytes?))]
[zip-directory-contains? (zip-directory? (or/c path-string? bytes?) . -> . boolean?)]
[zip-directory-includes-directory? (zip-directory? (or/c path-string? input-port?) . -> . boolean?)]
[unzip-entry (((or/c path-string? input-port?) zip-directory? bytes?)
((bytes? boolean? input-port? . -> . any))
. ->* .
any)]
[path->zip-path ((or/c string? path?) . -> . bytes?)]))
;; ===========================================================================
;; CONSTANTS
;; ===========================================================================
(define *local-file-header* #x04034b50)
(define *archive-extra-record* #x08064b50)
(define *central-file-header* #x02014b50)
(define *digital-signature* #x05054b50)
(define *zip64-end-of-central-directory-record* #x06064b50)
(define *zip64-end-of-central-directory-locator* #x07064b50)
(define *end-of-central-directory-record* #x06054b50)
;; ===========================================================================
;; DATATYPES AND UTILITIES
;; ===========================================================================
(define-struct (exn:fail:unzip:no-such-entry exn:fail) (entry)
#:guard (lambda (msg cm entry who)
(unless (bytes? entry)
(raise-argument-error who "bytes?" entry))
(values msg cm entry)))
;; (alistof bytes zip-entry)
(define-struct zip-directory (contents))
;; nat * boolean
(define-struct zip-entry (offset dir?))
(define (raise-unzip-error message)
(error 'unzip "~a" message))
(define (raise-entry-not-found entry)
(raise
(make-exn:fail:unzip:no-such-entry
(string->immutable-string
(format "unzip: entry not found: \"~a\"" (bytes->string/latin-1 entry)))
(current-continuation-marks)
entry)))
;; zip-directory-entries : zip-directory -> (listof bytes)
(define (zip-directory-entries zipdir)
(map car (zip-directory-contents zipdir)))
;; zip-directory-lookup : bytes zip-directory -> (option zip-entry)
(define (zip-directory-lookup entry zipdir)
(let loop ([contents (zip-directory-contents zipdir)])
(cond
[(null? contents) #f]
[(or (bytes=? entry (caar contents))
(bytes=? (bytes-append entry #"/") (caar contents)))
(cdar contents)]
[else (loop (cdr contents))])))
;; zip-directory-contains? : zip-directory (union string path bytes) -> boolean
(define (zip-directory-contains? zipdir entry)
(if (bytes? entry)
(and (zip-directory-lookup entry zipdir) #t)
(zip-directory-contains? zipdir (path->zip-path entry))))
;; matches-directory? : bytes bytes -> boolean
(define (bytes-prefix? dirname entry-name)
(let ([dirname-len (bytes-length dirname)]
[entry-name-len (bytes-length entry-name)])
(and (>= entry-name-len dirname-len)
(bytes=? (subbytes entry-name 0 dirname-len) dirname))))
;; zip-directory-includes-directory? : zip-directory (union string path bytes) -> boolean
(define (zip-directory-includes-directory? zipdir dirname)
(if (bytes? dirname)
(ormap (lambda (pair)
(bytes-prefix? dirname (car pair)))
(zip-directory-contents zipdir))
(zip-directory-includes-directory? zipdir (path->zip-path dirname))))
;; path->zip-path : (union path string) -> bytes
(define (path->zip-path p)
(let ([p (simplify-path p #f)])
(if (path? p)
(bytes->zip-bytes (path->bytes p))
(bytes->zip-bytes (string->bytes/latin-1 p)))))
(define (bytes->zip-bytes b)
(case (system-path-convention-type)
[(windows) (regexp-replace* #rx#"\\\\" b #"/")]
[else b]))
;; ===========================================================================
;; UNZIPPING ENGINE
;; ===========================================================================
(define *slash-byte* (char->integer #\/))
(define (directory-entry? name)
(= (bytes-ref name (sub1 (bytes-length name))) *slash-byte*))
(define (read-integer count signed? in big-endian?)
(define bstr (read-bytes count in))
(unless (and (bytes? bstr) (= count (bytes-length bstr)))
(error 'unzip "unexpected EOF"))
(integer-bytes->integer bstr signed? big-endian?))
(define (peek-integer count signed? in big-endian?)
(define bstr (peek-bytes count 0 in))
(unless (and (bytes? bstr) (= count (bytes-length bstr)))
(error 'unzip "unexpected EOF"))
(integer-bytes->integer bstr signed? big-endian?))
(define (make-filter-input-port inflate orig-in)
(define-values (in out) (make-pipe 4096))
(values
in
(thread (lambda ()
(inflate orig-in out)
(close-output-port out)))))
(define (skip-bytes amt in)
(read-bytes amt in)
(void))
;; unzip-one-entry : input-port (bytes boolean input-port -> a) -> a
(define (unzip-one-entry in read-entry)
(let ([read-int (lambda (count) (read-integer count #f in #f))])
(let* ([signature (read-int 4)]
[version (read-bytes 2 in)]
[bits (read-int 2)]
[compression (read-int 2)]
[time (read-int 2)]
[date (read-int 2)]
[crc-32 (read-int 4)]
[compressed (read-int 4)]
[uncompressed (read-int 4)]
[filename-length (read-int 2)]
[extra-length (read-int 2)]
[filename (read-bytes filename-length in)]
[extra (read-bytes extra-length in)])
(let* ([mark (file-position in)]
[dir? (directory-entry? filename)]
;; appnote VI-J : if bit 3 is set, the fields crc-32,
;; compressed size, and uncompressed size are set to
;; zero in the local header
[in0 (if (bitwise-bit-set? bits 3)
in
(make-limited-input-port in compressed #f))])
(dynamic-wind
void
(lambda ()
(define-values (in t)
(if (zero? compression)
(values in0 #f)
(make-filter-input-port inflate in0)))
(read-entry filename dir? in)
(when t (kill-thread t)))
(lambda ()
;; appnote VI-C : if bit 3 is set, then the file data
;; is immediately followed by a data descriptor
(if (bitwise-bit-set? bits 3)
(skip-bytes 12 in)
(file-position in (+ mark compressed))))))
(void))))
;; find-central-directory : input-port nat -> nat nat nat
(define (find-central-directory in size)
(let loop ([pos (- size 18)])
(unless (positive? pos)
(raise-unzip-error "no central directory"))
(file-position in pos)
(let* ([read-int (lambda (count) (read-integer count #f in #f))]
[signature (read-int 4)])
(if (= signature *end-of-central-directory-record*)
(let ([disk-number (read-int 2)]
[directory-disk (read-int 2)]
[disk-entries (read-int 2)]
[entry-count (read-int 2)]
[directory-length (read-int 4)]
[directory-offset (read-int 4)]
[comment-length (read-int 2)])
(if (= (- size (file-position in)) comment-length)
(values directory-offset directory-length entry-count)
(loop (sub1 pos))))
(loop (sub1 pos))))))
;; read-central-directory : input-port nat -> (alistof bytes zip-entry)
(define (read-central-directory in size)
(let-values ([(offset length count) (find-central-directory in size)])
(file-position in offset)
(build-list count
(lambda (i)
(let* ([read-int (lambda (count)
(read-integer count #f in #f))]
[signature (read-int 4)])
(unless (= signature *central-file-header*)
(raise-unzip-error
(format "bad central file header signature: ~a"
signature)))
(let ([version (read-int 2)]
[required (read-int 2)]
[bits (read-int 2)]
[compression (read-int 2)]
[time (read-int 2)]
[date (read-int 2)]
[crc-32 (read-int 4)]
[compressed (read-int 4)]
[uncompressed (read-int 4)]
[filename-length (read-int 2)]
[extra-length (read-int 2)]
[comment-length (read-int 2)]
[disk-number (read-int 2)]
[internal-attributes (read-int 2)]
[external-attributes (read-int 4)]
[relative-offset (read-int 4)])
(let* ([filename (read-bytes filename-length in)]
[dir? (directory-entry? filename)])
(skip-bytes (+ extra-length comment-length) in)
(cons filename (make-zip-entry relative-offset dir?)))))))))
;; ===========================================================================
;; FRONT END
;; ===========================================================================
(define (call-with-input in proc)
((if (input-port? in)
(lambda (in f) (f in))
call-with-input-file*)
in
proc))
;; unzip : [(or/c path-string? input-port) (bytes boolean input-port -> any)] -> any
(define unzip
(lambda (in [read-entry (make-filesystem-entry-reader)])
(call-with-input
in
(lambda (in)
(when (= (peek-integer 4 #f in #f) *local-file-header*)
(unzip-one-entry in read-entry)
(unzip in read-entry))))))
(define (input-size in)
(file-position in eof)
(begin0
(file-position in)
(file-position in 0)))
;; read-zip-directory : (union string path) -> zip-directory
(define (read-zip-directory in)
(make-zip-directory
(call-with-input
in
(lambda (in)
(read-central-directory in
(input-size in))))))
;; unzip-entry : (union string path) zip-directory bytes [(bytes boolean input-port -> a)] -> a
(define unzip-entry
(lambda (in dir entry-name [read-entry (make-filesystem-entry-reader)])
(cond
[(zip-directory-lookup entry-name dir)
=> (lambda (entry)
(call-with-input
in
(lambda (in)
(file-position in (zip-entry-offset entry))
(unzip-one-entry in read-entry))))]
[else (raise-entry-not-found entry-name)])))
;; ===========================================================================
;; ENTRY PARSERS
;; ===========================================================================
;; make-filesystem-entry-reader : [output-flag] -> (bytes boolean input-port -> any)
(define make-filesystem-entry-reader
(lambda (#:dest [dest-dir #f] #:strip-count [strip-count 0] #:exists [flag 'error])
(lambda (name dir? in)
(let* ([base-path (strip-prefix (bytes->path name) strip-count)]
[path (and base-path
(if dest-dir
(build-path dest-dir base-path)
base-path))])
(when path
(if dir?
(unless (directory-exists? path)
(make-directory* path))
(let ([parent (dirname path)])
(unless (directory-exists? parent)
(make-directory* parent))
(unless (and (eq? flag 'skip)
(file-exists? path))
(with-output-to-file path
#:exists flag
(lambda ()
(copy-port in (current-output-port))))))))))))
(define (dirname p)
(define-values (base name dir?) (split-path p))
(if (path? base)
base
(current-directory)))