add file/unzip

Based on Dave Herman's "zip.plt" Planet package.
This commit is contained in:
Matthew Flatt 2012-11-20 08:28:39 -07:00
parent dcf2b0f4dc
commit 8a77d87a30
7 changed files with 616 additions and 20 deletions

View File

@ -0,0 +1,24 @@
#lang racket/base
(provide strip-prefix)
(define (strip-prefix filename strip-count)
(if (zero? strip-count)
filename
(let-values ([(name count)
(let loop ([fn filename])
(define-values (base name dir?) (split-path fn))
(cond
[(eq? 'relative base)
(values 'same strip-count)]
[else
(define-values (res count) (loop base))
(if (count . <= . 1)
(if (eq? res 'same)
(values name 0)
(values (build-path res name) 0))
(values res (sub1 count)))]))])
(if (and (zero? count)
(not (eq? name 'same)))
name
#f))))

View File

@ -9,6 +9,7 @@
@include-section["gzip.scrbl"]
@include-section["gunzip.scrbl"]
@include-section["zip.scrbl"]
@include-section["unzip.scrbl"]
@include-section["tar.scrbl"]
@include-section["untar.scrbl"]
@include-section["untgz.scrbl"]

View File

@ -0,0 +1,133 @@
#lang scribble/doc
@(require "common.rkt" (for-label file/unzip))
@title[#:tag "unzip"]{@exec{zip} File Extraction}
@author{David Herman}
@defmodule[file/unzip]{The @racketmodname[file/unzip] library provides
a function to extract items from a @exec{zip} archive.}
@defproc[(unzip [in (or/c path-string? input-port)]
[entry-reader (bytes? boolean? input-port? . -> . any)
(make-filesystem-entry-reader)])
void?]{
Unzips an entire @exec{zip} archive from @racket[in].
For each entry in the archive, the @racket[entry-reader] procedure is
called with three arguments: the byte string representing the entry
name, a boolean flag indicating whether the entry represents a
directory, and an input port containing the inflated contents of the
entry. The default @racket[entry-reader] unpacks entries to the
filesystem; call @racket[make-filesystem-entry-reader] to configure
aspects of the unpacking, such as the destination directory.}
@defproc[(make-filesystem-entry-reader
[#:dest dest-path (or/c path-string? #f) #f]
[#:strip-count strip-count exact-nonnegative-integer? 0]
[#:exists exists (or/c 'skip 'error 'replace 'truncate
'truncate/replace 'append 'update
'can-update 'must-truncate)
'error])
(bytes? boolean? input-port? . -> . any)]{
Creates a @exec{zip} entry reader that can be used with either
@racket[unzip] or @racket[unzip-entry] and whose behavior is to save
entries to the local filesystem. Intermediate directories are always
created if necessary before creating files. Directory entries are
created as directories in the filesystem, and their entry contents are
ignored.
If @racket[dest-path] is not @racket[#f], every path in the archive is
prefixed to determine the destination path of the extracted entry.
If @racket[strip-count] is positive, then @racket[strip-count] path
elements are removed from the entry path from the archive (before
prefixing the path with @racket[dest-path]); if the item's path
contains @racket[strip-count] elements, then it is not extracted.
If @racket[exists] is @racket['skip] and the file for an entry already
exists, then the entry is skipped. Otherwise, @racket[exists] is
passed on to @racket[open-output-file] for writing the entry's
inflated content.}
@defproc[(read-zip-directory [in (or/c path-string? input-port?)]) zip-directory?]{
Reads the central directory of a @exec{zip} file and generates a
@deftech{zip directory} representing the zip file's contents. If
@racket[in] is an input port, it must support position setting via
@racket[file-position].
This procedure performs limited I/O: it reads the list of entries from
the @exec{zip} file, but it does not inflate any of their
contents.}
@defproc[(zip-directory? [v any/c]) boolean?]{
Returns @racket[#t] if @racket[v] is a @tech{zip directory},
@racket[#f] otherwise.}
@defproc[(zip-directory-entries [zipdir zip-directory?]) (listof bytes?)]{
Extracts the list of entries for a @exec{zip} archive.}
@defproc[(zip-directory-contains? [zipdir zip-directory?]
[name (or/c bytes? path-string?)])
boolean?]{
Determines whether the given entry name occurs in the given @tech{zip
directory}. If @racket[name] is not a byte string, it is converted
using @racket[path->zip-path].
Directory entries match with or without trailing slashes.}
@defproc[(zip-directory-includes-directory? [zipdir zip-directory?]
[name (or/c bytes? path-string?)])
boolean?]{
Determines whether the given name is included anywhere in the given
@tech{zip directory} as a filesystem directory, either as an entry
itself or as the containing directory of other entries. If
@racket[name] is not a byte string, it is converted using
@racket[path->zip-path].}
@defproc[(unzip-entry [path (or/c path-string? input-port?)]
[zipdir zip-directory?]
[entry (or/c bytes? path-string?)]
[entry-reader (bytes? boolean? input-port? . -> . any)
(make-filesystem-entry-reader)])
void?]{
Unzips a single entry from a @exec{zip} archive based on a previously
read @tech{zip directory}, @racket[zipdir], from
@racket[read-zip-directory]. If @racket[in] is an input port, it must
support position setting via @racket[file-position].
The @racket[entry] parameter is a byte string whose name must be found
in the zip file's central directory. If @racket[entry] is not a byte
string, it is converted using @racket[path->zip-path].
The @racket[read-entry] argument is used to read the contents of the zip entry
in the same way as for @racket[unzip].
If @racket[entry] is not in @racket[zipdir], an
@racket[exn:fail:unzip:no-such-entry] exception is raised.}
@defproc[(path->zip-path [path path-string?]) bytes?]{
Converts a file name potentially containing path separators in the current
platform's format to use path separators recognized by the zip file
format: @litchar{/}.}
@defstruct[(exn:fail:unzip:no-such-entry exn:fail) ([entry bytes?])]{
Raised when a requested entry cannot be found in a @exec{zip}
archive. The @racket[entry] field is a byte string representing the
requested entry name.}

View File

@ -1,6 +1,7 @@
#lang racket/base
(require racket/file
racket/contract/base)
racket/contract/base
"private/strip-prefix.rkt")
(provide
(contract-out
@ -73,25 +74,7 @@
(nul-terminated name-bytes))))
(when (absolute-path? base-filename)
(error 'untar "won't extract a file with an absolute path: ~e" base-filename))
(define stripped-filename (if (zero? strip-count)
base-filename
(let-values ([(name count)
(let loop ([fn base-filename])
(define-values (base name dir?) (split-path fn))
(cond
[(eq? 'relative base)
(values 'same strip-count)]
[else
(define-values (res count) (loop base))
(if (count . <= . 1)
(if (eq? res 'same)
(values name 0)
(values (build-path res name) 0))
(values res (sub1 count)))]))])
(if (and (zero? count)
(not (eq? name 'same)))
name
#f))))
(define stripped-filename (strip-prefix base-filename strip-count))
(define filename (and stripped-filename
(if dest
(build-path dest stripped-filename)

336
collects/file/unzip.rkt Normal file
View File

@ -0,0 +1,336 @@
#lang racket/base
(require racket/contract/base
racket/port
racket/file
file/gunzip
"private/strip-prefix.rkt")
(provide
(struct-out exn:fail:unzip:no-such-entry)
(contract-out
[unzip (((or/c path-string? input-port?))
((bytes? boolean? input-port? . -> . any))
. ->* . any)]
[make-filesystem-entry-reader (() (#:dest
(or/c #f path-string?)
#:strip-count
exact-nonnegative-integer?
#:exists
(or/c 'skip
'error 'replace 'truncate 'truncate/replace 'append 'update
'can-update 'must-truncate))
. ->* .
(bytes? boolean? input-port? . -> . any))]
[read-zip-directory ((or/c path-string? input-port?) . -> . zip-directory?)]
[zip-directory? (any/c . -> . boolean?)]
[zip-directory-entries (zip-directory? . -> . (listof bytes?))]
[zip-directory-contains? (zip-directory? (or/c path-string? bytes?) . -> . boolean?)]
[zip-directory-includes-directory? (zip-directory? (or/c path-string? input-port?) . -> . boolean?)]
[unzip-entry (((or/c path-string? input-port?) zip-directory? bytes?)
((bytes? boolean? input-port? . -> . any))
. ->* .
any)]
[path->zip-path ((or/c string? path?) . -> . bytes?)]))
;; ===========================================================================
;; CONSTANTS
;; ===========================================================================
(define *local-file-header* #x04034b50)
(define *archive-extra-record* #x08064b50)
(define *central-file-header* #x02014b50)
(define *digital-signature* #x05054b50)
(define *zip64-end-of-central-directory-record* #x06064b50)
(define *zip64-end-of-central-directory-locator* #x07064b50)
(define *end-of-central-directory-record* #x06054b50)
;; ===========================================================================
;; DATATYPES AND UTILITIES
;; ===========================================================================
(define-struct (exn:fail:unzip:no-such-entry exn:fail) (entry)
#:guard (lambda (msg cm entry who)
(unless (bytes? entry)
(raise-argument-error who "bytes?" entry))
(values msg cm entry)))
;; (alistof bytes zip-entry)
(define-struct zip-directory (contents))
;; nat * boolean
(define-struct zip-entry (offset dir?))
(define (raise-unzip-error message)
(error 'unzip "~a" message))
(define (raise-entry-not-found entry)
(raise
(make-exn:fail:unzip:no-such-entry
(string->immutable-string
(format "unzip: entry not found: \"~a\"" (bytes->string/latin-1 entry)))
(current-continuation-marks)
entry)))
;; zip-directory-entries : zip-directory -> (listof bytes)
(define (zip-directory-entries zipdir)
(map car (zip-directory-contents zipdir)))
;; zip-directory-lookup : bytes zip-directory -> (option zip-entry)
(define (zip-directory-lookup entry zipdir)
(let loop ([contents (zip-directory-contents zipdir)])
(cond
[(null? contents) #f]
[(or (bytes=? entry (caar contents))
(bytes=? (bytes-append entry #"/") (caar contents)))
(cdar contents)]
[else (loop (cdr contents))])))
;; zip-directory-contains? : zip-directory (union string path bytes) -> boolean
(define (zip-directory-contains? zipdir entry)
(if (bytes? entry)
(and (zip-directory-lookup entry zipdir) #t)
(zip-directory-contains? zipdir (path->zip-path entry))))
;; matches-directory? : bytes bytes -> boolean
(define (bytes-prefix? dirname entry-name)
(let ([dirname-len (bytes-length dirname)]
[entry-name-len (bytes-length entry-name)])
(and (>= entry-name-len dirname-len)
(bytes=? (subbytes entry-name 0 dirname-len) dirname))))
;; zip-directory-includes-directory? : zip-directory (union string path bytes) -> boolean
(define (zip-directory-includes-directory? zipdir dirname)
(if (bytes? dirname)
(ormap (lambda (pair)
(bytes-prefix? dirname (car pair)))
(zip-directory-contents zipdir))
(zip-directory-includes-directory? zipdir (path->zip-path dirname))))
;; path->zip-path : (union path string) -> bytes
(define (path->zip-path p)
(let ([p (simplify-path p #f)])
(if (path? p)
(bytes->zip-bytes (path->bytes p))
(bytes->zip-bytes (string->bytes/latin-1 p)))))
(define (bytes->zip-bytes b)
(case (system-path-convention-type)
[(windows) (regexp-replace* #rx#"\\\\" b #"/")]
[else b]))
;; ===========================================================================
;; UNZIPPING ENGINE
;; ===========================================================================
(define *slash-byte* (char->integer #\/))
(define (directory-entry? name)
(= (bytes-ref name (sub1 (bytes-length name))) *slash-byte*))
(define (read-integer count signed? in big-endian?)
(define bstr (read-bytes count in))
(unless (and (bytes? bstr) (= count (bytes-length bstr)))
(error 'unzip "unexpected EOF"))
(integer-bytes->integer bstr signed? big-endian?))
(define (peek-integer count signed? in big-endian?)
(define bstr (peek-bytes count 0 in))
(unless (and (bytes? bstr) (= count (bytes-length bstr)))
(error 'unzip "unexpected EOF"))
(integer-bytes->integer bstr signed? big-endian?))
(define (make-filter-input-port inflate orig-in)
(define-values (in out) (make-pipe 4096))
(values
in
(thread (lambda ()
(inflate orig-in out)
(close-output-port out)))))
(define (skip-bytes amt in)
(read-bytes amt in)
(void))
;; unzip-one-entry : input-port (bytes boolean input-port -> a) -> a
(define (unzip-one-entry in read-entry)
(let ([read-int (lambda (count) (read-integer count #f in #f))])
(let* ([signature (read-int 4)]
[version (read-bytes 2 in)]
[bits (read-int 2)]
[compression (read-int 2)]
[time (read-int 2)]
[date (read-int 2)]
[crc-32 (read-int 4)]
[compressed (read-int 4)]
[uncompressed (read-int 4)]
[filename-length (read-int 2)]
[extra-length (read-int 2)]
[filename (read-bytes filename-length in)]
[extra (read-bytes extra-length in)])
(let* ([mark (file-position in)]
[dir? (directory-entry? filename)]
;; appnote VI-J : if bit 3 is set, the fields crc-32,
;; compressed size, and uncompressed size are set to
;; zero in the local header
[in0 (if (bitwise-bit-set? bits 3)
in
(make-limited-input-port in compressed #f))])
(dynamic-wind
void
(lambda ()
(define-values (in t)
(if (zero? compression)
(values in0 #f)
(make-filter-input-port inflate in0)))
(read-entry filename dir? in)
(when t (kill-thread t)))
(lambda ()
;; appnote VI-C : if bit 3 is set, then the file data
;; is immediately followed by a data descriptor
(if (bitwise-bit-set? bits 3)
(skip-bytes 12 in)
(file-position in (+ mark compressed))))))
(void))))
;; find-central-directory : input-port nat -> nat nat nat
(define (find-central-directory in size)
(let loop ([pos (- size 18)])
(unless (positive? pos)
(raise-unzip-error "no central directory"))
(file-position in pos)
(let* ([read-int (lambda (count) (read-integer count #f in #f))]
[signature (read-int 4)])
(if (= signature *end-of-central-directory-record*)
(let ([disk-number (read-int 2)]
[directory-disk (read-int 2)]
[disk-entries (read-int 2)]
[entry-count (read-int 2)]
[directory-length (read-int 4)]
[directory-offset (read-int 4)]
[comment-length (read-int 2)])
(if (= (- size (file-position in)) comment-length)
(values directory-offset directory-length entry-count)
(loop (sub1 pos))))
(loop (sub1 pos))))))
;; read-central-directory : input-port nat -> (alistof bytes zip-entry)
(define (read-central-directory in size)
(let-values ([(offset length count) (find-central-directory in size)])
(file-position in offset)
(build-list count
(lambda (i)
(let* ([read-int (lambda (count)
(read-integer count #f in #f))]
[signature (read-int 4)])
(unless (= signature *central-file-header*)
(raise-unzip-error
(format "bad central file header signature: ~a"
signature)))
(let ([version (read-int 2)]
[required (read-int 2)]
[bits (read-int 2)]
[compression (read-int 2)]
[time (read-int 2)]
[date (read-int 2)]
[crc-32 (read-int 4)]
[compressed (read-int 4)]
[uncompressed (read-int 4)]
[filename-length (read-int 2)]
[extra-length (read-int 2)]
[comment-length (read-int 2)]
[disk-number (read-int 2)]
[internal-attributes (read-int 2)]
[external-attributes (read-int 4)]
[relative-offset (read-int 4)])
(let* ([filename (read-bytes filename-length in)]
[dir? (directory-entry? filename)])
(skip-bytes (+ extra-length comment-length) in)
(cons filename (make-zip-entry relative-offset dir?)))))))))
;; ===========================================================================
;; FRONT END
;; ===========================================================================
(define (call-with-input in proc)
((if (input-port? in)
(lambda (in f) (f in))
call-with-input-file*)
in
proc))
;; unzip : [(or/c path-string? input-port) (bytes boolean input-port -> any)] -> any
(define unzip
(lambda (in [read-entry (make-filesystem-entry-reader)])
(call-with-input
in
(lambda (in)
(when (= (peek-integer 4 #f in #f) *local-file-header*)
(unzip-one-entry in read-entry)
(unzip in read-entry))))))
(define (input-size in)
(file-position in eof)
(begin0
(file-position in)
(file-position in 0)))
;; read-zip-directory : (union string path) -> zip-directory
(define (read-zip-directory in)
(make-zip-directory
(call-with-input
in
(lambda (in)
(read-central-directory in
(input-size in))))))
;; unzip-entry : (union string path) zip-directory bytes [(bytes boolean input-port -> a)] -> a
(define unzip-entry
(lambda (in dir entry-name [read-entry (make-filesystem-entry-reader)])
(cond
[(zip-directory-lookup entry-name dir)
=> (lambda (entry)
(call-with-input
in
(lambda (in)
(file-position in (zip-entry-offset entry))
(unzip-one-entry in read-entry))))]
[else (raise-entry-not-found entry-name)])))
;; ===========================================================================
;; ENTRY PARSERS
;; ===========================================================================
;; make-filesystem-entry-reader : [output-flag] -> (bytes boolean input-port -> any)
(define make-filesystem-entry-reader
(lambda (#:dest [dest-dir #f] #:strip-count [strip-count 0] #:exists [flag 'error])
(lambda (name dir? in)
(let* ([base-path (strip-prefix (bytes->path name) strip-count)]
[path (and base-path
(if dest-dir
(build-path dest-dir base-path)
base-path))])
(when path
(if dir?
(unless (directory-exists? path)
(make-directory* path))
(let ([parent (dirname path)])
(unless (directory-exists? parent)
(make-directory* parent))
(unless (and (eq? flag 'skip)
(file-exists? path))
(with-output-to-file path
#:exists flag
(lambda ()
(copy-port in (current-output-port))))))))))))
(define (dirname p)
(define-values (base name dir?) (split-path p))
(if (path? base)
base
(current-directory)))

View File

@ -0,0 +1,118 @@
#lang racket/base
(require file/unzip
racket/file
racket/system)
(define tmp (find-system-path 'temp-dir))
(define zip-exe (find-executable-path "zip"))
(define work-dir (build-path tmp (format "unzip-testing~a" (random 1000))))
(printf "Working in ~a\n" work-dir)
(when (directory-exists? work-dir)
(delete-directory/files work-dir))
(define a.zip (build-path work-dir "a.zip"))
(define sub-dir (build-path work-dir "sub"))
(define (make-file path)
(with-output-to-file path
(lambda ()
(write-bytes (make-bytes (random 100000) 42))))
(void))
(define ex1-dir (build-path work-dir "ex1"))
(define more-dir (build-path ex1-dir "more"))
(make-directory* ex1-dir)
(make-file (build-path ex1-dir "f1"))
(make-file (build-path ex1-dir "f2"))
(make-file (build-path ex1-dir "f3"))
(make-directory* more-dir)
(make-file (build-path more-dir "f4"))
(define (zip dir)
(define-values (base name dir?) (split-path dir))
(parameterize ([current-directory base])
(void (system* zip-exe "-r" a.zip name))))
(define (diff-error src dest)
(error 'diff "different: ~e ~e\n" src dest))
(define (diff src dest)
(cond
[(link-exists? src)
(unless (link-exists? dest) (diff-error src dest))
(diff (resolve-path src) (resolve-path dest))]
[(file-exists? src)
(unless (and (file-exists? dest)
(= (file-size src) (file-size dest))
(equal? (file->bytes src) (file->bytes dest)))
(diff-error src dest))]
[(directory-exists? src)
(unless (directory-exists? dest)
(diff-error src dest))
(define (sort-paths l)
(sort l bytes<? #:key path->bytes))
(define srcs (sort-paths (directory-list src)))
(define dests (sort-paths (directory-list dest)))
(unless (equal? srcs dests) (diff-error src dest))
(for ([src-item (in-list srcs)]
[dest-item (in-list dests)])
(diff (build-path src src-item) (build-path dest dest-item)))]
[else (void)]))
(zip ex1-dir)
(make-directory* sub-dir)
(parameterize ([current-directory sub-dir])
(unzip a.zip))
(diff ex1-dir (build-path sub-dir "ex1"))
(delete-directory/files sub-dir)
(parameterize ([current-directory work-dir])
(unzip a.zip (make-filesystem-entry-reader #:dest "sub")))
(diff ex1-dir (build-path sub-dir "ex1"))
(delete-directory/files sub-dir)
(parameterize ([current-directory work-dir])
(unzip a.zip (lambda (bytes dir? in) (void))))
(when (directory-exists? sub-dir)
(error "should not have been unpacked"))
(define (directory-test src)
(define zd (read-zip-directory src))
(unless (zip-directory? zd)
(error "not a zip directory"))
(define (check-there p)
(unless (zip-directory-contains? zd p)
(error 'unzip-test "not there: ~e" p)))
(check-there "ex1/f1")
(check-there #"ex1/f1")
(check-there "ex1/more/f4")
(check-there (string->path "ex1/more/f4"))
(unless (zip-directory-includes-directory? zd "ex1/more")
(error "directory missing"))
(define (check-not-there p)
(when (zip-directory-contains? zd p)
(error "there!"))
(with-handlers ([exn:fail:unzip:no-such-entry?
(lambda (exn)
(unless (equal? (exn:fail:unzip:no-such-entry-entry exn)
(if (bytes? p)
p
(path->zip-path p)))
(error "bad exn")))])
(unzip-entry src zd p)))
(check-not-there #"f1")
(for ([entry (in-list (zip-directory-entries zd))])
(parameterize ([current-directory work-dir])
(unzip-entry src zd entry
(make-filesystem-entry-reader #:dest "sub"))))
(diff ex1-dir (build-path sub-dir "ex1"))
(delete-directory/files sub-dir))
(directory-test a.zip)
(call-with-input-file a.zip
directory-test)
(delete-directory/files work-dir)

View File

@ -1,6 +1,7 @@
Version 5.3.1.8
file/untar: added
file/untgz: added
file/unzip: added
Version 5.3.1.7
compiler/zo-structs: generalize flonum? field to type