diff --git a/collects/file/private/strip-prefix.rkt b/collects/file/private/strip-prefix.rkt new file mode 100644 index 0000000000..30885bee02 --- /dev/null +++ b/collects/file/private/strip-prefix.rkt @@ -0,0 +1,24 @@ +#lang racket/base + +(provide strip-prefix) + +(define (strip-prefix filename strip-count) + (if (zero? strip-count) + filename + (let-values ([(name count) + (let loop ([fn filename]) + (define-values (base name dir?) (split-path fn)) + (cond + [(eq? 'relative base) + (values 'same strip-count)] + [else + (define-values (res count) (loop base)) + (if (count . <= . 1) + (if (eq? res 'same) + (values name 0) + (values (build-path res name) 0)) + (values res (sub1 count)))]))]) + (if (and (zero? count) + (not (eq? name 'same))) + name + #f)))) diff --git a/collects/file/scribblings/file.scrbl b/collects/file/scribblings/file.scrbl index 664871ce6e..99c7c71a6d 100644 --- a/collects/file/scribblings/file.scrbl +++ b/collects/file/scribblings/file.scrbl @@ -9,6 +9,7 @@ @include-section["gzip.scrbl"] @include-section["gunzip.scrbl"] @include-section["zip.scrbl"] +@include-section["unzip.scrbl"] @include-section["tar.scrbl"] @include-section["untar.scrbl"] @include-section["untgz.scrbl"] diff --git a/collects/file/scribblings/unzip.scrbl b/collects/file/scribblings/unzip.scrbl new file mode 100644 index 0000000000..db781229b9 --- /dev/null +++ b/collects/file/scribblings/unzip.scrbl @@ -0,0 +1,133 @@ +#lang scribble/doc +@(require "common.rkt" (for-label file/unzip)) + +@title[#:tag "unzip"]{@exec{zip} File Extraction} +@author{David Herman} + +@defmodule[file/unzip]{The @racketmodname[file/unzip] library provides +a function to extract items from a @exec{zip} archive.} + +@defproc[(unzip [in (or/c path-string? input-port)] + [entry-reader (bytes? boolean? input-port? . -> . any) + (make-filesystem-entry-reader)]) + void?]{ + +Unzips an entire @exec{zip} archive from @racket[in]. + +For each entry in the archive, the @racket[entry-reader] procedure is +called with three arguments: the byte string representing the entry +name, a boolean flag indicating whether the entry represents a +directory, and an input port containing the inflated contents of the +entry. The default @racket[entry-reader] unpacks entries to the +filesystem; call @racket[make-filesystem-entry-reader] to configure +aspects of the unpacking, such as the destination directory.} + +@defproc[(make-filesystem-entry-reader + [#:dest dest-path (or/c path-string? #f) #f] + [#:strip-count strip-count exact-nonnegative-integer? 0] + [#:exists exists (or/c 'skip 'error 'replace 'truncate + 'truncate/replace 'append 'update + 'can-update 'must-truncate) + 'error]) + (bytes? boolean? input-port? . -> . any)]{ + +Creates a @exec{zip} entry reader that can be used with either +@racket[unzip] or @racket[unzip-entry] and whose behavior is to save +entries to the local filesystem. Intermediate directories are always +created if necessary before creating files. Directory entries are +created as directories in the filesystem, and their entry contents are +ignored. + +If @racket[dest-path] is not @racket[#f], every path in the archive is +prefixed to determine the destination path of the extracted entry. + +If @racket[strip-count] is positive, then @racket[strip-count] path +elements are removed from the entry path from the archive (before +prefixing the path with @racket[dest-path]); if the item's path +contains @racket[strip-count] elements, then it is not extracted. + +If @racket[exists] is @racket['skip] and the file for an entry already +exists, then the entry is skipped. Otherwise, @racket[exists] is +passed on to @racket[open-output-file] for writing the entry's +inflated content.} + + +@defproc[(read-zip-directory [in (or/c path-string? input-port?)]) zip-directory?]{ + +Reads the central directory of a @exec{zip} file and generates a +@deftech{zip directory} representing the zip file's contents. If +@racket[in] is an input port, it must support position setting via +@racket[file-position]. + +This procedure performs limited I/O: it reads the list of entries from +the @exec{zip} file, but it does not inflate any of their +contents.} + +@defproc[(zip-directory? [v any/c]) boolean?]{ + +Returns @racket[#t] if @racket[v] is a @tech{zip directory}, +@racket[#f] otherwise.} + + +@defproc[(zip-directory-entries [zipdir zip-directory?]) (listof bytes?)]{ + +Extracts the list of entries for a @exec{zip} archive.} + + +@defproc[(zip-directory-contains? [zipdir zip-directory?] + [name (or/c bytes? path-string?)]) + boolean?]{ + +Determines whether the given entry name occurs in the given @tech{zip +directory}. If @racket[name] is not a byte string, it is converted +using @racket[path->zip-path]. + +Directory entries match with or without trailing slashes.} + + +@defproc[(zip-directory-includes-directory? [zipdir zip-directory?] + [name (or/c bytes? path-string?)]) + boolean?]{ + +Determines whether the given name is included anywhere in the given +@tech{zip directory} as a filesystem directory, either as an entry +itself or as the containing directory of other entries. If +@racket[name] is not a byte string, it is converted using +@racket[path->zip-path].} + + +@defproc[(unzip-entry [path (or/c path-string? input-port?)] + [zipdir zip-directory?] + [entry (or/c bytes? path-string?)] + [entry-reader (bytes? boolean? input-port? . -> . any) + (make-filesystem-entry-reader)]) + void?]{ + +Unzips a single entry from a @exec{zip} archive based on a previously +read @tech{zip directory}, @racket[zipdir], from +@racket[read-zip-directory]. If @racket[in] is an input port, it must +support position setting via @racket[file-position]. + +The @racket[entry] parameter is a byte string whose name must be found +in the zip file's central directory. If @racket[entry] is not a byte +string, it is converted using @racket[path->zip-path]. + +The @racket[read-entry] argument is used to read the contents of the zip entry +in the same way as for @racket[unzip]. + +If @racket[entry] is not in @racket[zipdir], an +@racket[exn:fail:unzip:no-such-entry] exception is raised.} + + +@defproc[(path->zip-path [path path-string?]) bytes?]{ + +Converts a file name potentially containing path separators in the current +platform's format to use path separators recognized by the zip file +format: @litchar{/}.} + + +@defstruct[(exn:fail:unzip:no-such-entry exn:fail) ([entry bytes?])]{ + +Raised when a requested entry cannot be found in a @exec{zip} +archive. The @racket[entry] field is a byte string representing the +requested entry name.} diff --git a/collects/file/untar.rkt b/collects/file/untar.rkt index ecca75e6cf..33fa9fb746 100644 --- a/collects/file/untar.rkt +++ b/collects/file/untar.rkt @@ -1,6 +1,7 @@ #lang racket/base (require racket/file - racket/contract/base) + racket/contract/base + "private/strip-prefix.rkt") (provide (contract-out @@ -73,25 +74,7 @@ (nul-terminated name-bytes)))) (when (absolute-path? base-filename) (error 'untar "won't extract a file with an absolute path: ~e" base-filename)) - (define stripped-filename (if (zero? strip-count) - base-filename - (let-values ([(name count) - (let loop ([fn base-filename]) - (define-values (base name dir?) (split-path fn)) - (cond - [(eq? 'relative base) - (values 'same strip-count)] - [else - (define-values (res count) (loop base)) - (if (count . <= . 1) - (if (eq? res 'same) - (values name 0) - (values (build-path res name) 0)) - (values res (sub1 count)))]))]) - (if (and (zero? count) - (not (eq? name 'same))) - name - #f)))) + (define stripped-filename (strip-prefix base-filename strip-count)) (define filename (and stripped-filename (if dest (build-path dest stripped-filename) diff --git a/collects/file/unzip.rkt b/collects/file/unzip.rkt new file mode 100644 index 0000000000..8d7c426513 --- /dev/null +++ b/collects/file/unzip.rkt @@ -0,0 +1,336 @@ +#lang racket/base +(require racket/contract/base + racket/port + racket/file + file/gunzip + "private/strip-prefix.rkt") + +(provide + (struct-out exn:fail:unzip:no-such-entry) + + (contract-out + [unzip (((or/c path-string? input-port?)) + ((bytes? boolean? input-port? . -> . any)) + . ->* . any)] + + [make-filesystem-entry-reader (() (#:dest + (or/c #f path-string?) + #:strip-count + exact-nonnegative-integer? + #:exists + (or/c 'skip + 'error 'replace 'truncate 'truncate/replace 'append 'update + 'can-update 'must-truncate)) + . ->* . + (bytes? boolean? input-port? . -> . any))] + + [read-zip-directory ((or/c path-string? input-port?) . -> . zip-directory?)] + [zip-directory? (any/c . -> . boolean?)] + [zip-directory-entries (zip-directory? . -> . (listof bytes?))] + [zip-directory-contains? (zip-directory? (or/c path-string? bytes?) . -> . boolean?)] + [zip-directory-includes-directory? (zip-directory? (or/c path-string? input-port?) . -> . boolean?)] + [unzip-entry (((or/c path-string? input-port?) zip-directory? bytes?) + ((bytes? boolean? input-port? . -> . any)) + . ->* . + any)] + + [path->zip-path ((or/c string? path?) . -> . bytes?)])) + +;; =========================================================================== +;; CONSTANTS +;; =========================================================================== + +(define *local-file-header* #x04034b50) +(define *archive-extra-record* #x08064b50) +(define *central-file-header* #x02014b50) +(define *digital-signature* #x05054b50) +(define *zip64-end-of-central-directory-record* #x06064b50) +(define *zip64-end-of-central-directory-locator* #x07064b50) +(define *end-of-central-directory-record* #x06054b50) + +;; =========================================================================== +;; DATATYPES AND UTILITIES +;; =========================================================================== + +(define-struct (exn:fail:unzip:no-such-entry exn:fail) (entry) + #:guard (lambda (msg cm entry who) + (unless (bytes? entry) + (raise-argument-error who "bytes?" entry)) + (values msg cm entry))) + +;; (alistof bytes zip-entry) +(define-struct zip-directory (contents)) + +;; nat * boolean +(define-struct zip-entry (offset dir?)) + +(define (raise-unzip-error message) + (error 'unzip "~a" message)) + +(define (raise-entry-not-found entry) + (raise + (make-exn:fail:unzip:no-such-entry + (string->immutable-string + (format "unzip: entry not found: \"~a\"" (bytes->string/latin-1 entry))) + (current-continuation-marks) + entry))) + +;; zip-directory-entries : zip-directory -> (listof bytes) +(define (zip-directory-entries zipdir) + (map car (zip-directory-contents zipdir))) + +;; zip-directory-lookup : bytes zip-directory -> (option zip-entry) +(define (zip-directory-lookup entry zipdir) + (let loop ([contents (zip-directory-contents zipdir)]) + (cond + [(null? contents) #f] + [(or (bytes=? entry (caar contents)) + (bytes=? (bytes-append entry #"/") (caar contents))) + (cdar contents)] + [else (loop (cdr contents))]))) + +;; zip-directory-contains? : zip-directory (union string path bytes) -> boolean +(define (zip-directory-contains? zipdir entry) + (if (bytes? entry) + (and (zip-directory-lookup entry zipdir) #t) + (zip-directory-contains? zipdir (path->zip-path entry)))) + +;; matches-directory? : bytes bytes -> boolean +(define (bytes-prefix? dirname entry-name) + (let ([dirname-len (bytes-length dirname)] + [entry-name-len (bytes-length entry-name)]) + (and (>= entry-name-len dirname-len) + (bytes=? (subbytes entry-name 0 dirname-len) dirname)))) + +;; zip-directory-includes-directory? : zip-directory (union string path bytes) -> boolean +(define (zip-directory-includes-directory? zipdir dirname) + (if (bytes? dirname) + (ormap (lambda (pair) + (bytes-prefix? dirname (car pair))) + (zip-directory-contents zipdir)) + (zip-directory-includes-directory? zipdir (path->zip-path dirname)))) + +;; path->zip-path : (union path string) -> bytes +(define (path->zip-path p) + (let ([p (simplify-path p #f)]) + (if (path? p) + (bytes->zip-bytes (path->bytes p)) + (bytes->zip-bytes (string->bytes/latin-1 p))))) + +(define (bytes->zip-bytes b) + (case (system-path-convention-type) + [(windows) (regexp-replace* #rx#"\\\\" b #"/")] + [else b])) + +;; =========================================================================== +;; UNZIPPING ENGINE +;; =========================================================================== + +(define *slash-byte* (char->integer #\/)) + +(define (directory-entry? name) + (= (bytes-ref name (sub1 (bytes-length name))) *slash-byte*)) + +(define (read-integer count signed? in big-endian?) + (define bstr (read-bytes count in)) + (unless (and (bytes? bstr) (= count (bytes-length bstr))) + (error 'unzip "unexpected EOF")) + (integer-bytes->integer bstr signed? big-endian?)) + +(define (peek-integer count signed? in big-endian?) + (define bstr (peek-bytes count 0 in)) + (unless (and (bytes? bstr) (= count (bytes-length bstr))) + (error 'unzip "unexpected EOF")) + (integer-bytes->integer bstr signed? big-endian?)) + +(define (make-filter-input-port inflate orig-in) + (define-values (in out) (make-pipe 4096)) + (values + in + (thread (lambda () + (inflate orig-in out) + (close-output-port out))))) + +(define (skip-bytes amt in) + (read-bytes amt in) + (void)) + +;; unzip-one-entry : input-port (bytes boolean input-port -> a) -> a +(define (unzip-one-entry in read-entry) + (let ([read-int (lambda (count) (read-integer count #f in #f))]) + (let* ([signature (read-int 4)] + [version (read-bytes 2 in)] + [bits (read-int 2)] + [compression (read-int 2)] + [time (read-int 2)] + [date (read-int 2)] + [crc-32 (read-int 4)] + [compressed (read-int 4)] + [uncompressed (read-int 4)] + [filename-length (read-int 2)] + [extra-length (read-int 2)] + [filename (read-bytes filename-length in)] + [extra (read-bytes extra-length in)]) + (let* ([mark (file-position in)] + [dir? (directory-entry? filename)] + ;; appnote VI-J : if bit 3 is set, the fields crc-32, + ;; compressed size, and uncompressed size are set to + ;; zero in the local header + [in0 (if (bitwise-bit-set? bits 3) + in + (make-limited-input-port in compressed #f))]) + (dynamic-wind + void + (lambda () + (define-values (in t) + (if (zero? compression) + (values in0 #f) + (make-filter-input-port inflate in0))) + + (read-entry filename dir? in) + + (when t (kill-thread t))) + (lambda () + ;; appnote VI-C : if bit 3 is set, then the file data + ;; is immediately followed by a data descriptor + (if (bitwise-bit-set? bits 3) + (skip-bytes 12 in) + (file-position in (+ mark compressed)))))) + (void)))) + +;; find-central-directory : input-port nat -> nat nat nat +(define (find-central-directory in size) + (let loop ([pos (- size 18)]) + (unless (positive? pos) + (raise-unzip-error "no central directory")) + (file-position in pos) + (let* ([read-int (lambda (count) (read-integer count #f in #f))] + [signature (read-int 4)]) + (if (= signature *end-of-central-directory-record*) + (let ([disk-number (read-int 2)] + [directory-disk (read-int 2)] + [disk-entries (read-int 2)] + [entry-count (read-int 2)] + [directory-length (read-int 4)] + [directory-offset (read-int 4)] + [comment-length (read-int 2)]) + (if (= (- size (file-position in)) comment-length) + (values directory-offset directory-length entry-count) + (loop (sub1 pos)))) + (loop (sub1 pos)))))) + +;; read-central-directory : input-port nat -> (alistof bytes zip-entry) +(define (read-central-directory in size) + (let-values ([(offset length count) (find-central-directory in size)]) + (file-position in offset) + (build-list count + (lambda (i) + (let* ([read-int (lambda (count) + (read-integer count #f in #f))] + [signature (read-int 4)]) + (unless (= signature *central-file-header*) + (raise-unzip-error + (format "bad central file header signature: ~a" + signature))) + (let ([version (read-int 2)] + [required (read-int 2)] + [bits (read-int 2)] + [compression (read-int 2)] + [time (read-int 2)] + [date (read-int 2)] + [crc-32 (read-int 4)] + [compressed (read-int 4)] + [uncompressed (read-int 4)] + [filename-length (read-int 2)] + [extra-length (read-int 2)] + [comment-length (read-int 2)] + [disk-number (read-int 2)] + [internal-attributes (read-int 2)] + [external-attributes (read-int 4)] + [relative-offset (read-int 4)]) + (let* ([filename (read-bytes filename-length in)] + [dir? (directory-entry? filename)]) + (skip-bytes (+ extra-length comment-length) in) + (cons filename (make-zip-entry relative-offset dir?))))))))) + +;; =========================================================================== +;; FRONT END +;; =========================================================================== + +(define (call-with-input in proc) + ((if (input-port? in) + (lambda (in f) (f in)) + call-with-input-file*) + in + proc)) + +;; unzip : [(or/c path-string? input-port) (bytes boolean input-port -> any)] -> any +(define unzip + (lambda (in [read-entry (make-filesystem-entry-reader)]) + (call-with-input + in + (lambda (in) + (when (= (peek-integer 4 #f in #f) *local-file-header*) + (unzip-one-entry in read-entry) + (unzip in read-entry)))))) + +(define (input-size in) + (file-position in eof) + (begin0 + (file-position in) + (file-position in 0))) + +;; read-zip-directory : (union string path) -> zip-directory +(define (read-zip-directory in) + (make-zip-directory + (call-with-input + in + (lambda (in) + (read-central-directory in + (input-size in)))))) + +;; unzip-entry : (union string path) zip-directory bytes [(bytes boolean input-port -> a)] -> a +(define unzip-entry + (lambda (in dir entry-name [read-entry (make-filesystem-entry-reader)]) + (cond + [(zip-directory-lookup entry-name dir) + => (lambda (entry) + (call-with-input + in + (lambda (in) + (file-position in (zip-entry-offset entry)) + (unzip-one-entry in read-entry))))] + [else (raise-entry-not-found entry-name)]))) + +;; =========================================================================== +;; ENTRY PARSERS +;; =========================================================================== + +;; make-filesystem-entry-reader : [output-flag] -> (bytes boolean input-port -> any) +(define make-filesystem-entry-reader + (lambda (#:dest [dest-dir #f] #:strip-count [strip-count 0] #:exists [flag 'error]) + (lambda (name dir? in) + (let* ([base-path (strip-prefix (bytes->path name) strip-count)] + [path (and base-path + (if dest-dir + (build-path dest-dir base-path) + base-path))]) + (when path + (if dir? + (unless (directory-exists? path) + (make-directory* path)) + (let ([parent (dirname path)]) + (unless (directory-exists? parent) + (make-directory* parent)) + (unless (and (eq? flag 'skip) + (file-exists? path)) + (with-output-to-file path + #:exists flag + (lambda () + (copy-port in (current-output-port)))))))))))) + +(define (dirname p) + (define-values (base name dir?) (split-path p)) + (if (path? base) + base + (current-directory))) diff --git a/collects/tests/file/unzip.rkt b/collects/tests/file/unzip.rkt new file mode 100644 index 0000000000..b814667188 --- /dev/null +++ b/collects/tests/file/unzip.rkt @@ -0,0 +1,118 @@ +#lang racket/base +(require file/unzip + racket/file + racket/system) + +(define tmp (find-system-path 'temp-dir)) +(define zip-exe (find-executable-path "zip")) + +(define work-dir (build-path tmp (format "unzip-testing~a" (random 1000)))) +(printf "Working in ~a\n" work-dir) +(when (directory-exists? work-dir) + (delete-directory/files work-dir)) +(define a.zip (build-path work-dir "a.zip")) + +(define sub-dir (build-path work-dir "sub")) + +(define (make-file path) + (with-output-to-file path + (lambda () + (write-bytes (make-bytes (random 100000) 42)))) + (void)) + +(define ex1-dir (build-path work-dir "ex1")) +(define more-dir (build-path ex1-dir "more")) + +(make-directory* ex1-dir) +(make-file (build-path ex1-dir "f1")) +(make-file (build-path ex1-dir "f2")) +(make-file (build-path ex1-dir "f3")) +(make-directory* more-dir) +(make-file (build-path more-dir "f4")) + +(define (zip dir) + (define-values (base name dir?) (split-path dir)) + (parameterize ([current-directory base]) + (void (system* zip-exe "-r" a.zip name)))) + +(define (diff-error src dest) + (error 'diff "different: ~e ~e\n" src dest)) + +(define (diff src dest) + (cond + [(link-exists? src) + (unless (link-exists? dest) (diff-error src dest)) + (diff (resolve-path src) (resolve-path dest))] + [(file-exists? src) + (unless (and (file-exists? dest) + (= (file-size src) (file-size dest)) + (equal? (file->bytes src) (file->bytes dest))) + (diff-error src dest))] + [(directory-exists? src) + (unless (directory-exists? dest) + (diff-error src dest)) + (define (sort-paths l) + (sort l bytesbytes)) + (define srcs (sort-paths (directory-list src))) + (define dests (sort-paths (directory-list dest))) + (unless (equal? srcs dests) (diff-error src dest)) + (for ([src-item (in-list srcs)] + [dest-item (in-list dests)]) + (diff (build-path src src-item) (build-path dest dest-item)))] + [else (void)])) + +(zip ex1-dir) + +(make-directory* sub-dir) +(parameterize ([current-directory sub-dir]) + (unzip a.zip)) +(diff ex1-dir (build-path sub-dir "ex1")) +(delete-directory/files sub-dir) + +(parameterize ([current-directory work-dir]) + (unzip a.zip (make-filesystem-entry-reader #:dest "sub"))) +(diff ex1-dir (build-path sub-dir "ex1")) +(delete-directory/files sub-dir) + +(parameterize ([current-directory work-dir]) + (unzip a.zip (lambda (bytes dir? in) (void)))) +(when (directory-exists? sub-dir) + (error "should not have been unpacked")) + +(define (directory-test src) + (define zd (read-zip-directory src)) + (unless (zip-directory? zd) + (error "not a zip directory")) + (define (check-there p) + (unless (zip-directory-contains? zd p) + (error 'unzip-test "not there: ~e" p))) + (check-there "ex1/f1") + (check-there #"ex1/f1") + (check-there "ex1/more/f4") + (check-there (string->path "ex1/more/f4")) + (unless (zip-directory-includes-directory? zd "ex1/more") + (error "directory missing")) + (define (check-not-there p) + (when (zip-directory-contains? zd p) + (error "there!")) + (with-handlers ([exn:fail:unzip:no-such-entry? + (lambda (exn) + (unless (equal? (exn:fail:unzip:no-such-entry-entry exn) + (if (bytes? p) + p + (path->zip-path p))) + (error "bad exn")))]) + (unzip-entry src zd p))) + (check-not-there #"f1") + + (for ([entry (in-list (zip-directory-entries zd))]) + (parameterize ([current-directory work-dir]) + (unzip-entry src zd entry + (make-filesystem-entry-reader #:dest "sub")))) + (diff ex1-dir (build-path sub-dir "ex1")) + (delete-directory/files sub-dir)) +(directory-test a.zip) +(call-with-input-file a.zip + directory-test) + +(delete-directory/files work-dir) diff --git a/doc/release-notes/racket/HISTORY.txt b/doc/release-notes/racket/HISTORY.txt index d53858f183..352405c07c 100644 --- a/doc/release-notes/racket/HISTORY.txt +++ b/doc/release-notes/racket/HISTORY.txt @@ -1,6 +1,7 @@ Version 5.3.1.8 file/untar: added file/untgz: added +file/unzip: added Version 5.3.1.7 compiler/zo-structs: generalize flonum? field to type