From e5dd7183b587f7192dc5fd1da0e30253e8b87646 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Tue, 10 Jun 2014 16:13:23 +0100 Subject: [PATCH] file/[un]zip: add UTC and time-rounding option A `zip` archive normally holds timestamps in local time; a new option records/interprets a UTC time instead. (Use the option both to zip and unzip.) Also, add an option to round the 2-second resolution of time to the past instead of the future (to avoid files with timestamps in the future) when zipping. --- .../racket-doc/file/scribblings/unzip.scrbl | 16 +++++-- .../racket-doc/file/scribblings/zip.scrbl | 26 +++++++++--- .../racket-test/tests/file/packers.rkt | 42 +++++++++++++++---- racket/collects/file/unzip.rkt | 29 ++++++++----- racket/collects/file/zip.rkt | 22 ++++++---- 5 files changed, 98 insertions(+), 37 deletions(-) diff --git a/pkgs/racket-pkgs/racket-doc/file/scribblings/unzip.scrbl b/pkgs/racket-pkgs/racket-doc/file/scribblings/unzip.scrbl index d0f054f5e8..9db4d7e0f0 100644 --- a/pkgs/racket-pkgs/racket-doc/file/scribblings/unzip.scrbl +++ b/pkgs/racket-pkgs/racket-doc/file/scribblings/unzip.scrbl @@ -13,7 +13,8 @@ a function to extract items from a @exec{zip} archive.} . -> . any) (bytes? boolean? input-port? . -> . any)) (make-filesystem-entry-reader)] - [#:preserve-timestamps? preserve-timestamps? any/c #f]) + [#:preserve-timestamps? preserve-timestamps? any/c #f] + [#:utc-timestamps? utc-timestamps? any/c #f]) void?]{ Unzips an entire @exec{zip} archive from @racket[in]. @@ -27,7 +28,12 @@ for a file. The default @racket[entry-reader] unpacks entries to the filesystem; call @racket[make-filesystem-entry-reader] to configure aspects of the unpacking, such as the destination directory. -@history[#:changed "6.0.0.3" @elem{Added the @racket[#:preserve-timestamps?] argument.}]} +Normally, @exec{zip} archives record modification dates in local time, +but if @racket[utc-timestamps?] is true, then the time in the archive +is interpreted as UTC. + +@history[#:changed "6.0.0.3" @elem{Added the @racket[#:preserve-timestamps?] argument.} + #:changed "6.0.1.12" @elem{Added the @racket[#:utc-timestamps?] argument.}]} @defproc[(call-with-unzip [in (or/c path-string? input-port?)] @@ -127,7 +133,8 @@ itself or as the containing directory of other entries. If . -> . any) (bytes? boolean? input-port? . -> . any)) (make-filesystem-entry-reader)] - [#:preserve-timestamps? preserve-timestamps? any/c #f]) + [#:preserve-timestamps? preserve-timestamps? any/c #f] + [#:utc-timestamps? utc-timestamps? any/c #f]) void?]{ Unzips a single entry from a @exec{zip} archive based on a previously @@ -145,7 +152,8 @@ in the same way as for @racket[unzip]. If @racket[entry] is not in @racket[zipdir], an @racket[exn:fail:unzip:no-such-entry] exception is raised. -@history[#:changed "6.0.0.3" @elem{Added the @racket[#:preserve-timestamps?] argument.}]} +@history[#:changed "6.0.0.3" @elem{Added the @racket[#:preserve-timestamps?] argument.} + #:changed "6.0.1.12" @elem{Added the @racket[#:utc-timestamps?] argument.}]} @defproc[(call-with-unzip-entry [in path-string? input-port] diff --git a/pkgs/racket-pkgs/racket-doc/file/scribblings/zip.scrbl b/pkgs/racket-pkgs/racket-doc/file/scribblings/zip.scrbl index c9d10b29a6..21001b3886 100644 --- a/pkgs/racket-pkgs/racket-doc/file/scribblings/zip.scrbl +++ b/pkgs/racket-pkgs/racket-doc/file/scribblings/zip.scrbl @@ -15,6 +15,8 @@ compression is implemented by @racket[deflate].} (if timestamp (lambda (p) timestamp) file-or-directory-modify-seconds)] + [#:utc-timestamps? utc-timestamps? any/c #f] + [#:round-timestamps-down? round-timestamps-down? any/c #f] [#:path-prefix path-prefix (or/c #f path-string?) #f] [#:system-type sys-type symbol? (system-type)]) void?]{ @@ -26,7 +28,7 @@ The given @racket[path]s are all expected to be relative path names of existing directories and files (i.e., relative to the current directory). If a nested path is provided as a @racket[path], its ancestor directories are also added to the -resulting zip file, up to the current directory (using +resulting @exec{zip} file, up to the current directory (using @racket[pathlist-closure]). Files are packaged as usual for @@ -37,8 +39,16 @@ distinction between owner/group/other permissions. Also, symbolic links are always followed. The @racket[get-timestamp] function is used to obtain the modification -date to record in the archive for a file or directory, while -@racket[sys-type] determines the system type recorded in the archive. +date to record in the archive for a file or directory. Normally, +@exec{zip} archives record modification dates in local time, but if +@racket[utc-timestamps?] is true, then the UTC time is recorded. +Timestamps in @exec{zip} archives are precise only to two seconds; by +default, the time is rounded toward the future (like WinZip or PKZIP), +but time is rounded toward the past (like Java) if +@racket[round-timestamps-down?] is true. + +The @racket[sys-type] argument determines the system type recorded in +the archive. If @racket[path-prefix] is not @racket[#f], then it prefixes the name of each path as it is written in the @exec{zip} file, and directory @@ -47,7 +57,8 @@ entries are added for each element of @racket[path-prefix]. @history[#:changed "6.0.0.3" @elem{Added the @racket[#:get-timestamp] and @racket[#:system-type] arguments.} #:changed "6.0.1.12" - @elem{Added the @racket[#:path-prefix] argument.}]} + @elem{Added the @racket[#:path-prefix], @racket[#:utc-timestamps?], and + @racket[#:utc-timestamps-down?] arguments.}]} @@ -59,11 +70,13 @@ entries are added for each element of @racket[path-prefix]. (if timestamp (lambda (p) timestamp) file-or-directory-modify-seconds)] + [#:utc-timestamps? utc-timestamps? any/c #f] + [#:round-timestamps-down? round-timestamps-down? any/c #f] [#:path-prefix path-prefix (or/c #f path-string?) #f] [#:system-type sys-type symbol? (system-type)]) void?]{ -Zips each of the given @racket[paths], and packages it as a zip +Zips each of the given @racket[paths], and packages it as a @exec{zip} ``file'' that is written directly to @racket[out]. Unlike @racket[zip], the specified @racket[paths] are included without closing over directories: if a @@ -73,7 +86,8 @@ nested directories are added without parent directories. @history[#:changed "6.0.0.3" @elem{Added the @racket[#:get-timestamp] and @racket[#:system-type] arguments.} #:changed "6.0.1.12" - @elem{Added the @racket[#:path-prefix] argument.}]} + @elem{Added the @racket[#:path-prefix], @racket[#:utc-timestamps?], and + @racket[#:utc-timestamps-down?] arguments.}]} @defboolparam[zip-verbose on?]{ diff --git a/pkgs/racket-pkgs/racket-test/tests/file/packers.rkt b/pkgs/racket-pkgs/racket-test/tests/file/packers.rkt index 2255970721..bad01eb910 100644 --- a/pkgs/racket-pkgs/racket-test/tests/file/packers.rkt +++ b/pkgs/racket-pkgs/racket-test/tests/file/packers.rkt @@ -19,10 +19,23 @@ (define (diff src dest check-attributes?) (define (compare-attributes p1 p2) (or (not check-attributes?) - (and (= (file-or-directory-modify-seconds p1) - (file-or-directory-modify-seconds p2)) + (and (or (and (eq? check-attributes? 'file) + (directory-exists? p1)) + (= (round-date (file-or-directory-modify-seconds p1)) + (round-date (file-or-directory-modify-seconds p2))) + (begin + (printf "~s ~s ~s\n" + p1 + (file-or-directory-modify-seconds p1) + (file-or-directory-modify-seconds p2)) + #f)) (equal? (file-or-directory-permissions p1) (file-or-directory-permissions p2))))) + (define (round-date s) + (if (eq? check-attributes? 'file) + ;; granularity of ".zip" file dates is 2 seconds(!) + (if (even? s) s (add1 s)) ; round to future is the default + s)) (cond [(link-exists? src) (and (link-exists? dest) @@ -48,7 +61,7 @@ (begin (file-or-directory-permissions* dest "rwx") #t))))] [else #t])) -(define (zip-tests zip unzip) +(define (zip-tests zip unzip timestamps?) (make-directory* "ex1") (make-file (build-path "ex1" "f1")) (make-file (build-path "ex1" "f2")) @@ -58,13 +71,14 @@ (make-file (build-path more-dir "f4")) (zip "a.zip" "ex1") + (when timestamps? (sleep 3)) ; at least 2 seconds, plus 1 to likely change parity (make-directory* "sub") (parameterize ([current-directory "sub"]) (unzip "../a.zip")) - (unless (diff "ex1" (build-path "sub" "ex1") #t) - (eprintf "changed! ~s" zip)) + (unless (diff "ex1" (build-path "sub" "ex1") timestamps?) + (eprintf "changed! ~s\n" zip)) (delete-directory/files "sub") (delete-file "a.zip") @@ -74,8 +88,8 @@ (parameterize ([current-directory "sub"]) (unzip "../a.zip")) - (unless (diff "ex1" (build-path "sub" "inside" "ex1") #t) - (eprintf "changed! ~s" zip)) + (unless (diff "ex1" (build-path "sub" "inside" "ex1") timestamps?) + (eprintf "changed! ~s\n" zip)) (delete-file "a.zip") (delete-directory/files "sub") @@ -83,9 +97,19 @@ (define work-dir (make-temporary-file "packer~a" 'directory)) +(define (make-zip utc?) + (lambda (#:path-prefix [prefix #f] . args) + (apply zip #:path-prefix prefix args #:utc-timestamps? utc?))) + +(define (make-unzip utc?) + (lambda args + (apply unzip #:preserve-timestamps? #t #:utc-timestamps? utc? args))) + (parameterize ([current-directory work-dir]) - (zip-tests zip unzip) - (zip-tests tar untar)) + (zip-tests zip unzip #f) + (zip-tests (make-zip #f) (make-unzip #f) 'file) + (zip-tests (make-zip #t) (make-unzip #t) 'file) + (zip-tests tar untar #t)) (delete-directory/files work-dir) diff --git a/racket/collects/file/unzip.rkt b/racket/collects/file/unzip.rkt index 8545923284..09f7ce1bf4 100644 --- a/racket/collects/file/unzip.rkt +++ b/racket/collects/file/unzip.rkt @@ -16,7 +16,8 @@ #; (or/c (bytes? boolean? input-port? (or/c #f exact-integer?) . -> . any) (bytes? boolean? input-port? . -> . any)) - #:preserve-timestamps? any/c) + #:preserve-timestamps? any/c + #:utc-timestamps? any/c) . ->* . any)] [make-filesystem-entry-reader (() (#:dest @@ -39,7 +40,8 @@ [zip-directory-includes-directory? (zip-directory? (or/c path-string? input-port?) . -> . boolean?)] [unzip-entry (((or/c path-string? input-port?) zip-directory? bytes?) ((or/c (procedure-arity-includes/c 2) (procedure-arity-includes/c 3)) - #:preserve-timestamps? any/c) + #:preserve-timestamps? any/c + #:utc-timestamps? any/c) . ->* . any)] @@ -173,7 +175,7 @@ (void)) ;; unzip-one-entry : input-port (bytes boolean input-port [exact-integer?] -> a) -> a -(define (unzip-one-entry in read-entry preserve-timestamps?) +(define (unzip-one-entry in read-entry preserve-timestamps? utc?) (let ([read-int (lambda (count) (read-integer count #f in #f))]) (let* ([signature (read-int 4)] [version (read-bytes 2 in)] @@ -206,7 +208,7 @@ (if preserve-timestamps? (read-entry filename dir? in (and (not dir?) - (msdos-date+time->seconds date time))) + (msdos-date+time->seconds date time utc?))) (read-entry filename dir? in)) (when t (kill-thread t))) @@ -273,14 +275,15 @@ (skip-bytes (+ extra-length comment-length) in) (cons filename (make-zip-entry relative-offset dir?))))))))) -(define (msdos-date+time->seconds date time) +(define (msdos-date+time->seconds date time utc?) (with-handlers ([exn:fail? (lambda (exn) #f)]) (find-seconds (* 2 (bitwise-and #x1F time)) (bitwise-and #x3F (arithmetic-shift time -5)) (bitwise-and #x1F (arithmetic-shift time -11)) (bitwise-and #x1F date) (bitwise-and #xF (arithmetic-shift date -5)) - (+ (bitwise-and #x7F (arithmetic-shift date -9)) 1980)))) + (+ (bitwise-and #x7F (arithmetic-shift date -9)) 1980) + (not utc?)))) ;; =========================================================================== ;; FRONT END @@ -296,13 +299,16 @@ ;; unzip : [(or/c path-string? input-port) (bytes boolean input-port -> any)] -> any (define unzip (lambda (in [read-entry (make-filesystem-entry-reader)] - #:preserve-timestamps? [preserve-timestamps? #f]) + #:preserve-timestamps? [preserve-timestamps? #f] + #:utc-timestamps? [utc? #f]) (call-with-input in (lambda (in) (when (= (peek-integer 4 #f in #f) *local-file-header*) - (unzip-one-entry in read-entry preserve-timestamps?) - (unzip in read-entry #:preserve-timestamps? preserve-timestamps?)))))) + (unzip-one-entry in read-entry preserve-timestamps? utc?) + (unzip in read-entry + #:preserve-timestamps? preserve-timestamps? + #:utc-timestamps? utc?)))))) (define (input-size in) (file-position in eof) @@ -322,7 +328,8 @@ ;; unzip-entry : (union string path) zip-directory bytes [(bytes boolean input-port -> a)] -> a (define unzip-entry (lambda (in dir entry-name [read-entry (make-filesystem-entry-reader)] - #:preserve-timestamps? [preserve-timestamps? #f]) + #:preserve-timestamps? [preserve-timestamps? #f] + #:utc-timestamps? [utc? #f]) (cond [(zip-directory-lookup entry-name dir) => (lambda (entry) @@ -330,7 +337,7 @@ in (lambda (in) (file-position in (zip-entry-offset entry)) - (unzip-one-entry in read-entry preserve-timestamps?))))] + (unzip-one-entry in read-entry preserve-timestamps? utc?))))] [else (raise-entry-not-found entry-name)]))) ;; =========================================================================== diff --git a/racket/collects/file/zip.rkt b/racket/collects/file/zip.rkt index 2102493c4c..e3ac7de0e8 100644 --- a/racket/collects/file/zip.rkt +++ b/racket/collects/file/zip.rkt @@ -59,8 +59,8 @@ ;; =========================================================================== ;; date->msdos-time : date -> msdos-time - (define (date->msdos-time date) - (bitwise-ior (ceiling (/ (date-second date) 2)) + (define (date->msdos-time date round-down?) + (bitwise-ior (arithmetic-shift (+ (if round-down? 0 1) (date-second date)) -1) (arithmetic-shift (date-minute date) 5) (arithmetic-shift (date-hour date) 11))) @@ -230,9 +230,9 @@ ;; build-metadata : relative-path (relative-path . -> . exact-integer?) ;; boolean (or/c #f integer?) -> metadata - (define (build-metadata path-prefix path get-timestamp + (define (build-metadata path-prefix path get-timestamp utc? round-down? force-dir? permissions) - (let* ([mod (seconds->date (get-timestamp path))] + (let* ([mod (seconds->date (get-timestamp path) (not utc?))] [dir? (or force-dir? (directory-exists? path))] [attr (path-attributes path dir? permissions)] [path (cond [(path? path) path] @@ -243,7 +243,7 @@ path)] [name (with-slash-separator (path->bytes name-path))] [name (if dir? (with-trailing-slash name) name)] - [time (date->msdos-time mod)] + [time (date->msdos-time mod round-down?)] [date (date->msdos-date mod)] [comp (if dir? 0 *compression-level*)]) (make-metadata path name dir? time date comp attr))) @@ -260,6 +260,8 @@ #:get-timestamp [get-timestamp (if timestamp (lambda (p) timestamp) file-or-directory-modify-seconds)] + #:utc-timestamps? [utc? #f] + #:round-timestamps-down? [round-down? #f] #:path-prefix [path-prefix #f] #:system-type [sys-type (system-type)]) (parameterize ([current-output-port out]) @@ -275,12 +277,14 @@ (define-values (base name dir?) (split-path path-prefix)) (define r (loop (and (path? base) base))) (cons - (zip-one-entry (build-metadata #f path-prefix (lambda (x) (current-seconds)) #t #o755) + (zip-one-entry (build-metadata #f path-prefix (lambda (x) (current-seconds)) + utc? round-down? #t #o755) seekable?) r)]))) ;; add normal files: (map (lambda (file) - (zip-one-entry (build-metadata path-prefix file get-timestamp #f #f) + (zip-one-entry (build-metadata path-prefix file get-timestamp + utc? round-down? #f #f) seekable?)) files))]) (when (zip-verbose) @@ -296,6 +300,8 @@ #:get-timestamp [get-timestamp (if timestamp (lambda (p) timestamp) file-or-directory-modify-seconds)] + #:utc-timestamps? [utc? #f] + #:round-timestamps-down? [round-down? #f] #:path-prefix [path-prefix #f] #:system-type [sys-type (system-type)] . paths) @@ -303,6 +309,8 @@ (with-output-to-file zip-file (lambda () (zip->output (pathlist-closure paths) #:get-timestamp get-timestamp + #:utc-timestamps? utc? + #:round-timestamps-down? round-down? #:path-prefix path-prefix #:system-type sys-type))))