From 3a76581a36df3af8f563110d9f63e4447e7a43b4 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Mon, 19 Nov 2012 16:40:47 -0700 Subject: [PATCH] add file/untar --- collects/file/scribblings/file.scrbl | 1 + collects/file/scribblings/untar.scrbl | 63 +++++++++ collects/file/untar.rkt | 194 ++++++++++++++++++++++++++ collects/tests/file/untar.rkt | 109 +++++++++++++++ doc/release-notes/racket/HISTORY.txt | 3 + 5 files changed, 370 insertions(+) create mode 100644 collects/file/scribblings/untar.scrbl create mode 100644 collects/file/untar.rkt create mode 100644 collects/tests/file/untar.rkt diff --git a/collects/file/scribblings/file.scrbl b/collects/file/scribblings/file.scrbl index d2046036dd..4657dfa69f 100644 --- a/collects/file/scribblings/file.scrbl +++ b/collects/file/scribblings/file.scrbl @@ -10,6 +10,7 @@ @include-section["gunzip.scrbl"] @include-section["zip.scrbl"] @include-section["tar.scrbl"] +@include-section["untar.scrbl"] @include-section["md5.scrbl"] @include-section["sha1.scrbl"] @include-section["gif.scrbl"] diff --git a/collects/file/scribblings/untar.scrbl b/collects/file/scribblings/untar.scrbl new file mode 100644 index 0000000000..b055ddd77b --- /dev/null +++ b/collects/file/scribblings/untar.scrbl @@ -0,0 +1,63 @@ +#lang scribble/doc +@(require "common.rkt" (for-label file/untar)) + +@title[#:tag "untar"]{@exec{tar} File Extraction} + +@defmodule[file/untar]{The @racketmodname[file/untar] library provides +a function to extract items from a TAR/USTAR archive.} + +@defproc[(untar [in (or/c path-string? input-port?)] + [#:dest dest-path (or/c path-string? #f) #f] + [#:strip-count strip-count exact-nonnegative-integer? 0] + [#:filter filter-proc + (path? (or/c path? #f) + symbol? exact-integer? (or/c path? #f) + exact-nonnegative-integer? + exact-nonnegative-integer? + . -> . any/c) + (lambda args #t)]) + void?]{ + +Extracts TAR/USTAR content from @racket[in]. + +If @racket[dest-path] is not @racket[#f], every path in the archive is +prefixed to determine the destination path of the extracted item. + +If @racket[strip-count] is positive, then @racket[strip-count] path +elements are removed from the item path from the archive (before +prefixing the path with @racket[dest-path]); if the item's path +contains @racket[strip-count] elements, then it is not extracted. + +For each item in the archive, @racket[filter-proc] is applied to + +@itemlist[ + + @item{the item's path as it appears in the archive;} + + @item{a destination path that is based on the path in the archive, + @racket[strip-count], and @racket[dest-path]--which can be + @racket[#f] if the item's path does not have + @racket[strip-count] or more elements;} + + @item{a symbol representing the item's type---@racket['file], + @racket['dir], @racket['link], @racket['hard-link], + @racket['character-special], @racket['block-special], + @racket['fifo], @racket['contiguous-file], + @racket['extended-header], @racket['extended-header-for-next], + or @racket['unknown]---where only @racket['file], + @racket['dir], or @racket['link] can be unpacked by + @racket[untar];} + + @item{an exact integer representing the item's size;} + + @item{a target path for a @racket['link] type or @racket[#f] + for any other type;} + + @item{an integer representing the item's modification date; and} + + @item{an integer representing the item's permissions} + +] + +If the result of @racket[filter-proc] is @racket[#f], then the item is +not unpacked.} diff --git a/collects/file/untar.rkt b/collects/file/untar.rkt new file mode 100644 index 0000000000..a87c1bff19 --- /dev/null +++ b/collects/file/untar.rkt @@ -0,0 +1,194 @@ +#lang racket/base +(require racket/file + racket/contract/base) + +(provide + (contract-out + [untar (->* ((or/c path-string? input-port?)) + (#:dest + (or/c #f path-string?) + #:strip-count exact-nonnegative-integer? + #:filter (path? (or/c path? #f) + symbol? exact-integer? (or/c path? #f) + exact-nonnegative-integer? exact-nonnegative-integer? + . -> . any/c)) + void?)])) + +(define (untar in + #:dest [dest #f] + #:strip-count [strip-count 0] + #:filter [filter void]) + ((if (input-port? in) + (lambda (in f) (f in)) + call-with-input-file*) + in + (lambda (in) + (let loop ([delays null]) + (define bstr (peek-bytes 512 0 in)) + (if (for/and ([b (in-bytes bstr)]) (zero? b)) + (for ([delay (in-list (reverse delays))]) + (delay)) + (loop (untar-one-from-port in delays + dest strip-count filter))))))) + +(define (read-bytes* n in) + (define s (read-bytes n in)) + (unless (and (bytes? s) + (= n (bytes-length s))) + (error 'untar "unexpected EOF")) + s) + +(define (untar-one-from-port in delays + dest strip-count filter) + (define name-bytes (read-bytes* 100 in)) + (define mode (tar-bytes->number (read-bytes* 8 in) in)) + (define owner (tar-bytes->number (read-bytes* 8 in) in)) + (define group (tar-bytes->number (read-bytes* 8 in) in)) + (define size (tar-bytes->number (read-bytes* 12 in) in)) + (define mod-time (tar-bytes->number (read-bytes* 12 in) in)) + (define checksum-bytes (read-bytes* 8 in)) + (define type (case (integer->char (read-byte in)) + [(#\0) 'file] + [(#\1) 'hard-link] + [(#\2) 'link] + [(#\3) 'character-special] + [(#\4) 'block-special] + [(#\5) 'dir] + [(#\6) 'fifo] + [(#\7) 'contiguous-file] + [(#\g) 'extended-header] + [(#\x) 'extended-header-for-next] + [else 'unknown])) + (define link-target-bytes (read-bytes* 100 in)) + (define ustar? (bytes=? #"ustar\00000" (read-bytes* 8 in))) + (define owner-bytes (read-bytes* 32 in)) + (define group-bytes (read-bytes* 32 in)) + (define device-major-bytes (read-bytes* 8 in)) + (define device-minor-bytes (read-bytes* 8 in)) + (define filename-prefix-bytes (read-bytes* 155 in)) + (define base-filename (bytes->path + (if ustar? + (bytes-append (nul-terminated filename-prefix-bytes) + (nul-terminated name-bytes)) + (nul-terminated name-bytes)))) + (when (absolute-path? base-filename) + (error 'untar "won't extract a file with an absolute path: ~e" base-filename)) + (define stripped-filename (if (zero? strip-count) + base-filename + (let-values ([(name count) + (let loop ([fn base-filename]) + (define-values (base name dir?) (split-path fn)) + (cond + [(eq? 'relative base) + (values 'same strip-count)] + [else + (define-values (res count) (loop base)) + (if (count . <= . 1) + (if (eq? res 'same) + (values name 0) + (values (build-path res name) 0)) + (values res (sub1 count)))]))]) + (if (and (zero? count) + (not (eq? name 'same))) + name + #f)))) + (define filename (and stripped-filename + (if dest + (build-path dest stripped-filename) + stripped-filename))) + (define link-target (and (eq? type 'link) + (bytes->path (nul-terminated link-target-bytes)))) + (read-bytes* 12 in) ; padding + (define create? + (filter base-filename filename type size link-target mod-time mode)) + (define total-len (* (ceiling (/ size 512)) 512)) + (cond + [(and filename create?) + (case type + [(dir) + (make-directory* filename) + (cons + ;; delay directory meta-data updates until after any contained + ;; files are written + (lambda () + (try-file-op + (lambda () + (file-or-directory-permissions filename mode))) + (try-file-op + (lambda () + (file-or-directory-modify-seconds filename mod-time)))) + delays)] + [(file) + (define-values (base name dir?) (split-path filename)) + (make-directory* base) + (call-with-output-file* + filename + #:exists 'truncate + (lambda (out) + (copy-bytes size in out))) + (try-file-op + (lambda () + (file-or-directory-permissions filename mode))) + (try-file-op + (lambda () + (file-or-directory-modify-seconds filename mod-time))) + (copy-bytes (- total-len size) in #f) + delays] + [(link) + (define-values (base name dir?) (split-path filename)) + (make-directory* base) + (when (file-exists? filename) (delete-file filename)) + (make-file-or-directory-link link-target filename) + delays] + [else + (error 'untar "cannot handle block type: ~a" type)])] + [else + (copy-bytes total-len in #f) + delays])) + +(define (copy-bytes amt in out) + (let ([bstr (make-bytes (min amt 4096))]) + (let loop ([amt amt]) + (unless (zero? amt) + (define size (min amt 4096)) + (unless (= (read-bytes! bstr in 0 size) size) + (error 'untar "unexpected EOF")) + (when out + (write-bytes bstr out 0 size)) + (loop (- amt size)))))) + +(define (tar-bytes->number bstr in) + (define len (bytes-length bstr)) + (cond + [(bitwise-bit-set? (bytes-ref bstr 0) 7) + ;; base-256: + (for/fold ([v 0]) ([i (in-range 1 len)]) + (+ (* v 256) v))] + [else + ;; traditional: + (define skip-tail + (- len + (for/or ([i (in-range len 0 -1)]) + (case (integer->char (bytes-ref bstr (sub1 i))) + [(#\space #\nul) #f] + [else i])))) + (for/fold ([v 0]) ([i (in-range (- len skip-tail))]) + (define b (bytes-ref bstr i)) + (if (<= (char->integer #\0) b (char->integer #\7)) + (+ (* v 8) (- b (char->integer #\0))) + (error 'untar "bad number ~e at ~a" bstr (file-position in))))])) + +(define (nul-terminated bstr) + (subbytes bstr + 0 + (or (for/or ([i (in-range (bytes-length bstr))]) + (and (zero? (bytes-ref bstr i)) + i)) + (bytes-length bstr)))) + +(define (try-file-op thunk) + (with-handlers ([exn:fail:filesystem? + (lambda (exn) + (log-error "untar: ~a" (exn-message exn)) + (void))]) + (thunk))) diff --git a/collects/tests/file/untar.rkt b/collects/tests/file/untar.rkt new file mode 100644 index 0000000000..114239d3ae --- /dev/null +++ b/collects/tests/file/untar.rkt @@ -0,0 +1,109 @@ +#lang racket/base +(require file/untar + racket/file + racket/system) + +(define tmp (find-system-path 'temp-dir)) +(define tar-exe (find-executable-path "tar")) + +(define work-dir (build-path tmp (format "untar-testing~a" (random 1000)))) +(printf "Working in ~a\n" work-dir) +(when (directory-exists? work-dir) + (delete-directory/files work-dir)) +(define a.tar (build-path work-dir "a.tar")) + +(define sub-dir (build-path work-dir "sub")) + +(define (make-file path mod-time [permissions '(read write)]) + (with-output-to-file path + (lambda () + (write-bytes (make-bytes (random 100000))))) + (file-or-directory-modify-seconds path mod-time) + (file-or-directory-permissions* path permissions)) + +(define (file-or-directory-permissions* path permissions) + (file-or-directory-permissions path + (bitwise-ior + (if (memq 'read permissions) + #o444 + 0) + (if (memq 'write permissions) + #o222 + 0) + (if (memq 'execute permissions) + #o111 + 0)))) + +(define ex1-dir (build-path work-dir "ex1")) + +(make-directory* ex1-dir) +(make-file (build-path ex1-dir "f1") (- (current-seconds) 12)) +(make-file (build-path ex1-dir "f2") (+ (current-seconds) 12) '(read write execute)) +(make-file (build-path ex1-dir "f3") (- (current-seconds) 7) '(read)) +(make-file-or-directory-link "fnone" (build-path ex1-dir "f4")) +(define more-dir (build-path ex1-dir "more")) +(make-directory* more-dir) +(make-file (build-path more-dir "f4") (current-seconds)) +(file-or-directory-permissions* more-dir '(read execute)) ; not 'write + +(define (tar dir) + (define-values (base name dir?) (split-path dir)) + (parameterize ([current-directory base]) + (void (system* tar-exe "-c" "-f" a.tar name)))) + +(define (diff-error src dest) + (error 'diff "different: ~e ~e\n" src dest)) + +(define (diff src dest) + (cond + [(link-exists? src) + (unless (link-exists? dest) (diff-error src dest)) + (diff (resolve-path src) (resolve-path dest))] + [(file-exists? src) + (unless (and (file-exists? dest) + (= (file-size src) (file-size dest)) + (= (file-or-directory-modify-seconds src) + (file-or-directory-modify-seconds dest)) + (equal? (file-or-directory-permissions src) + (file-or-directory-permissions dest)) + (equal? (file->bytes src) (file->bytes dest))) + (diff-error src dest))] + [(directory-exists? src) + (unless (and (directory-exists? dest) + (= (file-or-directory-modify-seconds src) + (file-or-directory-modify-seconds dest)) + (equal? (file-or-directory-permissions src) + (file-or-directory-permissions dest))) + (diff-error src dest)) + (define (sort-paths l) + (sort l bytesbytes)) + (define srcs (sort-paths (directory-list src))) + (define dests (sort-paths (directory-list dest))) + (unless (equal? srcs dests) (diff-error src dest)) + (for ([src-item (in-list srcs)] + [dest-item (in-list dests)]) + (diff (build-path src src-item) (build-path dest dest-item))) + ;; make dest writable to simplify clean-up: + (file-or-directory-permissions* dest '(read execute write))] + [else (void)])) + +(tar ex1-dir) + +(make-directory* sub-dir) +(parameterize ([current-directory sub-dir]) + (untar a.tar)) +(diff ex1-dir (build-path sub-dir "ex1")) +(delete-directory/files sub-dir) + +(parameterize ([current-directory work-dir]) + (untar a.tar #:dest "sub")) +(diff ex1-dir (build-path sub-dir "ex1")) +(delete-directory/files sub-dir) + +(parameterize ([current-directory work-dir]) + (untar a.tar #:dest "sub" #:filter (lambda args #f))) +(when (directory-exists? sub-dir) + (error "should not have been unpacked")) + +(file-or-directory-permissions* more-dir '(read execute write)) +(delete-directory/files work-dir) diff --git a/doc/release-notes/racket/HISTORY.txt b/doc/release-notes/racket/HISTORY.txt index 031c4ce23d..a26b6afe4e 100644 --- a/doc/release-notes/racket/HISTORY.txt +++ b/doc/release-notes/racket/HISTORY.txt @@ -1,3 +1,6 @@ +Version 5.3.1.8 +file/untar: added + Version 5.3.1.7 compiler/zo-structs: generalize flonum? field to type field in localref, let-one, and fun