file/[un]tar: support for long paths
Implement POSIX.1-2001/pax and GNU extensions for long paths and links in `untar` and `tar`. Add a `#:format` argument to `tar` to select among POSIX.1-2001/pax, GNU, or error encoding for long paths.
This commit is contained in:
parent
9eb7d6b84e
commit
00171a3c2c
|
@ -5,20 +5,20 @@
|
|||
|
||||
@defmodule[file/tar]{The @racketmodname[file/tar] library provides
|
||||
utilities to create archive files in USTAR format, like the archive
|
||||
that the Unix utility @exec{pax} generates. The USTAR format imposes
|
||||
limits on path lengths. The resulting archives contain only
|
||||
directories, files, and symbolic links, and owner
|
||||
information is not preserved; the owner that is stored in the archive
|
||||
is always ``root.''
|
||||
that the Unix utility @exec{pax} generates. Long paths are supported
|
||||
using either the POSIX.1-2001/pax or GNU format for long paths. The
|
||||
resulting archives contain only directories, files, and symbolic
|
||||
links, and owner information is not preserved; the owner that is
|
||||
stored in the archive is always ``root.''
|
||||
|
||||
Symbolic links (on Unix and Mac OS X) are not followed by default, and the path
|
||||
in a link must be less than 100 bytes.}
|
||||
Symbolic links (on Unix and Mac OS X) are not followed by default.}
|
||||
|
||||
|
||||
@defproc[(tar [tar-file path-string?]
|
||||
[path path-string?] ...
|
||||
[#:follow-links? follow-links? any/c #f]
|
||||
[#:exists-ok? exists-ok? any/c #f]
|
||||
[#:format format (or/c 'pax 'gnu 'ustar) 'pax]
|
||||
[#:path-prefix path-prefix (or/c #f path-string?) #f]
|
||||
[#:path-filter path-filter (or/c #f (path? . -> . any/c)) #f]
|
||||
[#:get-timestamp get-timestamp
|
||||
|
@ -41,6 +41,13 @@ If @racket[exists-ok?] is @racket[#f], then an exception is raised if
|
|||
@racket[tar-file] exists already. If @racket[exists-ok?] is true, then
|
||||
@racket[tar-file] is truncated or replaced if it exists already.
|
||||
|
||||
The @racket[format] argument determines the handling of long paths and
|
||||
long symbolic-link targets. If @racket[format] is @racket['pax], then
|
||||
POSIX.1-2001/pax extensions are used. If @racket[format] is
|
||||
@racket['gnu], then GNU extensions are used. If @racket[format] is
|
||||
@racket['ustar], then @racket[tar] raises an error for too-long paths
|
||||
or symbolic-link targets.
|
||||
|
||||
If @racket[path-prefix] is not @racket[#f], then it is prefixed to
|
||||
each path in the archive.
|
||||
|
||||
|
@ -50,12 +57,16 @@ date to record in the archive for each file or directory.
|
|||
@history[#:changed "6.0.0.3" @elem{Added the @racket[#:get-timestamp] argument.}
|
||||
#:changed "6.1.1.1" @elem{Added the @racket[#:exists-ok?] argument.}
|
||||
#:changed "6.3.0.3" @elem{Added the @racket[#:follow-links?] argument.}
|
||||
#:changed "6.3.0.11" @elem{Added the @racket[#:path-filter] argument.}]}
|
||||
#:changed "6.3.0.11" @elem{Added the @racket[#:path-filter] argument.}
|
||||
#:changed "6.7.0.4" @elem{Added the @racket[#:format] argument and
|
||||
effectively changed its default from @racket['ustar]
|
||||
to @racket['pax].}]}
|
||||
|
||||
|
||||
@defproc[(tar->output [paths (listof path?)]
|
||||
[out output-port? (current-output-port)]
|
||||
[#:follow-links? follow-links? any/c #f]
|
||||
[#:format format (or/c 'pax 'gnu 'ustar) 'pax]
|
||||
[#:path-prefix path-prefix (or/c #f path-string?) #f]
|
||||
[#:path-filter path-filter (or/c #f (path? . -> . any/c)) #f]
|
||||
[#:get-timestamp get-timestamp
|
||||
|
@ -65,7 +76,7 @@ date to record in the archive for each file or directory.
|
|||
file-or-directory-modify-seconds)])
|
||||
exact-nonnegative-integer?]{
|
||||
|
||||
Packages each of the given @racket[paths] in a @exec{tar} format
|
||||
Like @racket[tar], but packages each of the given @racket[paths] in a @exec{tar} format
|
||||
archive that is written directly to the @racket[out]. The specified
|
||||
@racket[paths] are included as-is (except for adding @racket[path-prefix], if any); if a directory is specified, its
|
||||
content is not automatically added, and nested directories are added
|
||||
|
@ -73,13 +84,17 @@ without parent directories.
|
|||
|
||||
@history[#:changed "6.0.0.3" @elem{Added the @racket[#:get-timestamp] argument.}
|
||||
#:changed "6.3.0.3" @elem{Added the @racket[#:follow-links?] argument.}
|
||||
#:changed "6.3.0.11" @elem{Added the @racket[#:path-filter] argument.}]}
|
||||
#:changed "6.3.0.11" @elem{Added the @racket[#:path-filter] argument.}
|
||||
#:changed "6.7.0.4" @elem{Added the @racket[#:format] argument and
|
||||
effectively changed its default from @racket['ustar]
|
||||
to @racket['pax].}]}
|
||||
|
||||
|
||||
@defproc[(tar-gzip [tar-file path-string?]
|
||||
[paths path-string?] ...
|
||||
[#:follow-links? follow-links? any/c #f]
|
||||
[#:exists-ok? exists-ok? any/c #f]
|
||||
[#:format format (or/c 'pax 'gnu 'ustar) 'pax]
|
||||
[#:path-prefix path-prefix (or/c #f path-string?) #f]
|
||||
[#:get-timestamp get-timestamp
|
||||
(path? . -> . exact-integer?)
|
||||
|
@ -92,4 +107,7 @@ Like @racket[tar], but compresses the resulting file with @racket[gzip].
|
|||
|
||||
@history[#:changed "6.0.0.3" @elem{Added the @racket[#:get-timestamp] argument.}
|
||||
#:changed "6.1.1.1" @elem{Added the @racket[#:exists-ok?] argument.}
|
||||
#:changed "6.3.0.3" @elem{Added the @racket[#:follow-links?] argument.}]}
|
||||
#:changed "6.3.0.3" @elem{Added the @racket[#:follow-links?] argument.}
|
||||
#:changed "6.7.0.4" @elem{Added the @racket[#:format] argument and
|
||||
effectively changed its default from @racket['ustar]
|
||||
to @racket['pax].}]}
|
||||
|
|
|
@ -4,7 +4,8 @@
|
|||
@title[#:tag "untar"]{@exec{tar} File Extraction}
|
||||
|
||||
@defmodule[file/untar]{The @racketmodname[file/untar] library provides
|
||||
a function to extract items from a TAR/USTAR archive.}
|
||||
a function to extract items from a TAR/USTAR archive using GNU and/or
|
||||
pax extensions to support long pathnames.}
|
||||
|
||||
@defproc[(untar [in (or/c path-string? input-port?)]
|
||||
[#:dest dest-path (or/c path-string? #f) #f]
|
||||
|
@ -19,7 +20,9 @@ a function to extract items from a TAR/USTAR archive.}
|
|||
(lambda args #t)])
|
||||
void?]{
|
||||
|
||||
Extracts TAR/USTAR content from @racket[in].
|
||||
Extracts TAR/USTAR content from @racket[in], recognizing
|
||||
POSIX.1-2001/pax and GNU extensions for long paths and long
|
||||
symbolic-link targets.
|
||||
|
||||
If @racket[dest-path] is not @racket[#f], every path in the archive is
|
||||
prefixed to determine the destination path of the extracted item.
|
||||
|
@ -69,4 +72,7 @@ For each item in the archive, @racket[filter-proc] is applied to
|
|||
If the result of @racket[filter-proc] is @racket[#f], then the item is
|
||||
not unpacked.
|
||||
|
||||
@history[#:changed "6.3" @elem{Added the @racket[#:permissive?] argument.}]}
|
||||
@history[#:changed "6.3" @elem{Added the @racket[#:permissive?] argument.}
|
||||
#:changed "6.7.0.4" @elem{Support long paths and long symbolic-link
|
||||
targets using POSIX.1-2001/pax and GNU
|
||||
extensions.}]}
|
||||
|
|
110
pkgs/racket-test/tests/file/tar-long-paths.rkt
Normal file
110
pkgs/racket-test/tests/file/tar-long-paths.rkt
Normal file
|
@ -0,0 +1,110 @@
|
|||
#lang racket/base
|
||||
(require racket/file
|
||||
file/tar
|
||||
file/untar
|
||||
racket/system)
|
||||
|
||||
;; Paths and link targets longer than 100 to 255 characters are
|
||||
;; trouble for tar. Check the extensions that handle those kinds
|
||||
;; of paths.
|
||||
|
||||
(define tmp (make-temporary-file "tar~a" 'directory))
|
||||
|
||||
(define src-dir (build-path tmp "src"))
|
||||
(define dest-dir (build-path tmp "dest"))
|
||||
|
||||
(define tar-bin (find-executable-path "tar"))
|
||||
|
||||
(define (check what . paths)
|
||||
(for ([format '(pax gnu exe)]
|
||||
#:when (or (not (eq? format 'exe)) tar-bin))
|
||||
(printf "Trying ~a ~a\n" what format)
|
||||
|
||||
(delete-directory/files src-dir #:must-exist? #f)
|
||||
(delete-directory/files dest-dir #:must-exist? #f)
|
||||
|
||||
(make-directory src-dir)
|
||||
(make-directory dest-dir)
|
||||
|
||||
(for ([p (in-list paths)])
|
||||
(define link?
|
||||
(and (pair? p)
|
||||
(eq? 'link (car p))))
|
||||
(define-values (base name dir?)
|
||||
(split-path (if link? (cadr p) p)))
|
||||
(parameterize ([current-directory src-dir])
|
||||
(when (path? base) (make-directory* base))
|
||||
(if link?
|
||||
(make-file-or-directory-link (caddr p) (cadr p))
|
||||
(call-with-output-file
|
||||
p
|
||||
(lambda (o)
|
||||
(display (random) o))))))
|
||||
|
||||
(parameterize ([current-directory src-dir])
|
||||
(case format
|
||||
[(exe)
|
||||
;; `tar` may complain about weird paths, so redirect those
|
||||
;; complaints to stdout to avoid a test failure:
|
||||
(parameterize ([current-error-port (current-output-port)])
|
||||
(apply system*
|
||||
tar-bin
|
||||
"cf"
|
||||
"content.tar"
|
||||
(for/list ([p (in-list paths)])
|
||||
(if (pair? p) (cadr p) p))))]
|
||||
[else
|
||||
(apply tar
|
||||
"content.tar"
|
||||
#:format format
|
||||
(for/list ([p (in-list paths)])
|
||||
(if (pair? p) (cadr p) p)))]))
|
||||
|
||||
(parameterize ([current-directory dest-dir])
|
||||
(untar (build-path src-dir "content.tar")))
|
||||
|
||||
(for/list ([p (in-list paths)])
|
||||
(define n (if (pair? p) (cadr p) p))
|
||||
(check-same (build-path src-dir n)
|
||||
(build-path dest-dir n)))))
|
||||
|
||||
(define (check-same p1 p2)
|
||||
(cond
|
||||
[(link-exists? p1)
|
||||
(unless (link-exists? p2) (error 'tar-long-paths "not a link: ~s" p2))
|
||||
(unless (equal? (resolve-path p1) (resolve-path p2))
|
||||
(error 'tar-long-paths "links differ: ~s and ~s" p1 p2))]
|
||||
[else
|
||||
(unless (file-exists? p2) (error 'tar-long-paths "not unpacked: ~s" p2))
|
||||
(when (link-exists? p2) (error 'tar-long-paths "unpacked as link: ~s" p2))
|
||||
(unless (equal? (file->bytes p1) (file->bytes p2))
|
||||
(error 'tar-long-paths "files differ: ~s and ~s" p1 p2))]))
|
||||
|
||||
(check "one long"
|
||||
"one"
|
||||
"two"
|
||||
(string-append "three-" (make-string 100 #\x))
|
||||
"four")
|
||||
|
||||
(check "two long"
|
||||
"one"
|
||||
(string-append "sub/two-" (make-string 93 #\x))
|
||||
(string-append "sub/three-" (make-string 100 #\x))
|
||||
"four")
|
||||
|
||||
(unless (eq? 'windows (system-type))
|
||||
(check "encoding"
|
||||
(bytes->path #"one\xF0")
|
||||
"two\u3BB"
|
||||
(bytes->path (bytes-append #"sub/three\xF1-" (make-bytes 93 (char->integer #\x))))
|
||||
(string-append "sub/four\u3BB-" (make-string 93 #\x)))
|
||||
|
||||
(check "long link"
|
||||
(string-append "one-" (make-string 150 #\x))
|
||||
`[link ,"two" ,(string-append "one-" (make-string 150 #\x))])
|
||||
|
||||
(check "long link as long"
|
||||
(string-append "one-" (make-string 150 #\x))
|
||||
`[link ,(string-append "two-" (make-string 100 #\x)) ,(string-append "one-" (make-string 150 #\x))]))
|
||||
|
||||
(delete-directory/files tmp)
|
|
@ -4,6 +4,7 @@
|
|||
(define tar-block-size 512)
|
||||
(define tar-name-length 100)
|
||||
(define tar-prefix-length 155)
|
||||
(define tar-link-name-length 100)
|
||||
|
||||
(define 0-block (make-bytes tar-block-size 0)) ; used for fast block zeroing
|
||||
|
||||
|
@ -21,15 +22,17 @@
|
|||
(let* ([bts (path->name-bytes path)]
|
||||
[len (bytes-length bts)])
|
||||
(if (< len tar-name-length)
|
||||
(values bts #f)
|
||||
(let loop ([n 1]) ; search for a split point
|
||||
(cond [(<= (sub1 len) n)
|
||||
(error 'tar "path too long for USTAR: ~a" path)]
|
||||
[(and (eq? sep-char (bytes-ref bts n))
|
||||
(< n tar-prefix-length)
|
||||
(< (- len (+ n 1)) tar-name-length))
|
||||
(values (subbytes bts (add1 n)) (subbytes bts 0 n))]
|
||||
[else (loop (add1 n))])))))
|
||||
(values bts #f #t)
|
||||
(let loop ([n 1]) ; search for a split point
|
||||
(cond [(<= (sub1 len) n)
|
||||
;; Doesn't fit, so we'll use an extension record:
|
||||
(values (subbytes bts 0 (sub1 tar-name-length)) #f #f)]
|
||||
[(and (eq? sep-char (bytes-ref bts n))
|
||||
(< n tar-prefix-length)
|
||||
(< (- len (+ n 1)) tar-name-length))
|
||||
;; Fits after splitting:
|
||||
(values (subbytes bts (add1 n)) (subbytes bts 0 n) #t)]
|
||||
[else (loop (add1 n))])))))
|
||||
|
||||
;; see also the same function name in "zip.rkt"
|
||||
(define (path-attributes path dir?)
|
||||
|
@ -43,21 +46,24 @@
|
|||
|
||||
(define 0-byte (char->integer #\0))
|
||||
|
||||
(define ((tar-one-entry buf prefix get-timestamp follow-links?) path)
|
||||
(define ((tar-one-entry buf prefix get-timestamp follow-links? format) path)
|
||||
(let* ([link? (and (not follow-links?) (link-exists? path))]
|
||||
[dir? (and (not link?) (directory-exists? path))]
|
||||
[size (if (or dir? link?) 0 (file-size path))]
|
||||
[p 0] ; write pointer
|
||||
[cksum 0]
|
||||
[cksum-p #f])
|
||||
(define-values (file-name file-prefix) (split-tar-name (if prefix
|
||||
(build-path prefix path)
|
||||
path)))
|
||||
(define full-file-name (if prefix
|
||||
(build-path prefix path)
|
||||
path))
|
||||
(define-values (file-name file-prefix file-name-fits?)
|
||||
(split-tar-name full-file-name))
|
||||
(define-syntax advance (syntax-rules () [(_ l) (set! p (+ p l))]))
|
||||
(define (write-block* len bts) ; no padding required
|
||||
(when bts
|
||||
(bytes-copy! buf p bts)
|
||||
(for ([i (in-range (bytes-length bts))])
|
||||
(define wlen (min (bytes-length bts) len))
|
||||
(bytes-copy! buf p bts 0 wlen)
|
||||
(for ([i (in-range wlen)])
|
||||
(set! cksum (+ cksum (bytes-ref bts i)))))
|
||||
(advance len))
|
||||
(define (write-block len bts) ; len includes one nul padding
|
||||
|
@ -76,6 +82,11 @@
|
|||
(set! cksum (+ cksum d))
|
||||
(loop (sub1 q) (quotient n 8)))))
|
||||
(advance len))
|
||||
(define (write-bytes/fill-block bstr) ; assumes that buf is zero'd
|
||||
(write-bytes bstr)
|
||||
(define len (bytes-length bstr))
|
||||
(unless (zero? (modulo len tar-block-size))
|
||||
(write-bytes buf (current-output-port) (modulo len tar-block-size))))
|
||||
(define attrib-path
|
||||
(if link?
|
||||
;; For a link, use attributes of the containing directory:
|
||||
|
@ -84,35 +95,83 @@
|
|||
base)
|
||||
(current-directory)))
|
||||
path))
|
||||
(define link-path-bytes (and link?
|
||||
(path->bytes (resolve-path path))))
|
||||
;; see http://www.mkssoftware.com/docs/man4/tar.4.asp for format spec
|
||||
(write-block tar-name-length file-name)
|
||||
(write-octal 8 (path-attributes attrib-path dir?))
|
||||
(write-octal 8 0) ; always root (uid)
|
||||
(write-octal 8 0) ; always root (gid)
|
||||
(write-octal 12 size)
|
||||
(write-octal 12 (get-timestamp attrib-path))
|
||||
;; set checksum later, consider it "all blanks" for cksum
|
||||
(set! cksum-p p) (set! cksum (+ cksum (* 8 32))) (advance 8)
|
||||
(write-block* 1 (if link? #"2" (if dir? #"5" #"0"))) ; type-flag: dir/file (no symlinks)
|
||||
(if link?
|
||||
(let* ([p (path->bytes (resolve-path path))]
|
||||
[len (bytes-length p)])
|
||||
(if (len . < . 100)
|
||||
(begin
|
||||
(write-block* len p)
|
||||
(advance (- 100 len)))
|
||||
(error 'tar "soft-link target too long")))
|
||||
(advance 100)) ; no link-name
|
||||
(write-block 6 #"ustar") ; magic
|
||||
(write-block* 2 #"00") ; version
|
||||
(write-block 32 #"root") ; always root (user-name)
|
||||
(write-block 32 #"root") ; always root (group-name)
|
||||
(write-octal 8 0) ; device-major
|
||||
(write-octal 8 0) ; device-minor
|
||||
(write-block tar-prefix-length file-prefix)
|
||||
(set! p cksum-p)
|
||||
(write-octal 8 cksum) ; patch checksum
|
||||
(write-bytes buf)
|
||||
(define (write-a-block file-name-bytes file-prefix size type link-path-bytes)
|
||||
(set! p 0)
|
||||
(set! cksum 0)
|
||||
(set! cksum-p #f)
|
||||
(write-block tar-name-length file-name-bytes)
|
||||
(write-octal 8 (path-attributes attrib-path dir?))
|
||||
(write-octal 8 0) ; always root (uid)
|
||||
(write-octal 8 0) ; always root (gid)
|
||||
(write-octal 12 size)
|
||||
(write-octal 12 (get-timestamp attrib-path))
|
||||
;; set checksum later, consider it "all blanks" for cksum
|
||||
(set! cksum-p p) (set! cksum (+ cksum (* 8 32))) (advance 8)
|
||||
(write-block* 1 type) ; type-flag
|
||||
(if link-path-bytes
|
||||
(let ([len (min (sub1 tar-link-name-length) (bytes-length link-path-bytes))])
|
||||
(write-block* len link-path-bytes) ; link-name (possibly truncated)
|
||||
(advance (- tar-link-name-length len)))
|
||||
(advance tar-link-name-length)) ; no link-name
|
||||
(write-block 6 #"ustar") ; magic
|
||||
(write-block* 2 #"00") ; version
|
||||
(write-block 32 #"root") ; always root (user-name)
|
||||
(write-block 32 #"root") ; always root (group-name)
|
||||
(write-octal 8 0) ; device-major
|
||||
(write-octal 8 0) ; device-minor
|
||||
(write-block tar-prefix-length file-prefix)
|
||||
(set! p cksum-p)
|
||||
(write-octal 8 cksum) ; patch checksum
|
||||
(write-bytes buf))
|
||||
;; If the file name is too long, then we need to write an
|
||||
;; extension block, first (GNU format):
|
||||
(when (and (not file-name-fits?)
|
||||
(eq? format 'gnu))
|
||||
(define full-file-bytes (path->bytes full-file-name))
|
||||
(write-a-block #"././@LongLink" #"" (bytes-length full-file-bytes) #"L" #f)
|
||||
(zero-block! buf)
|
||||
(write-bytes/fill-block full-file-bytes))
|
||||
;; Ditto for a long link target (GNU format):
|
||||
(define long-target-link? (and link-path-bytes
|
||||
((bytes-length link-path-bytes) . >= . tar-link-name-length)))
|
||||
(when (and long-target-link?
|
||||
(eq? format 'gnu))
|
||||
(write-a-block #"././@LongLink" #"" (bytes-length link-path-bytes) #"K" #f)
|
||||
(zero-block! buf)
|
||||
(write-bytes/fill-block link-path-bytes))
|
||||
;; Or this way (pax format):
|
||||
(when (and (or (not file-name-fits?)
|
||||
long-target-link?)
|
||||
(eq? format 'pax))
|
||||
(define full-file-bytes (path->bytes full-file-name))
|
||||
(define pax-bytes
|
||||
(let* ([ht #hash()]
|
||||
[ht (if (not file-name-fits?) (hash-set ht 'path full-file-bytes) ht)]
|
||||
[ht (if long-target-link? (hash-set ht 'linkpath link-path-bytes) ht)])
|
||||
(pax->bytes ht)))
|
||||
(define pax-header-path-bytes (let ([bstr (bytes-append #"PaxHeader/" full-file-bytes)])
|
||||
(if ((bytes-length bstr) . >= . tar-name-length)
|
||||
(subbytes bstr 0 (sub1 tar-name-length))
|
||||
bstr)))
|
||||
(write-a-block pax-header-path-bytes #"" (bytes-length pax-bytes) #"x" #f)
|
||||
(zero-block! buf)
|
||||
(write-bytes/fill-block pax-bytes))
|
||||
;; If plain 'ustar, report an error for long paths
|
||||
(when (eq? format 'ustar)
|
||||
(when (not file-name-fits?)
|
||||
(error 'tar "path too long for ustar, must fit in ~a bytes: ~e"
|
||||
(sub1 tar-name-length)
|
||||
full-file-name))
|
||||
(when long-target-link?
|
||||
(error 'tar "symbolic-link target too long for ustar, must fit in ~a bytes: ~e"
|
||||
(sub1 tar-link-name-length)
|
||||
link-path-bytes)))
|
||||
;; Write the data block
|
||||
(write-a-block file-name file-prefix size (if link? #"2" (if dir? #"5" #"0")) link-path-bytes)
|
||||
;; Write the file data (if any)
|
||||
(if (or dir? link?)
|
||||
(zero-block! buf) ; must clean buffer for re-use
|
||||
;; write the file
|
||||
|
@ -140,9 +199,11 @@
|
|||
(define (tar->output files [out (current-output-port)]
|
||||
#:get-timestamp [get-timestamp file-or-directory-modify-seconds]
|
||||
#:path-prefix [prefix #f]
|
||||
#:follow-links? [follow-links? #f])
|
||||
#:follow-links? [follow-links? #f]
|
||||
#:format [format 'pax])
|
||||
(check-format 'tar->output format)
|
||||
(parameterize ([current-output-port out])
|
||||
(let* ([buf (new-block)] [entry (tar-one-entry buf prefix get-timestamp follow-links?)])
|
||||
(let* ([buf (new-block)] [entry (tar-one-entry buf prefix get-timestamp follow-links? format)])
|
||||
(for-each entry files)
|
||||
;; two null blocks end-marker
|
||||
(write-bytes buf) (write-bytes buf))))
|
||||
|
@ -155,7 +216,9 @@
|
|||
#:path-filter [path-filter #f]
|
||||
#:follow-links? [follow-links? #f]
|
||||
#:get-timestamp [get-timestamp file-or-directory-modify-seconds]
|
||||
#:format [format 'pax]
|
||||
. paths)
|
||||
(check-format 'tar format)
|
||||
(when (null? paths) (error 'tar "no paths specified"))
|
||||
(with-output-to-file tar-file
|
||||
#:exists (if exists-ok? 'truncate/replace 'error)
|
||||
|
@ -164,7 +227,8 @@
|
|||
#:path-filter path-filter)
|
||||
#:get-timestamp get-timestamp
|
||||
#:path-prefix prefix
|
||||
#:follow-links? follow-links?))))
|
||||
#:follow-links? follow-links?
|
||||
#:format format))))
|
||||
|
||||
;; tar-gzip : output-file paths ->
|
||||
(provide tar-gzip)
|
||||
|
@ -174,7 +238,9 @@
|
|||
#:path-filter [path-filter #f]
|
||||
#:follow-links? [follow-links? #f]
|
||||
#:get-timestamp [get-timestamp file-or-directory-modify-seconds]
|
||||
#:format [format 'pax]
|
||||
. paths)
|
||||
(check-format 'tar-gzip format)
|
||||
(when (null? paths) (error 'tar-gzip "no paths specified"))
|
||||
(with-output-to-file tgz-file
|
||||
#:exists (if exists-ok? 'truncate/replace 'error)
|
||||
|
@ -189,7 +255,8 @@
|
|||
o
|
||||
#:path-prefix prefix
|
||||
#:follow-links? follow-links?
|
||||
#:get-timestamp get-timestamp))
|
||||
#:get-timestamp get-timestamp
|
||||
#:format format))
|
||||
(close-output-port o)))
|
||||
(gzip-through-ports
|
||||
i (current-output-port)
|
||||
|
@ -200,3 +267,36 @@
|
|||
[else #f])
|
||||
(current-seconds))
|
||||
(when tar-exn (raise tar-exn))))))
|
||||
|
||||
;; ----------------------------------------
|
||||
|
||||
(define (check-format who format)
|
||||
(case format
|
||||
[(ustar gnu pax) (void)]
|
||||
[else (raise-argument-error who "(or/c 'pax 'gnu 'ustar)" format)]))
|
||||
|
||||
(define (pax->bytes ht)
|
||||
(define (number->bytes n) (string->bytes/utf-8 (format "~a" n)))
|
||||
(apply
|
||||
bytes-append
|
||||
(for/list ([(k v) (in-hash ht)])
|
||||
(define k-bytes (string->bytes/utf-8 (symbol->string k)))
|
||||
(define len
|
||||
;; Weird: we have to figure out the line length *including* the bytes
|
||||
;; that describe the line length.
|
||||
(let loop ([guess-len (+ 1 (bytes-length k-bytes) 1 (bytes-length v) 1)])
|
||||
(define n-bytes (number->bytes guess-len))
|
||||
(define encoding-size (+ (bytes-length n-bytes) 1 (bytes-length k-bytes) 1 (bytes-length v) 1))
|
||||
(cond
|
||||
[(= guess-len encoding-size)
|
||||
guess-len]
|
||||
[(guess-len . < . encoding-size)
|
||||
(loop (add1 guess-len))]
|
||||
[else
|
||||
(error 'pax->bytes "internal error: cannot figure out encoding line length!")])))
|
||||
(bytes-append (number->bytes len)
|
||||
#" "
|
||||
k-bytes
|
||||
#"="
|
||||
v
|
||||
#"\n"))))
|
||||
|
|
|
@ -37,7 +37,9 @@
|
|||
(delay))
|
||||
(loop (untar-one-from-port in delays
|
||||
dest strip-count filter
|
||||
permissive?)))))))
|
||||
permissive?
|
||||
#f
|
||||
#f)))))))
|
||||
|
||||
(define (read-bytes* n in)
|
||||
(define s (read-bytes n in))
|
||||
|
@ -48,7 +50,9 @@
|
|||
|
||||
(define (untar-one-from-port in delays
|
||||
dest strip-count filter
|
||||
permissive?)
|
||||
permissive?
|
||||
path-from-extended-attributes
|
||||
link-target-from-extended-attributes)
|
||||
(define name-bytes (read-bytes* 100 in))
|
||||
(define mode (tar-bytes->number (read-bytes* 8 in) in))
|
||||
(define owner (tar-bytes->number (read-bytes* 8 in) in))
|
||||
|
@ -56,7 +60,8 @@
|
|||
(define size (tar-bytes->number (read-bytes* 12 in) in))
|
||||
(define mod-time (tar-bytes->number (read-bytes* 12 in) in))
|
||||
(define checksum-bytes (read-bytes* 8 in))
|
||||
(define type (case (integer->char (read-byte in))
|
||||
(define type-byte (integer->char (read-byte in)))
|
||||
(define type (case type-byte
|
||||
[(#\0) 'file]
|
||||
[(#\1) 'hard-link]
|
||||
[(#\2) 'link]
|
||||
|
@ -67,6 +72,8 @@
|
|||
[(#\7) 'contiguous-file]
|
||||
[(#\g) 'extended-header]
|
||||
[(#\x) 'extended-header-for-next]
|
||||
[(#\L) 'gnu-long-name]
|
||||
[(#\K) 'gnu-long-link]
|
||||
[else 'unknown]))
|
||||
(define link-target-bytes (read-bytes* 100 in))
|
||||
(define ustar? (bytes=? #"ustar\00000" (read-bytes* 8 in)))
|
||||
|
@ -75,14 +82,15 @@
|
|||
(define device-major-bytes (read-bytes* 8 in))
|
||||
(define device-minor-bytes (read-bytes* 8 in))
|
||||
(define filename-prefix-bytes (read-bytes* 155 in))
|
||||
(define base-filename (bytes->path
|
||||
(let ([name (nul-terminated name-bytes)])
|
||||
(if ustar?
|
||||
(let ([prefix (nul-terminated filename-prefix-bytes)])
|
||||
(if (zero? (bytes-length prefix))
|
||||
name
|
||||
(bytes-append prefix #"/" name)))
|
||||
name))))
|
||||
(define base-filename (or path-from-extended-attributes
|
||||
(bytes->path
|
||||
(let ([name (nul-terminated name-bytes)])
|
||||
(if ustar?
|
||||
(let ([prefix (nul-terminated filename-prefix-bytes)])
|
||||
(if (zero? (bytes-length prefix))
|
||||
name
|
||||
(bytes-append prefix #"/" name)))
|
||||
name)))))
|
||||
(check-unpack-path 'untar base-filename #:allow-up? permissive?)
|
||||
(define stripped-filename (strip-prefix base-filename strip-count))
|
||||
(define filename (and stripped-filename
|
||||
|
@ -90,7 +98,8 @@
|
|||
(build-path dest stripped-filename)
|
||||
stripped-filename)))
|
||||
(define link-target (and (eq? type 'link)
|
||||
(bytes->path (nul-terminated link-target-bytes))))
|
||||
(or link-target-from-extended-attributes
|
||||
(bytes->path (nul-terminated link-target-bytes)))))
|
||||
(when (and link-target (not permissive?))
|
||||
(check-unpack-path 'untar link-target))
|
||||
(read-bytes* 12 in) ; padding
|
||||
|
@ -138,8 +147,41 @@
|
|||
(when (file-exists? filename) (delete-file filename))
|
||||
(make-file-or-directory-link link-target filename)
|
||||
delays]
|
||||
[(extended-header-for-next)
|
||||
;; pax record to support long namesand other attributes
|
||||
(define extended-header (read-pax in total-len))
|
||||
;; Recur to use given paths, if any:
|
||||
(untar-one-from-port in delays
|
||||
dest strip-count filter
|
||||
permissive?
|
||||
(or (let ([v (hash-ref extended-header 'path #f)])
|
||||
(and v (bytes->path v)))
|
||||
path-from-extended-attributes)
|
||||
(or (let ([v (hash-ref extended-header 'linkpath #f)])
|
||||
(and v (bytes->path v)))
|
||||
link-target-from-extended-attributes))]
|
||||
[(gnu-long-name)
|
||||
;; GNU record to support long names
|
||||
(define o (open-output-bytes))
|
||||
(copy-bytes total-len in o)
|
||||
;; Recur to use given path:
|
||||
(untar-one-from-port in delays
|
||||
dest strip-count filter
|
||||
permissive?
|
||||
(bytes->path (nul-terminated (get-output-bytes o)))
|
||||
link-target-from-extended-attributes)]
|
||||
[(gnu-long-link)
|
||||
;; GNU record to support long link targets
|
||||
(define o (open-output-bytes))
|
||||
(copy-bytes total-len in o)
|
||||
;; Recur to use given link target:
|
||||
(untar-one-from-port in delays
|
||||
dest strip-count filter
|
||||
permissive?
|
||||
path-from-extended-attributes
|
||||
(bytes->path (nul-terminated (get-output-bytes o))))]
|
||||
[else
|
||||
(log-untar-info "ignored ~a: ~a" type filename)
|
||||
(log-untar-info "ignored ~a[~a]: ~a" type type-byte filename)
|
||||
(copy-bytes total-len in #f)
|
||||
delays])]
|
||||
[else
|
||||
|
@ -209,3 +251,44 @@
|
|||
(arithmetic-shift user-perms -6))]
|
||||
[else perms])))
|
||||
|
||||
(define (read-pax in len)
|
||||
;; Format of pax entries is sequence of "<num><space><key>=<value>\n"
|
||||
;; where <num> is the length of that whole line, and the key and value
|
||||
;; are UTF-8 encoded
|
||||
(define (finish len accum)
|
||||
(when (positive? len)
|
||||
(copy-bytes (sub1 len) in #f))
|
||||
accum)
|
||||
(let loop ([len len] [num-base 0] [digits 0] [accum #hash()])
|
||||
(define c (if (positive? len)
|
||||
(read-byte in)
|
||||
0))
|
||||
(cond
|
||||
[(eof-object? c) (finish len accum)]
|
||||
[(zero? c) (finish len accum)]
|
||||
[(char-numeric? (integer->char c))
|
||||
(loop (sub1 len) (+ (- c (char->integer #\0)) (* num-base 10)) (add1 digits) accum)]
|
||||
[(= c (char->integer #\space))
|
||||
(cond
|
||||
[((- num-base digits 1) . > . (sub1 len))
|
||||
;; Can't read that far, so something has gone wrong
|
||||
accum]
|
||||
[else
|
||||
(define s (read-bytes (- num-base digits 1) in))
|
||||
(define m (regexp-match #rx#"^([^=]*)=(.*)\n$" s))
|
||||
(loop (- len (- num-base digits))
|
||||
0
|
||||
0
|
||||
(cond
|
||||
[(not m) accum]
|
||||
[else
|
||||
(hash-set accum
|
||||
(string->symbol (bytes->string/utf-8 (cadr m) #\?))
|
||||
;; pax values are supposed to be UTF-8, but we'll
|
||||
;; convert raw bytes to a path; that arguably doesn't do
|
||||
;; the right thing if the source and destination
|
||||
;; systems use different path encodings, but it makes
|
||||
;; things work on systems where a path doesn't have to
|
||||
;; have a string encoding.
|
||||
(caddr m))]))])]
|
||||
[else (finish len accum)])))
|
||||
|
|
Loading…
Reference in New Issue
Block a user