From 00171a3c2ce2ff4a65d1abbbdf294b67f019bc90 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 21 Dec 2016 14:11:48 -0700 Subject: [PATCH] file/[un]tar: support for long paths Implement POSIX.1-2001/pax and GNU extensions for long paths and links in `untar` and `tar`. Add a `#:format` argument to `tar` to select among POSIX.1-2001/pax, GNU, or error encoding for long paths. --- pkgs/racket-doc/file/scribblings/tar.scrbl | 40 +++- pkgs/racket-doc/file/scribblings/untar.scrbl | 12 +- .../racket-test/tests/file/tar-long-paths.rkt | 110 ++++++++++ racket/collects/file/tar.rkt | 194 +++++++++++++----- racket/collects/file/untar.rkt | 109 ++++++++-- 5 files changed, 391 insertions(+), 74 deletions(-) create mode 100644 pkgs/racket-test/tests/file/tar-long-paths.rkt diff --git a/pkgs/racket-doc/file/scribblings/tar.scrbl b/pkgs/racket-doc/file/scribblings/tar.scrbl index 618a1d4d3b..936f2fba72 100644 --- a/pkgs/racket-doc/file/scribblings/tar.scrbl +++ b/pkgs/racket-doc/file/scribblings/tar.scrbl @@ -5,20 +5,20 @@ @defmodule[file/tar]{The @racketmodname[file/tar] library provides utilities to create archive files in USTAR format, like the archive -that the Unix utility @exec{pax} generates. The USTAR format imposes -limits on path lengths. The resulting archives contain only -directories, files, and symbolic links, and owner -information is not preserved; the owner that is stored in the archive -is always ``root.'' +that the Unix utility @exec{pax} generates. Long paths are supported +using either the POSIX.1-2001/pax or GNU format for long paths. The +resulting archives contain only directories, files, and symbolic +links, and owner information is not preserved; the owner that is +stored in the archive is always ``root.'' -Symbolic links (on Unix and Mac OS X) are not followed by default, and the path -in a link must be less than 100 bytes.} +Symbolic links (on Unix and Mac OS X) are not followed by default.} @defproc[(tar [tar-file path-string?] [path path-string?] ... [#:follow-links? follow-links? any/c #f] [#:exists-ok? exists-ok? any/c #f] + [#:format format (or/c 'pax 'gnu 'ustar) 'pax] [#:path-prefix path-prefix (or/c #f path-string?) #f] [#:path-filter path-filter (or/c #f (path? . -> . any/c)) #f] [#:get-timestamp get-timestamp @@ -41,6 +41,13 @@ If @racket[exists-ok?] is @racket[#f], then an exception is raised if @racket[tar-file] exists already. If @racket[exists-ok?] is true, then @racket[tar-file] is truncated or replaced if it exists already. +The @racket[format] argument determines the handling of long paths and +long symbolic-link targets. If @racket[format] is @racket['pax], then +POSIX.1-2001/pax extensions are used. If @racket[format] is +@racket['gnu], then GNU extensions are used. If @racket[format] is +@racket['ustar], then @racket[tar] raises an error for too-long paths +or symbolic-link targets. + If @racket[path-prefix] is not @racket[#f], then it is prefixed to each path in the archive. @@ -50,12 +57,16 @@ date to record in the archive for each file or directory. @history[#:changed "6.0.0.3" @elem{Added the @racket[#:get-timestamp] argument.} #:changed "6.1.1.1" @elem{Added the @racket[#:exists-ok?] argument.} #:changed "6.3.0.3" @elem{Added the @racket[#:follow-links?] argument.} - #:changed "6.3.0.11" @elem{Added the @racket[#:path-filter] argument.}]} + #:changed "6.3.0.11" @elem{Added the @racket[#:path-filter] argument.} + #:changed "6.7.0.4" @elem{Added the @racket[#:format] argument and + effectively changed its default from @racket['ustar] + to @racket['pax].}]} @defproc[(tar->output [paths (listof path?)] [out output-port? (current-output-port)] [#:follow-links? follow-links? any/c #f] + [#:format format (or/c 'pax 'gnu 'ustar) 'pax] [#:path-prefix path-prefix (or/c #f path-string?) #f] [#:path-filter path-filter (or/c #f (path? . -> . any/c)) #f] [#:get-timestamp get-timestamp @@ -65,7 +76,7 @@ date to record in the archive for each file or directory. file-or-directory-modify-seconds)]) exact-nonnegative-integer?]{ -Packages each of the given @racket[paths] in a @exec{tar} format +Like @racket[tar], but packages each of the given @racket[paths] in a @exec{tar} format archive that is written directly to the @racket[out]. The specified @racket[paths] are included as-is (except for adding @racket[path-prefix], if any); if a directory is specified, its content is not automatically added, and nested directories are added @@ -73,13 +84,17 @@ without parent directories. @history[#:changed "6.0.0.3" @elem{Added the @racket[#:get-timestamp] argument.} #:changed "6.3.0.3" @elem{Added the @racket[#:follow-links?] argument.} - #:changed "6.3.0.11" @elem{Added the @racket[#:path-filter] argument.}]} + #:changed "6.3.0.11" @elem{Added the @racket[#:path-filter] argument.} + #:changed "6.7.0.4" @elem{Added the @racket[#:format] argument and + effectively changed its default from @racket['ustar] + to @racket['pax].}]} @defproc[(tar-gzip [tar-file path-string?] [paths path-string?] ... [#:follow-links? follow-links? any/c #f] [#:exists-ok? exists-ok? any/c #f] + [#:format format (or/c 'pax 'gnu 'ustar) 'pax] [#:path-prefix path-prefix (or/c #f path-string?) #f] [#:get-timestamp get-timestamp (path? . -> . exact-integer?) @@ -92,4 +107,7 @@ Like @racket[tar], but compresses the resulting file with @racket[gzip]. @history[#:changed "6.0.0.3" @elem{Added the @racket[#:get-timestamp] argument.} #:changed "6.1.1.1" @elem{Added the @racket[#:exists-ok?] argument.} - #:changed "6.3.0.3" @elem{Added the @racket[#:follow-links?] argument.}]} + #:changed "6.3.0.3" @elem{Added the @racket[#:follow-links?] argument.} + #:changed "6.7.0.4" @elem{Added the @racket[#:format] argument and + effectively changed its default from @racket['ustar] + to @racket['pax].}]} diff --git a/pkgs/racket-doc/file/scribblings/untar.scrbl b/pkgs/racket-doc/file/scribblings/untar.scrbl index e8d6671a62..b2c19a4439 100644 --- a/pkgs/racket-doc/file/scribblings/untar.scrbl +++ b/pkgs/racket-doc/file/scribblings/untar.scrbl @@ -4,7 +4,8 @@ @title[#:tag "untar"]{@exec{tar} File Extraction} @defmodule[file/untar]{The @racketmodname[file/untar] library provides -a function to extract items from a TAR/USTAR archive.} +a function to extract items from a TAR/USTAR archive using GNU and/or +pax extensions to support long pathnames.} @defproc[(untar [in (or/c path-string? input-port?)] [#:dest dest-path (or/c path-string? #f) #f] @@ -19,7 +20,9 @@ a function to extract items from a TAR/USTAR archive.} (lambda args #t)]) void?]{ -Extracts TAR/USTAR content from @racket[in]. +Extracts TAR/USTAR content from @racket[in], recognizing +POSIX.1-2001/pax and GNU extensions for long paths and long +symbolic-link targets. If @racket[dest-path] is not @racket[#f], every path in the archive is prefixed to determine the destination path of the extracted item. @@ -69,4 +72,7 @@ For each item in the archive, @racket[filter-proc] is applied to If the result of @racket[filter-proc] is @racket[#f], then the item is not unpacked. -@history[#:changed "6.3" @elem{Added the @racket[#:permissive?] argument.}]} +@history[#:changed "6.3" @elem{Added the @racket[#:permissive?] argument.} + #:changed "6.7.0.4" @elem{Support long paths and long symbolic-link + targets using POSIX.1-2001/pax and GNU + extensions.}]} diff --git a/pkgs/racket-test/tests/file/tar-long-paths.rkt b/pkgs/racket-test/tests/file/tar-long-paths.rkt new file mode 100644 index 0000000000..596389c87e --- /dev/null +++ b/pkgs/racket-test/tests/file/tar-long-paths.rkt @@ -0,0 +1,110 @@ +#lang racket/base +(require racket/file + file/tar + file/untar + racket/system) + +;; Paths and link targets longer than 100 to 255 characters are +;; trouble for tar. Check the extensions that handle those kinds +;; of paths. + +(define tmp (make-temporary-file "tar~a" 'directory)) + +(define src-dir (build-path tmp "src")) +(define dest-dir (build-path tmp "dest")) + +(define tar-bin (find-executable-path "tar")) + +(define (check what . paths) + (for ([format '(pax gnu exe)] + #:when (or (not (eq? format 'exe)) tar-bin)) + (printf "Trying ~a ~a\n" what format) + + (delete-directory/files src-dir #:must-exist? #f) + (delete-directory/files dest-dir #:must-exist? #f) + + (make-directory src-dir) + (make-directory dest-dir) + + (for ([p (in-list paths)]) + (define link? + (and (pair? p) + (eq? 'link (car p)))) + (define-values (base name dir?) + (split-path (if link? (cadr p) p))) + (parameterize ([current-directory src-dir]) + (when (path? base) (make-directory* base)) + (if link? + (make-file-or-directory-link (caddr p) (cadr p)) + (call-with-output-file + p + (lambda (o) + (display (random) o)))))) + + (parameterize ([current-directory src-dir]) + (case format + [(exe) + ;; `tar` may complain about weird paths, so redirect those + ;; complaints to stdout to avoid a test failure: + (parameterize ([current-error-port (current-output-port)]) + (apply system* + tar-bin + "cf" + "content.tar" + (for/list ([p (in-list paths)]) + (if (pair? p) (cadr p) p))))] + [else + (apply tar + "content.tar" + #:format format + (for/list ([p (in-list paths)]) + (if (pair? p) (cadr p) p)))])) + + (parameterize ([current-directory dest-dir]) + (untar (build-path src-dir "content.tar"))) + + (for/list ([p (in-list paths)]) + (define n (if (pair? p) (cadr p) p)) + (check-same (build-path src-dir n) + (build-path dest-dir n))))) + +(define (check-same p1 p2) + (cond + [(link-exists? p1) + (unless (link-exists? p2) (error 'tar-long-paths "not a link: ~s" p2)) + (unless (equal? (resolve-path p1) (resolve-path p2)) + (error 'tar-long-paths "links differ: ~s and ~s" p1 p2))] + [else + (unless (file-exists? p2) (error 'tar-long-paths "not unpacked: ~s" p2)) + (when (link-exists? p2) (error 'tar-long-paths "unpacked as link: ~s" p2)) + (unless (equal? (file->bytes p1) (file->bytes p2)) + (error 'tar-long-paths "files differ: ~s and ~s" p1 p2))])) + +(check "one long" + "one" + "two" + (string-append "three-" (make-string 100 #\x)) + "four") + +(check "two long" + "one" + (string-append "sub/two-" (make-string 93 #\x)) + (string-append "sub/three-" (make-string 100 #\x)) + "four") + +(unless (eq? 'windows (system-type)) + (check "encoding" + (bytes->path #"one\xF0") + "two\u3BB" + (bytes->path (bytes-append #"sub/three\xF1-" (make-bytes 93 (char->integer #\x)))) + (string-append "sub/four\u3BB-" (make-string 93 #\x))) + + (check "long link" + (string-append "one-" (make-string 150 #\x)) + `[link ,"two" ,(string-append "one-" (make-string 150 #\x))]) + + (check "long link as long" + (string-append "one-" (make-string 150 #\x)) + `[link ,(string-append "two-" (make-string 100 #\x)) ,(string-append "one-" (make-string 150 #\x))])) + +(delete-directory/files tmp) diff --git a/racket/collects/file/tar.rkt b/racket/collects/file/tar.rkt index a5809d300f..d0273f00bf 100644 --- a/racket/collects/file/tar.rkt +++ b/racket/collects/file/tar.rkt @@ -4,6 +4,7 @@ (define tar-block-size 512) (define tar-name-length 100) (define tar-prefix-length 155) +(define tar-link-name-length 100) (define 0-block (make-bytes tar-block-size 0)) ; used for fast block zeroing @@ -21,15 +22,17 @@ (let* ([bts (path->name-bytes path)] [len (bytes-length bts)]) (if (< len tar-name-length) - (values bts #f) - (let loop ([n 1]) ; search for a split point - (cond [(<= (sub1 len) n) - (error 'tar "path too long for USTAR: ~a" path)] - [(and (eq? sep-char (bytes-ref bts n)) - (< n tar-prefix-length) - (< (- len (+ n 1)) tar-name-length)) - (values (subbytes bts (add1 n)) (subbytes bts 0 n))] - [else (loop (add1 n))]))))) + (values bts #f #t) + (let loop ([n 1]) ; search for a split point + (cond [(<= (sub1 len) n) + ;; Doesn't fit, so we'll use an extension record: + (values (subbytes bts 0 (sub1 tar-name-length)) #f #f)] + [(and (eq? sep-char (bytes-ref bts n)) + (< n tar-prefix-length) + (< (- len (+ n 1)) tar-name-length)) + ;; Fits after splitting: + (values (subbytes bts (add1 n)) (subbytes bts 0 n) #t)] + [else (loop (add1 n))]))))) ;; see also the same function name in "zip.rkt" (define (path-attributes path dir?) @@ -43,21 +46,24 @@ (define 0-byte (char->integer #\0)) -(define ((tar-one-entry buf prefix get-timestamp follow-links?) path) +(define ((tar-one-entry buf prefix get-timestamp follow-links? format) path) (let* ([link? (and (not follow-links?) (link-exists? path))] [dir? (and (not link?) (directory-exists? path))] [size (if (or dir? link?) 0 (file-size path))] [p 0] ; write pointer [cksum 0] [cksum-p #f]) - (define-values (file-name file-prefix) (split-tar-name (if prefix - (build-path prefix path) - path))) + (define full-file-name (if prefix + (build-path prefix path) + path)) + (define-values (file-name file-prefix file-name-fits?) + (split-tar-name full-file-name)) (define-syntax advance (syntax-rules () [(_ l) (set! p (+ p l))])) (define (write-block* len bts) ; no padding required (when bts - (bytes-copy! buf p bts) - (for ([i (in-range (bytes-length bts))]) + (define wlen (min (bytes-length bts) len)) + (bytes-copy! buf p bts 0 wlen) + (for ([i (in-range wlen)]) (set! cksum (+ cksum (bytes-ref bts i))))) (advance len)) (define (write-block len bts) ; len includes one nul padding @@ -76,6 +82,11 @@ (set! cksum (+ cksum d)) (loop (sub1 q) (quotient n 8))))) (advance len)) + (define (write-bytes/fill-block bstr) ; assumes that buf is zero'd + (write-bytes bstr) + (define len (bytes-length bstr)) + (unless (zero? (modulo len tar-block-size)) + (write-bytes buf (current-output-port) (modulo len tar-block-size)))) (define attrib-path (if link? ;; For a link, use attributes of the containing directory: @@ -84,35 +95,83 @@ base) (current-directory))) path)) + (define link-path-bytes (and link? + (path->bytes (resolve-path path)))) ;; see http://www.mkssoftware.com/docs/man4/tar.4.asp for format spec - (write-block tar-name-length file-name) - (write-octal 8 (path-attributes attrib-path dir?)) - (write-octal 8 0) ; always root (uid) - (write-octal 8 0) ; always root (gid) - (write-octal 12 size) - (write-octal 12 (get-timestamp attrib-path)) - ;; set checksum later, consider it "all blanks" for cksum - (set! cksum-p p) (set! cksum (+ cksum (* 8 32))) (advance 8) - (write-block* 1 (if link? #"2" (if dir? #"5" #"0"))) ; type-flag: dir/file (no symlinks) - (if link? - (let* ([p (path->bytes (resolve-path path))] - [len (bytes-length p)]) - (if (len . < . 100) - (begin - (write-block* len p) - (advance (- 100 len))) - (error 'tar "soft-link target too long"))) - (advance 100)) ; no link-name - (write-block 6 #"ustar") ; magic - (write-block* 2 #"00") ; version - (write-block 32 #"root") ; always root (user-name) - (write-block 32 #"root") ; always root (group-name) - (write-octal 8 0) ; device-major - (write-octal 8 0) ; device-minor - (write-block tar-prefix-length file-prefix) - (set! p cksum-p) - (write-octal 8 cksum) ; patch checksum - (write-bytes buf) + (define (write-a-block file-name-bytes file-prefix size type link-path-bytes) + (set! p 0) + (set! cksum 0) + (set! cksum-p #f) + (write-block tar-name-length file-name-bytes) + (write-octal 8 (path-attributes attrib-path dir?)) + (write-octal 8 0) ; always root (uid) + (write-octal 8 0) ; always root (gid) + (write-octal 12 size) + (write-octal 12 (get-timestamp attrib-path)) + ;; set checksum later, consider it "all blanks" for cksum + (set! cksum-p p) (set! cksum (+ cksum (* 8 32))) (advance 8) + (write-block* 1 type) ; type-flag + (if link-path-bytes + (let ([len (min (sub1 tar-link-name-length) (bytes-length link-path-bytes))]) + (write-block* len link-path-bytes) ; link-name (possibly truncated) + (advance (- tar-link-name-length len))) + (advance tar-link-name-length)) ; no link-name + (write-block 6 #"ustar") ; magic + (write-block* 2 #"00") ; version + (write-block 32 #"root") ; always root (user-name) + (write-block 32 #"root") ; always root (group-name) + (write-octal 8 0) ; device-major + (write-octal 8 0) ; device-minor + (write-block tar-prefix-length file-prefix) + (set! p cksum-p) + (write-octal 8 cksum) ; patch checksum + (write-bytes buf)) + ;; If the file name is too long, then we need to write an + ;; extension block, first (GNU format): + (when (and (not file-name-fits?) + (eq? format 'gnu)) + (define full-file-bytes (path->bytes full-file-name)) + (write-a-block #"././@LongLink" #"" (bytes-length full-file-bytes) #"L" #f) + (zero-block! buf) + (write-bytes/fill-block full-file-bytes)) + ;; Ditto for a long link target (GNU format): + (define long-target-link? (and link-path-bytes + ((bytes-length link-path-bytes) . >= . tar-link-name-length))) + (when (and long-target-link? + (eq? format 'gnu)) + (write-a-block #"././@LongLink" #"" (bytes-length link-path-bytes) #"K" #f) + (zero-block! buf) + (write-bytes/fill-block link-path-bytes)) + ;; Or this way (pax format): + (when (and (or (not file-name-fits?) + long-target-link?) + (eq? format 'pax)) + (define full-file-bytes (path->bytes full-file-name)) + (define pax-bytes + (let* ([ht #hash()] + [ht (if (not file-name-fits?) (hash-set ht 'path full-file-bytes) ht)] + [ht (if long-target-link? (hash-set ht 'linkpath link-path-bytes) ht)]) + (pax->bytes ht))) + (define pax-header-path-bytes (let ([bstr (bytes-append #"PaxHeader/" full-file-bytes)]) + (if ((bytes-length bstr) . >= . tar-name-length) + (subbytes bstr 0 (sub1 tar-name-length)) + bstr))) + (write-a-block pax-header-path-bytes #"" (bytes-length pax-bytes) #"x" #f) + (zero-block! buf) + (write-bytes/fill-block pax-bytes)) + ;; If plain 'ustar, report an error for long paths + (when (eq? format 'ustar) + (when (not file-name-fits?) + (error 'tar "path too long for ustar, must fit in ~a bytes: ~e" + (sub1 tar-name-length) + full-file-name)) + (when long-target-link? + (error 'tar "symbolic-link target too long for ustar, must fit in ~a bytes: ~e" + (sub1 tar-link-name-length) + link-path-bytes))) + ;; Write the data block + (write-a-block file-name file-prefix size (if link? #"2" (if dir? #"5" #"0")) link-path-bytes) + ;; Write the file data (if any) (if (or dir? link?) (zero-block! buf) ; must clean buffer for re-use ;; write the file @@ -140,9 +199,11 @@ (define (tar->output files [out (current-output-port)] #:get-timestamp [get-timestamp file-or-directory-modify-seconds] #:path-prefix [prefix #f] - #:follow-links? [follow-links? #f]) + #:follow-links? [follow-links? #f] + #:format [format 'pax]) + (check-format 'tar->output format) (parameterize ([current-output-port out]) - (let* ([buf (new-block)] [entry (tar-one-entry buf prefix get-timestamp follow-links?)]) + (let* ([buf (new-block)] [entry (tar-one-entry buf prefix get-timestamp follow-links? format)]) (for-each entry files) ;; two null blocks end-marker (write-bytes buf) (write-bytes buf)))) @@ -155,7 +216,9 @@ #:path-filter [path-filter #f] #:follow-links? [follow-links? #f] #:get-timestamp [get-timestamp file-or-directory-modify-seconds] + #:format [format 'pax] . paths) + (check-format 'tar format) (when (null? paths) (error 'tar "no paths specified")) (with-output-to-file tar-file #:exists (if exists-ok? 'truncate/replace 'error) @@ -164,7 +227,8 @@ #:path-filter path-filter) #:get-timestamp get-timestamp #:path-prefix prefix - #:follow-links? follow-links?)))) + #:follow-links? follow-links? + #:format format)))) ;; tar-gzip : output-file paths -> (provide tar-gzip) @@ -174,7 +238,9 @@ #:path-filter [path-filter #f] #:follow-links? [follow-links? #f] #:get-timestamp [get-timestamp file-or-directory-modify-seconds] + #:format [format 'pax] . paths) + (check-format 'tar-gzip format) (when (null? paths) (error 'tar-gzip "no paths specified")) (with-output-to-file tgz-file #:exists (if exists-ok? 'truncate/replace 'error) @@ -189,7 +255,8 @@ o #:path-prefix prefix #:follow-links? follow-links? - #:get-timestamp get-timestamp)) + #:get-timestamp get-timestamp + #:format format)) (close-output-port o))) (gzip-through-ports i (current-output-port) @@ -200,3 +267,36 @@ [else #f]) (current-seconds)) (when tar-exn (raise tar-exn)))))) + +;; ---------------------------------------- + +(define (check-format who format) + (case format + [(ustar gnu pax) (void)] + [else (raise-argument-error who "(or/c 'pax 'gnu 'ustar)" format)])) + +(define (pax->bytes ht) + (define (number->bytes n) (string->bytes/utf-8 (format "~a" n))) + (apply + bytes-append + (for/list ([(k v) (in-hash ht)]) + (define k-bytes (string->bytes/utf-8 (symbol->string k))) + (define len + ;; Weird: we have to figure out the line length *including* the bytes + ;; that describe the line length. + (let loop ([guess-len (+ 1 (bytes-length k-bytes) 1 (bytes-length v) 1)]) + (define n-bytes (number->bytes guess-len)) + (define encoding-size (+ (bytes-length n-bytes) 1 (bytes-length k-bytes) 1 (bytes-length v) 1)) + (cond + [(= guess-len encoding-size) + guess-len] + [(guess-len . < . encoding-size) + (loop (add1 guess-len))] + [else + (error 'pax->bytes "internal error: cannot figure out encoding line length!")]))) + (bytes-append (number->bytes len) + #" " + k-bytes + #"=" + v + #"\n")))) diff --git a/racket/collects/file/untar.rkt b/racket/collects/file/untar.rkt index c0fd1c7e1f..09f3b6611c 100644 --- a/racket/collects/file/untar.rkt +++ b/racket/collects/file/untar.rkt @@ -37,7 +37,9 @@ (delay)) (loop (untar-one-from-port in delays dest strip-count filter - permissive?))))))) + permissive? + #f + #f))))))) (define (read-bytes* n in) (define s (read-bytes n in)) @@ -48,7 +50,9 @@ (define (untar-one-from-port in delays dest strip-count filter - permissive?) + permissive? + path-from-extended-attributes + link-target-from-extended-attributes) (define name-bytes (read-bytes* 100 in)) (define mode (tar-bytes->number (read-bytes* 8 in) in)) (define owner (tar-bytes->number (read-bytes* 8 in) in)) @@ -56,7 +60,8 @@ (define size (tar-bytes->number (read-bytes* 12 in) in)) (define mod-time (tar-bytes->number (read-bytes* 12 in) in)) (define checksum-bytes (read-bytes* 8 in)) - (define type (case (integer->char (read-byte in)) + (define type-byte (integer->char (read-byte in))) + (define type (case type-byte [(#\0) 'file] [(#\1) 'hard-link] [(#\2) 'link] @@ -67,6 +72,8 @@ [(#\7) 'contiguous-file] [(#\g) 'extended-header] [(#\x) 'extended-header-for-next] + [(#\L) 'gnu-long-name] + [(#\K) 'gnu-long-link] [else 'unknown])) (define link-target-bytes (read-bytes* 100 in)) (define ustar? (bytes=? #"ustar\00000" (read-bytes* 8 in))) @@ -75,14 +82,15 @@ (define device-major-bytes (read-bytes* 8 in)) (define device-minor-bytes (read-bytes* 8 in)) (define filename-prefix-bytes (read-bytes* 155 in)) - (define base-filename (bytes->path - (let ([name (nul-terminated name-bytes)]) - (if ustar? - (let ([prefix (nul-terminated filename-prefix-bytes)]) - (if (zero? (bytes-length prefix)) - name - (bytes-append prefix #"/" name))) - name)))) + (define base-filename (or path-from-extended-attributes + (bytes->path + (let ([name (nul-terminated name-bytes)]) + (if ustar? + (let ([prefix (nul-terminated filename-prefix-bytes)]) + (if (zero? (bytes-length prefix)) + name + (bytes-append prefix #"/" name))) + name))))) (check-unpack-path 'untar base-filename #:allow-up? permissive?) (define stripped-filename (strip-prefix base-filename strip-count)) (define filename (and stripped-filename @@ -90,7 +98,8 @@ (build-path dest stripped-filename) stripped-filename))) (define link-target (and (eq? type 'link) - (bytes->path (nul-terminated link-target-bytes)))) + (or link-target-from-extended-attributes + (bytes->path (nul-terminated link-target-bytes))))) (when (and link-target (not permissive?)) (check-unpack-path 'untar link-target)) (read-bytes* 12 in) ; padding @@ -138,8 +147,41 @@ (when (file-exists? filename) (delete-file filename)) (make-file-or-directory-link link-target filename) delays] + [(extended-header-for-next) + ;; pax record to support long namesand other attributes + (define extended-header (read-pax in total-len)) + ;; Recur to use given paths, if any: + (untar-one-from-port in delays + dest strip-count filter + permissive? + (or (let ([v (hash-ref extended-header 'path #f)]) + (and v (bytes->path v))) + path-from-extended-attributes) + (or (let ([v (hash-ref extended-header 'linkpath #f)]) + (and v (bytes->path v))) + link-target-from-extended-attributes))] + [(gnu-long-name) + ;; GNU record to support long names + (define o (open-output-bytes)) + (copy-bytes total-len in o) + ;; Recur to use given path: + (untar-one-from-port in delays + dest strip-count filter + permissive? + (bytes->path (nul-terminated (get-output-bytes o))) + link-target-from-extended-attributes)] + [(gnu-long-link) + ;; GNU record to support long link targets + (define o (open-output-bytes)) + (copy-bytes total-len in o) + ;; Recur to use given link target: + (untar-one-from-port in delays + dest strip-count filter + permissive? + path-from-extended-attributes + (bytes->path (nul-terminated (get-output-bytes o))))] [else - (log-untar-info "ignored ~a: ~a" type filename) + (log-untar-info "ignored ~a[~a]: ~a" type type-byte filename) (copy-bytes total-len in #f) delays])] [else @@ -209,3 +251,44 @@ (arithmetic-shift user-perms -6))] [else perms]))) +(define (read-pax in len) + ;; Format of pax entries is sequence of "=\n" + ;; where is the length of that whole line, and the key and value + ;; are UTF-8 encoded + (define (finish len accum) + (when (positive? len) + (copy-bytes (sub1 len) in #f)) + accum) + (let loop ([len len] [num-base 0] [digits 0] [accum #hash()]) + (define c (if (positive? len) + (read-byte in) + 0)) + (cond + [(eof-object? c) (finish len accum)] + [(zero? c) (finish len accum)] + [(char-numeric? (integer->char c)) + (loop (sub1 len) (+ (- c (char->integer #\0)) (* num-base 10)) (add1 digits) accum)] + [(= c (char->integer #\space)) + (cond + [((- num-base digits 1) . > . (sub1 len)) + ;; Can't read that far, so something has gone wrong + accum] + [else + (define s (read-bytes (- num-base digits 1) in)) + (define m (regexp-match #rx#"^([^=]*)=(.*)\n$" s)) + (loop (- len (- num-base digits)) + 0 + 0 + (cond + [(not m) accum] + [else + (hash-set accum + (string->symbol (bytes->string/utf-8 (cadr m) #\?)) + ;; pax values are supposed to be UTF-8, but we'll + ;; convert raw bytes to a path; that arguably doesn't do + ;; the right thing if the source and destination + ;; systems use different path encodings, but it makes + ;; things work on systems where a path doesn't have to + ;; have a string encoding. + (caddr m))]))])] + [else (finish len accum)])))