From 5cc686526bb672d60f35c6c6d1197cc012630aa7 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Sun, 12 Jul 2020 19:55:45 -0600 Subject: [PATCH] cs: sync with revised fasl compression for code Sync with changes from cisco/ChezScheme. The specific code fragments that are compressed and the chunks that are used for compression remain essentially the same as before for Racket CS, but a different organization at the Chez Scheme level takes over some of the work that was on the Racket CS linklet layer, and load times may improve slightly. --- pkgs/base/info.rkt | 2 +- racket/src/cs/c/adjust-compress.rkt | 184 ++++++++++++++++++++++------ racket/src/cs/c/convert-to-boot.ss | 4 +- racket/src/cs/c/embed-boot.rkt | 7 +- racket/src/cs/c/to-vfasl.ss | 4 +- racket/src/cs/compile-file.ss | 6 +- racket/src/cs/linklet.sls | 90 ++++++-------- racket/src/cs/linklet/write.ss | 17 +-- racket/src/racket/src/schvers.h | 2 +- 9 files changed, 194 insertions(+), 122 deletions(-) diff --git a/pkgs/base/info.rkt b/pkgs/base/info.rkt index eb588aa684..c73b838b74 100644 --- a/pkgs/base/info.rkt +++ b/pkgs/base/info.rkt @@ -12,7 +12,7 @@ (define collection 'multi) -(define version "7.8.0.2") +(define version "7.8.0.3") (define deps `("racket-lib" ["racket" #:version ,version])) diff --git a/racket/src/cs/c/adjust-compress.rkt b/racket/src/cs/c/adjust-compress.rkt index 901c48d395..575490782d 100644 --- a/racket/src/cs/c/adjust-compress.rkt +++ b/racket/src/cs/c/adjust-compress.rkt @@ -2,7 +2,8 @@ (require racket/file racket/system file/gzip - file/gunzip) + file/gunzip + ffi/unsafe) (provide enable-compress! compress-enabled? @@ -36,52 +37,155 @@ (loop))) (get-output-bytes o))) -(define (lz4 flag bstr) - (define o (open-output-bytes)) - (unless (parameterize ([current-input-port (open-input-bytes bstr)] - [current-output-port o]) - (define lz4 (or (find-executable-path "lz4") - (error 'adjust-compress "could not find `lz4` executable, which is needed to adjust bootfile compression"))) - (system* lz4 flag)) - (error "lz4 failed")) - (get-output-bytes o)) +(define (lz4-d result-size bstr) + (define lz4 (ffi-lib "liblz4")) + (define LZ4_decompress_safe (get-ffi-obj 'LZ4_decompress_safe lz4 (_fun _pointer _pointer _int _int -> _int))) + (define r-bstr (make-bytes result-size)) + (when (negative? (LZ4_decompress_safe bstr r-bstr (bytes-length bstr) (bytes-length r-bstr))) + (error 'lz4 "decompression failed")) + r-bstr) + +(define (lz4-z bstr) + (define lz4 (ffi-lib "liblz4")) + (define LZ4_compressBound (get-ffi-obj 'LZ4_compressBound lz4 (_fun _int -> _int))) + (define LZ4_compress_default (get-ffi-obj 'LZ4_compress_default lz4 (_fun _pointer _pointer _int _int -> _int))) + (define max-len (LZ4_compressBound (bytes-length bstr))) + (define r-bstr (make-bytes max-len)) + (define len (LZ4_compress_default bstr r-bstr (bytes-length bstr) (bytes-length r-bstr))) + (subbytes r-bstr 0 len)) + +(define (adler32 bstr) + (define BASE 65521) + (define-values (s1 s2) + (for/fold ([prev-s1 1] [s2 0]) ([b (in-bytes bstr)]) + (define s1 (modulo (+ prev-s1 b) BASE)) + (values s1 (modulo (+ s2 s1) BASE)))) + (integer->integer-bytes (+ s1 (arithmetic-shift s2 16)) 4 #f #t)) + +(define fasl-type-uncompressed 43) +(define fasl-type-gzip 44) +(define fasl-type-lz4 45) + +(define (read-byte/not-eof in) + (define b (read-byte in)) + (when (eof-object? b) + (error "unexpected eof")) + b) + +(define (read-uptr in) + (let ([k (read-byte/not-eof in)]) + (let loop ([k k] [n 0]) + (let ([n (bitwise-ior n (bitwise-and k #x7F))]) + (if (zero? (bitwise-and k #x80)) + n + (loop (read-byte/not-eof in) (arithmetic-shift n 7))))))) + +(define (write-uptr n out) + (let loop ([n n] [cbit 0]) + (unless (n . <= . #x7F) + (loop (arithmetic-shift n -7) #x80)) + (write-byte (bitwise-ior (bitwise-and n #x7F) cbit) out))) + +(define (uptr-bytes n) + (if (n . <= . #x7F) + 1 + (add1 (uptr-bytes (arithmetic-shift n -7))))) (define (adjust-compress bstr) - (cond - [(bytes=? #"\0\0\0\0chez" (subbytes bstr 0 8)) - ;; source is not compressed - (cond - [(and compress? (eq? compress-format 'gzip)) - (reencode bstr (lambda (i o) (gzip-through-ports i o #f 0)))] - [(and compress? (eq? compress-format 'lz4)) - (lz4 "-z" bstr)] - [else bstr])] - [(bytes=? #"\4\"M\30" (subbytes bstr 0 4)) - ;; source is LZ4 - (cond - [(and compress? (eq? compress-format 'gzip)) - (reencode (lz4 "-d" bstr) (lambda (i o) (gzip-through-ports i o #f 0)))] - [(and compress? (eq? compress-format 'lz4)) - bstr] - [else (lz4 "-d" bstr)])] - [(bytes=? #"\37\213\b" (subbytes bstr 0 3)) - ;; source is gzip - (cond - [(and compress? (eq? compress-format 'gzip)) - bstr] - [(and compress? (eq? compress-format 'lz4)) - (lz4 "-z" (reencode bstr gunzip-through-ports))] - [else - (reencode bstr gunzip-through-ports)])] - [else (error 'adjust-compress "unrecognized format ~s" (subbytes bstr 0 3))])) + (define in (open-input-bytes bstr)) + (define out (open-output-bytes bstr)) + (let loop ([saw-header? #f]) + (cond + [(regexp-try-match #"^\0\0\0\0chez" in) + ;; copy header + (define vers (read-uptr in)) + (define mach (read-uptr in)) + (define s (regexp-try-match #rx"^[(][^)]*[)]" in)) + (unless s + (error 'adjust-compress "did not find (...)" (file-position in))) + (write-bytes #"\0\0\0\0chez" out) + (write-uptr vers out) + (write-uptr mach out) + (write-bytes (car s) out) + (loop #t)] + [(not saw-header?) + (error 'adjust-compress "did not find leading header")] + [else + (let ([situation (read-byte in)]) + (unless (eof-object? situation) + (let ([size (- (read-uptr in) 2)] ; size in fasl includes compression + kind + [compression (read-byte/not-eof in)] + [kind (read-byte/not-eof in)]) + (define-values (bstr compressed dest-size) + (cond + [(eqv? compression fasl-type-uncompressed) + ;; source is not compressed + (values (read-bytes size in) #f #f)] + [else + (define dest-size (read-uptr in)) + (define c-bstr (read-bytes (- size (uptr-bytes dest-size)) in)) + (cond + [(eqv? compression fasl-type-gzip) + (if (and compress? (eq? compress-format 'gzip)) + (values c-bstr 'gzip dest-size) + (let ([c-bstr (subbytes c-bstr 2 (- (bytes-length c-bstr 4)))]) + (values (reencode c-bstr inflate) #f #f)))] + [(eqv? compression fasl-type-lz4) + (if (and compress? (eq? compress-format 'lz4)) + (values c-bstr 'lz4 dest-size) + (values (lz4-d dest-size c-bstr) #f #f))] + [else + (error 'adjust-compress "unrecognized compression ~s" compression)])])) + (cond + [(or (not compress?) + (and (not compressed) + ((bytes-length bstr) . < . 100))) + (write-byte situation out) + (write-uptr (+ (bytes-length bstr) 2) out) + (write-byte fasl-type-uncompressed out) + (write-byte kind out) + (write-bytes bstr out)] + [else + (define d-size (or dest-size + (bytes-length bstr))) + (define c-bstr + (if compressed + bstr + (cond + [(eq? compress-format 'gzip) + (bytes-append #"\x78\x5e" + (reencode bstr (lambda (i o) (deflate i o))) + (adler32 bstr))] + [(eq? compress-format 'lz4) + (lz4-z bstr)] + [else + (error 'adjust-compress "unsupported ~s" compress-format)]))) + (write-byte situation out) + (write-uptr (+ (bytes-length c-bstr) (uptr-bytes d-size) 2) out) + (write-byte (cond + [(eq? compress-format 'gzip) fasl-type-gzip] + [(eq? compress-format 'lz4) fasl-type-lz4] + [else (error 'adjust-compress "unsupported ~s" compress-format)]) + out) + (write-byte kind out) + (write-uptr d-size out) + (write-bytes c-bstr out)])) + (loop #t)))])) + (get-output-bytes out)) (module+ main (require racket/cmdline) (command-line - #:once-each - [("--compress") "Leave compiled code files as compressed" - (enable-compress!)] + #:once-any + [("--uncompressed") "Uncompress compiled code" + (void)] + [("--gzip") "Compress using gzip" + (enable-compress!) + (set-compress-format! 'gzip)] + [("--lz4") "Compress using lz4" + (enable-compress!) + (set-compress-format! 'lz4)] #:args path (for ([path (in-list path)]) (define bstr (file->bytes path)) diff --git a/racket/src/cs/c/convert-to-boot.ss b/racket/src/cs/c/convert-to-boot.ss index fe110e2b93..17e8f968e9 100644 --- a/racket/src/cs/c/convert-to-boot.ss +++ b/racket/src/cs/c/convert-to-boot.ss @@ -1,11 +1,11 @@ -(compile-compressed #f) +(fasl-compressed #f) (define-values (src dest machine) (let loop ([args (command-line-arguments)]) (cond [(and (pair? args) (equal? (car args) "--compress")) - (compile-compressed #t) + (fasl-compressed #t) (loop (cdr args))] [(and (pair? args) (equal? (car args) "--xpatch") diff --git a/racket/src/cs/c/embed-boot.rkt b/racket/src/cs/c/embed-boot.rkt index 81c570654f..62b8612091 100644 --- a/racket/src/cs/c/embed-boot.rkt +++ b/racket/src/cs/c/embed-boot.rkt @@ -45,11 +45,8 @@ (delete-file dest-file)) (raise x))]) (define terminator - (if (compress-enabled?) - ;; zero byte stops a gzip-read sequence - #"\0" - ;; A 127 byte teriminates a fasl-read sequence - #"\177")) + ;; A 127 byte teriminates a fasl-read sequence + #"\177") (define data (bytes-append bstr1 terminator bstr2 terminator diff --git a/racket/src/cs/c/to-vfasl.ss b/racket/src/cs/c/to-vfasl.ss index 947bde8d8e..d24965e381 100644 --- a/racket/src/cs/c/to-vfasl.ss +++ b/racket/src/cs/c/to-vfasl.ss @@ -1,4 +1,4 @@ -(compile-compressed #f) +(fasl-compressed #f) (define compile-cross? #f) (define-values (src dest deps) @@ -6,7 +6,7 @@ (cond [(and (pair? args) (equal? (car args) "--compress")) - (compile-compressed #t) + (fasl-compressed #t) (loop (cdr args))] [(and (pair? args) (equal? (car args) "--cross")) diff --git a/racket/src/cs/compile-file.ss b/racket/src/cs/compile-file.ss index 9da25abc5f..bafee5a063 100644 --- a/racket/src/cs/compile-file.ss +++ b/racket/src/cs/compile-file.ss @@ -2,7 +2,7 @@ ;; Check to make we're using a build of Chez Scheme ;; that has all the features we need. (define-values (need-maj need-min need-sub need-dev) - (values 9 5 3 32)) + (values 9 5 3 33)) (unless (guard (x [else #f]) (eval 'scheme-fork-version-number)) (error 'compile-file @@ -42,7 +42,7 @@ (define whole-program? #f) (generate-inspector-information #f) (generate-procedure-source-information #f) -(compile-compressed #f) +(fasl-compressed #f) (enable-arithmetic-left-associative #t) (define build-dir "") (define xpatch-path #f) @@ -64,7 +64,7 @@ (loop args))] [(get-opt args "--compress" 0) => (lambda (args) - (compile-compressed #t) + (fasl-compressed #t) (putenv "PLT_CS_MAKE_COMPRESSED" "y") ; for "linklet.sls" (loop args))] [(get-opt args "--whole-program" 0) diff --git a/racket/src/cs/linklet.sls b/racket/src/cs/linklet.sls index 91a1ca94d2..4f75f2e489 100644 --- a/racket/src/cs/linklet.sls +++ b/racket/src/cs/linklet.sls @@ -235,8 +235,14 @@ (call-with-system-wind (lambda () (interpret e)))) (define (fasl-write* s o) (call-with-system-wind (lambda () (fasl-write s o)))) + (define (fasl-write-code* s o) + (call-with-system-wind (lambda () + (parameterize ([fasl-compressed compress-code?]) + (fasl-write s o))))) (define (compile-to-port* s o) - (call-with-system-wind (lambda () (compile-to-port s o)))) + (call-with-system-wind (lambda () + (parameterize ([fasl-compressed compress-code?]) + (compile-to-port s o))))) (define (eval/foreign e mode) (performance-region @@ -275,49 +281,36 @@ (get))) (define (compile-to-bytevector s paths format) - (let ([bv (cond - [(eq? format 'interpret) - (let-values ([(o get) (open-bytevector-output-port)]) - (fasl-write* s o) - (get))] - [else (compile*-to-bytevector s)])]) - (if compress-code? - (bytevector-compress bv) - bv))) + (cond + [(eq? format 'interpret) + (let-values ([(o get) (open-bytevector-output-port)]) + (fasl-write-code* s o) + (get))] + [else (compile*-to-bytevector s)])) (define (make-cross-compile-to-bytevector machine) (lambda (s paths format) - (let ([bv (cond - [(eq? format 'interpret) (cross-fasl-to-string machine s)] - [else (cross-compile machine s)])]) - (if compress-code? - (bytevector-compress bv) - bv)))) - - (define (eval-from-bytevector c-bv paths format) - (let ([bv (if (bytevector-uncompressed-fasl? c-bv) - c-bv - (begin - (add-performance-memory! 'uncompress (bytevector-length c-bv)) - (performance-region - 'uncompress - (bytevector-uncompress c-bv))))]) - (add-performance-memory! 'faslin-code (bytevector-length bv)) (cond - [(eq? format 'interpret) - (let ([r (performance-region - 'faslin-code - (fasl-read (open-bytevector-input-port bv)))]) - (performance-region - 'outer - (run-interpret r paths)))] - [else - (let ([proc (performance-region - 'faslin-code - (code-from-bytevector bv))]) - (if (null? paths) - proc - (#%apply proc paths)))]))) + [(eq? format 'interpret) (cross-fasl-to-string machine s)] + [else (cross-compile machine s)]))) + + (define (eval-from-bytevector bv paths format) + (add-performance-memory! 'faslin-code (bytevector-length bv)) + (cond + [(eq? format 'interpret) + (let ([r (performance-region + 'faslin-code + (fasl-read (open-bytevector-input-port bv)))]) + (performance-region + 'outer + (run-interpret r paths)))] + [else + (let ([proc (performance-region + 'faslin-code + (code-from-bytevector bv))]) + (if (null? paths) + proc + (#%apply proc paths)))])) (define (code-from-bytevector bv) (let ([i (open-bytevector-input-port bv)]) @@ -326,21 +319,6 @@ 'outer (r))))) - (define (bytevector-uncompressed-fasl? bv) - ;; There's not actually a way to distinguish a fasl header from a - ;; compression header, but the fasl header as a compression header - ;; would mean a > 1GB uncompressed bytevector, so we can safely - ;; assume that it's a fasl stream in that case. - (and (> (bytevector-length bv) 8) - (fx= 0 (bytevector-u8-ref bv 0)) - (fx= 0 (bytevector-u8-ref bv 1)) - (fx= 0 (bytevector-u8-ref bv 2)) - (fx= 0 (bytevector-u8-ref bv 3)) - (fx= (char->integer #\c) (bytevector-u8-ref bv 4)) - (fx= (char->integer #\h) (bytevector-u8-ref bv 5)) - (fx= (char->integer #\e) (bytevector-u8-ref bv 6)) - (fx= (char->integer #\z) (bytevector-u8-ref bv 7)))) - (define-record-type wrapped-code (fields (mutable content) ; bytevector for 'lambda mode; annotation or (vector hash annotation) for 'jit mode arity-mask @@ -1231,6 +1209,8 @@ (enable-arithmetic-left-associative #t) (expand-omit-library-invocations #t) (enable-error-source-expression #f) + (fasl-compressed #f) + (compile-omit-concatenate-support #t) ;; Avoid gensyms for generated record-tyope UIDs. Otherwise, ;; printing one of those gensyms --- perhaps when producing a trace diff --git a/racket/src/cs/linklet/write.ss b/racket/src/cs/linklet/write.ss index d101c7089c..659d4559b1 100644 --- a/racket/src/cs/linklet/write.ss +++ b/racket/src/cs/linklet/write.ss @@ -46,19 +46,10 @@ ;; Before fasl conversion, change 'cross or 'faslable-unsafe to 'faslable (define (adjust-cross-perparation l) - (adjust-linklet-compress - (let ([p (linklet-preparation l)]) - (if (or (pair? p) (eq? p 'faslable-unsafe)) - (set-linklet-preparation l 'faslable) - l)))) - -(define (adjust-linklet-compress l) - (if (or compress-code? - (bytevector-uncompressed-fasl? (linklet-code l))) - l - (set-linklet-code l - (bytevector-uncompress (linklet-code l)) - (linklet-preparation l)))) + (let ([p (linklet-preparation l)]) + (if (or (pair? p) (eq? p 'faslable-unsafe)) + (set-linklet-preparation l 'faslable) + l))) (define (check-fasl-preparation l) (case (linklet-preparation l) diff --git a/racket/src/racket/src/schvers.h b/racket/src/racket/src/schvers.h index 28ab2a0ca2..04ccfb5cf3 100644 --- a/racket/src/racket/src/schvers.h +++ b/racket/src/racket/src/schvers.h @@ -16,7 +16,7 @@ #define MZSCHEME_VERSION_X 7 #define MZSCHEME_VERSION_Y 8 #define MZSCHEME_VERSION_Z 0 -#define MZSCHEME_VERSION_W 2 +#define MZSCHEME_VERSION_W 3 /* A level of indirection makes `#` work as needed: */ #define AS_a_STR_HELPER(x) #x