diff --git a/LOG b/LOG index b9f706b8b8..1e08bd1a35 100644 --- a/LOG +++ b/LOG @@ -523,3 +523,7 @@ c/Mf-* - Suppress warnings from implicit fallthrough in case labels. Mf-{a6,arm32,i3,ppc,ta6,ti3,tpp32}le +- added bytevector-compress and bytevector-uncompress + bytevector.ss, primdata.ss, new-io.c, prim5.c, externs.h, + objects.stex, release_notes.stex, + bytevector.ms, root-experr* diff --git a/c/externs.h b/c/externs.h index c554ec1e5f..8014632f5b 100644 --- a/c/externs.h +++ b/c/externs.h @@ -197,6 +197,12 @@ extern ptr S_get_fd_length PROTO((ptr file, IBOOL gzflag)); extern ptr S_set_fd_length PROTO((ptr file, ptr length, IBOOL gzflag)); extern void S_new_io_init PROTO((void)); +extern uptr S_bytevector_compress_size PROTO((iptr s_count)); +extern ptr S_bytevector_compress PROTO((ptr dest_bv, iptr d_start, iptr d_count, + ptr src_bv, iptr s_start, iptr s_count)); +extern ptr S_bytevector_uncompress PROTO((ptr dest_bv, iptr d_start, iptr d_count, + ptr src_bv, iptr s_start, iptr s_count)); + /* thread.c */ extern void S_thread_init PROTO((void)); extern ptr S_create_thread_object PROTO((const char *who, ptr p_tc)); diff --git a/c/new-io.c b/c/new-io.c index 6f1ce810d1..a3d3c48222 100644 --- a/c/new-io.c +++ b/c/new-io.c @@ -54,6 +54,7 @@ static ptr new_open_output_fd_helper PROTO((const char *filename, INT mode, INT append, INT lock, INT replace, INT compressed)); static INT lockfile PROTO((INT fd)); static ptr make_gzxfile PROTO((int fd, gzFile file)); +static int is_valid_zlib_length(iptr count); /* not_ok_is_fatal: !ok definitely implies error, so ignore gzerror @@ -783,3 +784,65 @@ void S_new_io_init() { _setmode(_fileno(stderr), O_BINARY); #endif /* WIN32 */ } + +static int is_valid_zlib_length(iptr count) { + /* A zlib `uLong` may be the same as `unsigned long`, + which is not as big as `iptr` on 64-bit Windows. */ + return count == (iptr)(uLong)count; +} + +/* Accept `iptr` because we expect it to represent a bytevector size, + which always fits in `iptr`. Return `uptr`, because the result might + not fit in `iptr`. */ +uptr S_bytevector_compress_size(iptr s_count) { + if (is_valid_zlib_length(s_count)) + return compressBound(s_count); + else { + /* Compression will report "source too long" */ + return 0; + } +} + +ptr S_bytevector_compress(ptr dest_bv, iptr d_start, iptr d_count, + ptr src_bv, iptr s_start, iptr s_count) { + /* On error, an message-template string with ~s for the bytevector */ + int r; + uLong destLen; + + if (!is_valid_zlib_length(s_count)) + return Sstring("source bytevector ~s is too large"); + + destLen = d_count; + + r = compress(&BVIT(dest_bv, d_start), &destLen, &BVIT(src_bv, s_start), s_count); + + if (r == Z_OK) + return FIX(destLen); + else if (r == Z_BUF_ERROR) + return Sstring("destination bytevector is too small for compressed form of ~s"); + else + return Sstring("internal error compressing ~s"); +} + +ptr S_bytevector_uncompress(ptr dest_bv, iptr d_start, iptr d_count, + ptr src_bv, iptr s_start, iptr s_count) { + /* On error, an message-template string with ~s for the bytevector */ + int r; + uLongf destLen; + + if (!is_valid_zlib_length(d_count)) + return Sstring("expected result size of uncompressed source ~s is too large"); + + destLen = d_count; + + r = uncompress(&BVIT(dest_bv, d_start), &destLen, &BVIT(src_bv, s_start), s_count); + + if (r == Z_OK) + return FIX(destLen); + else if (r == Z_BUF_ERROR) + return Sstring("uncompressed ~s is larger than expected size"); + else if (r == Z_DATA_ERROR) + return Sstring("invalid data in source bytevector ~s"); + else + return Sstring("internal error uncompressing ~s"); +} diff --git a/c/prim5.c b/c/prim5.c index 0cffbc2189..5bee635c00 100644 --- a/c/prim5.c +++ b/c/prim5.c @@ -1554,6 +1554,10 @@ void S_prim5_init() { Sforeign_symbol("(cs)get_fd_length", (void*)S_get_fd_length); Sforeign_symbol("(cs)set_fd_length", (void*)S_set_fd_length); + Sforeign_symbol("(cs)bytevector_compress_size", (void*)S_bytevector_compress_size); + Sforeign_symbol("(cs)bytevector_compress", (void*)S_bytevector_compress); + Sforeign_symbol("(cs)bytevector_uncompress", (void*)S_bytevector_uncompress); + Sforeign_symbol("(cs)logand", (void *)S_logand); Sforeign_symbol("(cs)logbitp", (void *)S_logbitp); Sforeign_symbol("(cs)logbit0", (void *)S_logbit0); diff --git a/csug/objects.stex b/csug/objects.stex index 0d4e5569e4..d243e7107a 100644 --- a/csug/objects.stex +++ b/csug/objects.stex @@ -1129,7 +1129,7 @@ Negative values are stored as their two's complement equivalent. %---------------------------------------------------------------------------- \entryheader \formdef{bytevector->immutable-bytevector}{\categoryprocedure}{(bytevector->immutable-bytevector \var{bytevector})} -\returns an immutable \var{bytevector} equal to \var{bytevector} +\returns an immutable bytevector equal to \var{bytevector} \listlibraries \endentryheader @@ -1144,6 +1144,33 @@ is immutable; otherwise, the result is an immutable bytevector with the same con \endschemedisplay +%---------------------------------------------------------------------------- +\entryheader +\formdef{bytevector-compress}{\categoryprocedure}{(bytevector-compress \var{bytevector})} +\returns a new bytevector containing compressed content of \var{bytevector} +\listlibraries +\endentryheader + +\noindent +The result is the raw compressed data with a minimal header to record +the uncompressed size and the compression mode. The result does not include +the header that is written by port-based compression using the +\scheme{compressed} option. + + +%---------------------------------------------------------------------------- +\entryheader +\formdef{bytevector-uncompress}{\categoryprocedure}{(bytevector-uncompress \var{bytevector})} +\returns a bytevector containing uncompressed content of \var{bytevector} +\listlibraries +\endentryheader + +\noindent +Uncompresses a \var{bytevector} produced by +\scheme{bytevector-compress} to a new bytevector with the same content +as the original given to \scheme{bytevector-compress}. + + \section{Boxes\label{SECTBOXES}} \index{boxes}Boxes are single-cell objects that are primarily useful for providing diff --git a/mats/bytevector.ms b/mats/bytevector.ms index e1492a5c35..61709e76d6 100644 --- a/mats/bytevector.ms +++ b/mats/bytevector.ms @@ -11011,3 +11011,45 @@ (number? (bytevector-ieee-single-native-ref immutable-100-bytevector 0)) (number? (bytevector-ieee-double-native-ref immutable-100-bytevector 0)) ) + + +(mat bytevector-compress + (error? (bytevector-compress 7)) + (error? (bytevector-compress "hello")) + (error? (bytevector-uncompress 7)) + (error? (bytevector-uncompress "hello")) + (begin + (define (round-trip-bytevector-compress bv) + (equal? (bytevector-uncompress (bytevector-compress bv)) + bv)) + (round-trip-bytevector-compress (string->utf8 "hello"))) + (round-trip-bytevector-compress '#vu8()) + (round-trip-bytevector-compress (apply bytevector + (let loop ([i 0]) + (if (= i 4096) + '() + (cons (bitwise-and i 255) + (loop (+ i 1))))))) + (error? + ;; Need at least 8 bytes for result size + (bytevector-uncompress '#vu8())) + (error? + ;; Need at least 8 bytes for result size + (bytevector-uncompress '#vu8(0 0 0 0 0 0 255))) + (error? + ;; Fail if the uncompressed result is too big + (bytevector-uncompress (let ([bv (bytevector-compress (string->utf8 "hello"))]) + (bytevector-u64-set! bv 0 (sub1 (bytevector-u64-ref bv 0 (endianness big))) (endianness big)) + bv))) + (error? + ;; Fail if the uncompressed result is too small + (bytevector-uncompress (let ([bv (bytevector-compress (string->utf8 "hello"))]) + (bytevector-u64-set! bv 0 (add1 (bytevector-u64-ref bv 0 (endianness big))) (endianness big)) + bv))) + (error? + ;; Compressed data always starts with 0x78, so this one isn't valid: + (bytevector-uncompress '#vu8(0 0 0 0 0 0 0 255 1 2 3))) + (error? + ;; Claming a too-large size in the header should fail with a suitable message: + (bytevector-uncompress '#vu8(255 255 255 255 255 255 255 255 1 2 3))) +) diff --git a/mats/root-experr-compile-0-f-f-f b/mats/root-experr-compile-0-f-f-f index 71767263fb..503bbb6127 100644 --- a/mats/root-experr-compile-0-f-f-f +++ b/mats/root-experr-compile-0-f-f-f @@ -3573,6 +3573,16 @@ bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevecto bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-fill!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector". bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-copy!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector". bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-truncate!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-compress: 7 is not a bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-compress: "hello" is not a bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: 7 is not a bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: "hello" is not a bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8()". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: uncompressed #vu8(0 0 0 0 0 0 ...) is larger than expected size". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: uncompressed size 5 for #vu8(0 0 0 0 0 0 ...) is smaller than expected size 6". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: bytevector #vu8(255 255 255 255 255 255 ...) claims invalid uncompressed size ". misc.mo:Expected error in mat compiler1: "variable i-am-not-bound is not bound". misc.mo:Expected error in mat compiler1: "attempt to apply non-procedure oops". misc.mo:Expected error in mat compiler1: "incorrect argument count in call (g (list))". diff --git a/mats/root-experr-compile-2-f-f-f b/mats/root-experr-compile-2-f-f-f index 71767263fb..503bbb6127 100644 --- a/mats/root-experr-compile-2-f-f-f +++ b/mats/root-experr-compile-2-f-f-f @@ -3573,6 +3573,16 @@ bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevecto bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-fill!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector". bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-copy!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector". bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-truncate!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-compress: 7 is not a bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-compress: "hello" is not a bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: 7 is not a bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: "hello" is not a bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8()". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: uncompressed #vu8(0 0 0 0 0 0 ...) is larger than expected size". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: uncompressed size 5 for #vu8(0 0 0 0 0 0 ...) is smaller than expected size 6". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: bytevector #vu8(255 255 255 255 255 255 ...) claims invalid uncompressed size ". misc.mo:Expected error in mat compiler1: "variable i-am-not-bound is not bound". misc.mo:Expected error in mat compiler1: "attempt to apply non-procedure oops". misc.mo:Expected error in mat compiler1: "incorrect argument count in call (g (list))". diff --git a/release_notes/release_notes.stex b/release_notes/release_notes.stex index d2a50b6fd9..b484eab0a2 100644 --- a/release_notes/release_notes.stex +++ b/release_notes/release_notes.stex @@ -105,6 +105,13 @@ For example, the arity mask for a two-argument procedure such as while the arity mask for a procedure that accepts one or more arguments, such as \var{list*}, is $-2$ (all but bit 0 set). +\subsection{Bytevector compression (9.4.1)} + +The new primitive procedures \scheme{bytevector-compress} and +\scheme{bytevector-decompress} exposes for bytevectors the kind of +compression functionality that is used for files with the +\scheme{compressed} option. + \subsection{High-precision clock time in Windows 8 and up (9.4.1)} When running on Windows 8 and up, Chez Scheme uses the high-precision diff --git a/s/bytevector.ss b/s/bytevector.ss index 44bccfaeb4..c45c46e013 100644 --- a/s/bytevector.ss +++ b/s/bytevector.ss @@ -1452,4 +1452,63 @@ (list->little ls size) (unrecognized-endianness who eness))])))) ) + + (let () + ;; Store uncompressed size as u64: + (define uncompressed-length-length (ftype-sizeof integer-64)) + ;; Always big-endian, so that compressed data is portable. + ;; It might be useful somehow that valid compressed data always starts + ;; with a 0 byte; otherwise, the expected size would be unrealistically big. + (define uncompressed-length-endianness (endianness big)) + + (define $bytevector-compress-size + (foreign-procedure "(cs)bytevector_compress_size" (iptr) uptr)) + (define $bytevector-compress + (foreign-procedure "(cs)bytevector_compress" (scheme-object iptr iptr scheme-object iptr iptr) scheme-object)) + (define $bytevector-uncompress + (foreign-procedure "(cs)bytevector_uncompress" (scheme-object iptr iptr scheme-object iptr iptr) scheme-object)) + + (set-who! bytevector-compress + (lambda (bv) + (unless (bytevector? bv) (not-a-bytevector who bv)) + (let* ([dest-max-len ($bytevector-compress-size (bytevector-length bv))] + [dest-alloc-len (min (+ dest-max-len uncompressed-length-length) + ;; In the unlikely event of a non-fixnum requested size... + (constant maximum-bytevector-length))] + [dest-bv (make-bytevector dest-alloc-len)]) + (let ([r ($bytevector-compress dest-bv + uncompressed-length-length + (fx- dest-alloc-len uncompressed-length-length) + bv + 0 + (bytevector-length bv))]) + (cond + [(string? r) + ($oops who r bv)] + [else + ($bytevector-u64-set! dest-bv 0 (bytevector-length bv) uncompressed-length-endianness who) + (bytevector-truncate! dest-bv (fx+ r uncompressed-length-length))]))))) + + (set-who! bytevector-uncompress + (lambda (bv) + (unless (bytevector? bv) (not-a-bytevector who bv)) + (unless (>= (bytevector-length bv) uncompressed-length-length) + ($oops who "invalid data in source bytevector ~s" bv)) + (let ([dest-length ($bytevector-u64-ref bv 0 uncompressed-length-endianness who)]) + (unless (and (fixnum? dest-length) + ($fxu< dest-length (constant maximum-bytevector-length))) + ($oops who "bytevector ~s claims invalid uncompressed size ~s" bv dest-length)) + (let* ([dest-bv (make-bytevector dest-length)] + [r ($bytevector-uncompress dest-bv + 0 + dest-length + bv + uncompressed-length-length + (fx- (bytevector-length bv) uncompressed-length-length))]) + (cond + [(string? r) ($oops who r bv)] + [(fx= r dest-length) dest-bv] + [else + ($oops who "uncompressed size ~s for ~s is smaller than expected size ~a" r bv dest-length)])))))) + ) diff --git a/s/primdata.ss b/s/primdata.ss index e331c003b7..e4e18e0d85 100644 --- a/s/primdata.ss +++ b/s/primdata.ss @@ -1160,6 +1160,8 @@ (bytevector-u48-set! [sig [(bytevector sub-index symbol u48) -> (void)]] [flags true]) (bytevector-u56-ref [sig [(bytevector sub-index symbol) -> (u56)]] [flags true mifoldable discard]) (bytevector-u56-set! [sig [(bytevector sub-index symbol u56) -> (void)]] [flags true]) + (bytevector-compress [sig [(ptr) -> (ptr)]] [flags]) + (bytevector-uncompress [sig [(ptr) -> (ptr)]] [flags]) (call/1cc [sig [(procedure) -> (ptr ...)]] [flags]) (call-with-input-file [sig [(pathname procedure) (pathname procedure sub-ptr) -> (ptr ...)]] [flags ieee r5rs]) ; has options argument (call-with-output-file [sig [(pathname procedure) (pathname procedure sub-ptr) -> (ptr ...)]] [flags ieee r5rs]) ; has options argument