Merge pull request #176 from mflatt/compress

add bytevector-compress and bytevector-decompress
original commit: 938b24f138c44d7ceed6fd996889ca81e3b908de
This commit is contained in:
R. Kent Dybvig 2017-06-22 21:42:29 -04:00 committed by GitHub
commit 19c9dd8ac0
11 changed files with 235 additions and 1 deletions

4
LOG
View File

@ -523,3 +523,7 @@
c/Mf-* c/Mf-*
- Suppress warnings from implicit fallthrough in case labels. - Suppress warnings from implicit fallthrough in case labels.
Mf-{a6,arm32,i3,ppc,ta6,ti3,tpp32}le Mf-{a6,arm32,i3,ppc,ta6,ti3,tpp32}le
- added bytevector-compress and bytevector-uncompress
bytevector.ss, primdata.ss, new-io.c, prim5.c, externs.h,
objects.stex, release_notes.stex,
bytevector.ms, root-experr*

View File

@ -197,6 +197,12 @@ extern ptr S_get_fd_length PROTO((ptr file, IBOOL gzflag));
extern ptr S_set_fd_length PROTO((ptr file, ptr length, IBOOL gzflag)); extern ptr S_set_fd_length PROTO((ptr file, ptr length, IBOOL gzflag));
extern void S_new_io_init PROTO((void)); extern void S_new_io_init PROTO((void));
extern uptr S_bytevector_compress_size PROTO((iptr s_count));
extern ptr S_bytevector_compress PROTO((ptr dest_bv, iptr d_start, iptr d_count,
ptr src_bv, iptr s_start, iptr s_count));
extern ptr S_bytevector_uncompress PROTO((ptr dest_bv, iptr d_start, iptr d_count,
ptr src_bv, iptr s_start, iptr s_count));
/* thread.c */ /* thread.c */
extern void S_thread_init PROTO((void)); extern void S_thread_init PROTO((void));
extern ptr S_create_thread_object PROTO((const char *who, ptr p_tc)); extern ptr S_create_thread_object PROTO((const char *who, ptr p_tc));

View File

@ -54,6 +54,7 @@ static ptr new_open_output_fd_helper PROTO((const char *filename, INT mode,
INT append, INT lock, INT replace, INT compressed)); INT append, INT lock, INT replace, INT compressed));
static INT lockfile PROTO((INT fd)); static INT lockfile PROTO((INT fd));
static ptr make_gzxfile PROTO((int fd, gzFile file)); static ptr make_gzxfile PROTO((int fd, gzFile file));
static int is_valid_zlib_length(iptr count);
/* /*
not_ok_is_fatal: !ok definitely implies error, so ignore gzerror not_ok_is_fatal: !ok definitely implies error, so ignore gzerror
@ -783,3 +784,65 @@ void S_new_io_init() {
_setmode(_fileno(stderr), O_BINARY); _setmode(_fileno(stderr), O_BINARY);
#endif /* WIN32 */ #endif /* WIN32 */
} }
static int is_valid_zlib_length(iptr count) {
/* A zlib `uLong` may be the same as `unsigned long`,
which is not as big as `iptr` on 64-bit Windows. */
return count == (iptr)(uLong)count;
}
/* Accept `iptr` because we expect it to represent a bytevector size,
which always fits in `iptr`. Return `uptr`, because the result might
not fit in `iptr`. */
uptr S_bytevector_compress_size(iptr s_count) {
if (is_valid_zlib_length(s_count))
return compressBound(s_count);
else {
/* Compression will report "source too long" */
return 0;
}
}
ptr S_bytevector_compress(ptr dest_bv, iptr d_start, iptr d_count,
ptr src_bv, iptr s_start, iptr s_count) {
/* On error, an message-template string with ~s for the bytevector */
int r;
uLong destLen;
if (!is_valid_zlib_length(s_count))
return Sstring("source bytevector ~s is too large");
destLen = d_count;
r = compress(&BVIT(dest_bv, d_start), &destLen, &BVIT(src_bv, s_start), s_count);
if (r == Z_OK)
return FIX(destLen);
else if (r == Z_BUF_ERROR)
return Sstring("destination bytevector is too small for compressed form of ~s");
else
return Sstring("internal error compressing ~s");
}
ptr S_bytevector_uncompress(ptr dest_bv, iptr d_start, iptr d_count,
ptr src_bv, iptr s_start, iptr s_count) {
/* On error, an message-template string with ~s for the bytevector */
int r;
uLongf destLen;
if (!is_valid_zlib_length(d_count))
return Sstring("expected result size of uncompressed source ~s is too large");
destLen = d_count;
r = uncompress(&BVIT(dest_bv, d_start), &destLen, &BVIT(src_bv, s_start), s_count);
if (r == Z_OK)
return FIX(destLen);
else if (r == Z_BUF_ERROR)
return Sstring("uncompressed ~s is larger than expected size");
else if (r == Z_DATA_ERROR)
return Sstring("invalid data in source bytevector ~s");
else
return Sstring("internal error uncompressing ~s");
}

View File

@ -1554,6 +1554,10 @@ void S_prim5_init() {
Sforeign_symbol("(cs)get_fd_length", (void*)S_get_fd_length); Sforeign_symbol("(cs)get_fd_length", (void*)S_get_fd_length);
Sforeign_symbol("(cs)set_fd_length", (void*)S_set_fd_length); Sforeign_symbol("(cs)set_fd_length", (void*)S_set_fd_length);
Sforeign_symbol("(cs)bytevector_compress_size", (void*)S_bytevector_compress_size);
Sforeign_symbol("(cs)bytevector_compress", (void*)S_bytevector_compress);
Sforeign_symbol("(cs)bytevector_uncompress", (void*)S_bytevector_uncompress);
Sforeign_symbol("(cs)logand", (void *)S_logand); Sforeign_symbol("(cs)logand", (void *)S_logand);
Sforeign_symbol("(cs)logbitp", (void *)S_logbitp); Sforeign_symbol("(cs)logbitp", (void *)S_logbitp);
Sforeign_symbol("(cs)logbit0", (void *)S_logbit0); Sforeign_symbol("(cs)logbit0", (void *)S_logbit0);

View File

@ -1129,7 +1129,7 @@ Negative values are stored as their two's complement equivalent.
%---------------------------------------------------------------------------- %----------------------------------------------------------------------------
\entryheader \entryheader
\formdef{bytevector->immutable-bytevector}{\categoryprocedure}{(bytevector->immutable-bytevector \var{bytevector})} \formdef{bytevector->immutable-bytevector}{\categoryprocedure}{(bytevector->immutable-bytevector \var{bytevector})}
\returns an immutable \var{bytevector} equal to \var{bytevector} \returns an immutable bytevector equal to \var{bytevector}
\listlibraries \listlibraries
\endentryheader \endentryheader
@ -1144,6 +1144,33 @@ is immutable; otherwise, the result is an immutable bytevector with the same con
\endschemedisplay \endschemedisplay
%----------------------------------------------------------------------------
\entryheader
\formdef{bytevector-compress}{\categoryprocedure}{(bytevector-compress \var{bytevector})}
\returns a new bytevector containing compressed content of \var{bytevector}
\listlibraries
\endentryheader
\noindent
The result is the raw compressed data with a minimal header to record
the uncompressed size and the compression mode. The result does not include
the header that is written by port-based compression using the
\scheme{compressed} option.
%----------------------------------------------------------------------------
\entryheader
\formdef{bytevector-uncompress}{\categoryprocedure}{(bytevector-uncompress \var{bytevector})}
\returns a bytevector containing uncompressed content of \var{bytevector}
\listlibraries
\endentryheader
\noindent
Uncompresses a \var{bytevector} produced by
\scheme{bytevector-compress} to a new bytevector with the same content
as the original given to \scheme{bytevector-compress}.
\section{Boxes\label{SECTBOXES}} \section{Boxes\label{SECTBOXES}}
\index{boxes}Boxes are single-cell objects that are primarily useful for providing \index{boxes}Boxes are single-cell objects that are primarily useful for providing

View File

@ -11011,3 +11011,45 @@
(number? (bytevector-ieee-single-native-ref immutable-100-bytevector 0)) (number? (bytevector-ieee-single-native-ref immutable-100-bytevector 0))
(number? (bytevector-ieee-double-native-ref immutable-100-bytevector 0)) (number? (bytevector-ieee-double-native-ref immutable-100-bytevector 0))
) )
(mat bytevector-compress
(error? (bytevector-compress 7))
(error? (bytevector-compress "hello"))
(error? (bytevector-uncompress 7))
(error? (bytevector-uncompress "hello"))
(begin
(define (round-trip-bytevector-compress bv)
(equal? (bytevector-uncompress (bytevector-compress bv))
bv))
(round-trip-bytevector-compress (string->utf8 "hello")))
(round-trip-bytevector-compress '#vu8())
(round-trip-bytevector-compress (apply bytevector
(let loop ([i 0])
(if (= i 4096)
'()
(cons (bitwise-and i 255)
(loop (+ i 1)))))))
(error?
;; Need at least 8 bytes for result size
(bytevector-uncompress '#vu8()))
(error?
;; Need at least 8 bytes for result size
(bytevector-uncompress '#vu8(0 0 0 0 0 0 255)))
(error?
;; Fail if the uncompressed result is too big
(bytevector-uncompress (let ([bv (bytevector-compress (string->utf8 "hello"))])
(bytevector-u64-set! bv 0 (sub1 (bytevector-u64-ref bv 0 (endianness big))) (endianness big))
bv)))
(error?
;; Fail if the uncompressed result is too small
(bytevector-uncompress (let ([bv (bytevector-compress (string->utf8 "hello"))])
(bytevector-u64-set! bv 0 (add1 (bytevector-u64-ref bv 0 (endianness big))) (endianness big))
bv)))
(error?
;; Compressed data always starts with 0x78, so this one isn't valid:
(bytevector-uncompress '#vu8(0 0 0 0 0 0 0 255 1 2 3)))
(error?
;; Claming a too-large size in the header should fail with a suitable message:
(bytevector-uncompress '#vu8(255 255 255 255 255 255 255 255 1 2 3)))
)

View File

@ -3573,6 +3573,16 @@ bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevecto
bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-fill!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector". bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-fill!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector".
bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-copy!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector". bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-copy!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector".
bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-truncate!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector". bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-truncate!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-compress: 7 is not a bytevector".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-compress: "hello" is not a bytevector".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: 7 is not a bytevector".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: "hello" is not a bytevector".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8()".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: uncompressed #vu8(0 0 0 0 0 0 ...) is larger than expected size".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: uncompressed size 5 for #vu8(0 0 0 0 0 0 ...) is smaller than expected size 6".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: bytevector #vu8(255 255 255 255 255 255 ...) claims invalid uncompressed size <int>".
misc.mo:Expected error in mat compiler1: "variable i-am-not-bound is not bound". misc.mo:Expected error in mat compiler1: "variable i-am-not-bound is not bound".
misc.mo:Expected error in mat compiler1: "attempt to apply non-procedure oops". misc.mo:Expected error in mat compiler1: "attempt to apply non-procedure oops".
misc.mo:Expected error in mat compiler1: "incorrect argument count in call (g (list))". misc.mo:Expected error in mat compiler1: "incorrect argument count in call (g (list))".

View File

@ -3573,6 +3573,16 @@ bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevecto
bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-fill!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector". bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-fill!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector".
bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-copy!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector". bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-copy!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector".
bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-truncate!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector". bytevector.mo:Expected error in mat bytevector->immutable-bytevector: "bytevector-truncate!: #vu8(42 42 42 42 42 42 ...) is not a mutable bytevector".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-compress: 7 is not a bytevector".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-compress: "hello" is not a bytevector".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: 7 is not a bytevector".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: "hello" is not a bytevector".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8()".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: uncompressed #vu8(0 0 0 0 0 0 ...) is larger than expected size".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: uncompressed size 5 for #vu8(0 0 0 0 0 0 ...) is smaller than expected size 6".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)".
bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: bytevector #vu8(255 255 255 255 255 255 ...) claims invalid uncompressed size <int>".
misc.mo:Expected error in mat compiler1: "variable i-am-not-bound is not bound". misc.mo:Expected error in mat compiler1: "variable i-am-not-bound is not bound".
misc.mo:Expected error in mat compiler1: "attempt to apply non-procedure oops". misc.mo:Expected error in mat compiler1: "attempt to apply non-procedure oops".
misc.mo:Expected error in mat compiler1: "incorrect argument count in call (g (list))". misc.mo:Expected error in mat compiler1: "incorrect argument count in call (g (list))".

View File

@ -105,6 +105,13 @@ For example, the arity mask for a two-argument procedure such as
while the arity mask for a procedure that accepts one or more arguments, while the arity mask for a procedure that accepts one or more arguments,
such as \var{list*}, is $-2$ (all but bit 0 set). such as \var{list*}, is $-2$ (all but bit 0 set).
\subsection{Bytevector compression (9.4.1)}
The new primitive procedures \scheme{bytevector-compress} and
\scheme{bytevector-decompress} exposes for bytevectors the kind of
compression functionality that is used for files with the
\scheme{compressed} option.
\subsection{High-precision clock time in Windows 8 and up (9.4.1)} \subsection{High-precision clock time in Windows 8 and up (9.4.1)}
When running on Windows 8 and up, Chez Scheme uses the high-precision When running on Windows 8 and up, Chez Scheme uses the high-precision

View File

@ -1452,4 +1452,63 @@
(list->little ls size) (list->little ls size)
(unrecognized-endianness who eness))])))) (unrecognized-endianness who eness))]))))
) )
(let ()
;; Store uncompressed size as u64:
(define uncompressed-length-length (ftype-sizeof integer-64))
;; Always big-endian, so that compressed data is portable.
;; It might be useful somehow that valid compressed data always starts
;; with a 0 byte; otherwise, the expected size would be unrealistically big.
(define uncompressed-length-endianness (endianness big))
(define $bytevector-compress-size
(foreign-procedure "(cs)bytevector_compress_size" (iptr) uptr))
(define $bytevector-compress
(foreign-procedure "(cs)bytevector_compress" (scheme-object iptr iptr scheme-object iptr iptr) scheme-object))
(define $bytevector-uncompress
(foreign-procedure "(cs)bytevector_uncompress" (scheme-object iptr iptr scheme-object iptr iptr) scheme-object))
(set-who! bytevector-compress
(lambda (bv)
(unless (bytevector? bv) (not-a-bytevector who bv))
(let* ([dest-max-len ($bytevector-compress-size (bytevector-length bv))]
[dest-alloc-len (min (+ dest-max-len uncompressed-length-length)
;; In the unlikely event of a non-fixnum requested size...
(constant maximum-bytevector-length))]
[dest-bv (make-bytevector dest-alloc-len)])
(let ([r ($bytevector-compress dest-bv
uncompressed-length-length
(fx- dest-alloc-len uncompressed-length-length)
bv
0
(bytevector-length bv))])
(cond
[(string? r)
($oops who r bv)]
[else
($bytevector-u64-set! dest-bv 0 (bytevector-length bv) uncompressed-length-endianness who)
(bytevector-truncate! dest-bv (fx+ r uncompressed-length-length))])))))
(set-who! bytevector-uncompress
(lambda (bv)
(unless (bytevector? bv) (not-a-bytevector who bv))
(unless (>= (bytevector-length bv) uncompressed-length-length)
($oops who "invalid data in source bytevector ~s" bv))
(let ([dest-length ($bytevector-u64-ref bv 0 uncompressed-length-endianness who)])
(unless (and (fixnum? dest-length)
($fxu< dest-length (constant maximum-bytevector-length)))
($oops who "bytevector ~s claims invalid uncompressed size ~s" bv dest-length))
(let* ([dest-bv (make-bytevector dest-length)]
[r ($bytevector-uncompress dest-bv
0
dest-length
bv
uncompressed-length-length
(fx- (bytevector-length bv) uncompressed-length-length))])
(cond
[(string? r) ($oops who r bv)]
[(fx= r dest-length) dest-bv]
[else
($oops who "uncompressed size ~s for ~s is smaller than expected size ~a" r bv dest-length)]))))))
) )

View File

@ -1160,6 +1160,8 @@
(bytevector-u48-set! [sig [(bytevector sub-index symbol u48) -> (void)]] [flags true]) (bytevector-u48-set! [sig [(bytevector sub-index symbol u48) -> (void)]] [flags true])
(bytevector-u56-ref [sig [(bytevector sub-index symbol) -> (u56)]] [flags true mifoldable discard]) (bytevector-u56-ref [sig [(bytevector sub-index symbol) -> (u56)]] [flags true mifoldable discard])
(bytevector-u56-set! [sig [(bytevector sub-index symbol u56) -> (void)]] [flags true]) (bytevector-u56-set! [sig [(bytevector sub-index symbol u56) -> (void)]] [flags true])
(bytevector-compress [sig [(ptr) -> (ptr)]] [flags])
(bytevector-uncompress [sig [(ptr) -> (ptr)]] [flags])
(call/1cc [sig [(procedure) -> (ptr ...)]] [flags]) (call/1cc [sig [(procedure) -> (ptr ...)]] [flags])
(call-with-input-file [sig [(pathname procedure) (pathname procedure sub-ptr) -> (ptr ...)]] [flags ieee r5rs]) ; has options argument (call-with-input-file [sig [(pathname procedure) (pathname procedure sub-ptr) -> (ptr ...)]] [flags ieee r5rs]) ; has options argument
(call-with-output-file [sig [(pathname procedure) (pathname procedure sub-ptr) -> (ptr ...)]] [flags ieee r5rs]) ; has options argument (call-with-output-file [sig [(pathname procedure) (pathname procedure sub-ptr) -> (ptr ...)]] [flags ieee r5rs]) ; has options argument