From 82b2cda639c3f7479326b0e85b91d09aa6773488 Mon Sep 17 00:00:00 2001 From: dyb Date: Mon, 8 Apr 2019 15:37:21 -0700 Subject: [PATCH] compress-level parameter, improvement in lz4 compression, and various other related improvements - added compress-level parameter to select a compression level for file writing and changed the default for lz4 compression to do a better job compressing. finished splitting glz input routines apart from glz output routines and did a bit of other restructuring. removed gzxfile struct-as-bytevector wrapper and moved its fd into glzFile. moved DEACTIVATE to before glzdopen_input calls in S_new_open_input_fd and S_compress_input_fd, since glzdopen_input reads from the file and could block. the compress format and now level are now recorded directly the thread context. replaced as-gz? flag bit in compressed bytevector header word with a small number of bits recording the compression format at the bottom of the header word. flushed a couple of bytevector compression mats that depended on the old representation. (these last few changes should make adding new compression formats easier.) added s-directory build options to choose whether to compress and, if so, the format and level. compress-io.h, compress-io.c, new-io.c, equates.h, system.h, scheme.c, gc.c, io.ss, cmacros.ss, back.ss, bytevector.ss, primdata.ss, s/Mf-base, io.ms, mat.ss, bytevector.ms, root-experr*, release_notes.stex, io.stex, system.stex, objects.stex - improved the effectiveness of LZ4 boot-file compression to within 15% of gzip by increasing the lz4 output-port in_buffer size to 1<<18. With the previous size (1<<14) LZ4-compressed boot files were about 50% larger. set the lz4 input-port in_buffer and out_buffer sizes to 1<<12 and 1<<14. there's no clear win at present for larger input-port buffer sizes. compress-io.c - To reduce the memory hit for the increased output-port in_buffer size and the corresponding increase in computed out_buffer size, one output-side out_buffer is now allocated (lazily) per thread and stored in the thread context. The other buffers are now directly a part of the lz4File_out and lz4File_in structures rather than allocated separately. compress-io.c, scheme.c, gc.c, cmacros.ss - split out the buffer emit code from glzwrite_lz4 into a separate glzemit_lz4 helper that is now also used by gzclose so we can avoid dealing with a NULL buffer in glzwrite_lz4. glzwrite_lz4 also uses it to writing large buffers directly and avoid the memcpy. compress-io.c - replaced lz4File_out and lz4File_in mode enumeration with the compress format and inputp boolean. using switch to check and raising exceptions for unexpected values to further simplify adding new compression formats in the future. compress-io.c - replaced the never-defined struct lz4File pointer in glzFile union with the more specific struct lz4File_in_r and Lz4File_out_r pointers. compress-io.h, compress-io.c - added free of lz4 structures to gzclose. also changed file-close logic generally so that (1) port is marked closed before anything is freed to avoid dangling pointers in the case of an interrupt or error, and (2) structures are freed even in the case of a write or close error, before the error is reported. also now mallocing glz and lz4 structures after possibility of errors have passed where possible and freeing them when not. compress-io.c, io.ss - added return-value checks to malloc calls and to a couple of other C-library calls. compress-io.c - corrected EINTR checks to look at errno rather than return codes. compress-io.c - added S_ prefixes to the glz* exports externs.h, compress-io.c, new-io.c, scheme.c, fasl.c - added entries for mutex-name and mutex-thread threads.stex original commit: 722ffabef4c938bc92c0fe07f789a9ba350dc6c6 --- LOG | 69 +++ c/compress-io.c | 962 ++++++++++++++++++------------- c/compress-io.h | 35 +- c/externs.h | 31 +- c/fasl.c | 7 +- c/gc.c | 3 + c/new-io.c | 271 +++++---- c/scheme.c | 106 ++-- c/system.h | 3 +- c/thread.c | 6 +- checkin | 2 +- csug/io.stex | 48 +- csug/objects.stex | 7 +- csug/system.stex | 5 + csug/threads.stex | 20 + mats/bytevector.ms | 15 +- mats/io.ms | 59 +- mats/mat.ss | 14 +- mats/root-experr-compile-0-f-f-f | 22 +- mats/root-experr-compile-2-f-f-f | 26 +- release_notes/release_notes.stex | 27 +- s/Mf-base | 47 +- s/back.ss | 38 +- s/bytevector.ss | 43 +- s/cmacros.ss | 12 + s/io.ss | 22 +- s/primdata.ss | 1 + 27 files changed, 1185 insertions(+), 716 deletions(-) diff --git a/LOG b/LOG index 127737423e..4e7091df90 100644 --- a/LOG +++ b/LOG @@ -1268,3 +1268,72 @@ externs.h, system.h, expeditor.c, configure, Mf-*, Makefile.*nt, workarea, mat.ss, io.ms, io.stex, objects.stex, release_notes.stex, root-experr*, patch* +- added compress-level parameter to select a compression level for + file writing and changed the default for lz4 compression to do a + better job compressing. finished splitting glz input routines + apart from glz output routines and did a bit of other restructuring. + removed gzxfile struct-as-bytevector wrapper and moved its fd + into glzFile. moved DEACTIVATE to before glzdopen_input calls + in S_new_open_input_fd and S_compress_input_fd, since glzdopen_input + reads from the file and could block. the compress format and now + level are now recorded directly the thread context. replaced + as-gz? flag bit in compressed bytevector header word with a small + number of bits recording the compression format at the bottom of + the header word. flushed a couple of bytevector compression mats + that depended on the old representation. (these last few changes + should make adding new compression formats easier.) added + s-directory build options to choose whether to compress and, if + so, the format and level. + compress-io.h, compress-io.c, new-io.c, equates.h, system.h, + scheme.c, gc.c, + io.ss, cmacros.ss, back.ss, bytevector.ss, primdata.ss, s/Mf-base, + io.ms, mat.ss, bytevector.ms, root-experr*, + release_notes.stex, io.stex, system.stex, objects.stex +- improved the effectiveness of LZ4 boot-file compression to within + 15% of gzip by increasing the lz4 output-port in_buffer size to + 1<<18. With the previous size (1<<14) LZ4-compressed boot files + were about 50% larger. set the lz4 input-port in_buffer and + out_buffer sizes to 1<<12 and 1<<14. there's no clear win at + present for larger input-port buffer sizes. + compress-io.c +- To reduce the memory hit for the increased output-port in_buffer + size and the corresponding increase in computed out_buffer size, + one output-side out_buffer is now allocated (lazily) per thread + and stored in the thread context. The other buffers are now + directly a part of the lz4File_out and lz4File_in structures + rather than allocated separately. + compress-io.c, scheme.c, gc.c, + cmacros.ss +- split out the buffer emit code from glzwrite_lz4 into a + separate glzemit_lz4 helper that is now also used by gzclose + so we can avoid dealing with a NULL buffer in glzwrite_lz4. + glzwrite_lz4 also uses it to writing large buffers directly and + avoid the memcpy. + compress-io.c +- replaced lz4File_out and lz4File_in mode enumeration with the + compress format and inputp boolean. using switch to check and + raising exceptions for unexpected values to further simplify + adding new compression formats in the future. + compress-io.c +- replaced the never-defined struct lz4File pointer in glzFile + union with the more specific struct lz4File_in_r and Lz4File_out_r + pointers. + compress-io.h, compress-io.c +- added free of lz4 structures to gzclose. also changed file-close + logic generally so that (1) port is marked closed before anything is + freed to avoid dangling pointers in the case of an interrupt or + error, and (2) structures are freed even in the case of a write + or close error, before the error is reported. also now mallocing + glz and lz4 structures after possibility of errors have passed where + possible and freeing them when not. + compress-io.c, + io.ss +- added return-value checks to malloc calls and to a couple of other + C-library calls. + compress-io.c +- corrected EINTR checks to look at errno rather than return codes. + compress-io.c +- added S_ prefixes to the glz* exports + externs.h, compress-io.c, new-io.c, scheme.c, fasl.c +- added entries for mutex-name and mutex-thread + threads.stex diff --git a/c/compress-io.c b/c/compress-io.c index edf9cb2fb3..376cd03876 100644 --- a/c/compress-io.c +++ b/c/compress-io.c @@ -1,5 +1,5 @@ /* compress-io.c - * Copyright 1984-2017 Cisco Systems, Inc. + * Copyright 1984-2019 Cisco Systems, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,8 +17,10 @@ /* Dispatch to zlib or LZ4 */ #include "system.h" +#include "zlib.h" #include "lz4.h" #include "lz4frame.h" +#include "lz4hc.h" #include #include @@ -31,468 +33,640 @@ # define GLZ_O_BINARY 0 #endif -enum { - is_gz, - is_lz4_write, - is_lz4_read -}; +/* the value of LZ4_OUTPUT_PORT_IN_BUFFER_SIZE was determined + through experimentation on an intel linux server and an intel + osx laptop. smaller sizes result in significantly worse compression + of object files, and larger sizes don't have much beneficial effect. + don't increase the output-port in-buffer size unless you're sure + it reduces object-file size or reduces compression time + significantly. don't decrease it unless you're sure it doesn't + increase object-file size significnatly. one buffer of size + LZ4_OUTPUT_PORT_IN_BUFFER_SIZE is allocated per lz4-compressed + output port. another buffer of a closely related size is allocated + per thread. */ +#define LZ4_OUTPUT_PORT_IN_BUFFER_SIZE (1 << 18) -typedef struct lz4File_out { - int fd; - void *in_buffer, *out_buffer; - int in_pos, out_len, out_pos; - int err; +/* the values we choose for LZ4_INPUT_PORT_IN_BUFFER_SIZE and + LZ4_INPUT_PORT_OUT_BUFFER_SIZE don't seem to make much difference + in decompression speed, so we keep them fairly small. one buffer + of size LZ4_INPUT_PORT_IN_BUFFER_SIZE and one buffer of size + LZ4_INPUT_PORT_OUT_BUFFER_SIZE are allocated per lz4-compressed + input port. */ +#define LZ4_INPUT_PORT_IN_BUFFER_SIZE (1 << 12) +#define LZ4_INPUT_PORT_OUT_BUFFER_SIZE (1 << 14) + +typedef struct lz4File_out_r { + LZ4F_preferences_t preferences; + INT fd; + INT out_buffer_size; + INT in_pos; + INT err; size_t stream_pos; + char in_buffer[LZ4_OUTPUT_PORT_IN_BUFFER_SIZE]; } lz4File_out; -typedef struct lz4File_in { - int fd; +typedef struct lz4File_in_r { + INT fd; LZ4F_dctx *dctx; - void *in_buffer, *out_buffer; - int in_pos, in_len, out_pos, out_len; - int frame_ended; - int err; + INT in_pos, in_len, out_pos, out_len; + INT frame_ended; + INT err; size_t stream_pos; off_t init_pos; + char in_buffer[LZ4_INPUT_PORT_IN_BUFFER_SIZE]; + char out_buffer[LZ4_INPUT_PORT_OUT_BUFFER_SIZE]; } lz4File_in; -#define USE_LZ4_BUFFER_SIZE (1 << 14) +typedef struct sized_buffer_r { + INT size; + char buffer[0]; +} sized_buffer; -static glzFile glzdopen_lz4_pos(int fd, const char *mode, off_t init_pos); +/* local functions */ +static glzFile glzdopen_output_gz(INT fd, INT compress_level); +static glzFile glzdopen_output_lz4(INT fd, INT compress_level); +static glzFile glzdopen_input_gz(INT fd); +static glzFile glzdopen_input_lz4(INT fd, off_t init_pos); +static INT glzread_lz4(lz4File_in *lz4, void *buffer, UINT count); +static INT glzemit_lz4(lz4File_out *lz4, void *buffer, UINT count); +static INT glzwrite_lz4(lz4File_out *lz4, void *buffer, UINT count); -glzFile glzdopen_gz(int fd, const char *mode) { +static glzFile glzdopen_output_gz(INT fd, INT compress_level) { gzFile gz; + glzFile glz; + INT as_append; + INT level; - if ((gz = gzdopen(fd, mode)) == Z_NULL) { +#ifdef WIN32 + as_append = 0; +#else + as_append = fcntl(fd, F_GETFL) & O_APPEND; +#endif + + if ((gz = gzdopen(fd, as_append ? "ab" : "wb")) == Z_NULL) return Z_NULL; + + switch (compress_level) { + case COMPRESS_LOW: + level = Z_BEST_SPEED; + break; + case COMPRESS_MEDIUM: + level = (Z_BEST_SPEED + Z_BEST_COMPRESSION) / 2; + break; + case COMPRESS_HIGH: + level = (Z_BEST_SPEED + (3 * Z_BEST_COMPRESSION)) / 4; + break; + case COMPRESS_MAX: + level = Z_BEST_COMPRESSION; + break; + default: + S_error1("glzdopen_output_gz", "unexpected compress level ~s", Sinteger(compress_level)); + level = 0; + break; + } + + gzsetparams(gz, level, Z_DEFAULT_STRATEGY); + + if ((glz = malloc(sizeof(struct glzFile_r))) == NULL) { + (void)gzclose(gz); return Z_NULL; - } else { - glzFile glz = malloc(sizeof(struct glzFile_r)); - glz->mode = is_gz; - glz->gz = gz; - return glz; } -} - -glzFile glzdopen_lz4(int fd, const char *mode) { - return glzdopen_lz4_pos(fd, mode, 0); -} - -static glzFile glzdopen_lz4_pos(int fd, const char *mode, off_t init_pos) { - glzFile glz = malloc(sizeof(struct glzFile_r)); - - if (mode[0] == 'r') { - LZ4F_dctx *dctx; - LZ4F_errorCode_t r; - lz4File_in *lz4; - - r = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION); - if (LZ4F_isError(r)) - return Z_NULL; - - lz4 = malloc(sizeof(lz4File_in)); - lz4->fd = fd; - lz4->dctx = dctx; - lz4->in_buffer = malloc(USE_LZ4_BUFFER_SIZE); - lz4->out_buffer = malloc(USE_LZ4_BUFFER_SIZE); - lz4->in_pos = 0; - lz4->in_len = 0; - lz4->out_len = 0; - lz4->out_pos = 0; - lz4->frame_ended = 0; - lz4->err = 0; - lz4->stream_pos = 0; - lz4->init_pos = init_pos; - - glz->mode = is_lz4_read; - glz->lz4 = (struct lz4File *)lz4; - } else { - lz4File_out *lz4 = malloc(sizeof(lz4File_out)); - - lz4->fd = fd; - lz4->in_buffer = malloc(USE_LZ4_BUFFER_SIZE); - lz4->out_buffer = malloc(LZ4F_compressFrameBound(USE_LZ4_BUFFER_SIZE, NULL)); - lz4->in_pos = 0; - lz4->out_len = 0; - lz4->out_pos = 0; - lz4->err = 0; - lz4->stream_pos = 0; - - glz->mode = is_lz4_write; - glz->lz4 = (struct lz4File *)lz4; - } - + glz->fd = fd; + glz->inputp = 0; + glz->format = COMPRESS_GZIP; + glz->gz = gz; return glz; } -glzFile glzdopen(int fd, const char *mode) { - if (mode[0] == 'r') { - int r, pos = 0; - unsigned char buffer[4]; - off_t init_pos; +static glzFile glzdopen_output_lz4(INT fd, INT compress_level) { + glzFile glz; + lz4File_out *lz4; + INT level; - /* check for LZ4 magic number, otherwise defer to gzdopen */ + switch (compress_level) { + case COMPRESS_LOW: + level = 1; + break; + case COMPRESS_MEDIUM: + level = LZ4HC_CLEVEL_MIN; + break; + case COMPRESS_HIGH: + level = (LZ4HC_CLEVEL_MIN + LZ4HC_CLEVEL_MAX) / 2; + break; + case COMPRESS_MAX: + level = LZ4HC_CLEVEL_MAX; + break; + default: + S_error1("glzdopen_output_lz4", "unexpected compress level ~s", Sinteger(compress_level)); + level = 0; + break; + } - init_pos = WIN32_IZE(lseek)(fd, 0, SEEK_CUR); + if ((lz4 = malloc(sizeof(lz4File_out))) == NULL) return Z_NULL; + memset(&lz4->preferences, 0, sizeof(LZ4F_preferences_t)); + lz4->preferences.compressionLevel = level; + lz4->fd = fd; + lz4->out_buffer_size = (INT)LZ4F_compressFrameBound(LZ4_OUTPUT_PORT_IN_BUFFER_SIZE, &lz4->preferences); + lz4->in_pos = 0; + lz4->err = 0; + lz4->stream_pos = 0; - while (pos < 4) { - r = WIN32_IZE(read)(fd, (char*)buffer + pos, 4 - pos); - if (r == 0) - break; - else if (r > 0) - pos += r; -#ifdef EINTR - else if (r == EINTR) - r = 0; -#endif - else - break; /* error reading */ - } - - if (pos > 0) - WIN32_IZE(lseek)(fd, init_pos, SEEK_SET); - - if ((pos == 4) - && (buffer[0] == 0x04) - && (buffer[1] == 0x22) - && (buffer[2] == 0x4d) - && (buffer[3] == 0x18)) - return glzdopen_lz4_pos(fd, mode, init_pos); - - return glzdopen_gz(fd, mode); - } else - return glzdopen_gz(fd, mode); + if ((glz = malloc(sizeof(struct glzFile_r))) == NULL) { + free(lz4); + return Z_NULL; + } + glz->fd = fd; + glz->inputp = 0; + glz->format = COMPRESS_LZ4; + glz->lz4_out = lz4; + return glz; } -/* currently assumes read mode: */ -glzFile glzopen(const char *path, const char *mode) { - int fd; +glzFile S_glzdopen_output(INT fd, INT compress_format, INT compress_level) { + switch (compress_format) { + case COMPRESS_GZIP: + return glzdopen_output_gz(fd, compress_level); + case COMPRESS_LZ4: + return glzdopen_output_lz4(fd, compress_level); + default: + S_error1("glzdopen_output", "unexpected compress format ~s", Sinteger(compress_format)); + return Z_NULL; + } +} + +static glzFile glzdopen_input_gz(INT fd) { + gzFile gz; + glzFile glz; + + if ((gz = gzdopen(fd, "rb")) == Z_NULL) return Z_NULL; + + if ((glz = malloc(sizeof(struct glzFile_r))) == NULL) { + (void)gzclose(gz); + return Z_NULL; + } + glz->fd = fd; + glz->inputp = 1; + glz->format = COMPRESS_GZIP; + glz->gz = gz; + return glz; +} + +static glzFile glzdopen_input_lz4(INT fd, off_t init_pos) { + glzFile glz; + LZ4F_dctx *dctx; + LZ4F_errorCode_t r; + lz4File_in *lz4; + + r = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION); + if (LZ4F_isError(r)) + return Z_NULL; + + if ((lz4 = malloc(sizeof(lz4File_in))) == NULL) { + (void)LZ4F_freeDecompressionContext(dctx); + return Z_NULL; + } + lz4->fd = fd; + lz4->dctx = dctx; + lz4->in_pos = 0; + lz4->in_len = 0; + lz4->out_len = 0; + lz4->out_pos = 0; + lz4->frame_ended = 0; + lz4->err = 0; + lz4->stream_pos = 0; + lz4->init_pos = init_pos; + + if ((glz = malloc(sizeof(struct glzFile_r))) == NULL) { + (void)LZ4F_freeDecompressionContext(lz4->dctx); + free(lz4); + return Z_NULL; + } + glz->fd = fd; + glz->inputp = 1; + glz->format = COMPRESS_LZ4; + glz->lz4_in = lz4; + return glz; +} + +glzFile S_glzdopen_input(INT fd) { + INT r, pos = 0; + unsigned char buffer[4]; + off_t init_pos; + + /* check for LZ4 magic number, otherwise defer to gzdopen */ + + if ((init_pos = WIN32_IZE(lseek)(fd, 0, SEEK_CUR)) == -1) return Z_NULL; + + while (pos < 4) { + r = WIN32_IZE(read)(fd, (char*)buffer + pos, 4 - pos); + if (r == 0) + break; + else if (r > 0) + pos += r; +#ifdef EINTR + else if (errno == EINTR) + continue; +#endif + else + break; /* error reading */ + } + + if (pos > 0) { + if (WIN32_IZE(lseek)(fd, init_pos, SEEK_SET) == -1) return Z_NULL; + } + + if ((pos == 4) + && (buffer[0] == 0x04) + && (buffer[1] == 0x22) + && (buffer[2] == 0x4d) + && (buffer[3] == 0x18)) + return glzdopen_input_lz4(fd, init_pos); + + return glzdopen_input_gz(fd); +} + +glzFile S_glzopen_input(const char *path) { + INT fd; fd = WIN32_IZE(open)(path, O_RDONLY | GLZ_O_BINARY); if (fd == -1) return Z_NULL; else - return glzdopen(fd, mode); + return S_glzdopen_input(fd); } #ifdef WIN32 -/* currently assumes read mode: */ -glzFile glzopen_w(wchar_t *path, const char *mode) { - int fd; +glzFile S_glzopen_input_w(const wchar_t *path) { + INT fd; fd = _wopen(path, O_RDONLY | GLZ_O_BINARY); if (fd == -1) return Z_NULL; else - return glzdopen(fd, mode); + return S_glzdopen_input(fd); } #endif -int glzdirect(glzFile file) { - if (file->mode == is_gz) - return gzdirect(file->gz); +IBOOL S_glzdirect(glzFile glz) { + if (glz->format == COMPRESS_GZIP) + return gzdirect(glz->gz); else return 0; } -int glzclose(glzFile file) { - if (file->mode == is_gz) { - int r; - r = gzclose(file->gz); - if (r != Z_OK) - return r; - } else if (file->mode == is_lz4_write) { - lz4File_out *lz4 = (lz4File_out *)file->lz4; - glzwrite(file, NULL /* => flush */, 0); - while (1) { - int r = WIN32_IZE(close)(lz4->fd); - if (r == 0) - break; -#ifdef EINTR - else if (r == EINTR) - r = 0; -#endif - else - return r; - } - free(lz4->in_buffer); - free(lz4->out_buffer); - } else { - lz4File_in *lz4 = (lz4File_in *)file->lz4; - while (1) { - int r = WIN32_IZE(close)(lz4->fd); - if (r == 0) - break; -#ifdef EINTR - else if (r == EINTR) - r = 0; -#endif - else - return r; - } - (void)LZ4F_freeDecompressionContext(lz4->dctx); - free(lz4->in_buffer); - free(lz4->out_buffer); - } - free(file); - return Z_OK; -} - -int glzread(glzFile file, void *buffer, unsigned int count) { - if (file->mode == is_gz) { - return gzread(file->gz, buffer, count); - } else { - lz4File_in *lz4 = (lz4File_in *)file->lz4; - - while (lz4->out_pos == lz4->out_len) { - int in_avail; - - in_avail = lz4->in_len - lz4->in_pos; - if (!in_avail) { +INT S_glzclose(glzFile glz) { + INT r = Z_OK, saved_errno = 0; + switch (glz->format) { + case COMPRESS_GZIP: + r = gzclose(glz->gz); + break; + case COMPRESS_LZ4: + if (glz->inputp) { + lz4File_in *lz4 = glz->lz4_in; while (1) { - in_avail = WIN32_IZE(read)(lz4->fd, (char*)lz4->in_buffer, USE_LZ4_BUFFER_SIZE); - if (in_avail >= 0) { - lz4->in_len = in_avail; - lz4->in_pos = 0; - break; + INT r = WIN32_IZE(close)(lz4->fd); #ifdef EINTR - } else if (in_avail == EINTR) { - /* try again */ + if (r < 0 && errno == EINTR) continue; #endif - } else { - lz4->err = errno; - return -1; - } - } - } - - if (in_avail > 0) { - size_t amt, out_len = USE_LZ4_BUFFER_SIZE, in_len = in_avail; - - /* For a large enough result buffer, try to decompress directly - to that buffer: */ - if (count >= (out_len >> 1)) { - size_t direct_out_len = count; - - if (lz4->frame_ended && ((char *)lz4->in_buffer)[lz4->in_pos] == 0) - return 0; /* count 0 after frame as stream terminator */ - - amt = LZ4F_decompress(lz4->dctx, - buffer, &direct_out_len, - (char *)lz4->in_buffer + lz4->in_pos, &in_len, - NULL); - lz4->frame_ended = (amt == 0); - - if (LZ4F_isError(amt)) { - lz4->err = (int)amt; - return -1; - } - - lz4->in_pos += (int)in_len; - - if (direct_out_len) { - lz4->stream_pos += direct_out_len; - return (int)direct_out_len; - } - - in_len = in_avail - in_len; - } - - if (in_len > 0) { - if (lz4->frame_ended && ((char *)lz4->in_buffer)[lz4->in_pos] == 0) - return 0; /* count 0 after frame as stream terminator */ - - amt = LZ4F_decompress(lz4->dctx, - lz4->out_buffer, &out_len, - (char *)lz4->in_buffer + lz4->in_pos, &in_len, - NULL); - lz4->frame_ended = (amt == 0); - - if (LZ4F_isError(amt)) { - lz4->err = (int)amt; - return -1; - } - - lz4->in_pos += (int)in_len; - lz4->out_len = (int)out_len; - lz4->out_pos = 0; + if (r == 0) { r = Z_ERRNO; saved_errno = errno; } + break; } + (void)LZ4F_freeDecompressionContext(lz4->dctx); + free(lz4); } else { - /* EOF on read */ - break; - } - } - - if (lz4->out_pos < lz4->out_len) { - unsigned int amt = lz4->out_len - lz4->out_pos; - if (amt > count) amt = count; - memcpy(buffer, (char *)lz4->out_buffer + lz4->out_pos, amt); - lz4->out_pos += amt; - lz4->stream_pos += amt; - return amt; - } - - return 0; - } -} - -int glzwrite(glzFile file, void *buffer, unsigned int count) { - if (file->mode == is_gz) - return gzwrite(file->gz, buffer, count); - else { - lz4File_out *lz4 = (lz4File_out *)file->lz4; - - if ((lz4->in_pos == USE_LZ4_BUFFER_SIZE) - || ((lz4->in_pos > 0) && !buffer)) { - size_t out_len; - - out_len = LZ4F_compressFrame(lz4->out_buffer, LZ4F_compressFrameBound(USE_LZ4_BUFFER_SIZE, NULL), - lz4->in_buffer, lz4->in_pos, - NULL); - if (LZ4F_isError(out_len)) { - lz4->err = (int)out_len; - return -1; - } - - lz4->in_pos = 0; - lz4->out_len = (int)out_len; - lz4->out_pos = 0; - } - - while (lz4->out_pos < lz4->out_len) { - int r = WIN32_IZE(write)(lz4->fd, (char*)lz4->out_buffer + lz4->out_pos, lz4->out_len - lz4->out_pos); - if (r >= 0) - lz4->out_pos += r; + lz4File_out *lz4 = glz->lz4_out; + if (lz4->in_pos != 0) { + r = glzemit_lz4(lz4, lz4->in_buffer, lz4->in_pos); + if (r >= 0) r = Z_OK; else { r = Z_ERRNO; saved_errno = errno; } + } + while (1) { + int r1 = WIN32_IZE(close)(lz4->fd); #ifdef EINTR - else if (r == EINTR) - lz4->out_pos += 0; /* try again */ + if (r1 < 0 && errno == EINTR) continue; #endif - else { - lz4->err = errno; - return r; + if (r == Z_OK && r1 < 0) { r = Z_ERRNO; saved_errno = errno; } + break; + } + free(lz4); + } + break; + default: + S_error1("S_glzclose", "unexpected compress format ~s", Sinteger(glz->format)); + } + free(glz); + if (saved_errno) errno = saved_errno; + return r; +} + +static INT glzread_lz4(lz4File_in *lz4, void *buffer, UINT count) { + while (lz4->out_pos == lz4->out_len) { + INT in_avail; + + in_avail = lz4->in_len - lz4->in_pos; + if (!in_avail) { + while (1) { + in_avail = WIN32_IZE(read)(lz4->fd, lz4->in_buffer, LZ4_INPUT_PORT_IN_BUFFER_SIZE); + if (in_avail >= 0) { + lz4->in_len = in_avail; + lz4->in_pos = 0; + break; +#ifdef EINTR + } else if (errno == EINTR) { + /* try again */ +#endif + } else { + lz4->err = Z_ERRNO; + return -1; + } } } - { - unsigned int amt = (USE_LZ4_BUFFER_SIZE - lz4->in_pos); + if (in_avail > 0) { + size_t amt, out_len = LZ4_INPUT_PORT_OUT_BUFFER_SIZE, in_len = in_avail; - if (count < amt) - amt = count; + /* For a large enough result buffer, try to decompress directly + to that buffer: */ + if (count >= (out_len >> 1)) { + size_t direct_out_len = count; - memcpy((char *)lz4->in_buffer + lz4->in_pos, buffer, amt); - lz4->in_pos += amt; - lz4->stream_pos += amt; + if (lz4->frame_ended && lz4->in_buffer[lz4->in_pos] == 0) + return 0; /* count 0 after frame as stream terminator */ - return amt; + amt = LZ4F_decompress(lz4->dctx, + buffer, &direct_out_len, + lz4->in_buffer + lz4->in_pos, &in_len, + NULL); + lz4->frame_ended = (amt == 0); + + if (LZ4F_isError(amt)) { + lz4->err = Z_STREAM_ERROR; + return -1; + } + + lz4->in_pos += (INT)in_len; + + if (direct_out_len) { + lz4->stream_pos += direct_out_len; + return (INT)direct_out_len; + } + + in_len = in_avail - in_len; + } + + if (in_len > 0) { + if (lz4->frame_ended && lz4->in_buffer[lz4->in_pos] == 0) + return 0; /* count 0 after frame as stream terminator */ + + amt = LZ4F_decompress(lz4->dctx, + lz4->out_buffer, &out_len, + lz4->in_buffer + lz4->in_pos, &in_len, + NULL); + lz4->frame_ended = (amt == 0); + + if (LZ4F_isError(amt)) { + lz4->err = Z_STREAM_ERROR; + return -1; + } + + lz4->in_pos += (INT)in_len; + lz4->out_len = (INT)out_len; + lz4->out_pos = 0; + } + } else { + /* EOF on read */ + break; } } + + if (lz4->out_pos < lz4->out_len) { + UINT amt = lz4->out_len - lz4->out_pos; + if (amt > count) amt = count; + memcpy(buffer, lz4->out_buffer + lz4->out_pos, amt); + lz4->out_pos += amt; + lz4->stream_pos += amt; + return amt; + } + + return 0; +} + +INT S_glzread(glzFile glz, void *buffer, UINT count) { + switch (glz->format) { + case COMPRESS_GZIP: + return gzread(glz->gz, buffer, count); + case COMPRESS_LZ4: + return glzread_lz4(glz->lz4_in, buffer, count); + default: + S_error1("S_glzread", "unexpected compress format ~s", Sinteger(glz->format)); + return -1; + } } -long glzseek(glzFile file, long offset, int whence) { - if (file->mode == is_gz) - return gzseek(file->gz, offset, whence); - else if (file->mode == is_lz4_write) { - lz4File_out *lz4 = (lz4File_out *)file->lz4; - if (whence == SEEK_CUR) - offset += (long)lz4->stream_pos; - if (offset >= 0) { - while ((size_t)offset > lz4->stream_pos) { - size_t amt = (size_t)offset - lz4->stream_pos; - if (amt > 8) amt = 8; - if (glzwrite(file, "\0\0\0\0\0\0\0\0", (unsigned int)amt) < 0) +static INT glzemit_lz4(lz4File_out *lz4, void *buffer, UINT count) { + ptr tc = get_thread_context(); + sized_buffer *cached_out_buffer; + char *out_buffer; + INT out_len, out_pos; + INT r = 0; + + /* allocate one out_buffer (per thread) since we don't need one for each file. + the buffer is freed by destroy_thread. */ + if ((cached_out_buffer = LZ4OUTBUFFER(tc)) == NULL || cached_out_buffer->size < lz4->out_buffer_size) { + if (cached_out_buffer != NULL) free(cached_out_buffer); + if ((LZ4OUTBUFFER(tc) = cached_out_buffer = malloc(sizeof(sized_buffer) + lz4->out_buffer_size)) == NULL) return -1; + cached_out_buffer->size = lz4->out_buffer_size; + } + out_buffer = cached_out_buffer->buffer; + + out_len = (INT)LZ4F_compressFrame(out_buffer, lz4->out_buffer_size, + buffer, count, + &lz4->preferences); + if (LZ4F_isError(out_len)) { + lz4->err = Z_STREAM_ERROR; + return -1; + } + + out_pos = 0; + while (out_pos < out_len) { + r = WIN32_IZE(write)(lz4->fd, out_buffer + out_pos, out_len - out_pos); + if (r >= 0) + out_pos += r; +#ifdef EINTR + else if (errno == EINTR) + continue; +#endif + else + break; + } + + return r; +} + +static INT glzwrite_lz4(lz4File_out *lz4, void *buffer, UINT count) { + UINT amt; INT r; + + if ((amt = LZ4_OUTPUT_PORT_IN_BUFFER_SIZE - lz4->in_pos) > count) amt = count; + + if (amt == LZ4_OUTPUT_PORT_IN_BUFFER_SIZE) { + /* full buffer coming from input...skip the memcpy */ + if ((r = glzemit_lz4(lz4, buffer, LZ4_OUTPUT_PORT_IN_BUFFER_SIZE)) < 0) return 0; + } else { + memcpy(lz4->in_buffer + lz4->in_pos, buffer, amt); + if ((lz4->in_pos += amt) == LZ4_OUTPUT_PORT_IN_BUFFER_SIZE) { + lz4->in_pos = 0; + if ((r = glzemit_lz4(lz4, lz4->in_buffer, LZ4_OUTPUT_PORT_IN_BUFFER_SIZE)) < 0) return 0; + } + } + + lz4->stream_pos += amt; + return amt; +} + +INT S_glzwrite(glzFile glz, void *buffer, UINT count) { + switch (glz->format) { + case COMPRESS_GZIP: + return gzwrite(glz->gz, buffer, count); + case COMPRESS_LZ4: + return glzwrite_lz4(glz->lz4_out, buffer, count); + default: + S_error1("S_glzwrite", "unexpected compress format ~s", Sinteger(glz->format)); + return -1; + } +} + +long S_glzseek(glzFile glz, long offset, INT whence) { + switch (glz->format) { + case COMPRESS_GZIP: + return gzseek(glz->gz, offset, whence); + case COMPRESS_LZ4: + if (glz->inputp) { + lz4File_in *lz4 = glz->lz4_in; + if (whence == SEEK_CUR) + offset += (long)lz4->stream_pos; + if (offset < 0) + offset = 0; + if ((size_t)offset < lz4->stream_pos) { + /* rewind and read from start */ + if (WIN32_IZE(lseek)(lz4->fd, lz4->init_pos, SEEK_SET) < 0) { + lz4->err = Z_ERRNO; + return -1; + } + LZ4F_resetDecompressionContext(lz4->dctx); + lz4->in_pos = 0; + lz4->in_len = 0; + lz4->out_len = 0; + lz4->out_pos = 0; + lz4->err = 0; + lz4->stream_pos = 0; + } + while ((size_t)offset > lz4->stream_pos) { + char buffer[32]; + size_t amt = (size_t)offset - lz4->stream_pos; + if (amt > sizeof(buffer)) amt = sizeof(buffer); + if (glzread_lz4(lz4, buffer, (UINT)amt) < 0) + return -1; + } + return (long)lz4->stream_pos; + } else { + lz4File_out *lz4 = glz->lz4_out; + if (whence == SEEK_CUR) + offset += (long)lz4->stream_pos; + if (offset >= 0) { + while ((size_t)offset > lz4->stream_pos) { + size_t amt = (size_t)offset - lz4->stream_pos; + if (amt > 8) amt = 8; + if (glzwrite_lz4(lz4, "\0\0\0\0\0\0\0\0", (UINT)amt) < 0) + return -1; + } + } + return (long)lz4->stream_pos; + } + default: + S_error1("S_glzseek", "unexpected compress format ~s", Sinteger(glz->format)); + return -1; + } +} + +INT S_glzgetc(glzFile glz) { + switch (glz->format) { + case COMPRESS_GZIP: + return gzgetc(glz->gz); + case COMPRESS_LZ4: + { + unsigned char buffer[1]; + INT r; + r = S_glzread(glz, buffer, 1); + if (r == 1) + return buffer[0]; + else return -1; } - } - return (long)lz4->stream_pos; - } else if (file->mode == is_lz4_read) { - lz4File_in *lz4 = (lz4File_in *)file->lz4; - if (whence == SEEK_CUR) - offset += (long)lz4->stream_pos; - if (offset < 0) - offset = 0; - if ((size_t)offset < lz4->stream_pos) { - /* rewind and read from start */ - if (WIN32_IZE(lseek)(lz4->fd, lz4->init_pos, SEEK_SET) < 0) { - lz4->err = errno; - return -1; - } - LZ4F_resetDecompressionContext(lz4->dctx); - lz4->in_pos = 0; - lz4->in_len = 0; - lz4->out_len = 0; - lz4->out_pos = 0; - lz4->err = 0; - lz4->stream_pos = 0; - } - while ((size_t)offset > lz4->stream_pos) { - char buffer[32]; - size_t amt = (size_t)offset - lz4->stream_pos; - if (amt > sizeof(buffer)) amt = sizeof(buffer); - if (glzread(file, buffer, (unsigned int)amt) < 0) - return -1; - } - return (long)lz4->stream_pos; - } else - return 0; -} - -int glzgetc(glzFile file) { - if (file->mode == is_gz) - return gzgetc(file->gz); - else { - unsigned char buffer[1]; - int r; - r = glzread(file, buffer, 1); - if (r == 1) - return buffer[0]; - return -1; + default: + S_error1("S_glzgetc", "unexpected compress format ~s", Sinteger(glz->format)); + return -1; } } - -int glzungetc(int c, glzFile file) { - if (file->mode == is_gz) - return gzungetc(c, file->gz); - else if (file->mode == is_lz4_read) { - lz4File_in *lz4 = (lz4File_in *)file->lz4; - if (lz4->out_len == 0) - lz4->out_len = lz4->out_pos = 1; - if (lz4->out_pos) { - lz4->out_pos--; - ((unsigned char *)lz4->out_buffer)[lz4->out_pos] = c; - lz4->stream_pos--; - return c; - } else { - /* support ungetc only just after a getc, in which case there - should have been room */ + +INT S_glzungetc(INT c, glzFile glz) { + switch (glz->format) { + case COMPRESS_GZIP: + return gzungetc(c, glz->gz); + case COMPRESS_LZ4: + { + lz4File_in *lz4 = glz->lz4_in; + if (lz4->out_len == 0) + lz4->out_len = lz4->out_pos = 1; + if (lz4->out_pos) { + lz4->out_pos--; + lz4->out_buffer[lz4->out_pos] = c; + lz4->stream_pos--; + return c; + } else { + /* support ungetc only just after a getc, in which case there + should have been room */ + return -1; + } + } + default: + S_error1("S_glzungetc", "unexpected compress format ~s", Sinteger(glz->format)); return -1; - } - } else - return -1; -} - -int glzrewind(glzFile file) { - return glzseek(file, 0, SEEK_SET); + } } -void glzerror(glzFile file, int *errnum) -{ - if (file->mode == is_gz) - (void)gzerror(file->gz, errnum); - else if (file->mode == is_lz4_write) - *errnum = ((lz4File_out *)file->lz4)->err; - else if (file->mode == is_lz4_read) - *errnum = ((lz4File_in *)file->lz4)->err; - else - *errnum = 0; +INT S_glzrewind(glzFile glz) { + return S_glzseek(glz, 0, SEEK_SET); } -void glzclearerr(glzFile file) -{ - if (file->mode == is_gz) - gzclearerr(file->gz); - else if (file->mode == is_lz4_write) - ((lz4File_out *)file->lz4)->err = 0; - else if (file->mode == is_lz4_read) - ((lz4File_in *)file->lz4)->err = 0; +void S_glzerror(glzFile glz, INT *errnum) { + switch (glz->format) { + case COMPRESS_GZIP: + (void)gzerror(glz->gz, errnum); + break; + case COMPRESS_LZ4: + if (glz->inputp) + *errnum = glz->lz4_in->err; + else + *errnum = glz->lz4_out->err; + break; + default: + S_error1("S_glzerror", "unexpected compress format ~s", Sinteger(glz->format)); + *errnum = 0; + } +} + +void S_glzclearerr(glzFile glz) { + switch (glz->format) { + case COMPRESS_GZIP: + gzclearerr(glz->gz); + break; + case COMPRESS_LZ4: + if (glz->inputp) + glz->lz4_in->err = 0; + else + glz->lz4_out->err = 0; + break; + default: + S_error1("S_glzerror", "unexpected compress format ~s", Sinteger(glz->format)); + } } diff --git a/c/compress-io.h b/c/compress-io.h index d0cb121a6f..a5f988b709 100644 --- a/c/compress-io.h +++ b/c/compress-io.h @@ -1,5 +1,5 @@ /* compress-io.h - * Copyright 1984-2017 Cisco Systems, Inc. + * Copyright 1984-2019 Cisco Systems, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,34 +14,13 @@ * limitations under the License. */ -#include "zlib.h" - -struct lz4File; - typedef struct glzFile_r { - int mode; + INT fd; + IBOOL inputp; + INT format; union { - gzFile gz; - struct lz4File *lz4; + struct gzFile_s *gz; + struct lz4File_in_r *lz4_in; + struct lz4File_out_r *lz4_out; }; } *glzFile; - -glzFile glzdopen_gz(int fd, const char *mode); -glzFile glzdopen_lz4(int fd, const char *mode); -glzFile glzdopen(int fd, const char *mode); -glzFile glzopen(const char *path, const char *mode); -#ifdef WIN32 -glzFile glzopen_w(wchar_t *path, const char *mode); -#endif -int glzdirect(glzFile file); -int glzclose(glzFile file); - -int glzread(glzFile file, void *buffer, unsigned int count); -int glzwrite(glzFile file, void *buffer, unsigned int count); -long glzseek(glzFile file, long offset, int whence); -int glzgetc(glzFile file); -int glzungetc(int c, glzFile file); -int glzrewind(glzFile file); - -void glzerror(glzFile file, int *errnum); -void glzclearerr(glzFile fdfile); diff --git a/c/externs.h b/c/externs.h index e92e7f4ce4..10c0442cef 100644 --- a/c/externs.h +++ b/c/externs.h @@ -169,6 +169,27 @@ extern wchar_t *S_malloc_wide_pathname PROTO((const char *inpath)); #endif extern IBOOL S_fixedpathp PROTO((const char *inpath)); +/* compress-io.c */ +extern glzFile S_glzdopen_output PROTO((INT fd, INT compress_format, INT compress_level)); +extern glzFile S_glzdopen_input PROTO((INT fd)); +extern glzFile S_glzopen_input PROTO((const char *path)); +#ifdef WIN32 +extern glzFile S_glzopen_input_w PROTO((const wchar_t *path)); +#endif +extern IBOOL S_glzdirect PROTO((glzFile file)); +extern INT S_glzclose PROTO((glzFile file)); + +extern INT S_glzread PROTO((glzFile file, void *buffer, UINT count)); +extern INT S_glzwrite PROTO((glzFile file, void *buffer, UINT count)); +extern long S_glzseek PROTO((glzFile file, long offset, INT whence)); +extern INT S_glzgetc PROTO((glzFile file)); +extern INT S_glzungetc PROTO((INT c, glzFile file)); +extern INT S_glzrewind PROTO((glzFile file)); + +extern void S_glzerror PROTO((glzFile file, INT *errnum)); +extern void S_glzclearerr PROTO((glzFile fdfile)); + + /* new-io.c */ extern INT S_gzxfile_fd PROTO((ptr x)); extern glzFile S_gzxfile_gzfile PROTO((ptr x)); @@ -176,14 +197,14 @@ extern ptr S_new_open_input_fd PROTO((const char *filename, IBOOL compressed)); extern ptr S_new_open_output_fd PROTO(( const char *filename, INT mode, IBOOL no_create, IBOOL no_fail, IBOOL no_truncate, - IBOOL append, IBOOL lock, IBOOL replace, IBOOL compressed, IBOOL as_gz)); + IBOOL append, IBOOL lock, IBOOL replace, IBOOL compressed)); extern ptr S_new_open_input_output_fd PROTO(( const char *filename, INT mode, IBOOL no_create, IBOOL no_fail, IBOOL no_truncate, IBOOL append, IBOOL lock, IBOOL replace, IBOOL compressed)); extern ptr S_close_fd PROTO((ptr file, IBOOL gzflag)); extern ptr S_compress_input_fd PROTO((INT fd, I64 fp)); -extern ptr S_compress_output_fd PROTO((INT fd, IBOOL as_gz)); +extern ptr S_compress_output_fd PROTO((INT fd)); extern ptr S_bytevector_read PROTO((ptr file, ptr buffer, iptr start, iptr count, IBOOL gzflag)); extern ptr S_bytevector_read_nb PROTO((ptr file, ptr buffer, iptr start, iptr count, IBOOL gzflag)); @@ -198,13 +219,13 @@ extern ptr S_get_fd_length PROTO((ptr file, IBOOL gzflag)); extern ptr S_set_fd_length PROTO((ptr file, ptr length, IBOOL gzflag)); extern void S_new_io_init PROTO((void)); -extern uptr S_bytevector_compress_size PROTO((iptr s_count, IBOOL as_gz)); +extern uptr S_bytevector_compress_size PROTO((iptr s_count, INT compress_format)); extern ptr S_bytevector_compress PROTO((ptr dest_bv, iptr d_start, iptr d_count, ptr src_bv, iptr s_start, iptr s_count, - IBOOL as_gz)); + INT compress_format)); extern ptr S_bytevector_uncompress PROTO((ptr dest_bv, iptr d_start, iptr d_count, ptr src_bv, iptr s_start, iptr s_count, - IBOOL as_gz)); + INT compress_format)); /* thread.c */ extern void S_thread_init PROTO((void)); diff --git a/c/fasl.c b/c/fasl.c index 413b370765..b08ffe33b9 100644 --- a/c/fasl.c +++ b/c/fasl.c @@ -177,6 +177,7 @@ */ #include "system.h" +#include "zlib.h" #ifdef WIN32 #include @@ -346,14 +347,14 @@ static INT uf_read(unbufFaslFile uf, octet *s, iptr n) { switch (uf->type) { case UFFO_TYPE_GZ: - k = glzread(uf->file, s, (GZ_IO_SIZE_T)nx); + k = S_glzread(uf->file, s, (GZ_IO_SIZE_T)nx); if (k > 0) n -= k; else if (k == 0) return -1; else { - glzerror(uf->file, &errnum); - glzclearerr(uf->file); + S_glzerror(uf->file, &errnum); + S_glzclearerr(uf->file); if (errnum != Z_ERRNO || errno != EINTR) S_error1("", "error reading from ~a", uf->path); } diff --git a/c/gc.c b/c/gc.c index f14b110355..82fb705eda 100644 --- a/c/gc.c +++ b/c/gc.c @@ -1515,6 +1515,9 @@ static void sweep_thread(p) ptr p; { /* immediate SUPPRESSPRIMITIVEINLINING */ relocate(&DEFAULTRECORDEQUALPROCEDURE(tc)) relocate(&DEFAULTRECORDHASHPROCEDURE(tc)) + relocate(&COMPRESSFORMAT(tc)) + relocate(&COMPRESSLEVEL(tc)) + /* void* LZ4OUTBUFFER(tc) */ /* U64 INSTRCOUNTER(tc) */ /* U64 ALLOCCOUNTER(tc) */ relocate(&PARAMETERS(tc)) diff --git a/c/new-io.c b/c/new-io.c index f8243bf505..2a896c19eb 100644 --- a/c/new-io.c +++ b/c/new-io.c @@ -52,9 +52,8 @@ /* locally defined functions */ static ptr new_open_output_fd_helper PROTO((const char *filename, INT mode, INT flags, INT no_create, INT no_fail, INT no_truncate, - INT append, INT lock, INT replace, INT compressed, INT as_gz)); + INT append, INT lock, INT replace, INT compressed)); static INT lockfile PROTO((INT fd)); -static ptr make_gzxfile PROTO((int fd, glzFile file)); static int is_valid_zlib_length(iptr count); static int is_valid_lz4_length(iptr count); @@ -78,8 +77,8 @@ static int is_valid_lz4_length(iptr count); if (ok) { flag = 0; } \ else { \ INT errnum; \ - glzerror((fd),&errnum); \ - glzclearerr((fd)); \ + S_glzerror((fd),&errnum); \ + S_glzclearerr((fd)); \ if (errnum == Z_ERRNO) { \ flag = errno != EINTR; \ } else { \ @@ -99,8 +98,8 @@ static int is_valid_lz4_length(iptr count); if (ok) { flag = 0; break; } \ else { \ INT errnum; \ - glzerror((fd),&errnum); \ - glzclearerr((fd)); \ + S_glzerror((fd),&errnum); \ + S_glzclearerr((fd)); \ if (errnum == Z_ERRNO) { \ if (errno != EINTR) { flag = 1; break; } \ } else { \ @@ -117,8 +116,8 @@ static int is_valid_lz4_length(iptr count); if (ok) { flag = 0; } \ else { \ INT errnum; \ - glzerror((fd),&errnum); \ - glzclearerr((fd)); \ + S_glzerror((fd),&errnum); \ + S_glzclearerr((fd)); \ if (errnum == Z_ERRNO) { flag = 1; } \ else { \ flag = not_ok_is_fatal || errnum != Z_OK; \ @@ -145,26 +144,15 @@ static INT lockfile(INT fd) { return FLOCK(fd, LOCK_EX); } static INT lockfile(INT fd) { return lockf(fd, F_LOCK, (off_t)0); } #endif -/* work around missing zlib API operation to extract a glzFile's fd */ -typedef struct { - int fd; - glzFile file; -} gzxfile; -#define gzxfile_fd(x) (((gzxfile *)&BVIT(x,0))->fd) -#define gzxfile_gzfile(x) (((gzxfile *)&BVIT(x,0))->file) -static ptr make_gzxfile(int fd, glzFile file) { - ptr bv; +#define MAKE_GZXFILE(x) Sinteger((iptr)x) +#define GZXFILE_GZFILE(x) ((glzFile)Sinteger_value(x)) - bv = S_bytevector(sizeof(gzxfile)); - gzxfile_fd(bv) = fd; - gzxfile_gzfile(bv) = file; - return bv; -} INT S_gzxfile_fd(ptr x) { - return gzxfile_fd(x); + return GZXFILE_GZFILE(x)->fd; } + glzFile S_gzxfile_gzfile(ptr x) { - return gzxfile_gzfile(x); + return GZXFILE_GZFILE(x); } ptr S_new_open_input_fd(const char *infilename, IBOOL compressed) { @@ -209,25 +197,26 @@ ptr S_new_open_input_fd(const char *infilename, IBOOL compressed) { return Scons(FIX(OPEN_ERROR_OTHER), str); } - if ((file = glzdopen(dupfd, "rb")) == Z_NULL) { + DEACTIVATE(tc) /* glzdopen_input reads the magic word from the file */ + if ((file = S_glzdopen_input(dupfd)) == Z_NULL) { + REACTIVATE(tc) FD_GUARD(result == 0, error, result = CLOSE(fd)); FD_GUARD(result == 0, error, result = CLOSE(dupfd)); return Scons(FIX(OPEN_ERROR_OTHER), Sstring("unable to allocate compression state (too many open files?)")); } - DEACTIVATE(tc) - compressed = !glzdirect(file); + compressed = !S_glzdirect(file); REACTIVATE(tc) if (compressed) { FD_GUARD(result == 0, error, result = CLOSE(fd)); - /* box indicates gzip'd */ - return Sbox(make_gzxfile(dupfd, file)); + /* box indicates compressed */ + return Sbox(MAKE_GZXFILE(file)); } - GZ_GUARD(1, ok == 0 || ok == Z_BUF_ERROR, flag, file, ok = glzclose(file)); + GZ_GUARD(1, ok == 0 || ok == Z_BUF_ERROR, flag, file, ok = S_glzclose(file)); if (flag) {} /* make the compiler happy */ - if (LSEEK(fd, 0, SEEK_SET) != 0) { /* glzdirect does not leave fd at position 0 */ + if (LSEEK(fd, 0, SEEK_SET) != 0) { /* glzdopen and glzdirect might not leave fd at position 0 */ FD_GUARD(result == 0, error, result = CLOSE(fd)); return Scons(FIX(OPEN_ERROR_OTHER),Sstring("unable to reset after reading header bytes")); } @@ -245,22 +234,24 @@ ptr S_compress_input_fd(INT fd, I64 pos) { return S_strerror(errno); } - if ((file = glzdopen(dupfd, "rb")) == Z_NULL) { + DEACTIVATE(tc) + if ((file = S_glzdopen_input(dupfd)) == Z_NULL) { + REACTIVATE(tc) FD_GUARD(result == 0, error, result = CLOSE(dupfd)); return Sstring("unable to allocate compression state (too many open files?)"); } - DEACTIVATE(tc) - compressed = !glzdirect(file); + compressed = !S_glzdirect(file); REACTIVATE(tc) if (compressed) { FD_GUARD(result == 0, error, result = CLOSE(fd)); if (error) {} /* make the compiler happy */ - return Sbox(make_gzxfile(dupfd, file)); + /* box indicates compressed */ + return Sbox(MAKE_GZXFILE(file)); } - GZ_GUARD(1, ok == 0 || ok == Z_BUF_ERROR, flag, file, ok = glzclose(file)); + GZ_GUARD(1, ok == 0 || ok == Z_BUF_ERROR, flag, file, ok = S_glzclose(file)); if (flag) {} /* make the compiler happy */ if (LSEEK(fd, pos, SEEK_SET) != pos) { /* glzdirect does not leave fd at same position */ return Sstring("unable to reset after reading header bytes"); @@ -268,38 +259,28 @@ ptr S_compress_input_fd(INT fd, I64 pos) { return MAKE_FD(fd); } -ptr S_compress_output_fd(INT fd, IBOOL as_gz) { +ptr S_compress_output_fd(INT fd) { glzFile file; - int as_append; + ptr tc = get_thread_context(); -#ifdef WIN32 - as_append = 0; -#else - as_append = fcntl(fd, F_GETFL) & O_APPEND; -#endif - - if (as_gz) - file = glzdopen_gz(fd, as_append ? "ab" : "wb"); - else - file = glzdopen_lz4(fd, as_append ? "ab" : "wb"); + file = S_glzdopen_output(fd, (INT)UNFIX(COMPRESSFORMAT(tc)), (INT)UNFIX(COMPRESSLEVEL(tc))); if (file == Z_NULL) return Sstring("unable to allocate compression state (too many open files?)"); - return Sbox(make_gzxfile(fd, file)); + /* box indicates compressed */ + return Sbox(MAKE_GZXFILE(file)); } static ptr new_open_output_fd_helper( const char *infilename, INT mode, INT flags, IBOOL no_create, IBOOL no_fail, IBOOL no_truncate, - IBOOL append, IBOOL lock, IBOOL replace, IBOOL compressed, IBOOL as_gz) { + IBOOL append, IBOOL lock, IBOOL replace, IBOOL compressed) { char *filename; INT saved_errno = 0; iptr error; INT fd, result; -#ifdef PTHREADS ptr tc = get_thread_context(); -#endif flags |= (no_create ? 0 : O_CREAT) | @@ -358,26 +339,23 @@ static ptr new_open_output_fd_helper( } glzFile file; - if (as_gz) - file = glzdopen_gz(fd, append ? "ab" : "wb"); - else - file = glzdopen_lz4(fd, append ? "ab" : "wb"); + file = S_glzdopen_output(fd, (INT)UNFIX(COMPRESSFORMAT(tc)), (INT)UNFIX(COMPRESSLEVEL(tc))); if (file == Z_NULL) { FD_GUARD(result == 0, error, result = CLOSE(fd)); return Scons(FIX(OPEN_ERROR_OTHER), Sstring("unable to allocate compression state")); } - return make_gzxfile(fd, file); + return MAKE_GZXFILE(file); } ptr S_new_open_output_fd( const char *filename, INT mode, IBOOL no_create, IBOOL no_fail, IBOOL no_truncate, - IBOOL append, IBOOL lock, IBOOL replace, IBOOL compressed, IBOOL as_gz) { + IBOOL append, IBOOL lock, IBOOL replace, IBOOL compressed) { return new_open_output_fd_helper( filename, mode, O_BINARY | O_WRONLY, no_create, no_fail, no_truncate, - append, lock, replace, compressed, as_gz); + append, lock, replace, compressed); } ptr S_new_open_input_output_fd( @@ -390,14 +368,14 @@ ptr S_new_open_input_output_fd( return new_open_output_fd_helper( filename, mode, O_BINARY | O_RDWR, no_create, no_fail, no_truncate, - append, lock, replace, compressed, 0); + append, lock, replace, 0); } ptr S_close_fd(ptr file, IBOOL gzflag) { INT saved_errno = 0; INT ok, flag; INT fd = gzflag ? 0 : GET_FD(file); - glzFile gzfile = gzflag ? gzxfile_gzfile(file) : NULL; + glzFile gzfile = gzflag ? GZXFILE_GZFILE(file) : NULL; #ifdef PTHREADS ptr tc = get_thread_context(); #endif @@ -414,7 +392,7 @@ ptr S_close_fd(ptr file, IBOOL gzflag) { FD_GUARD(ok == 0, flag, ok = CLOSE(fd)); } else { /* zlib 1.2.1 returns Z_BUF_ERROR when closing an empty file opened for reading */ - GZ_GUARD(1, ok == 0 || ok == Z_BUF_ERROR, flag, gzfile, ok = glzclose(gzfile)); + GZ_GUARD(1, ok == 0 || ok == Z_BUF_ERROR, flag, gzfile, ok = S_glzclose(gzfile)); } saved_errno = errno; REACTIVATE(tc) @@ -444,7 +422,7 @@ ptr S_bytevector_read(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) { ptr tc = get_thread_context(); iptr m, flag = 0; INT fd = gzflag ? 0 : GET_FD(file); - glzFile gzfile = gzflag ? gzxfile_gzfile(file) : NULL; + glzFile gzfile = gzflag ? GZXFILE_GZFILE(file) : NULL; /* file is not locked; do not reference after deactivating thread! */ file = (ptr)-1; @@ -477,7 +455,7 @@ ptr S_bytevector_read(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) { GZ_EINTR_GUARD( 1, m >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)), flag, gzfile, - m = glzread(gzfile, &BVIT(bv,start), (GZ_IO_SIZE_T)count)); + m = S_glzread(gzfile, &BVIT(bv,start), (GZ_IO_SIZE_T)count)); } } saved_errno = errno; @@ -561,7 +539,7 @@ ptr S_bytevector_write(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) { ptr tc = get_thread_context(); INT flag = 0, saved_errno = 0; INT fd = gzflag ? 0 : GET_FD(file); - glzFile gzfile = gzflag ? gzxfile_gzfile(file) : NULL; + glzFile gzfile = gzflag ? GZXFILE_GZFILE(file) : NULL; for (s = start, c = count; c > 0; s += i, c -= i) { iptr cx = c; @@ -579,7 +557,7 @@ ptr S_bytevector_write(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) { GZ_EINTR_GUARD( i < 0, i > 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)), flag, gzfile, - i = glzwrite(gzfile, &BVIT(bv,s), (GZ_IO_SIZE_T)cx)); + i = S_glzwrite(gzfile, &BVIT(bv,s), (GZ_IO_SIZE_T)cx)); } else { FD_EINTR_GUARD(i >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)), flag, i = WRITE(fd, &BVIT(bv,s), (IO_SIZE_T)cx)); @@ -623,7 +601,7 @@ ptr S_put_byte(ptr file, INT byte, IBOOL gzflag) { ptr tc = get_thread_context(); INT flag = 0, saved_errno = 0; INT fd = gzflag ? 0 : GET_FD(file); - glzFile gzfile = gzflag ? gzxfile_gzfile(file) : NULL; + glzFile gzfile = gzflag ? GZXFILE_GZFILE(file) : NULL; octet buf[1]; buf[0] = (octet)byte; @@ -634,7 +612,7 @@ ptr S_put_byte(ptr file, INT byte, IBOOL gzflag) { GZ_EINTR_GUARD( i < 0, i > 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)), flag, gzfile, - i = glzwrite(gzfile, buf, 1)); + i = S_glzwrite(gzfile, buf, 1)); } else { FD_EINTR_GUARD(i >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)), flag, i = WRITE(fd, buf, 1)); @@ -664,7 +642,7 @@ ptr S_put_byte(ptr file, INT byte, IBOOL gzflag) { ptr S_get_fd_pos(ptr file, IBOOL gzflag) { errno = 0; if (gzflag) { - z_off_t offset = glzseek(gzxfile_gzfile(file), 0, SEEK_CUR); + z_off_t offset = S_glzseek(GZXFILE_GZFILE(file), 0, SEEK_CUR); if (offset != -1) return Sinteger64(offset); } else { OFF_T offset = LSEEK(GET_FD(file), 0, SEEK_CUR); @@ -683,7 +661,7 @@ ptr S_set_fd_pos(ptr file, ptr pos, IBOOL gzflag) { if (sizeof(z_off_t) != sizeof(I64)) if (offset != offset64) return Sstring("invalid position"); errno = 0; - if (glzseek(gzxfile_gzfile(file),offset,SEEK_SET) == offset) return Strue; + if (S_glzseek(GZXFILE_GZFILE(file),offset,SEEK_SET) == offset) return Strue; if (errno == 0) return Sstring("compression failed"); return S_strerror(errno); } else { @@ -811,96 +789,115 @@ static int is_valid_lz4_length(iptr len) { /* Accept `iptr` because we expect it to represent a bytevector size, which always fits in `iptr`. Return `uptr`, because the result might not fit in `iptr`. */ -uptr S_bytevector_compress_size(iptr s_count, IBOOL as_gz) { - if (as_gz) { - if (is_valid_zlib_length(s_count)) - return compressBound((uLong)s_count); - else { - /* Compression will report "source too long" */ +uptr S_bytevector_compress_size(iptr s_count, INT compress_format) { + switch (compress_format) { + case COMPRESS_GZIP: + if (is_valid_zlib_length(s_count)) + return compressBound((uLong)s_count); + else { + /* Compression will report "source too long" */ + return 0; + } + case COMPRESS_LZ4: + if (is_valid_lz4_length(s_count)) + return LZ4_compressBound((uLong)s_count); + else { + /* Compression will report "source too long" */ + return 0; + } + default: + S_error1("S_bytevector_compress_size", "unexpected compress format ~s", FIX(compress_format)); return 0; - } - } else { - if (is_valid_lz4_length(s_count)) - return LZ4_compressBound((uLong)s_count); - else { - /* Compression will report "source too long" */ - return 0; - } } } ptr S_bytevector_compress(ptr dest_bv, iptr d_start, iptr d_count, ptr src_bv, iptr s_start, iptr s_count, - IBOOL as_gz) { + INT compress_format) { /* On error, an message-template string with ~s for the bytevector */ - if (as_gz) { - int r; - uLong destLen; - - if (!is_valid_zlib_length(s_count)) - return Sstring("source bytevector ~s is too large"); - - destLen = (uLong)d_count; - - r = compress(&BVIT(dest_bv, d_start), &destLen, &BVIT(src_bv, s_start), (uLong)s_count); - - if (r == Z_OK) - return FIX(destLen); - else if (r == Z_BUF_ERROR) - return Sstring("destination bytevector is too small for compressed form of ~s"); - else - return Sstring("internal error compressing ~s"); - } else { - int destLen; + switch (compress_format) { + case COMPRESS_GZIP: + { + int r; + uLong destLen; - if (!is_valid_lz4_length(s_count)) - return Sstring("source bytevector ~s is too large"); + if (!is_valid_zlib_length(s_count)) + return Sstring("source bytevector ~s is too large"); - destLen = (int)d_count; + destLen = (uLong)d_count; - destLen = LZ4_compress_default((char *)&BVIT(src_bv, s_start), (char *)&BVIT(dest_bv, d_start), (int)s_count, (int)d_count); + r = compress(&BVIT(dest_bv, d_start), &destLen, &BVIT(src_bv, s_start), (uLong)s_count); - if (destLen > 0) - return Sfixnum(destLen); - else - return Sstring("compression failed for ~s"); + if (r == Z_OK) + return FIX(destLen); + else if (r == Z_BUF_ERROR) + return Sstring("destination bytevector is too small for compressed form of ~s"); + else + return Sstring("internal error compressing ~s"); + } + case COMPRESS_LZ4: + { + int destLen; + + if (!is_valid_lz4_length(s_count)) + return Sstring("source bytevector ~s is too large"); + + destLen = (int)d_count; + + destLen = LZ4_compress_default((char *)&BVIT(src_bv, s_start), (char *)&BVIT(dest_bv, d_start), (int)s_count, (int)d_count); + + if (destLen > 0) + return Sfixnum(destLen); + else + return Sstring("compression failed for ~s"); + } + default: + S_error1("S_bytevector_compress", "unexpected compress format ~s", FIX(compress_format)); + return Sfalse; } } ptr S_bytevector_uncompress(ptr dest_bv, iptr d_start, iptr d_count, ptr src_bv, iptr s_start, iptr s_count, - IBOOL as_gz) { + INT compress_format) { /* On error, an message-template string with ~s for the bytevector */ - if (as_gz) { - int r; - uLongf destLen; + switch (compress_format) { + case COMPRESS_GZIP: + { + int r; + uLongf destLen; - if (!is_valid_zlib_length(d_count)) - return Sstring("expected result size of uncompressed source ~s is too large"); + if (!is_valid_zlib_length(d_count)) + return Sstring("expected result size of uncompressed source ~s is too large"); - destLen = (uLongf)d_count; + destLen = (uLongf)d_count; - r = uncompress(&BVIT(dest_bv, d_start), &destLen, &BVIT(src_bv, s_start), (uLong)s_count); + r = uncompress(&BVIT(dest_bv, d_start), &destLen, &BVIT(src_bv, s_start), (uLong)s_count); - if (r == Z_OK) - return FIX(destLen); - else if (r == Z_BUF_ERROR) - return Sstring("uncompressed ~s is larger than expected size"); - else if (r == Z_DATA_ERROR) - return Sstring("invalid data in source bytevector ~s"); - else - return Sstring("internal error uncompressing ~s"); - } else { - int r; + if (r == Z_OK) + return FIX(destLen); + else if (r == Z_BUF_ERROR) + return Sstring("uncompressed ~s is larger than expected size"); + else if (r == Z_DATA_ERROR) + return Sstring("invalid data in source bytevector ~s"); + else + return Sstring("internal error uncompressing ~s"); + } + case COMPRESS_LZ4: + { + int r; - if (!is_valid_lz4_length(d_count)) - return Sstring("expected result size of uncompressed source ~s is too large"); + if (!is_valid_lz4_length(d_count)) + return Sstring("expected result size of uncompressed source ~s is too large"); - r = LZ4_decompress_safe((char *)&BVIT(src_bv, s_start), (char *)&BVIT(dest_bv, d_start), (int)s_count, (int)d_count); + r = LZ4_decompress_safe((char *)&BVIT(src_bv, s_start), (char *)&BVIT(dest_bv, d_start), (int)s_count, (int)d_count); - if (r >= 0) - return Sfixnum(r); - else - return Sstring("internal error uncompressing ~s"); + if (r >= 0) + return Sfixnum(r); + else + return Sstring("internal error uncompressing ~s"); + } + default: + return Sstring("unepxected compress format ~s"); } } diff --git a/c/scheme.c b/c/scheme.c index 1dd9c86b66..067e498d2d 100644 --- a/c/scheme.c +++ b/c/scheme.c @@ -585,17 +585,17 @@ static IBOOL find_boot(name, ext, fd, errorp) const char *name, *ext; int fd; IB path = name; if (fd != -1) { - file = glzdopen(fd, "rb"); + file = S_glzdopen_input(fd); } else { #ifdef WIN32 expandedpath = S_malloc_wide_pathname(path); - file = glzopen_w(expandedpath, "rb"); + file = S_glzopen_input_w(expandedpath); #else expandedpath = S_malloc_pathname(path); - file = glzopen(expandedpath, "rb"); + file = S_glzopen_input(expandedpath); #endif /* assumption (seemingly true based on a glance at the source code): - glzopen doesn't squirrel away a pointer to expandedpath. */ + S_glzopen_input doesn't squirrel away a pointer to expandedpath. */ free(expandedpath); } @@ -611,14 +611,14 @@ static IBOOL find_boot(name, ext, fd, errorp) const char *name, *ext; int fd; IB if (verbose) fprintf(stderr, "trying %s...opened\n", path); /* check for magic number */ - if (glzgetc(file) != fasl_type_header || - glzgetc(file) != 0 || - glzgetc(file) != 0 || - glzgetc(file) != 0 || - glzgetc(file) != 'c' || - glzgetc(file) != 'h' || - glzgetc(file) != 'e' || - glzgetc(file) != 'z') { + if (S_glzgetc(file) != fasl_type_header || + S_glzgetc(file) != 0 || + S_glzgetc(file) != 0 || + S_glzgetc(file) != 0 || + S_glzgetc(file) != 'c' || + S_glzgetc(file) != 'h' || + S_glzgetc(file) != 'e' || + S_glzgetc(file) != 'z') { fprintf(stderr, "malformed fasl-object header in %s\n", path); S_abnormal_exit(); } @@ -626,7 +626,7 @@ static IBOOL find_boot(name, ext, fd, errorp) const char *name, *ext; int fd; IB /* check version */ if (zget_uptr(file, &n) != 0) { fprintf(stderr, "unexpected end of file on %s\n", path); - glzclose(file); + S_glzclose(file); S_abnormal_exit(); } @@ -634,21 +634,21 @@ static IBOOL find_boot(name, ext, fd, errorp) const char *name, *ext; int fd; IB fprintf(stderr, "%s is for Version %s; ", path, S_format_scheme_version(n)); /* use separate fprintf since S_format_scheme_version returns static string */ fprintf(stderr, "need Version %s\n", S_format_scheme_version(scheme_version)); - glzclose(file); + S_glzclose(file); S_abnormal_exit(); } /* check machine type */ if (zget_uptr(file, &n) != 0) { fprintf(stderr, "unexpected end of file on %s\n", path); - glzclose(file); + S_glzclose(file); S_abnormal_exit(); } if (n != machine_type) { fprintf(stderr, "%s is for machine-type %s; need machine-type %s\n", path, S_lookup_machine_type(n), S_lookup_machine_type(machine_type)); - glzclose(file); + S_glzclose(file); S_abnormal_exit(); } } else { @@ -671,13 +671,13 @@ static IBOOL find_boot(name, ext, fd, errorp) const char *name, *ext; int fd; IB #ifdef WIN32 expandedpath = S_malloc_wide_pathname(path); - file = glzopen_w(expandedpath, "rb"); + file = S_glzopen_input_w(expandedpath); #else expandedpath = S_malloc_pathname(path); - file = glzopen(expandedpath, "rb"); + file = S_glzopen_input(expandedpath); #endif /* assumption (seemingly true based on a glance at the source code): - glzopen doesn't squirrel away a pointer to expandedpath. */ + S_glzopen_input doesn't squirrel away a pointer to expandedpath. */ free(expandedpath); if (!file) { if (verbose) fprintf(stderr, "trying %s...cannot open\n", path); @@ -687,23 +687,23 @@ static IBOOL find_boot(name, ext, fd, errorp) const char *name, *ext; int fd; IB if (verbose) fprintf(stderr, "trying %s...opened\n", path); /* check for magic number */ - if (glzgetc(file) != fasl_type_header || - glzgetc(file) != 0 || - glzgetc(file) != 0 || - glzgetc(file) != 0 || - glzgetc(file) != 'c' || - glzgetc(file) != 'h' || - glzgetc(file) != 'e' || - glzgetc(file) != 'z') { + if (S_glzgetc(file) != fasl_type_header || + S_glzgetc(file) != 0 || + S_glzgetc(file) != 0 || + S_glzgetc(file) != 0 || + S_glzgetc(file) != 'c' || + S_glzgetc(file) != 'h' || + S_glzgetc(file) != 'e' || + S_glzgetc(file) != 'z') { if (verbose) fprintf(stderr, "malformed fasl-object header in %s\n", path); - glzclose(file); + S_glzclose(file); continue; } /* check version */ if (zget_uptr(file, &n) != 0) { if (verbose) fprintf(stderr, "unexpected end of file on %s\n", path); - glzclose(file); + S_glzclose(file); continue; } @@ -713,14 +713,14 @@ static IBOOL find_boot(name, ext, fd, errorp) const char *name, *ext; int fd; IB /* use separate fprintf since S_format_scheme_version returns static string */ fprintf(stderr, "need Version %s\n", S_format_scheme_version(scheme_version)); } - glzclose(file); + S_glzclose(file); continue; } /* check machine type */ if (zget_uptr(file, &n) != 0) { if (verbose) fprintf(stderr, "unexpected end of file on %s\n", path); - glzclose(file); + S_glzclose(file); continue; } @@ -728,7 +728,7 @@ static IBOOL find_boot(name, ext, fd, errorp) const char *name, *ext; int fd; IB if (verbose) fprintf(stderr, "%s is for machine-type %s; need machine-type %s\n", path, S_lookup_machine_type(n), S_lookup_machine_type(machine_type)); - glzclose(file); + S_glzclose(file); continue; } @@ -738,56 +738,56 @@ static IBOOL find_boot(name, ext, fd, errorp) const char *name, *ext; int fd; IB if (verbose) fprintf(stderr, "version and machine type check\n"); - if (glzgetc(file) != '(') { /* ) */ + if (S_glzgetc(file) != '(') { /* ) */ fprintf(stderr, "malformed boot file %s\n", path); - glzclose(file); + S_glzclose(file); S_abnormal_exit(); } /* ( */ - if ((c = glzgetc(file)) == ')') { + if ((c = S_glzgetc(file)) == ')') { if (boot_count != 0) { fprintf(stderr, "base boot file %s must come before other boot files\n", path); - glzclose(file); + S_glzclose(file); S_abnormal_exit(); } } else { if (boot_count == 0) { for (;;) { - glzungetc(c, file); + S_glzungetc(c, file); /* try to load heap or boot file this boot file requires */ if (zgetstr(file, buf, PATH_MAX) != 0) { fprintf(stderr, "unexpected end of file on %s\n", path); - glzclose(file); + S_glzclose(file); S_abnormal_exit(); } if (find_boot(buf, ".boot", -1, 0)) break; - if ((c = glzgetc(file)) == ')') { + if ((c = S_glzgetc(file)) == ')') { char *sep; char *wastebuf[8]; fprintf(stderr, "cannot find subordinate boot file "); - glzrewind(file); - (void) glzread(file, wastebuf, 8); /* magic number */ + S_glzrewind(file); + (void) S_glzread(file, wastebuf, 8); /* magic number */ (void) zget_uptr(file, &n); /* version */ (void) zget_uptr(file, &n); /* machine type */ - (void) glzgetc(file); /* open paren */ + (void) S_glzgetc(file); /* open paren */ for (sep = ""; ; sep = "or ") { - if ((c = glzgetc(file)) == ')') break; - glzungetc(c, file); + if ((c = S_glzgetc(file)) == ')') break; + S_glzungetc(c, file); (void) zgetstr(file, buf, PATH_MAX); fprintf(stderr, "%s%s.boot ", sep, buf); } fprintf(stderr, "required by %s\n", path); - glzclose(file); + S_glzclose(file); S_abnormal_exit(); } } } /* skip to end of header */ - while ((c = glzgetc(file)) != ')') { + while ((c = S_glzgetc(file)) != ')') { if (c < 0) { fprintf(stderr, "malformed boot file %s\n", path); - glzclose(file); + S_glzclose(file); S_abnormal_exit(); } } @@ -808,11 +808,11 @@ static IBOOL find_boot(name, ext, fd, errorp) const char *name, *ext; int fd; IB static uptr zget_uptr(glzFile file, uptr *pn) { uptr n, m; int c; octet k; - if ((c = glzgetc(file)) < 0) return -1; + if ((c = S_glzgetc(file)) < 0) return -1; k = (octet)c; n = k >> 1; while (k & 1) { - if ((c = glzgetc(file)) < 0) return -1; + if ((c = S_glzgetc(file)) < 0) return -1; k = (octet)c; m = n << 7; if (m >> 7 != n) return -1; @@ -826,9 +826,9 @@ static INT zgetstr(file, s, max) glzFile file; char *s; iptr max; { ICHAR c; while (max-- > 0) { - if ((c = glzgetc(file)) < 0) return -1; + if ((c = S_glzgetc(file)) < 0) return -1; if (c == ' ' || c == ')') { - if (c == ')') glzungetc(c, file); + if (c == ')') S_glzungetc(c, file); *s = 0; return 0; } @@ -923,7 +923,7 @@ static void load(tc, n, base) ptr tc; iptr n; IBOOL base; { } S_G.load_binary = Sfalse; - glzclose(bd[n].file); + S_glzclose(bd[n].file); } /***************************************************************************/ @@ -1137,6 +1137,8 @@ extern void Sbuild_heap(kernel, custom_init) const char *kernel; void (*custom_i /* #scheme-init enables interrupts */ TRAP(tc) = (ptr)most_positive_fixnum; DISABLECOUNT(tc) = Sfixnum(1); + COMPRESSFORMAT(tc) = FIX(COMPRESS_LZ4); + COMPRESSLEVEL(tc) = FIX(COMPRESS_MEDIUM); load(tc, i++, 1); S_boot_time = 0; diff --git a/c/system.h b/c/system.h index b078417a6a..cdf6f09fcc 100644 --- a/c/system.h +++ b/c/system.h @@ -24,13 +24,14 @@ #include "version.h" #include -#include "compress-io.h" #include #include "thread.h" #include "types.h" +#include "compress-io.h" + #ifndef EXTERN #define EXTERN extern #endif diff --git a/c/thread.c b/c/thread.c index f7491e347b..5b60d675d2 100644 --- a/c/thread.c +++ b/c/thread.c @@ -121,6 +121,8 @@ ptr S_create_thread_object(who, p_tc) const char *who; ptr p_tc; { GUARDIANENTRIES(tc) = Snil; + LZ4OUTBUFFER(tc) = NULL; + tc_mutex_release() return thread; @@ -224,7 +226,9 @@ static IBOOL destroy_thread(tc) ptr tc; { } } - free((void *)THREADTC(thread)); + if (LZ4OUTBUFFER(tc) != NULL) free(LZ4OUTBUFFER(tc)); + + free((void *)tc); THREADTC(thread) = 0; /* mark it dead */ status = 1; break; diff --git a/checkin b/checkin index 6db76bcfce..ef77340928 100755 --- a/checkin +++ b/checkin @@ -294,7 +294,7 @@ endif delete: -set tmpfiles = `(cd $W; find . -name zlib -prune -o -type f -print)` +set tmpfiles = `(cd $W; find . -name zlib -prune -o -name lz4 -prune -o -type f -print)` set files = () foreach x ($tmpfiles) set files = ($x $files) diff --git a/csug/io.stex b/csug/io.stex index a8bf76e4fd..736df4432c 100644 --- a/csug/io.stex +++ b/csug/io.stex @@ -208,8 +208,10 @@ Section~\ref{TSPL:SECTOPENINGFILES} of {\TSPLFOUR}. \item[\var{compressed}:] An output file should be compressed when written; and a compressed input file should be decompressed when read. The compression format for output -is determined by the \scheme{compress-format} parameter, while the compression -format on input is inferred. +is determined by the \index{\scheme{compress-format}}\scheme{compress-format} +parameter, while the compression format on input is inferred. +The compression level, which is relevant only for output, is determined +by the \index{\scheme{compress-level}}\scheme{compress-level} parameter. \item[\var{replace}:] For output files only, replace (remove and recreate) the existing file if @@ -975,8 +977,10 @@ will be compressed. If the port is an input port, subsequent input will be decompressed if and only if the port is currently pointing at compressed data. The compression format for output -is determined by the \scheme{compress-format} parameter, while the compression -format on input is inferred. +is determined by the \index{\scheme{compress-format}}\scheme{compress-format} +parameter, while the compression format on input is inferred. +The compression level, which is relevant only for output, is determined +by the \index{\scheme{compress-level}}\scheme{compress-level} parameter. This procedure has no effect if the port is already set for compression. %---------------------------------------------------------------------------- @@ -986,23 +990,41 @@ This procedure has no effect if the port is already set for compression. \endnoskipentryheader \noindent -\scheme{compress-format} is a parameter that determines the -compression algorithm and format that is used for output. Currently, -the possible values of the parameter are \scheme{'lz4} (the default) -and \scheme{'gzip}. +\scheme{compress-format} determines the +compression algorithm and format used for output. Currently, +the possible values of the parameter are the symbols \scheme{lz4} (the default) +and \scheme{gzip}. -The \scheme{'lz4} format uses the LZ4 compression library developed by +The \scheme{lz4} format uses the LZ4 compression library developed by Yann Collet. It is therefore compatible with the \scheme{lz4} program, which means that \scheme{lz4} may be used to uncompress files produced by {\ChezScheme} and visa versa. -The \scheme{'gzip} format uses the zlib compression library developed by +The \scheme{gzip} format uses the zlib compression library developed by Jean-loup Gailly and Mark Adler. It is therefore compatible with the \scheme{gzip} program, which means that \scheme{gzip} may be used to uncompress files produced by {\ChezScheme} and visa versa. +Reading \scheme{lz4}-compressed data tends to be much faster than reading +\scheme{gzip}-compressed data, while \scheme{gzip}-compressed data tends to +be significantly smaller. + +%---------------------------------------------------------------------------- +\entryheader +\formdef{compress-level}{\categorythreadparameter}{compress-level} +\listlibraries +\endnoskipentryheader + +\noindent +\scheme{compress-level} determines the amount of effort spent on +compression and is thus relevant only for output. +It can be set to one of the symbols \scheme{low}, +\scheme{medium}, \scheme{high}, or \scheme{maximum}, which are +listed in order from shortest to longest expected compression time +and least to greatest expected effectiveness. +Its default value is \scheme{medium}. \section{String Ports\label{SECTIOSTRINGPORTS}} @@ -1858,7 +1880,11 @@ The default behavior is to raise an exception. The mutually exclusive \scheme{compressed} and \scheme{uncompressed} options determine whether the output file is to be compressed. -The compression format is determined by the \scheme{compress-format} parameter. +The compression format and level are determined by the +\index{\scheme{compress-format}}\scheme{compress-format} +and +\index{\scheme{compress-level}}\scheme{compress-level} +parameters. Files are uncompressed by default, so the \scheme{uncompressed} option is useful only as documentation. diff --git a/csug/objects.stex b/csug/objects.stex index f94ddd3efd..b8e3262acb 100644 --- a/csug/objects.stex +++ b/csug/objects.stex @@ -1179,7 +1179,12 @@ The result is the raw compressed data with a minimal header to record the uncompressed size and the compression mode. The result does not include the header that is written by port-based compression using the \scheme{compressed} option. The compression format is determined by the -\scheme{compress-format} parameter. +\index{\scheme{compress-format}}\scheme{compress-format} +parameter. +The compression level is fixed to some default determined by the +format; it is not affected by the +\index{\scheme{compress-level}}\scheme{compress-level} +parameter. %---------------------------------------------------------------------------- diff --git a/csug/system.stex b/csug/system.stex index 4749833c4b..c3ca365755 100644 --- a/csug/system.stex +++ b/csug/system.stex @@ -2471,6 +2471,11 @@ When this parameter is \scheme{#t}, the default, \scheme{compile-file}, \scheme{compile-program}, \scheme{compile-to-file}, \scheme{compile-whole-program}, and \scheme{strip-fasl-file} compress the object files they create. +The compression format and level are determined by the +\index{\scheme{compress-format}}\scheme{compress-format} +and +\index{\scheme{compress-level}}\scheme{compress-level} +parameters. %---------------------------------------------------------------------------- diff --git a/csug/threads.stex b/csug/threads.stex index 8947efee64..64c394f0bb 100644 --- a/csug/threads.stex +++ b/csug/threads.stex @@ -208,6 +208,16 @@ continuation invocation, the mutex is reacquired. Using \scheme{with-mutex} is generally more convenient and safer than using \scheme{mutex-acquire} and \scheme{mutex-release} directly. +%---------------------------------------------------------------------------- +\entryheader +\formdef{mutex-name}{\categoryprocedure}{(mutex-name \var{mutex})} +\returns the name associated with \var{mutex}, if any; otherwise \scheme{#f} +\listlibraries +\endentryheader + +\noindent +\var{mutex} must be a mutex. + \section{Conditions} %---------------------------------------------------------------------------- @@ -284,6 +294,16 @@ condition identified by \var{cond}. \scheme{condition-broadcast} releases all of the threads waiting for the condition identified by \var{cond}. +%---------------------------------------------------------------------------- +\entryheader +\formdef{condition-name}{\categoryprocedure}{(condition-name \var{condition})} +\returns the name associated with \var{condition}, if any; otherwise \scheme{#f} +\listlibraries +\endentryheader + +\noindent +\var{condition} must be a condition. + \section{Locks\label{SECTTHREADLOCKS}} \index{locks}% diff --git a/mats/bytevector.ms b/mats/bytevector.ms index 0ef426f5d0..e018ef2592 100644 --- a/mats/bytevector.ms +++ b/mats/bytevector.ms @@ -11277,7 +11277,7 @@ (mat bytevector-compress - (parameters [compress-format 'gzip] [compress-format 'lz4]) + (parameters [compress-format 'gzip 'lz4]) (error? (bytevector-compress 7)) (error? (bytevector-compress "hello")) (error? (bytevector-uncompress 7)) @@ -11300,19 +11300,6 @@ (error? ;; Need at least 8 bytes for result size (bytevector-uncompress '#vu8(0 0 0 0 0 0 255))) - (error? - ;; Fail if the uncompressed result is too big - (bytevector-uncompress (let ([bv (bytevector-compress (string->utf8 "hello"))]) - (bytevector-u64-set! bv 0 (sub1 (bytevector-u64-ref bv 0 (endianness big))) (endianness big)) - bv))) - (error? - ;; Fail if the uncompressed result is too small - (bytevector-uncompress (let ([bv (bytevector-compress (string->utf8 "hello"))]) - (bytevector-u64-set! bv 0 (add1 (bytevector-u64-ref bv 0 (endianness big))) (endianness big)) - bv))) - (error? - ;; Compressed data always starts with 0x78, so this one isn't valid: - (bytevector-uncompress '#vu8(0 0 0 0 0 0 0 255 1 2 3))) (error? ;; Claming a too-large size in the header should fail with a suitable message: (bytevector-uncompress '#vu8(255 255 255 255 255 255 255 255 1 2 3))) diff --git a/mats/io.ms b/mats/io.ms index 43897ba8d8..5108e2e8c1 100644 --- a/mats/io.ms +++ b/mats/io.ms @@ -2126,8 +2126,63 @@ (= q (custom-port-buffer-size))))) ) +(mat compress-parameters + (error? ; unsupported format + (compress-format 'foo)) + (error? ; unsupported format + (compress-format "gzip")) + (eq? (compress-format) 'lz4) + (eq? (parameterize ([compress-format 'gzip]) (compress-format)) 'gzip) + (eq? (parameterize ([compress-format 'lz4]) (compress-format)) 'lz4) + (error? ; unsupported level + (compress-level 'foo)) + (error? ; unsupported level + (compress-level 1)) + (eq? (compress-level) 'medium) + (eq? (parameterize ([compress-level 'low]) (compress-level)) 'low) + (eq? (parameterize ([compress-level 'medium]) (compress-level)) 'medium) + (eq? (parameterize ([compress-level 'high]) (compress-level)) 'high) + (eq? (parameterize ([compress-level 'maximum]) (compress-level)) 'maximum) + (begin + (define (fnlength ifn) (call-with-port (open-file-input-port ifn) port-length)) + (define (compress-file ifn ofn fmt lvl) + (call-with-port (open-file-input-port ifn) + (lambda (ip) + (call-with-port (parameterize ([compress-format fmt] [compress-level lvl]) + (open-file-output-port ofn (file-options compressed replace))) + (lambda (op) (put-bytevector op (get-bytevector-all ip)))))) + (fnlength ofn)) + (define (compress-file-test fmt) + (let ([orig (fnlength "prettytest.ss")] + [low (compress-file "prettytest.ss" "testfile.ss" fmt 'low)] + [medium (compress-file "prettytest.ss" "testfile.ss" fmt 'medium)] + [high (compress-file "prettytest.ss" "testfile.ss" fmt 'high)] + [maximum (compress-file "prettytest.ss" "testfile.ss" fmt 'maximum)]) + (define-syntax test1 + (syntax-rules () + [(_ level) + (unless (< level orig) (errorf #f "~s ~s did not compress" fmt 'level))])) + (define-syntax test2 + (syntax-rules () + [(_ level1 level2) + (unless (<= level2 level1) (errorf #f "~s ~s did worse than ~s" fmt 'level2 'level1))])) + (test1 low) + (test1 medium) + (test1 high) + (test1 maximum) + (test2 low medium) + (test2 medium high) + (test2 high maximum) + (unless (< maximum low) (errorf #f "~s maximum didn't do better than low" fmt)))) + (compress-file-test 'lz4) + (compress-file-test 'gzip) + #t) +) + (mat compression - (parameters [compress-format 'gzip] [compress-format 'lz4]) + (parameters [compress-format 'gzip 'lz4] [compress-level 'low 'medium 'high 'maximum]) + (and (memq (compress-format) '(gzip lz4)) #t) + (and (memq (compress-level) '(low medium high maximum)) #t) (let () (define cp (lambda (src dst) @@ -3072,7 +3127,7 @@ ) (mat compression-textual - (parameters [compress-format 'gzip] [compress-format 'lz4]) + (parameters [compress-format 'gzip 'lz4] [compress-level 'low 'medium 'high 'maximum]) (let () (define cp (lambda (src dst) diff --git a/mats/mat.ss b/mats/mat.ss index cc04036f81..30b2e55d42 100644 --- a/mats/mat.ss +++ b/mats/mat.ss @@ -20,12 +20,16 @@ (define-syntax mat (lambda (x) (syntax-case x (parameters) - [(_ x (parameters [param val] ...) e ...) - #'(for-each (lambda (p v) + [(_ x (parameters [param val ...] ...) e ...) + #'(let f ([p* (list param ...)] [v** (list (list val ...) ...)]) + (if (null? p*) + (mat x e ...) + (let ([p (car p*)]) + (for-each + (lambda (v) (parameterize ([p v]) - (mat x e ...))) - (list param ...) - (list val ...))] + (f (cdr p*) (cdr v**)))) + (car v**)))))] [(_ x e ...) (with-syntax ([(source ...) (map (lambda (clause) diff --git a/mats/root-experr-compile-0-f-f-f b/mats/root-experr-compile-0-f-f-f index cef04463be..49e044b24f 100644 --- a/mats/root-experr-compile-0-f-f-f +++ b/mats/root-experr-compile-0-f-f-f @@ -3626,9 +3626,6 @@ bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: "hello" is not a bytevector". bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8()". bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)". -bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: uncompressed #vu8(0 0 0 0 0 0 ...) is larger than expected size". -bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: uncompressed size 5 for #vu8(0 0 0 0 0 0 ...) is smaller than expected size 6". -bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)". bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: bytevector #vu8(255 255 255 255 255 255 ...) claims invalid uncompressed size ". bytevector.mo:Expected error in mat bytevector-compress: "bytevector-compress: 7 is not a bytevector". bytevector.mo:Expected error in mat bytevector-compress: "bytevector-compress: "hello" is not a bytevector". @@ -3636,9 +3633,6 @@ bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: "hello" is not a bytevector". bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8()". bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)". -bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: internal error uncompressing #vu8(128 0 0 0 0 0 ...)". -bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: uncompressed size 5 for #vu8(128 0 0 0 0 0 ...) is smaller than expected size 6". -bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)". bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: bytevector #vu8(255 255 255 255 255 255 ...) claims invalid uncompressed size ". misc.mo:Expected error in mat compiler1: "variable i-am-not-bound is not bound". misc.mo:Expected error in mat compiler1: "attempt to apply non-procedure oops". @@ -6798,6 +6792,22 @@ io.mo:Expected error in mat custom-port-buffer-size: "custom-port-buffer-size: 0 io.mo:Expected error in mat custom-port-buffer-size: "custom-port-buffer-size: -15 is not a positive fixnum". io.mo:Expected error in mat custom-port-buffer-size: "custom-port-buffer-size: is not a positive fixnum". io.mo:Expected error in mat custom-port-buffer-size: "custom-port-buffer-size: 1024.0 is not a positive fixnum". +io.mo:Expected error in mat compress-parameters: "compress-format: foo is not a supported format". +io.mo:Expected error in mat compress-parameters: "compress-format: "gzip" is not a supported format". +io.mo:Expected error in mat compress-parameters: "compress-level: foo is not a supported level". +io.mo:Expected error in mat compress-parameters: "compress-level: 1 is not a supported level". +io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". +io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". +io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". +io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". +io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". +io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". +io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". +io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". +io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". +io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". +io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". +io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". diff --git a/mats/root-experr-compile-2-f-f-f b/mats/root-experr-compile-2-f-f-f index 1bd9fc7391..49e044b24f 100644 --- a/mats/root-experr-compile-2-f-f-f +++ b/mats/root-experr-compile-2-f-f-f @@ -3626,8 +3626,12 @@ bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: "hello" is not a bytevector". bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8()". bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)". -bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: uncompressed #vu8(0 0 0 0 0 0 ...) is larger than expected size". -bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: uncompressed size 5 for #vu8(0 0 0 0 0 0 ...) is smaller than expected size 6". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: bytevector #vu8(255 255 255 255 255 255 ...) claims invalid uncompressed size ". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-compress: 7 is not a bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-compress: "hello" is not a bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: 7 is not a bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: "hello" is not a bytevector". +bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8()". bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: invalid data in source bytevector #vu8(0 0 0 0 0 0 ...)". bytevector.mo:Expected error in mat bytevector-compress: "bytevector-uncompress: bytevector #vu8(255 255 255 255 255 255 ...) claims invalid uncompressed size ". misc.mo:Expected error in mat compiler1: "variable i-am-not-bound is not bound". @@ -6788,6 +6792,24 @@ io.mo:Expected error in mat custom-port-buffer-size: "custom-port-buffer-size: 0 io.mo:Expected error in mat custom-port-buffer-size: "custom-port-buffer-size: -15 is not a positive fixnum". io.mo:Expected error in mat custom-port-buffer-size: "custom-port-buffer-size: is not a positive fixnum". io.mo:Expected error in mat custom-port-buffer-size: "custom-port-buffer-size: 1024.0 is not a positive fixnum". +io.mo:Expected error in mat compress-parameters: "compress-format: foo is not a supported format". +io.mo:Expected error in mat compress-parameters: "compress-format: "gzip" is not a supported format". +io.mo:Expected error in mat compress-parameters: "compress-level: foo is not a supported level". +io.mo:Expected error in mat compress-parameters: "compress-level: 1 is not a supported level". +io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". +io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". +io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". +io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". +io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". +io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". +io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". +io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". +io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". +io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". +io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". +io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". +io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". +io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". io.mo:Expected error in mat compression: "port-file-compressed!: # is not a file port". io.mo:Expected error in mat compression: "port-file-compressed!: cannot compress input/output port #". io.mo:Expected error in mat bytevector-input-port: "incorrect argument count in call (open-bytevector-input-port)". diff --git a/release_notes/release_notes.stex b/release_notes/release_notes.stex index 8715c00633..3b8a751742 100644 --- a/release_notes/release_notes.stex +++ b/release_notes/release_notes.stex @@ -58,16 +58,29 @@ Online versions of both books can be found at %----------------------------------------------------------------------------- \section{Functionality Changes}\label{section:functionality} -\subsection{Compression format (9.5.3)} +\subsection{Compression format and level (9.5.3)} -The default format for compressed-file writing is now LZ4, while {\tt +Support for LZ4 compression has been added. +LZ4 is now the default format when compressing files (including +object files produced by the compiler) and bytevectors, while {\tt gzip} is still supported and can be enabled by setting -\scheme{compress-format} to \scheme{'gzip}. Reading in compressed mode +the new \scheme{compress-format} parameter to the symbol \scheme{gzip} instead of the +default \scheme{lz4}. Reading in compressed mode infers the format, so reading {\tt gzip}-compressed files will still -work without changing \scheme{compress-format}. Reading LZ4-format -files tends to be much faster than reading {\tt gzip}-format files, in -most cases nearly eliminating the load-time cost of compressing -compiled files. +work without changing \scheme{compress-format}. Reading LZ4-format +files tends to be much faster than reading {\tt gzip}-format files, +while {\tt gzip}-compressed files tend to be smaller. +In particular, object files created by the compiler now tend to be +larger but load more quickly. + +The new \scheme{compress-level} parameter can be used to control +the amount of time spent on file compression (but not +bytevector compression). +It can be set to one of the symbols \scheme{low}, +\scheme{medium}, \scheme{high}, and \scheme{maximum}, which are +listed in order from shortest to longest compression time and least +to greatest effectiveness. +The default value is \scheme{medium}. \subsection{Mutexes and condition variables can have names (9.5.3)} diff --git a/s/Mf-base b/s/Mf-base index 4c6a101fb4..e8d447284a 100644 --- a/s/Mf-base +++ b/s/Mf-base @@ -24,15 +24,23 @@ o = 3 # d is the debug level at which the system should be built d = 0 -# cl (xcl) determines the commonization level +# cl determines the commonization level cl = (commonization-level) # i determines whether inspector-information is generated: f for false, t for true i = f -# cp0 (xcp0) determines the number of cp0 (source optimizer) iterations run +# cp0 determines the number of cp0 (source optimizer) iterations run cp0 = 2 -xcp0 = 2 + +# cc determines whether compiled files are compressed +cc = t + +# xf determines the compression foramt +xf = (compress-format) + +# xl determine the compression level +xl = (compress-level) # p (xp) determines whether source profiling is enabled: f for false, t for true. p = f @@ -214,6 +222,9 @@ clean: profileclean '(optimize-level $o)'\ '(debug-level $d)'\ '(commonization-level $(cl))'\ + '(compile-compressed #$(cc))'\ + '(compress-format $(xf))'\ + '(compress-level $(xl))'\ '(when #$p (compile-profile (quote source)))'\ '(when #$(bp) (compile-profile (quote block)))'\ '(when #$(loadspd) (profile-load-data "${ProfileDumpSource}"))'\ @@ -238,6 +249,9 @@ clean: profileclean '(optimize-level $o)'\ '(debug-level $d)'\ '(commonization-level $(cl))'\ + '(compile-compressed #$(cc))'\ + '(compress-format $(xf))'\ + '(compress-level $(xl))'\ '(when #$p (compile-profile (quote source)))'\ '(when #$(bp) (compile-profile (quote block)))'\ '(when #$(loadspd) (profile-load-data "${ProfileDumpSource}"))'\ @@ -265,6 +279,9 @@ clean: profileclean '(optimize-level $o)'\ '(debug-level $d)'\ '(commonization-level $(cl))'\ + '(compile-compressed #$(cc))'\ + '(compress-format $(xf))'\ + '(compress-level $(xl))'\ '(generate-inspector-information #$i)'\ '(subset-mode (quote system))'\ '(compile-file "$*.ss" "$*.so")'\ @@ -275,6 +292,9 @@ clean: profileclean '(optimize-level $o)'\ '(debug-level $d)'\ '(commonization-level $(cl))'\ + '(compile-compressed #$(cc))'\ + '(compress-format $(xf))'\ + '(compress-level $(xl))'\ '(when #$(xp) (compile-profile (quote source)))'\ '(when #$(xbp) (compile-profile (quote block)))'\ '(generate-inspector-information #$i)'\ @@ -344,6 +364,9 @@ cmacros.so: cmacros.ss machine.def layout.ss '(optimize-level $o)'\ '(debug-level $d)'\ '(commonization-level $(cl))'\ + '(compile-compressed #$(cc))'\ + '(compress-format $(xf))'\ + '(compress-level $(xl))'\ '(generate-inspector-information #$i)'\ '(subset-mode (quote system))'\ '(compile-file "$*.ss" "$*.so")'\ @@ -356,6 +379,9 @@ priminfo.so: priminfo.ss primdata.ss cmacros.so '(optimize-level $o)'\ '(debug-level $d)'\ '(commonization-level $(cl))'\ + '(compile-compressed #$(cc))'\ + '(compress-format $(xf))'\ + '(compress-level $(xl))'\ '(generate-inspector-information #$i)'\ '(subset-mode (quote system))'\ '(compile-file "$*.ss" "$*.so")'\ @@ -369,6 +395,9 @@ mkheader.so: mkheader.ss cmacros.so primvars.so env.so '(optimize-level $o)'\ '(debug-level $d)'\ '(commonization-level $(cl))'\ + '(compile-compressed #$(cc))'\ + '(compress-format $(xf))'\ + '(compress-level $(xl))'\ '(generate-inspector-information #$i)'\ '(subset-mode (quote system))'\ '(compile-file "$*.ss" "$*.so")'\ @@ -381,6 +410,9 @@ nanopass.so: $(shell echo ../nanopass/nanopass/*) ../nanopass/nanopass.ss '(optimize-level $o)'\ '(debug-level $d)'\ '(commonization-level $(cl))'\ + '(compile-compressed #$(cc))'\ + '(compress-format $(xf))'\ + '(compress-level $(xl))'\ '(generate-inspector-information #$i)'\ '(collect-trip-bytes (expt 2 24))'\ '(collect-request-handler (lambda () (collect 0 1)))'\ @@ -404,6 +436,9 @@ script.all makescript: '(optimize-level $o)'\ '(debug-level $d)'\ '(commonization-level $(cl))'\ + '(compile-compressed #$(cc))'\ + '(compress-format $(xf))'\ + '(compress-level $(xl))'\ '(when #$p (compile-profile (quote source)))'\ '(when #$(bp) (compile-profile (quote block)))'\ '(when #$(loadspd) (profile-load-data "${ProfileDumpSource}"))'\ @@ -440,6 +475,9 @@ script-static.all: '(optimize-level $o)'\ '(debug-level $d)'\ '(commonization-level $(cl))'\ + '(compile-compressed #$(cc))'\ + '(compress-format $(xf))'\ + '(compress-level $(xl))'\ '(when #$p (compile-profile (quote source)))'\ '(when #$(bp) (compile-profile (quote block)))'\ '(generate-inspector-information #$i)'\ @@ -462,6 +500,9 @@ script-dynamic.all: '(optimize-level $o)'\ '(debug-level $d)'\ '(commonization-level $(cl))'\ + '(compile-compressed #$(cc))'\ + '(compress-format $(xf))'\ + '(compress-level $(xl))'\ '(when #$p (compile-profile (quote source)))'\ '(when #$(bp) (compile-profile (quote block)))'\ '(generate-inspector-information #$i)'\ diff --git a/s/back.ss b/s/back.ss index 3c02c6f63b..d0f21c249f 100644 --- a/s/back.ss +++ b/s/back.ss @@ -156,12 +156,38 @@ x))) (define-who compress-format - ($make-thread-parameter - 'lz4 - (lambda (x) - (unless (or (eq? x 'lz4) (eq? x 'gzip)) - ($oops who "~s is not a supported format" x)) - x))) + (case-lambda + [() + (let ([x ($tc-field 'compress-format ($tc))]) + (cond + [(eqv? x (constant COMPRESS-GZIP)) 'gzip] + [(eqv? x (constant COMPRESS-LZ4)) 'lz4] + [else ($oops who "unexpected $compress-format value ~s" x)]))] + [(x) + ($tc-field 'compress-format ($tc) + (case x + [(gzip) (constant COMPRESS-GZIP)] + [(lz4) (constant COMPRESS-LZ4)] + [else ($oops who "~s is not a supported format" x)]))])) + +(define-who compress-level + (case-lambda + [() + (let ([x ($tc-field 'compress-level ($tc))]) + (cond + [(eqv? x (constant COMPRESS-LOW)) 'low] + [(eqv? x (constant COMPRESS-MEDIUM)) 'medium] + [(eqv? x (constant COMPRESS-HIGH)) 'high] + [(eqv? x (constant COMPRESS-MAX)) 'maximum] + [else ($oops who "unexpected $compress-level value ~s" x)]))] + [(x) + ($tc-field 'compress-level ($tc) + (case x + [(low) (constant COMPRESS-LOW)] + [(medium) (constant COMPRESS-MEDIUM)] + [(high) (constant COMPRESS-HIGH)] + [(maximum) (constant COMPRESS-MAX)] + [else ($oops who "~s is not a supported level" x)]))])) (define-who debug-level ($make-thread-parameter diff --git a/s/bytevector.ss b/s/bytevector.ss index 724e7e7e2a..ce265f055b 100644 --- a/s/bytevector.ss +++ b/s/bytevector.ss @@ -1454,25 +1454,23 @@ ) (let () - ;; Store uncompressed size as u64, using high bit to indicate LZ4: + ;; Store uncompressed size as u64, using low bits to indicate compression format: (define uncompressed-length-length (ftype-sizeof integer-64)) ;; Always big-endian, so that compressed data is portable. - ;; It might be useful somehow that valid compressed data always starts - ;; with a 0 or 128 byte; otherwise, the expected size would be unrealistically big. (define uncompressed-length-endianness (endianness big)) (define $bytevector-compress-size - (foreign-procedure "(cs)bytevector_compress_size" (iptr boolean) uptr)) + (foreign-procedure "(cs)bytevector_compress_size" (iptr int) uptr)) (define $bytevector-compress - (foreign-procedure "(cs)bytevector_compress" (scheme-object iptr iptr scheme-object iptr iptr boolean) scheme-object)) + (foreign-procedure "(cs)bytevector_compress" (scheme-object iptr iptr scheme-object iptr iptr int) scheme-object)) (define $bytevector-uncompress - (foreign-procedure "(cs)bytevector_uncompress" (scheme-object iptr iptr scheme-object iptr iptr boolean) scheme-object)) + (foreign-procedure "(cs)bytevector_uncompress" (scheme-object iptr iptr scheme-object iptr iptr int) scheme-object)) (set-who! bytevector-compress (lambda (bv) (unless (bytevector? bv) (not-a-bytevector who bv)) - (let* ([as-gz? (eq? 'gzip (compress-format))] - [dest-max-len ($bytevector-compress-size (bytevector-length bv) as-gz?)] + (let* ([fmt ($tc-field 'compress-format ($tc))] + [dest-max-len ($bytevector-compress-size (bytevector-length bv) fmt)] [dest-alloc-len (min (+ dest-max-len uncompressed-length-length) ;; In the unlikely event of a non-fixnum requested size... (constant maximum-bytevector-length))] @@ -1483,34 +1481,25 @@ bv 0 (bytevector-length bv) - as-gz?)]) + fmt)]) (cond [(string? r) ($oops who r bv)] [else - ($bytevector-u64-set! dest-bv 0 (bytevector-length bv) uncompressed-length-endianness who) - (unless as-gz? (bytevector-u8-set! dest-bv 0 128)) ; set high bit for LZ4 - (bytevector-truncate! dest-bv (fx+ r uncompressed-length-length))]))))) + (let ([tag (bitwise-ior + (bitwise-arithmetic-shift-left (bytevector-length bv) (constant COMPRESS-FORMAT-BITS)) + fmt)]) + ($bytevector-u64-set! dest-bv 0 tag uncompressed-length-endianness who) + (bytevector-truncate! dest-bv (fx+ r uncompressed-length-length)))]))))) (set-who! bytevector-uncompress (lambda (bv) (unless (bytevector? bv) (not-a-bytevector who bv)) (unless (>= (bytevector-length bv) uncompressed-length-length) ($oops who "invalid data in source bytevector ~s" bv)) - (let* ([as-gz? (not (fx= 128 (bytevector-u8-ref bv 0)))] - [dest-length (cond - [as-gz? - ($bytevector-u64-ref bv 0 uncompressed-length-endianness who)] - ;; Need to skip high bit; likely can skip first 4 bytes - [(and (fx= 0 (bytevector-u8-ref bv 1)) - (fx= 0 (bytevector-u8-ref bv 2)) - (fx= 0 (bytevector-u8-ref bv 3))) - ($bytevector-u32-ref bv 4 uncompressed-length-endianness who)] - [else - ;; Clear high bit the hard way - (+ ($bytevector-u32-ref bv 4 uncompressed-length-endianness who) - (let ([v ($bytevector-u32-ref bv 0 uncompressed-length-endianness who)]) - ((bitwise-arithmetic-shift-left (- v #x80000000) 32))))])]) + (let* ([tag ($bytevector-u64-ref bv 0 uncompressed-length-endianness who)] + [fmt (logand tag (fx- (fxsll 1 (constant COMPRESS-FORMAT-BITS)) 1))] + [dest-length (bitwise-arithmetic-shift-right tag (constant COMPRESS-FORMAT-BITS))]) (unless (and (fixnum? dest-length) ($fxu< dest-length (constant maximum-bytevector-length))) ($oops who "bytevector ~s claims invalid uncompressed size ~s" bv dest-length)) @@ -1521,7 +1510,7 @@ bv uncompressed-length-length (fx- (bytevector-length bv) uncompressed-length-length) - as-gz?)]) + fmt)]) (cond [(string? r) ($oops who r bv)] [(fx= r dest-length) dest-bv] diff --git a/s/cmacros.ss b/s/cmacros.ss index 20be35ac5a..6a9127eec8 100644 --- a/s/cmacros.ss +++ b/s/cmacros.ss @@ -531,6 +531,15 @@ (define-constant SEOF -1) +(define-constant COMPRESS-GZIP 0) +(define-constant COMPRESS-LZ4 1) +(define-constant COMPRESS-FORMAT-BITS 3) + +(define-constant COMPRESS-LOW 0) +(define-constant COMPRESS-MEDIUM 1) +(define-constant COMPRESS-HIGH 2) +(define-constant COMPRESS-MAX 3) + (define-constant SICONV-DUNNO 0) (define-constant SICONV-INVALID 1) (define-constant SICONV-INCOMPLETE 2) @@ -1371,6 +1380,9 @@ [ptr suppress-primitive-inlining] [ptr default-record-equal-procedure] [ptr default-record-hash-procedure] + [ptr compress-format] + [ptr compress-level] + [void* lz4-out-buffer] [U64 instr-counter] [U64 alloc-counter] [ptr parameters])) diff --git a/s/io.ss b/s/io.ss index 045db4f5d4..191794864a 100644 --- a/s/io.ss +++ b/s/io.ss @@ -264,7 +264,7 @@ implementation notes: (foreign-procedure "(cs)new_open_output_fd" (string int boolean boolean boolean - boolean boolean boolean boolean boolean) + boolean boolean boolean boolean) scheme-object)) (define $open-input/output-fd (foreign-procedure "(cs)new_open_input_output_fd" @@ -310,7 +310,7 @@ implementation notes: (define $compress-input-fd (foreign-procedure "(cs)compress_input_fd" (int integer-64) scheme-object)) (define $compress-output-fd - (foreign-procedure "(cs)compress_output_fd" (int boolean) scheme-object)) + (foreign-procedure "(cs)compress_output_fd" (int) scheme-object)) (module (clear-open-files register-open-file registered-open-file? unregister-open-file) (define open-files #f) (define file-guardian) @@ -645,14 +645,17 @@ implementation notes: (define binary-file-port-close-port (lambda (who p) - (unregister-open-file p) - (let ([msg ($close-fd ($port-info p) (port-gz-mode p))]) - (unless (eq? #t msg) (port-oops who p msg))) - (mark-port-closed! p) (when (input-port? p) (set-port-eof! p #f) (set-binary-port-input-size! p 0)) - (when (output-port? p) (set-binary-port-output-size! p 0)))) + (when (output-port? p) (set-binary-port-output-size! p 0)) + (unregister-open-file p) + ; mark port closed before closing fd. if an interrupt occurs, we'd prefer + ; that the fd's resources never be freed than to have an open port floating + ; around with fd resources that have already been freed. + (mark-port-closed! p) + (let ([msg ($close-fd ($port-info p) (port-gz-mode p))]) + (unless (eq? #t msg) (port-oops who p msg))))) (define-syntax binary-file-port-port-position (syntax-rules () @@ -3185,7 +3188,7 @@ implementation notes: ; reposition to 'unread' any compressed data in the input buffer (set-port-position! p fp) ($compress-input-fd fd fp)) - ($compress-output-fd fd (eq? (compress-format) 'gzip)))]) + ($compress-output-fd fd))]) (when (string? gzfd) ($oops who "failed for ~s: ~(~a~)" p gzfd)) (unless (eqv? gzfd fd) ; uncompressed input port (assert (box? gzfd)) @@ -4091,8 +4094,7 @@ implementation notes: (let ([fd (critical-section ($open-output-fd filename perms no-create no-fail no-truncate - append lock replace compressed - (and compressed (eq? (compress-format) 'gzip))))]) + append lock replace compressed))]) (when (pair? fd) (open-oops who filename options fd)) (open-binary-fd-output-port who filename fd #t b-mode lock compressed))))) diff --git a/s/primdata.ss b/s/primdata.ss index 1e3e57b004..2b1423e53b 100644 --- a/s/primdata.ss +++ b/s/primdata.ss @@ -926,6 +926,7 @@ (compile-profile [sig [() -> (ptr)] [(ptr) -> (void)]] [flags unrestricted]) (compile-program-handler [sig [() -> (procedure)] [(procedure) -> (void)]] [flags]) (compress-format [sig [() -> (symbol)] [(sub-symbol) -> (void)]] [flags]) + (compress-level [sig [() -> (symbol)] [(sub-symbol) -> (void)]] [flags]) (console-error-port [sig [() -> (textual-output-port)] [(textual-output-port) -> (void)]] [flags]) (console-input-port [sig [() -> (textual-input-port)] [(textual-input-port) -> (void)]] [flags]) (console-output-port [sig [() -> (textual-output-port)] [(textual-output-port) -> (void)]] [flags])