file/gzip: misc clean-up

Separate state and functions, and convert a key loop to functional
style. As it turns out, this has no significant effect on performance,
but it looks a lot better to me.
This commit is contained in:
Matthew Flatt 2013-08-13 11:48:13 -06:00
parent 1acdcf2fb9
commit de322740a6

View File

@ -1,3 +1,5 @@
#lang racket/base
#| #|
/* deflate.c -- compress data using the deflation algorithm /* deflate.c -- compress data using the deflation algorithm
* Copyright (C) 1992-1993 Jean-loup Gailly * Copyright (C) 1992-1993 Jean-loup Gailly
@ -12,19 +14,10 @@
;; LGPL-compatible license. I (Eli Barzilay) have tried to contact the ;; LGPL-compatible license. I (Eli Barzilay) have tried to contact the
;; author, but no reply yet. ;; author, but no reply yet.
(module deflate racket/base
(provide deflate gzip-through-ports gzip) (provide deflate gzip-through-ports gzip)
(require (for-syntax racket/base)) (require (for-syntax racket/base))
(define (vector-ref* v i)
(let ([r (vector-ref v i)])
(if (<= 0 r 255) r (error 'vector-ref "BOOM: ~s" r))))
(define (vector-set!* v i n)
(if (<= 0 n 255) (vector-set! v i n) (error 'vector-ref "BOOM!: ~s" n)))
(define-syntax INSERT_STRING (define-syntax INSERT_STRING
(syntax-rules () (syntax-rules ()
[(_ s match_head UPDATE_HASH window-vec head-vec prev-vec ins_h) [(_ s match_head UPDATE_HASH window-vec head-vec prev-vec ins_h)
@ -207,17 +200,6 @@
;; ush dad; ;; /* father node in Huffman tree */ ;; ush dad; ;; /* father node in Huffman tree */
;; ush len; ;; /* length of bit string */ ;; ush len; ;; /* length of bit string */
;; } dl; ;; } dl;
#|
(define ct_data-freq ct_data-freq/code)
(define ct_data-code ct_data-freq/code)
(define ct_data-dad ct_data-dad/len)
(define ct_data-len ct_data-dad/len)
(define set-ct_data-freq! set-ct_data-freq/code!)
(define set-ct_data-code! set-ct_data-freq/code!)
(define set-ct_data-dad! set-ct_data-dad/len!)
(define set-ct_data-len! set-ct_data-dad/len!)
(define (_make-ct_data f c d l) (make-ct_data (or f c) (or d l)))
|#
(define _make-ct_data make-ct_data) (define _make-ct_data make-ct_data)
(define-struct tree_desc (define-struct tree_desc
@ -230,7 +212,171 @@
max_code); ;; /* largest code with non zero frequency */ max_code); ;; /* largest code with non zero frequency */
#:mutable) #:mutable)
;; /* Values for max_lazy_match, good_match and max_chain_length, depending on
;; * the desired pack level (0..9). The values given below have been tuned to
;; * exclude worst case performance for pathological files. Better values may be
;; * found for specific files.
;; */
(define-struct config
(good_length ;; /* reduce lazy search above this match length */
max_lazy ;; /* do not perform lazy search above this match length */
nice_length ;; /* quit search above this match length */
max_chain))
(define configuration_table
(vector
;; /* good lazy nice chain */
(make-config 0 0 0 0) ;; /* 0 - store only */
(make-config 4 4 8 4) ;; /* 1 - maximum speed, no lazy matches */
(make-config 4 5 16 8) ;; /* 2 */
(make-config 4 6 32 32) ;; /* 3 */
(make-config 4 4 16 16) ;; /* 4 - lazy matches */
(make-config 8 16 32 32) ;; /* 5 */
(make-config 8 16 128 128) ;; /* 6 */
(make-config 8 32 128 256) ;; /* 7 */
(make-config 32 128 258 1024) ;; /* 8 */
(make-config 32 258 258 4096))) ;; /* 9 - maximum compression */
;; /* ===========================================================================
;; * Constants
;; */
(define MAX_BITS 15)
;; /* All codes must not exceed MAX_BITS bits */
(define MAX_BL_BITS 7)
;; /* Bit length codes must not exceed MAX_BL_BITS bits */
(define LENGTH_CODES 29)
;; /* number of length codes, not counting the special END_BLOCK code */
(define LITERALS 256)
;; /* number of literal bytes 0..255 */
(define END_BLOCK 256)
;; /* end of block literal code */
(define L_CODES (+ LITERALS 1 LENGTH_CODES))
;; /* number of Literal or Length codes, including the END_BLOCK code */
(define D_CODES 30)
;; /* number of distance codes */
(define BL_CODES 19)
;; /* number of codes used to transfer the bit lengths */
(define extra_lbits ;; /* extra bits for each length code */
'#(0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 5 5 5 5 0))
(define extra_dbits ;; /* extra bits for each distance code */
'#(0 0 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 12 13 13))
(define extra_blbits ;; /* extra bits for each bit length code */
'#(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 7))
(define STORED_BLOCK 0)
(define STATIC_TREES 1)
(define DYN_TREES 2)
;; /* The three kinds of block type */
(define LIT_BUFSIZE #x8000)
(define DIST_BUFSIZE #x8000)
;; /* Sizes of match buffers for literals/lengths and distances. There are
;; * 4 reasons for limiting LIT_BUFSIZE to 64K:
;; * - frequencies can be kept in 16 bit counters
;; * - if compression is not successful for the first block, all input data is
;; * still in the window so we can still emit a stored block even when input
;; * comes from standard input. (This can also be done for all blocks if
;; * LIT_BUFSIZE is not greater than 32K.)
;; * - if compression is not successful for a file smaller than 64K, we can
;; * even emit a stored file instead of a stored block (saving 5 bytes).
;; * - creating new Huffman trees less frequently may not provide fast
;; * adaptation to changes in the input data statistics. (Take for
;; * example a binary file with poorly compressible code followed by
;; * a highly compressible string table.) Smaller buffer sizes give
;; * fast adaptation but have of course the overhead of transmitting trees
;; * more frequently.
;; * - I can't count above 4
;; * The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save
;; * memory at the expense of compression). Some optimizations would be possible
;; * if we rely on DIST_BUFSIZE == LIT_BUFSIZE.
;; */
(when (> LIT_BUFSIZE INBUFSIZ)
(error "cannot overlay l_buf and inbuf"))
(define REP_3_6 16)
;; /* repeat previous bit length 3-6 times (2 bits of repeat count) */
(define REPZ_3_10 17)
;; /* repeat a zero length 3-10 times (3 bits of repeat count) */
(define REPZ_11_138 18)
;; /* repeat a zero length 11-138 times (7 bits of repeat count) */
(define SMALLEST 1)
;; /* Index within the heap array of least frequent node in the Huffman tree */
(define crc_32_tab
#(#x00000000
#x77073096 #xee0e612c #x990951ba #x076dc419
#x706af48f #xe963a535 #x9e6495a3 #x0edb8832 #x79dcb8a4
#xe0d5e91e #x97d2d988 #x09b64c2b #x7eb17cbd #xe7b82d07
#x90bf1d91 #x1db71064 #x6ab020f2 #xf3b97148 #x84be41de
#x1adad47d #x6ddde4eb #xf4d4b551 #x83d385c7 #x136c9856
#x646ba8c0 #xfd62f97a #x8a65c9ec #x14015c4f #x63066cd9
#xfa0f3d63 #x8d080df5 #x3b6e20c8 #x4c69105e #xd56041e4
#xa2677172 #x3c03e4d1 #x4b04d447 #xd20d85fd #xa50ab56b
#x35b5a8fa #x42b2986c #xdbbbc9d6 #xacbcf940 #x32d86ce3
#x45df5c75 #xdcd60dcf #xabd13d59 #x26d930ac #x51de003a
#xc8d75180 #xbfd06116 #x21b4f4b5 #x56b3c423 #xcfba9599
#xb8bda50f #x2802b89e #x5f058808 #xc60cd9b2 #xb10be924
#x2f6f7c87 #x58684c11 #xc1611dab #xb6662d3d #x76dc4190
#x01db7106 #x98d220bc #xefd5102a #x71b18589 #x06b6b51f
#x9fbfe4a5 #xe8b8d433 #x7807c9a2 #x0f00f934 #x9609a88e
#xe10e9818 #x7f6a0dbb #x086d3d2d #x91646c97 #xe6635c01
#x6b6b51f4 #x1c6c6162 #x856530d8 #xf262004e #x6c0695ed
#x1b01a57b #x8208f4c1 #xf50fc457 #x65b0d9c6 #x12b7e950
#x8bbeb8ea #xfcb9887c #x62dd1ddf #x15da2d49 #x8cd37cf3
#xfbd44c65 #x4db26158 #x3ab551ce #xa3bc0074 #xd4bb30e2
#x4adfa541 #x3dd895d7 #xa4d1c46d #xd3d6f4fb #x4369e96a
#x346ed9fc #xad678846 #xda60b8d0 #x44042d73 #x33031de5
#xaa0a4c5f #xdd0d7cc9 #x5005713c #x270241aa #xbe0b1010
#xc90c2086 #x5768b525 #x206f85b3 #xb966d409 #xce61e49f
#x5edef90e #x29d9c998 #xb0d09822 #xc7d7a8b4 #x59b33d17
#x2eb40d81 #xb7bd5c3b #xc0ba6cad #xedb88320 #x9abfb3b6
#x03b6e20c #x74b1d29a #xead54739 #x9dd277af #x04db2615
#x73dc1683 #xe3630b12 #x94643b84 #x0d6d6a3e #x7a6a5aa8
#xe40ecf0b #x9309ff9d #x0a00ae27 #x7d079eb1 #xf00f9344
#x8708a3d2 #x1e01f268 #x6906c2fe #xf762575d #x806567cb
#x196c3671 #x6e6b06e7 #xfed41b76 #x89d32be0 #x10da7a5a
#x67dd4acc #xf9b9df6f #x8ebeeff9 #x17b7be43 #x60b08ed5
#xd6d6a3e8 #xa1d1937e #x38d8c2c4 #x4fdff252 #xd1bb67f1
#xa6bc5767 #x3fb506dd #x48b2364b #xd80d2bda #xaf0a1b4c
#x36034af6 #x41047a60 #xdf60efc3 #xa867df55 #x316e8eef
#x4669be79 #xcb61b38c #xbc66831a #x256fd2a0 #x5268e236
#xcc0c7795 #xbb0b4703 #x220216b9 #x5505262f #xc5ba3bbe
#xb2bd0b28 #x2bb45a92 #x5cb36a04 #xc2d7ffa7 #xb5d0cf31
#x2cd99e8b #x5bdeae1d #x9b64c2b0 #xec63f226 #x756aa39c
#x026d930a #x9c0906a9 #xeb0e363f #x72076785 #x05005713
#x95bf4a82 #xe2b87a14 #x7bb12bae #x0cb61b38 #x92d28e9b
#xe5d5be0d #x7cdcefb7 #x0bdbdf21 #x86d3d2d4 #xf1d4e242
#x68ddb3f8 #x1fda836e #x81be16cd #xf6b9265b #x6fb077e1
#x18b74777 #x88085ae6 #xff0f6a70 #x66063bca #x11010b5c
#x8f659eff #xf862ae69 #x616bffd3 #x166ccf45 #xa00ae278
#xd70dd2ee #x4e048354 #x3903b3c2 #xa7672661 #xd06016f7
#x4969474d #x3e6e77db #xaed16a4a #xd9d65adc #x40df0b66
#x37d83bf0 #xa9bcae53 #xdebb9ec5 #x47b2cf7f #x30b5ffe9
#xbdbdf21c #xcabac28a #x53b39330 #x24b4a3a6 #xbad03605
#xcdd70693 #x54de5729 #x23d967bf #xb3667a2e #xc4614ab8
#x5d681b02 #x2a6f2b94 #xb40bbe37 #xc30c8ea1 #x5a05df1b
#x2d02ef8d))
(define (code) (define (code)
;; The original code uses many `static' mutable variables, and that
;; strategy is largely intact in this port, so we group all of the
;; here with local variables to instantiate with the functions.
;; /* =========================================================================== ;; /* ===========================================================================
;; * Local data used by the "longest match" routines. ;; * Local data used by the "longest match" routines.
@ -241,6 +387,17 @@
(define prev-vec real-table) (define prev-vec real-table)
(define head-vec real-table) (define head-vec real-table)
(define cur_match 0)
(define chain_length 0)
(define scanpos 0)
(define matchpos 0)
(define len 0)
(define best_len 0)
(define limit NIL)
(define strendpos 0)
(define scan_end1 0)
(define scan_end 0)
;; /* DECLARE(uch, window, 2L*WSIZE); */ ;; /* DECLARE(uch, window, 2L*WSIZE); */
;; /* Sliding window. Input bytes are read into the second half of the window, ;; /* Sliding window. Input bytes are read into the second half of the window,
;; * and move to the first half later to keep a dictionary of at least WSIZE ;; * and move to the first half later to keep a dictionary of at least WSIZE
@ -311,42 +468,152 @@
(define good_match 0) (define good_match 0)
;; /* Use a faster search when the previous match is longer than this */ ;; /* Use a faster search when the previous match is longer than this */
;; /* Values for max_lazy_match, good_match and max_chain_length, depending on
;; * the desired pack level (0..9). The values given below have been tuned to
;; * exclude worst case performance for pathological files. Better values may be
;; * found for specific files.
;; */
(define-struct config
(good_length ;; /* reduce lazy search above this match length */
max_lazy ;; /* do not perform lazy search above this match length */
nice_length ;; /* quit search above this match length */
max_chain))
(define nice_match MAX_MATCH) (define nice_match MAX_MATCH)
;; /* Stop searching when current match exceeds this */ ;; /* Stop searching when current match exceeds this */
(define configuration_table
(vector
;; /* good lazy nice chain */
(make-config 0 0 0 0) ;; /* 0 - store only */
(make-config 4 4 8 4) ;; /* 1 - maximum speed, no lazy matches */
(make-config 4 5 16 8) ;; /* 2 */
(make-config 4 6 32 32) ;; /* 3 */
(make-config 4 4 16 16) ;; /* 4 - lazy matches */
(make-config 8 16 32 32) ;; /* 5 */
(make-config 8 16 128 128) ;; /* 6 */
(make-config 8 32 128 256) ;; /* 7 */
(make-config 32 128 258 1024) ;; /* 8 */
(make-config 32 258 258 4096))) ;; /* 9 - maximum compression */
;; /* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 ;; /* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
;; * For deflate_fast() (levels <= 3) good is ignored and lazy has a different ;; * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
;; * meaning. ;; * meaning.
;; */ ;; */
;; /* ===========================================================================
;; * Local data
;; */
(define HEAP_SIZE (+ (* 2 L_CODES) 1))
;; /* maximum heap size */
(define dyn_ltree (make-vector HEAP_SIZE 'uninit-dl)) ;; /* literal and length tree */
(define dyn_dtree (make-vector (+ (* 2 D_CODES) 1) 'uninit-dd)) ;; /* distance tree */
(define static_ltree (make-vector (+ L_CODES 2) 'uninit-sl))
;; /* The static literal tree. Since the bit lengths are imposed, there is no
;; * need for the L_CODES extra codes used during heap construction. However
;; * The codes 286 and 287 are needed to build a canonical tree (see ct_init
;; * below).
;; */
(define static_dtree (make-vector D_CODES 'uninit-sd))
;; /* The static distance tree. (Actually a trivial tree since all codes use
;; * 5 bits.)
;; */
(define bl_tree (make-vector (+ (* 2 BL_CODES) 1) 'uninit-dl))
;; /* Huffman tree for the bit lengths */
(define l_desc (make-tree_desc
dyn_ltree static_ltree extra_lbits
(+ LITERALS 1) L_CODES MAX_BITS 0))
(define d_desc (make-tree_desc
dyn_dtree static_dtree extra_dbits
0 D_CODES MAX_BITS 0))
(define bl_desc (make-tree_desc
bl_tree #f extra_blbits
0 BL_CODES MAX_BL_BITS 0))
(define bl_count (make-vector (+ MAX_BITS 1) 0))
;; /* number of codes at each bit length for an optimal tree */
(define bl_order
'#(16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15))
;; /* The lengths of the bit length codes are sent in order of decreasing
;; * probability, to avoid transmitting the lengths for unused bit length codes.
;; */
(define heap (make-vector (+ (* 2 L_CODES) 1) 0)) ;; /* heap used to build the Huffman trees */
(define heap_len 0) ;; /* number of elements in the heap */
(define heap_max 0) ;; /* element of largest frequency */
;; /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
;; * The same heap array is used to build all trees.
;; */
(define depth (make-vector (+ (* 2 L_CODES) 1) 0))
;; /* Depth of each subtree used as tie breaker for trees of equal frequency */
(define length_code (make-vector (- MAX_MATCH MIN_MATCH -1) 0))
;; /* length code for each normalized match length (0 == MIN_MATCH) */
(define dist_code (make-vector 512 0))
;; /* distance codes. The first 256 values correspond to the distances
;; * 3 .. 258, the last 256 values correspond to the top 8 bits of
;; * the 15 bit distances.
;; */
(define base_length (make-vector LENGTH_CODES 0))
;; /* First normalized length for each code (0 = MIN_MATCH) */
(define base_dist (make-vector D_CODES 0))
;; /* First normalized distance for each code (0 = distance of 1) */
(define inbuf (make-bytes (+ INBUFSIZ INBUF_EXTRA) 0))
(define l_buf inbuf)
;; /* DECLARE(uch, l_buf, LIT_BUFSIZE); buffer for literals or lengths */
(define d_buf (make-vector DIST_BUFSIZE 0))
;; /* DECLARE(ush, d_buf, DIST_BUFSIZE); buffer for distances */
(define flag_buf (make-vector (/ LIT_BUFSIZE 8) 0))
;; /* flag_buf is a bit array distinguishing literals from lengths in
;; * l_buf, thus indicating the presence or absence of a distance.
;; */
(define last_lit 0) ;; /* running index in l_buf */
(define last_dist 0) ;; /* running index in d_buf */
(define last_flags 0) ;; /* running index in flag_buf */
(define flags 0) ;; /* current flags not yet saved in flag_buf */
(define flag_bit 0) ;; /* current bit used in flags */
;; /* bits are filled in flags starting at bit 0 (least significant).
;; * Note: these flags are overkill in the current code since we don't
;; * take advantage of DIST_BUFSIZE == LIT_BUFSIZE.
;; */
(define opt_len 0); ;; /* bit length of current block with optimal trees */
(define static_len 0); ;; /* bit length of current block with static trees */
(define compressed_len 0); ;; /* total bit length of compressed file */
(define input_len 0); ;; /* total byte length of input file */
;; /* input_len is for debugging only since we can get it by other means. */
;; (define block_start 0); ;; /* window offset of current block */
;; (define strstart 0); ;; /* window offset of current string */
(define inited-once? #f)
(define bytes_in 0)
(define bi_buf 0)
;; /* Output buffer. bits are inserted starting at the bottom (least significant
;; * bits).
;; */
(define Buf_size (* 8 2))
;; /* Number of bits used within bi_buf. (bi_buf might be implemented on
;; * more than 16 bits on some systems.)
;; */
(define bi_valid 0)
;; /* Number of valid bits in bi_buf. All bits above the last valid bit
;; * are always zero.
;; */
(define crc #xffffffff)
(define outcnt 0)
(define bytes_out 0)
(define outbuf (make-bytes OUTBUFSIZ))
(define ifd #f)
(define ofd #f)
;; Functions below (and `let' ensures that we don't accidentally
;; reference any from above, which could make the compiler less
;; happy).
(let ()
;; /* =========================================================================== ;; /* ===========================================================================
;; * Update a hash value with the given input byte ;; * Update a hash value with the given input byte
;; * IN assertion: all calls to to UPDATE_HASH are made with consecutive ;; * IN assertion: all calls to to UPDATE_HASH are made with consecutive
@ -437,18 +704,6 @@
;; make this C-derived code have more C-like allocation by lifting out its local ;; make this C-derived code have more C-like allocation by lifting out its local
;; variables. ;; variables.
(define longest_match
(let ((cur_match 0)
(chain_length 0)
(scanpos 0)
(matchpos 0)
(len 0)
(best_len 0)
(limit NIL)
(strendpos 0)
(scan_end1 0)
(scan_end 0))
(define (longest_match _cur_match) (define (longest_match _cur_match)
;; IPos cur_match; /* current match */ ;; IPos cur_match; /* current match */
@ -564,7 +819,6 @@
#t))) #t)))
#t)) #t))
(continue))))) (continue)))))
longest_match))
;; /* =========================================================================== ;; /* ===========================================================================
;; * Check that the match at match_start is indeed a match. ;; * Check that the match at match_start is indeed a match.
@ -590,7 +844,7 @@
;; */ ;; */
(when (>= strstart (+ WSIZE MAX_DIST)) (when (>= strstart (+ WSIZE MAX_DIST))
(let ([bs (gzbytes-bytes window)] [ofs (gzbytes-offset window)]) (let ([bs (gzbytes-bytes window)] [ofs (gzbytes-offset window)])
(bytes-copy! bs ofs bs (+ ofs WSIZE) (+ ofs WSIZE WSIZE))) (bytes-copy! bs ofs bs (+ ofs WSIZE) (+ ofs (+ WSIZE WSIZE))))
(set! match_start (- match_start WSIZE)) (set! match_start (- match_start WSIZE))
(set! strstart (- strstart WSIZE)) ;; /* we now have strstart >= MAX_DIST: */ (set! strstart (- strstart WSIZE)) ;; /* we now have strstart >= MAX_DIST: */
@ -828,187 +1082,6 @@
|# |#
;; /* ===========================================================================
;; * Constants
;; */
(define MAX_BITS 15)
;; /* All codes must not exceed MAX_BITS bits */
(define MAX_BL_BITS 7)
;; /* Bit length codes must not exceed MAX_BL_BITS bits */
(define LENGTH_CODES 29)
;; /* number of length codes, not counting the special END_BLOCK code */
(define LITERALS 256)
;; /* number of literal bytes 0..255 */
(define END_BLOCK 256)
;; /* end of block literal code */
(define L_CODES (+ LITERALS 1 LENGTH_CODES))
;; /* number of Literal or Length codes, including the END_BLOCK code */
(define D_CODES 30)
;; /* number of distance codes */
(define BL_CODES 19)
;; /* number of codes used to transfer the bit lengths */
(define extra_lbits ;; /* extra bits for each length code */
(vector 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 5 5 5 5 0))
(define extra_dbits ;; /* extra bits for each distance code */
(vector 0 0 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 12 13 13))
(define extra_blbits ;; /* extra bits for each bit length code */
(vector 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 7))
(define STORED_BLOCK 0)
(define STATIC_TREES 1)
(define DYN_TREES 2)
;; /* The three kinds of block type */
(define LIT_BUFSIZE #x8000)
(define DIST_BUFSIZE #x8000)
;; /* Sizes of match buffers for literals/lengths and distances. There are
;; * 4 reasons for limiting LIT_BUFSIZE to 64K:
;; * - frequencies can be kept in 16 bit counters
;; * - if compression is not successful for the first block, all input data is
;; * still in the window so we can still emit a stored block even when input
;; * comes from standard input. (This can also be done for all blocks if
;; * LIT_BUFSIZE is not greater than 32K.)
;; * - if compression is not successful for a file smaller than 64K, we can
;; * even emit a stored file instead of a stored block (saving 5 bytes).
;; * - creating new Huffman trees less frequently may not provide fast
;; * adaptation to changes in the input data statistics. (Take for
;; * example a binary file with poorly compressible code followed by
;; * a highly compressible string table.) Smaller buffer sizes give
;; * fast adaptation but have of course the overhead of transmitting trees
;; * more frequently.
;; * - I can't count above 4
;; * The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save
;; * memory at the expense of compression). Some optimizations would be possible
;; * if we rely on DIST_BUFSIZE == LIT_BUFSIZE.
;; */
(when (> LIT_BUFSIZE INBUFSIZ)
(error "cannot overlay l_buf and inbuf"))
(define REP_3_6 16)
;; /* repeat previous bit length 3-6 times (2 bits of repeat count) */
(define REPZ_3_10 17)
;; /* repeat a zero length 3-10 times (3 bits of repeat count) */
(define REPZ_11_138 18)
;; /* repeat a zero length 11-138 times (7 bits of repeat count) */
;; /* ===========================================================================
;; * Local data
;; */
(define HEAP_SIZE (+ (* 2 L_CODES) 1))
;; /* maximum heap size */
(define dyn_ltree (make-vector HEAP_SIZE 'uninit-dl)) ;; /* literal and length tree */
(define dyn_dtree (make-vector (+ (* 2 D_CODES) 1) 'uninit-dd)) ;; /* distance tree */
(define static_ltree (make-vector (+ L_CODES 2) 'uninit-sl))
;; /* The static literal tree. Since the bit lengths are imposed, there is no
;; * need for the L_CODES extra codes used during heap construction. However
;; * The codes 286 and 287 are needed to build a canonical tree (see ct_init
;; * below).
;; */
(define static_dtree (make-vector D_CODES 'uninit-sd))
;; /* The static distance tree. (Actually a trivial tree since all codes use
;; * 5 bits.)
;; */
(define bl_tree (make-vector (+ (* 2 BL_CODES) 1) 'uninit-dl))
;; /* Huffman tree for the bit lengths */
(define l_desc (make-tree_desc
dyn_ltree static_ltree extra_lbits
(+ LITERALS 1) L_CODES MAX_BITS 0))
(define d_desc (make-tree_desc
dyn_dtree static_dtree extra_dbits
0 D_CODES MAX_BITS 0))
(define bl_desc (make-tree_desc
bl_tree #f extra_blbits
0 BL_CODES MAX_BL_BITS 0))
(define bl_count (make-vector (+ MAX_BITS 1) 0))
;; /* number of codes at each bit length for an optimal tree */
(define bl_order
(vector 16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15))
;; /* The lengths of the bit length codes are sent in order of decreasing
;; * probability, to avoid transmitting the lengths for unused bit length codes.
;; */
(define heap (make-vector (+ (* 2 L_CODES) 1) 0)) ;; /* heap used to build the Huffman trees */
(define heap_len 0) ;; /* number of elements in the heap */
(define heap_max 0) ;; /* element of largest frequency */
;; /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
;; * The same heap array is used to build all trees.
;; */
(define depth (make-vector (+ (* 2 L_CODES) 1) 0))
;; /* Depth of each subtree used as tie breaker for trees of equal frequency */
(define length_code (make-vector (- MAX_MATCH MIN_MATCH -1) 0))
;; /* length code for each normalized match length (0 == MIN_MATCH) */
(define dist_code (make-vector 512 0))
;; /* distance codes. The first 256 values correspond to the distances
;; * 3 .. 258, the last 256 values correspond to the top 8 bits of
;; * the 15 bit distances.
;; */
(define base_length (make-vector LENGTH_CODES 0))
;; /* First normalized length for each code (0 = MIN_MATCH) */
(define base_dist (make-vector D_CODES 0))
;; /* First normalized distance for each code (0 = distance of 1) */
(define inbuf (make-bytes (+ INBUFSIZ INBUF_EXTRA) 0))
(define l_buf inbuf)
;; /* DECLARE(uch, l_buf, LIT_BUFSIZE); buffer for literals or lengths */
(define d_buf (make-vector DIST_BUFSIZE 0))
;; /* DECLARE(ush, d_buf, DIST_BUFSIZE); buffer for distances */
(define flag_buf (make-vector (/ LIT_BUFSIZE 8) 0))
;; /* flag_buf is a bit array distinguishing literals from lengths in
;; * l_buf, thus indicating the presence or absence of a distance.
;; */
(define last_lit 0) ;; /* running index in l_buf */
(define last_dist 0) ;; /* running index in d_buf */
(define last_flags 0) ;; /* running index in flag_buf */
(define flags 0) ;; /* current flags not yet saved in flag_buf */
(define flag_bit 0) ;; /* current bit used in flags */
;; /* bits are filled in flags starting at bit 0 (least significant).
;; * Note: these flags are overkill in the current code since we don't
;; * take advantage of DIST_BUFSIZE == LIT_BUFSIZE.
;; */
(define opt_len 0); ;; /* bit length of current block with optimal trees */
(define static_len 0); ;; /* bit length of current block with static trees */
(define compressed_len 0); ;; /* total bit length of compressed file */
(define input_len 0); ;; /* total byte length of input file */
;; /* input_len is for debugging only since we can get it by other means. */
;; (define block_start 0); ;; /* window offset of current block */
;; (define strstart 0); ;; /* window offset of current string */
(define (send_code c tree) (define (send_code c tree)
(send_bits (ct_data-code (vector-ref tree c)) (send_bits (ct_data-code (vector-ref tree c))
(ct_data-len (vector-ref tree c)))) (ct_data-len (vector-ref tree c))))
@ -1107,7 +1180,6 @@
;; /* =========================================================================== ;; /* ===========================================================================
;; * Initialize a new block. ;; * Initialize a new block.
;; */ ;; */
(define inited-once? #f)
(define (init_block) (define (init_block)
(for n := 0 < (if inited-once? L_CODES HEAP_SIZE) do (for n := 0 < (if inited-once? L_CODES HEAP_SIZE) do
(vector-set! dyn_ltree n (_make-ct_data 0 #f 0 #f))) (vector-set! dyn_ltree n (_make-ct_data 0 #f 0 #f)))
@ -1127,10 +1199,6 @@
(set! flags 0) (set! flags 0)
(set! flag_bit 1)) (set! flag_bit 1))
(define SMALLEST 1)
;; /* Index within the heap array of least frequent node in the Huffman tree */
;; /* =========================================================================== ;; /* ===========================================================================
;; * Remove the smallest element from the heap and recreate the heap with ;; * Remove the smallest element from the heap and recreate the heap with
;; * one less element. Updates heap and heap_len. ;; * one less element. Updates heap and heap_len.
@ -1716,13 +1784,10 @@
;; * the current block must be flushed. ;; * the current block must be flushed.
;; */ ;; */
(define ct_tally (define ct_tally
(let ([dist 0]) (lambda (dist lc)
(lambda (_dist lc)
;; int dist; ;; /* distance of matched string */ ;; int dist; ;; /* distance of matched string */
;; int lc; ;; /* match length-MIN_MATCH or unmatched char (if dist==0) */ ;; int lc; ;; /* match length-MIN_MATCH or unmatched char (if dist==0) */
(set! dist _dist)
(bytes-set! l_buf last_lit lc) (bytes-set! l_buf last_lit lc)
(set! last_lit (add1 last_lit)) (set! last_lit (add1 last_lit))
(if (= dist 0) (if (= dist 0)
@ -1783,7 +1848,7 @@
;; * on 16 bit machines and because stored blocks are restricted to ;; * on 16 bit machines and because stored blocks are restricted to
;; * 64K-1 bytes. ;; * 64K-1 bytes.
;; */ ;; */
)))) )))
;; /* =========================================================================== ;; /* ===========================================================================
;; * Send the block data compressed using the given Huffman trees ;; * Send the block data compressed using the given Huffman trees
@ -1792,54 +1857,50 @@
;; ct_data near *ltree; ;; /* literal tree */ ;; ct_data near *ltree; ;; /* literal tree */
;; ct_data near *dtree; ;; /* distance tree */ ;; ct_data near *dtree; ;; /* distance tree */
(define dist 0) ;; /* distance of matched string */
(define lc 0) ;; /* match length or unmatched char (if dist == 0) */
(define lx 0) ;; /* running index in l_buf */
(define dx 0) ;; /* running index in d_buf */
(define fx 0) ;; /* running index in flag_buf */
(define flag 0) ;; /* current flags */
(define code 0) ;; /* the code to send */
(define extra 0) ;; /* number of extra bits to send */
(when (not (= last_lit 0)) (when (not (= last_lit 0))
(let loop () (let loop ([lx 0] ;; /* running index in l_buf */
(when (= (bitwise-and lx 7) 0) [dx 0] ;; /* running index in d_buf */
(set! flag (vector-ref flag_buf fx)) [fx 0] ;; /* running index in flag_buf */
(set! fx (add1 fx))) [flag 0]) ;; /* current flags */
(set! lc (bytes-ref l_buf lx)) (define next? (= (bitwise-and lx 7) 0))
(set! lx (add1 lx)) (define new-flag (if next? (vector-ref flag_buf fx) flag))
(define new-fx (if next? (add1 fx) fx))
(define lc (bytes-ref l_buf lx)) ;; /* match length or unmatched char (if dist == 0) */
(define new-dx
(cond (cond
[(= (bitwise-and flag 1) 0) [(= (bitwise-and new-flag 1) 0)
(send_code lc ltree) ;; /* send a literal byte */ (send_code lc ltree) ;; /* send a literal byte */
(DEBUG '(Tracecv (isgraph lc) stderr " '~c' " (integer->char lc)))] (DEBUG '(Tracecv (isgraph lc) stderr " '~c' " (integer->char lc)))
dx]
[else [else
;; /* Here, lc is the match length - MIN_MATCH */ ;; /* Here, lc is the match length - MIN_MATCH */
(set! code (vector-ref length_code lc)) (define code (vector-ref length_code lc)) ;; /* the code to send */
(send_code (+ code LITERALS 1) ltree) ;; /* send the length code */ (send_code (+ code LITERALS 1) ltree) ;; /* send the length code */
(set! extra (vector-ref extra_lbits code)) (let ([extra (vector-ref extra_lbits code)]) ;; /* number of extra bits to send */
(when (not (= extra 0)) (when (not (= extra 0))
(set! lc (- lc (vector-ref base_length code))) (let ([lc (- lc (vector-ref base_length code))])
(send_bits lc extra)) ;; /* send the extra length bits */ (send_bits lc extra)))) ;; /* send the extra length bits */
(set! dist (vector-ref d_buf dx)) (define dist (vector-ref d_buf dx)) ;; /* distance of matched string */
(set! dx (add1 dx))
;; /* Here, dist is the match distance - 1 */ ;; /* Here, dist is the match distance - 1 */
(set! code (d_code dist)) (define code2 (d_code dist))
(Assert (Assert
(unless (< code D_CODES) (unless (< code2 D_CODES)
(error "bad d_code"))) (error "bad d_code")))
(send_code code dtree) ;; /* send the distance code */ (send_code code2 dtree) ;; /* send the distance code */
(set! extra (vector-ref extra_dbits code)) (let* ([extra (vector-ref extra_dbits code2)])
(when (not (= extra 0)) (when (not (= extra 0))
(set! dist (- dist (vector-ref base_dist code))) (let ([dist (- dist (vector-ref base_dist code2))])
(send_bits dist extra))]) ;; /* send the extra distance bits */ (send_bits dist extra)))) ;; /* send the extra distance bits */
(add1 dx)]))
;; /* literal or match pair ? */ ;; /* literal or match pair ? */
(set! flag (>> flag 1)) (define new-lx (add1 lx))
(when (< lx last_lit) (when (< new-lx last_lit)
(loop)))) (loop new-lx new-dx new-fx (>> new-flag 1)))))
(send_code END_BLOCK ltree)) (send_code END_BLOCK ltree))
@ -1898,23 +1959,6 @@
*/ */
|# |#
(define bytes_in 0)
(define bi_buf 0)
;; /* Output buffer. bits are inserted starting at the bottom (least significant
;; * bits).
;; */
(define Buf_size (* 8 2))
;; /* Number of bits used within bi_buf. (bi_buf might be implemented on
;; * more than 16 bits on some systems.)
;; */
(define bi_valid 0)
;; /* Number of valid bits in bi_buf. All bits above the last valid bit
;; * are always zero.
;; */
;; /* =========================================================================== ;; /* ===========================================================================
;; * Initialize the bit string routines. ;; * Initialize the bit string routines.
;; */ ;; */
@ -1979,7 +2023,6 @@
;; * Run a set of bytes through the crc shift register. If s is a NULL ;; * Run a set of bytes through the crc shift register. If s is a NULL
;; * pointer, then initialize the crc shift register contents instead. ;; * pointer, then initialize the crc shift register contents instead.
;; */ ;; */
(define crc #xffffffff)
(define (updcrc s n) (define (updcrc s n)
;; uch *s; /* pointer to bytes to pump through */ ;; uch *s; /* pointer to bytes to pump through */
;; unsigned n; /* number of bytes in s[] */ ;; unsigned n; /* number of bytes in s[] */
@ -1996,61 +2039,6 @@
(add1 p)))) (add1 p))))
(set! crc #xffffffff))) (set! crc #xffffffff)))
(define crc_32_tab
#(#x00000000
#x77073096 #xee0e612c #x990951ba #x076dc419
#x706af48f #xe963a535 #x9e6495a3 #x0edb8832 #x79dcb8a4
#xe0d5e91e #x97d2d988 #x09b64c2b #x7eb17cbd #xe7b82d07
#x90bf1d91 #x1db71064 #x6ab020f2 #xf3b97148 #x84be41de
#x1adad47d #x6ddde4eb #xf4d4b551 #x83d385c7 #x136c9856
#x646ba8c0 #xfd62f97a #x8a65c9ec #x14015c4f #x63066cd9
#xfa0f3d63 #x8d080df5 #x3b6e20c8 #x4c69105e #xd56041e4
#xa2677172 #x3c03e4d1 #x4b04d447 #xd20d85fd #xa50ab56b
#x35b5a8fa #x42b2986c #xdbbbc9d6 #xacbcf940 #x32d86ce3
#x45df5c75 #xdcd60dcf #xabd13d59 #x26d930ac #x51de003a
#xc8d75180 #xbfd06116 #x21b4f4b5 #x56b3c423 #xcfba9599
#xb8bda50f #x2802b89e #x5f058808 #xc60cd9b2 #xb10be924
#x2f6f7c87 #x58684c11 #xc1611dab #xb6662d3d #x76dc4190
#x01db7106 #x98d220bc #xefd5102a #x71b18589 #x06b6b51f
#x9fbfe4a5 #xe8b8d433 #x7807c9a2 #x0f00f934 #x9609a88e
#xe10e9818 #x7f6a0dbb #x086d3d2d #x91646c97 #xe6635c01
#x6b6b51f4 #x1c6c6162 #x856530d8 #xf262004e #x6c0695ed
#x1b01a57b #x8208f4c1 #xf50fc457 #x65b0d9c6 #x12b7e950
#x8bbeb8ea #xfcb9887c #x62dd1ddf #x15da2d49 #x8cd37cf3
#xfbd44c65 #x4db26158 #x3ab551ce #xa3bc0074 #xd4bb30e2
#x4adfa541 #x3dd895d7 #xa4d1c46d #xd3d6f4fb #x4369e96a
#x346ed9fc #xad678846 #xda60b8d0 #x44042d73 #x33031de5
#xaa0a4c5f #xdd0d7cc9 #x5005713c #x270241aa #xbe0b1010
#xc90c2086 #x5768b525 #x206f85b3 #xb966d409 #xce61e49f
#x5edef90e #x29d9c998 #xb0d09822 #xc7d7a8b4 #x59b33d17
#x2eb40d81 #xb7bd5c3b #xc0ba6cad #xedb88320 #x9abfb3b6
#x03b6e20c #x74b1d29a #xead54739 #x9dd277af #x04db2615
#x73dc1683 #xe3630b12 #x94643b84 #x0d6d6a3e #x7a6a5aa8
#xe40ecf0b #x9309ff9d #x0a00ae27 #x7d079eb1 #xf00f9344
#x8708a3d2 #x1e01f268 #x6906c2fe #xf762575d #x806567cb
#x196c3671 #x6e6b06e7 #xfed41b76 #x89d32be0 #x10da7a5a
#x67dd4acc #xf9b9df6f #x8ebeeff9 #x17b7be43 #x60b08ed5
#xd6d6a3e8 #xa1d1937e #x38d8c2c4 #x4fdff252 #xd1bb67f1
#xa6bc5767 #x3fb506dd #x48b2364b #xd80d2bda #xaf0a1b4c
#x36034af6 #x41047a60 #xdf60efc3 #xa867df55 #x316e8eef
#x4669be79 #xcb61b38c #xbc66831a #x256fd2a0 #x5268e236
#xcc0c7795 #xbb0b4703 #x220216b9 #x5505262f #xc5ba3bbe
#xb2bd0b28 #x2bb45a92 #x5cb36a04 #xc2d7ffa7 #xb5d0cf31
#x2cd99e8b #x5bdeae1d #x9b64c2b0 #xec63f226 #x756aa39c
#x026d930a #x9c0906a9 #xeb0e363f #x72076785 #x05005713
#x95bf4a82 #xe2b87a14 #x7bb12bae #x0cb61b38 #x92d28e9b
#xe5d5be0d #x7cdcefb7 #x0bdbdf21 #x86d3d2d4 #xf1d4e242
#x68ddb3f8 #x1fda836e #x81be16cd #xf6b9265b #x6fb077e1
#x18b74777 #x88085ae6 #xff0f6a70 #x66063bca #x11010b5c
#x8f659eff #xf862ae69 #x616bffd3 #x166ccf45 #xa00ae278
#xd70dd2ee #x4e048354 #x3903b3c2 #xa7672661 #xd06016f7
#x4969474d #x3e6e77db #xaed16a4a #xd9d65adc #x40df0b66
#x37d83bf0 #xa9bcae53 #xdebb9ec5 #x47b2cf7f #x30b5ffe9
#xbdbdf21c #xcabac28a #x53b39330 #x24b4a3a6 #xbad03605
#xcdd70693 #x54de5729 #x23d967bf #xb3667a2e #xc4614ab8
#x5d681b02 #x2a6f2b94 #xb40bbe37 #xc30c8ea1 #x5a05df1b
#x2d02ef8d))
;; /* =========================================================================== ;; /* ===========================================================================
;; * Copy a stored block to the zip file, storing first the length and its ;; * Copy a stored block to the zip file, storing first the length and its
;; * one's complement if requested. ;; * one's complement if requested.
@ -2092,12 +2080,10 @@
len)) len))
;; Assumes being called with c in 0..FF ;; Assumes being called with c in 0..FF
(define-syntax put_byte (define (put_byte c)
(syntax-rules () (bytes-set! outbuf outcnt c)
[(_ c)
(begin (bytes-set! outbuf outcnt c)
(set! outcnt (add1 outcnt)) (set! outcnt (add1 outcnt))
(when (= outcnt OUTBUFSIZ) (flush_outbuf)))])) (when (= outcnt OUTBUFSIZ) (flush_outbuf)))
;; /* Output a 16 bit value, lsb first */ ;; /* Output a 16 bit value, lsb first */
;; Assumes being called with c in 0..FFFF ;; Assumes being called with c in 0..FFFF
@ -2116,10 +2102,6 @@
(put_short (bitwise-and #xFFFF n)) (put_short (bitwise-and #xFFFF n))
(put_short (bitwise-and #xFFFF (>> n 16)))) (put_short (bitwise-and #xFFFF (>> n 16))))
(define outcnt 0)
(define bytes_out 0)
(define outbuf (make-bytes OUTBUFSIZ))
;; /* =========================================================================== ;; /* ===========================================================================
;; * Write the output buffer outbuf[0..outcnt-1] and update bytes_out. ;; * Write the output buffer outbuf[0..outcnt-1] and update bytes_out.
;; * (used for the compressed data only) ;; * (used for the compressed data only)
@ -2136,9 +2118,6 @@
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define ifd #f)
(define ofd #f)
(define (deflate-inner in out) (define (deflate-inner in out)
(do-deflate)) (do-deflate))
@ -2223,7 +2202,7 @@
(lambda () (close-output-port o))))) (lambda () (close-output-port o)))))
(lambda () (close-input-port i))))) (lambda () (close-input-port i)))))
(list gzip gzip-through-ports deflate)) (list gzip gzip-through-ports deflate)))
(define gzip (define gzip
(case-lambda (case-lambda
@ -2235,5 +2214,3 @@
(define (deflate in out) (define (deflate in out)
((caddr (code)) in out)) ((caddr (code)) in out))
)