net/base64: speed up encoding and decoding, especially on byte strings
Base64 encoding and decoding are common and simple enough to specialize for byte strings, where they can be much faster.
This commit is contained in:
parent
d20292b80c
commit
ef43f26f62
|
@ -8,13 +8,17 @@ utilities for Base 64 (MIME-standard) encoding and decoding.}
|
|||
|
||||
@section[#:tag "base64-procs"]{Functions}
|
||||
|
||||
@defproc[(base64-encode [bstr bytes?] [newline-bstr bytes? #"\r\n"]) bytes?]{
|
||||
@defproc[(base64-encode [bstr bytes?] [newline any/c #"\r\n"]) bytes?]{
|
||||
|
||||
Consumes a byte string and returns its Base 64 encoding as a new byte
|
||||
string. The returned string is broken into 72-byte lines separated by
|
||||
@racket[newline-bstr], which defaults to a CRLF combination, and the
|
||||
@racket[newline], which defaults to a CRLF combination, and the
|
||||
result always ends with a @racket[newline-bstr] unless the
|
||||
input is empty.}
|
||||
input is empty.
|
||||
|
||||
Although @racket[newline] is intended to be a byte string, it can be
|
||||
any value (possibly with a performance penalty), and it is converted
|
||||
to a byte string using @racket[display].}
|
||||
|
||||
|
||||
@defproc[(base64-decode [bstr bytes?]) bytes?]{
|
||||
|
@ -25,15 +29,18 @@ string.}
|
|||
|
||||
@defproc[(base64-encode-stream [in input-port?]
|
||||
[out output-port?]
|
||||
[newline-bstr bytes? #"\n"])
|
||||
[newline any/c #"\n"])
|
||||
void?]{
|
||||
|
||||
Reads bytes from @racket[in] and writes the encoded result to
|
||||
@racket[out], breaking the output into 72-character lines separated by
|
||||
@racket[newline-bstr], and ending with @racket[newline-bstr] unless
|
||||
the input stream is empty. Note that the default @racket[newline-bstr]
|
||||
@racket[newline], and ending with @racket[newline] unless
|
||||
the input stream is empty. Note that the default @racket[newline]
|
||||
is just @racket[#"\n"], not @racket[#"\r\n"]. The procedure returns when
|
||||
it encounters an end-of-file from @racket[in].}
|
||||
it encounters an end-of-file from @racket[in].
|
||||
|
||||
Although @racket[newline] is intended to be a byte string, it can be
|
||||
any value, and it is written using @racket[display].}
|
||||
|
||||
@defproc[(base64-decode-stream [in input-port?]
|
||||
[out output-port?])
|
||||
|
|
|
@ -2,12 +2,40 @@
|
|||
(require net/base64)
|
||||
|
||||
(define (test expect f . args)
|
||||
(unless (equal? expect (apply f args))
|
||||
(error "fail")))
|
||||
(define got (apply f args))
|
||||
(unless (equal? expect got)
|
||||
(error "fail" expect got)))
|
||||
|
||||
(test #"" base64-encode #"")
|
||||
(test #"" base64-encode #"" #"<>")
|
||||
(test #"V2h5IGRvIGF4ZSBtdXJkZXJlcnMgb25seSBhdHRhY2sKV2hlbiB5b3UncmUgcGFydGlhbGx5\r\nIG51ZGUKT3IgeW91J3JlIHRha2luZyBhIGJhdGg=\r\n"
|
||||
(for ([base64-encode (list base64-encode
|
||||
(lambda (bstr [linesep #"\r\n"])
|
||||
(define out (open-output-bytes))
|
||||
(base64-encode-stream (open-input-bytes bstr) out linesep)
|
||||
(get-output-bytes out)))]
|
||||
[base64-decode (list base64-decode
|
||||
(lambda (bstr)
|
||||
(define out (open-output-bytes))
|
||||
(base64-decode-stream (open-input-bytes bstr) out)
|
||||
(get-output-bytes out)))])
|
||||
(test #"" base64-encode #"")
|
||||
(test #"" base64-encode #"" #"<>")
|
||||
(test #"" base64-decode #"")
|
||||
(test #"" base64-decode #"a")
|
||||
(test #"i" base64-decode #"ab")
|
||||
(test #"i\267" base64-decode #"abc")
|
||||
(test #"i\267" base64-decode #"abc=d")
|
||||
(test #"eA==\n" base64-encode #"x" #"\n")
|
||||
(test #"eHk=\n" base64-encode #"xy" #"\n")
|
||||
(test #"eHl6\n" base64-encode #"xyz" #"\n")
|
||||
(test #"eHh4\n" base64-encode #"xxx" #"\n")
|
||||
(test #"eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4\n"
|
||||
base64-encode #"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" #"\n")
|
||||
(test #"eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4\neA==\n"
|
||||
base64-encode #"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" #"\n")
|
||||
(test #"V2h5IGRvIGF4ZSBtdXJkZXJlcnMgb25seSBhdHRhY2sKV2hlbiB5b3UncmUgcGFydGlhbGx5\r\nIG51ZGUKT3IgeW91J3JlIHRha2luZyBhIGJhdGg=\r\n"
|
||||
base64-encode #"Why do axe murderers only attack\nWhen you're partially nude\nOr you're taking a bath")
|
||||
(test #"V2h5IGRvIGF4ZSBtdXJkZXJlcnMgb25seSBhdHRhY2sKV2hlbiB5b3UncmUgcGFydGlhbGx5<>IG51ZGUKT3IgeW91J3JlIHRha2luZyBhIGJhdGg=<>"
|
||||
(test #"V2h5IGRvIGF4ZSBtdXJkZXJlcnMgb25seSBhdHRhY2sKV2hlbiB5b3UncmUgcGFydGlhbGx5<>IG51ZGUKT3IgeW91J3JlIHRha2luZyBhIGJhdGg=<>"
|
||||
base64-encode #"Why do axe murderers only attack\nWhen you're partially nude\nOr you're taking a bath" #"<>")
|
||||
(test #"V2h5IGRvIGF4ZSBtdXJkZXJlcnMgb25seSBhdHRhY2sKV2hlbiB5b3UncmUgcGFydGlhbGx5_X_IG51ZGUKT3IgeW91J3JlIHRha2luZyBhIGJhdGg=_X_"
|
||||
base64-encode #"Why do axe murderers only attack\nWhen you're partially nude\nOr you're taking a bath" '_X_)
|
||||
(test #"Why do axe murderers only attack\nWhen you're partially nude\nOr you're taking a bath"
|
||||
base64-decode #"V2h5IGRvIGF4ZSBtdXJkZXJlcnMgb25seSBhdHRhY2sKV2hlbiB5b3UncmUgcGFydGlhbGx5\r\nIG51ZGUKT3IgeW91J3JlIHRha2luZyBhIGJhdGg=\r\n"))
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
#lang racket/base
|
||||
(require racket/fixnum)
|
||||
|
||||
;; Unsafe mode can be worth a factor of 2 to 4
|
||||
;; on byte-string encoding/decoding:
|
||||
(#%declare #:unsafe)
|
||||
|
||||
(provide base64-encode-stream
|
||||
base64-decode-stream
|
||||
|
@ -16,66 +21,179 @@
|
|||
(vector-set! db n i))
|
||||
(values (vector->immutable-vector bd) (vector->immutable-vector db))))
|
||||
|
||||
(define =byte (bytes-ref #"=" 0))
|
||||
(define ones
|
||||
(vector->immutable-vector
|
||||
(list->vector (for/list ([i (in-range 9)]) (sub1 (arithmetic-shift 1 i))))))
|
||||
|
||||
(define (base64-decode-stream in out)
|
||||
(unless (input-port? in) (raise-argument-error 'base64-decode-stream "input-port?" in))
|
||||
(unless (output-port? out) (raise-argument-error 'base64-decode-stream "output-port?" out))
|
||||
(let loop ([data 0] [bits 0])
|
||||
(if (>= bits 8)
|
||||
(let ([bits (- bits 8)])
|
||||
(write-byte (arithmetic-shift data (- bits)) out)
|
||||
(loop (bitwise-and data (vector*-ref ones bits)) bits))
|
||||
(let ([c (read-byte in)])
|
||||
(unless (or (eof-object? c) (eq? c =byte))
|
||||
(cond
|
||||
[(bits . fx>= . 8)
|
||||
(let ([bits (fx- bits 8)])
|
||||
(write-byte (fxrshift data bits) out)
|
||||
(loop (fxand data (fx- (fxlshift 1 bits) 1)) bits))]
|
||||
[else
|
||||
(define c (read-byte in))
|
||||
(unless (or (eof-object? c) (fx= c (char->integer #\=)))
|
||||
(let ([v (vector*-ref base64-digit c)])
|
||||
(if v
|
||||
(loop (+ (arithmetic-shift data 6) v) (+ bits 6))
|
||||
(loop data bits))))))))
|
||||
(loop (fx+ (fxlshift data 6) v) (fx+ bits 6))
|
||||
(loop data bits))))])))
|
||||
|
||||
(define (base64-encode-stream in out [linesep #"\n"])
|
||||
;; each set of three input bytes turns into four output bytes
|
||||
(unless (input-port? in) (raise-argument-error 'base64-encode-stream "input-port?" in))
|
||||
(unless (output-port? out) (raise-argument-error 'base64-encode-stream "output-port?" out))
|
||||
;; Each set of three input bytes turns into four output bytes.
|
||||
;; It might be nice to actually write the bytes as a 4-byte string,
|
||||
;; but this way preserves fine-grained streaming.
|
||||
(define (o! c) (write-byte (vector*-ref digit-base64 c) out))
|
||||
(define (fill!) (write-byte (char->integer #\=) out))
|
||||
(define (line!) (display linesep out))
|
||||
(let loop ([width 0])
|
||||
(define b1 (read-byte in))
|
||||
(unless (eof-object? b1)
|
||||
(let ([width (if (eqv? width 72)
|
||||
(begin
|
||||
(display linesep out)
|
||||
0)
|
||||
width)])
|
||||
(o! (arithmetic-shift b1 -2))
|
||||
(cond
|
||||
[(eof-object? b1)
|
||||
(unless (eqv? width 0)
|
||||
(line!))]
|
||||
[else
|
||||
(o! (fxrshift b1 2))
|
||||
(define b2 (read-byte in))
|
||||
(cond
|
||||
[(eof-object? b2)
|
||||
(o! (arithmetic-shift (bitwise-and b1 #b11) 4))
|
||||
(o! (fxlshift (fxand b1 #b11) 4))
|
||||
(fill!)
|
||||
(fill!)
|
||||
(line!)]
|
||||
[else
|
||||
(o! (bitwise-ior (arithmetic-shift (bitwise-and b1 #b11) 4)
|
||||
(arithmetic-shift b2 -4)))
|
||||
(o! (fxior (fxlshift (fxand b1 #b11) 4)
|
||||
(fxrshift b2 4)))
|
||||
(define b3 (read-byte in))
|
||||
(cond
|
||||
[(eof-object? b3)
|
||||
(o! (arithmetic-shift (bitwise-and b2 #b1111) 2))
|
||||
(o! (fxlshift (fxand b2 #b1111) 2))
|
||||
(fill!)
|
||||
(line!)]
|
||||
[else
|
||||
(o! (bitwise-ior (arithmetic-shift (bitwise-and b2 #b1111) 2)
|
||||
(arithmetic-shift b3 -6)))
|
||||
(o! (bitwise-and b3 #b111111))
|
||||
(loop (+ width 4))])])))))
|
||||
(o! (fxior (fxlshift (fxand b2 #b1111) 2)
|
||||
(fxrshift b3 6)))
|
||||
(o! (fxand b3 #b111111))
|
||||
(let ([width (if (eqv? width 68)
|
||||
(begin
|
||||
(display linesep out)
|
||||
0)
|
||||
(fx+ width 4))])
|
||||
(loop width))])])])))
|
||||
|
||||
;; ----------------------------------------
|
||||
|
||||
(define (base64-decode src)
|
||||
(let ([s (open-output-bytes)])
|
||||
(base64-decode-stream (open-input-bytes src) s)
|
||||
(get-output-bytes s)))
|
||||
(unless (bytes? src) (raise-argument-error 'base64-decode "bytes?" src))
|
||||
;; Loop through bytes to handle non-encoding characters and stop at `=`
|
||||
(define-values (src-len in-len)
|
||||
(let loop ([i 0] [len 0])
|
||||
(cond
|
||||
[(fx= i (bytes-length src)) (values i len)]
|
||||
[else
|
||||
(define c (bytes-ref src i))
|
||||
(cond
|
||||
[(fx= c (char->integer #\=)) (values i len)]
|
||||
[(vector*-ref base64-digit c) (loop (fx+ i 1) (fx+ len 1))]
|
||||
[else (loop (fx+ i 1) len)])])))
|
||||
(define out-len (fx+ (fx* (fxrshift in-len 2) 3)
|
||||
(fxmax 0 (fx- (fxand in-len 3) 1))))
|
||||
(define out (make-bytes out-len))
|
||||
(let loop1 ([i 0] [j 0])
|
||||
(unless (fx= i src-len)
|
||||
(define c1 (bytes-ref src i))
|
||||
(let ([v1 (vector*-ref base64-digit c1)]
|
||||
[i (fx+ i 1)])
|
||||
(cond
|
||||
[(not v1) (loop1 i j)]
|
||||
[else
|
||||
(let loop2 ([i i] [j j])
|
||||
(unless (fx= i src-len)
|
||||
(define c2 (bytes-ref src i))
|
||||
(let ([v2 (vector*-ref base64-digit c2)]
|
||||
[i (fx+ i 1)])
|
||||
(cond
|
||||
[(not v2) (loop2 i j)]
|
||||
[else
|
||||
(bytes-set! out j (fxior (fxlshift v1 2)
|
||||
(fxrshift v2 4)))
|
||||
(let loop3 ([i i] [j (fx+ j 1)])
|
||||
(unless (fx= i src-len)
|
||||
(define c3 (bytes-ref src i))
|
||||
(let ([v3 (vector*-ref base64-digit c3)]
|
||||
[i (fx+ i 1)])
|
||||
(cond
|
||||
[(not v3) (loop3 i j)]
|
||||
[else
|
||||
(bytes-set! out j (fxior (fxlshift (fxand v2 #b1111) 4)
|
||||
(fxrshift v3 2)))
|
||||
(let loop4 ([i i] [j (fx+ j 1)])
|
||||
(unless (fx= i src-len)
|
||||
(define c4 (bytes-ref src i))
|
||||
(let ([v4 (vector*-ref base64-digit c4)]
|
||||
[i (fx+ i 1)])
|
||||
(cond
|
||||
[(not v4) (loop4 i j)]
|
||||
[else
|
||||
(bytes-set! out j (fxior (fxlshift (fxand v3 #b11) 6)
|
||||
v4))
|
||||
(loop1 i (fx+ j 1))]))))]))))]))))]))))
|
||||
out)
|
||||
|
||||
(define (base64-encode src [linesep #"\r\n"])
|
||||
(unless (bytes? src) (raise-argument-error 'base64-encode "bytes?" src))
|
||||
(cond
|
||||
[(and (bytes? src) (bytes? linesep))
|
||||
(define in-len (bytes-length src))
|
||||
(cond
|
||||
[(eqv? 0 in-len) #""]
|
||||
[else
|
||||
(define out-payload-len (fx* (fxquotient (fx+ in-len 2) 3) 4))
|
||||
(define out-len (fx+ out-payload-len
|
||||
(fx* (fxquotient (fx+ out-payload-len 71) 72)
|
||||
(bytes-length linesep))))
|
||||
(define out (make-bytes out-len (char->integer #\=)))
|
||||
(define (out! j c) (bytes-set! out j (vector*-ref digit-base64 c)))
|
||||
(let loop ([i 0] [j 0] [width 0])
|
||||
(cond
|
||||
[((fx+ i 3) . fx<= . in-len)
|
||||
(define b1 (bytes-ref src i))
|
||||
(define b2 (bytes-ref src (fx+ i 1)))
|
||||
(define b3 (bytes-ref src (fx+ i 2)))
|
||||
(out! j (fxrshift b1 2))
|
||||
(out! (fx+ j 1) (fxior (fxlshift (fxand b1 #b11) 4)
|
||||
(fxrshift b2 4)))
|
||||
(out! (fx+ j 2) (fxior (fxlshift (fxand b2 #b1111) 2)
|
||||
(fxrshift b3 6)))
|
||||
(out! (fx+ j 3) (fxand b3 #b111111))
|
||||
(let ([width (fx+ width 4)]
|
||||
[i (fx+ i 3)]
|
||||
[j (fx+ j 4)])
|
||||
(cond
|
||||
[(and (eqv? width 72)
|
||||
(i . fx< . in-len))
|
||||
(bytes-copy! out j linesep)
|
||||
(loop i (fx+ j (bytes-length linesep)) 0)]
|
||||
[else
|
||||
(loop i j width)]))]
|
||||
[((fx+ i 2) . fx<= . in-len)
|
||||
(define b1 (bytes-ref src i))
|
||||
(define b2 (bytes-ref src (fx+ i 1)))
|
||||
(out! j (fxrshift b1 2))
|
||||
(out! (fx+ j 1) (fxior (fxlshift (fxand b1 #b11) 4)
|
||||
(fxrshift b2 4)))
|
||||
(out! (fx+ j 2) (fxlshift (fxand b2 #b1111) 2))
|
||||
(bytes-copy! out (fx+ j 4) linesep)]
|
||||
[((fx+ i 1) . fx<= . in-len)
|
||||
(define b1 (bytes-ref src i))
|
||||
(out! j (fxrshift b1 2))
|
||||
(out! (fx+ j 1) (fxlshift (fxand b1 #b11) 4))
|
||||
(bytes-copy! out (fx+ j 4) linesep)]
|
||||
[else
|
||||
(bytes-copy! out j linesep)]))
|
||||
out])]
|
||||
[else
|
||||
(let ([s (open-output-bytes)])
|
||||
(base64-encode-stream (open-input-bytes src) s linesep)
|
||||
(get-output-bytes s)))
|
||||
(get-output-bytes s))]))
|
||||
|
|
Loading…
Reference in New Issue
Block a user