racket/collects/rnrs/bytevectors-6.rkt
2010-04-27 16:50:15 -06:00

509 lines
18 KiB
Racket

#lang scheme/base
(require rnrs/enums-6
scheme/mpair)
(provide endianness
native-endianness
(rename-out [bytes? bytevector?]
[bytes-length bytevector-length]
[bytes=? bytevector=?]
[bytes-copy bytevector-copy]
[bytes-ref bytevector-u8-ref]
[bytes-set! bytevector-u8-set!])
bytevector-copy!
bytevector->u8-list
u8-list->bytevector
make-bytevector
bytevector-fill!
bytevector-s8-ref
bytevector-s8-set!
bytevector-u16-ref
bytevector-s16-ref
bytevector-u16-native-ref
bytevector-s16-native-ref
bytevector-u16-set!
bytevector-s16-set!
bytevector-u16-native-set!
bytevector-s16-native-set!
bytevector-u32-ref
bytevector-s32-ref
bytevector-u32-native-ref
bytevector-s32-native-ref
bytevector-u32-set!
bytevector-s32-set!
bytevector-u32-native-set!
bytevector-s32-native-set!
bytevector-u64-ref
bytevector-s64-ref
bytevector-u64-native-ref
bytevector-s64-native-ref
bytevector-u64-set!
bytevector-s64-set!
bytevector-u64-native-set!
bytevector-s64-native-set!
bytevector-uint-ref
bytevector-sint-ref
bytevector-uint-set!
bytevector-sint-set!
bytevector-ieee-single-ref
bytevector-ieee-single-native-ref
bytevector-ieee-single-set!
bytevector-ieee-single-native-set!
bytevector-ieee-double-ref
bytevector-ieee-double-native-ref
bytevector-ieee-double-set!
bytevector-ieee-double-native-set!
bytevector->uint-list
bytevector->sint-list
uint-list->bytevector
sint-list->bytevector
string->utf8
string->utf16
string->utf32
utf8->string
utf16->string
utf32->string)
(define-enumeration endianness (big little) endianness-set)
(define (native-endianness)
(if (system-big-endian?)
(endianness big)
(endianness little)))
(define (bytevector-copy! src src-start target target-start k)
(bytes-copy! target target-start src src-start (+ src-start k)))
(define (bytevector->u8-list bv)
(list->mlist (bytes->list bv)))
(define (u8-list->bytevector l)
(list->bytes (mlist->list l)))
(define (make-bytevector k [fill 0])
(make-bytes k (convert-fill 'make-bytevector fill)))
(define (convert-fill who fill)
(cond
[(byte? fill) fill]
[(and (exact-integer? fill)
(<= -128 fill -1))
(+ fill 256)]
[else (raise-type-error who
"exact integer in [128, 255]"
fill)]))
(define (bytevector-fill! bytes [fill 0])
(bytes-fill! bytes (convert-fill 'bytevector-fill! fill)))
;; ----------------------------------------
(define (bytevector-s8-ref bytes k)
(let ([v (bytes-ref bytes k)])
(if (v . > . 127)
(- v 256)
v)))
(define (bytevector-s8-set! bytes k v)
(bytes-set! bytes k (convert-fill 'bytevector-s8-set! v)))
(define (check-endian endianness)
(unless (or (eq? endianness 'little)
(eq? endianness 'big))
(raise-type-error 'bytevector-operation "'big or 'little" endianness)))
(define (make-integer-ops size)
(values
;; uXX-ref
(lambda (bytes k endianness)
(check-endian endianness)
(integer-bytes->integer bytes #f (eq? endianness 'big) k (+ k size)))
;; sXX-ref
(lambda (bytes k endianness)
(check-endian endianness)
(integer-bytes->integer bytes #t (eq? endianness 'big) k (+ k size)))
;; uXX-native-ref
(lambda (bytes k)
(integer-bytes->integer bytes #f (system-big-endian?) k (+ k size)))
;; sXX-native-ref
(lambda (bytes k)
(integer-bytes->integer bytes #t (system-big-endian?) k (+ k size)))
;; uXX-set!
(lambda (bytes k n endianness)
(check-endian endianness)
(integer->integer-bytes n size #f (eq? endianness 'big) bytes k)
(void))
;; sXX-set!
(lambda (bytes k n endianness)
(check-endian endianness)
(integer->integer-bytes n size #t (eq? endianness 'big) bytes k)
(void))
;; uXX-native-set!
(lambda (bytes k n)
(integer->integer-bytes n size #f (system-big-endian?) bytes k)
(void))
;; sXX-native-set!
(lambda (bytes k n)
(integer->integer-bytes n size #t (system-big-endian?) bytes k)
(void))))
(define-values (bytevector-u16-ref
bytevector-s16-ref
bytevector-u16-native-ref
bytevector-s16-native-ref
bytevector-u16-set!
bytevector-s16-set!
bytevector-u16-native-set!
bytevector-s16-native-set!)
(make-integer-ops 2))
(define-values (bytevector-u32-ref
bytevector-s32-ref
bytevector-u32-native-ref
bytevector-s32-native-ref
bytevector-u32-set!
bytevector-s32-set!
bytevector-u32-native-set!
bytevector-s32-native-set!)
(make-integer-ops 4))
(define-values (bytevector-u64-ref
bytevector-s64-ref
bytevector-u64-native-ref
bytevector-s64-native-ref
bytevector-u64-set!
bytevector-s64-set!
bytevector-u64-native-set!
bytevector-s64-native-set!)
(make-integer-ops 8))
(define (check-offset size k)
(unless (and (exact-nonnegative-integer? k)
(zero? (bitwise-and (sub1 size) k)))
(raise-type-error 'bytevector-operation
(format "exact nonnegative integer multiple of ~a" size)
k)))
(define (make-ieee-ops size)
(values
;; -ref
(lambda (bytes k endianness)
(check-endian endianness)
(floating-point-bytes->real bytes (eq? endianness 'big) k (+ k size)))
;; -native-ref
(lambda (bytes k)
(check-offset size k)
(floating-point-bytes->real bytes (system-big-endian?) k (+ k size)))
;; -set!
(lambda (bytes k n endianness)
(check-endian endianness)
(real->floating-point-bytes n size (eq? endianness 'big) bytes k))
;; -native-set!
(lambda (bytes k n)
(check-offset size k)
(real->floating-point-bytes n size (system-big-endian?) bytes k)
(void))))
(define-values (bytevector-ieee-single-ref
bytevector-ieee-single-native-ref
bytevector-ieee-single-set!
bytevector-ieee-single-native-set!)
(make-ieee-ops 4))
(define-values (bytevector-ieee-double-ref
bytevector-ieee-double-native-ref
bytevector-ieee-double-set!
bytevector-ieee-double-native-set!)
(make-ieee-ops 8))
;; ----------------------------------------
(define (bytevector-int-ref who bstr k endianness size)
(unless (bytes? bstr)
(raise-type-error who "bytevector" bstr))
(unless (exact-nonnegative-integer? k)
(raise-type-error who "exact nonnegative integer" k))
(check-endian endianness)
(unless (exact-positive-integer? size)
(raise-type-error who "exact positive integer" size))
(unless (<= (+ k size) (bytes-length bstr))
(error who "specified range [~a, ~a) beyond string range [0, ~a)"
k (+ k size) (bytes-length bstr)))
(for/fold ([r 0])
([i (in-range size)])
(+ (arithmetic-shift r 8)
(bytes-ref bstr (+ (if (eq? endianness 'big)
i
(- size i 1))
k)))))
(define (bytevector-uint-ref bstr k endianness size)
(bytevector-int-ref 'bytevector-uint-ref bstr k endianness size))
(define (bytevector-sint-ref bstr k endianness size)
(let ([v (bytevector-int-ref 'bytevector-sint-ref bstr k endianness size)]
[max (sub1 (arithmetic-shift 1 (sub1 (* size 8))))])
(if (v . > . max)
(- v (* 2 (add1 max)))
v)))
(define (bytevector-int-set! who bstr k n orig-n endianness size bit-size)
(unless (bytes? bstr)
(raise-type-error who "bytevector" bstr))
(unless (exact-nonnegative-integer? k)
(raise-type-error who "exact nonnegative integer" k))
(check-endian endianness)
(unless (exact-positive-integer? size)
(raise-type-error who "exact positive integer" size))
(unless (<= (+ k size) (bytes-length bstr))
(error who "specified target range [~a, ~a) beyond string range [0, ~a)"
k (+ k size) (bytes-length bstr)))
(unless ((integer-length orig-n) . <= . bit-size)
(error who "integer does not fit into ~a bytes: ~e"
size orig-n))
(for/fold ([n n])
([i (in-range size)])
(bytes-set! bstr (+ (if (eq? endianness 'little)
i
(- size i 1))
k)
(bitwise-and n 255))
(arithmetic-shift n -8))
(void))
(define (bytevector-uint-set! bstr k n endianness size)
(unless (exact-nonnegative-integer? n)
(raise-type-error 'bytevector-uint-set! "exact nonnegative integer" n))
(bytevector-int-set! 'bytevector-uint-set! bstr k n n endianness size (* size 8)))
(define (bytevector-sint-set! bstr k n endianness size)
(let ([pos-n (if (negative? n)
(+ n (arithmetic-shift 1 (* 8 size)))
n)])
(bytevector-int-set! 'bytevector-sint-set! bstr k pos-n n endianness size (sub1 (* size 8)))))
(define (bytevector->int-list who ref bv endianness size)
(unless (bytes? bv)
(raise-type-error who "bytevector" bv))
(check-endian endianness)
(unless (exact-positive-integer? size)
(raise-type-error who "exact positive integer" size))
(unless (zero? (modulo (bytes-length bv) size))
(raise-mismatch-error who "bytevector length is not a mulitple of given size: " size))
(list->mlist
(for/list ([k (in-range 0 (bytes-length bv) size)])
(ref bv k endianness size))))
(define (bytevector->uint-list bv endianness size)
(bytevector->int-list 'bytevector->uint-list bytevector-uint-ref bv endianness size))
(define (bytevector->sint-list bv endianness size)
(bytevector->int-list 'bytevector->sint-list bytevector-sint-ref bv endianness size))
(define (int-list->bytevector who signed? set l endianness size)
(unless (mlist? l)
(raise-type-error who "list" l))
(check-endian endianness)
(unless (exact-positive-integer? size)
(raise-type-error who "exact positive integer" size))
(let* ([l (mlist->list l)]
[len (length l)]
[bv (make-bytes (* size len))])
(for ([v (in-list l)]
[k (in-naturals)])
(set bv (* k size) v endianness size))
bv))
(define (uint-list->bytevector l endianness size)
(int-list->bytevector 'uint-list->bytevector #f bytevector-uint-set! l endianness size))
(define (sint-list->bytevector l endianness size)
(int-list->bytevector 'sint-list->bytevector #f bytevector-sint-set! l endianness size))
;; ----------------------------------------
(define (string->utf8 str)
(string->bytes/utf-8 str))
(define (string->utf16 str [endianness 'big])
(check-endian endianness)
(let ([big? (eq? endianness 'big)])
(let loop ([pos (string-length str)]
[accum null])
(if (zero? pos)
(list->bytes accum)
(let* ([pos (sub1 pos)]
[c (string-ref str pos)]
[v (char->integer c)])
(if (v . >= . #x10000)
(let ([v2 (- v #x10000)])
(let-values ([(a b c d)
(values (bitwise-ior #xD8
(arithmetic-shift v2 -18))
(bitwise-and #xFF
(arithmetic-shift v2 -10))
(bitwise-ior #xDC
(bitwise-and #x3
(arithmetic-shift v2 -8)))
(bitwise-and v2 #xFF))])
(if big?
(loop pos (list* a b c d accum))
(loop pos (list* b a d c accum)))))
(let-values ([(hi lo)
(values (arithmetic-shift v -8)
(bitwise-and v 255))])
(if big?
(loop pos (list* hi lo accum))
(loop pos (list* lo hi accum))))))))))
(define (string->utf32 str [endianness 'big])
(check-endian endianness)
(let ([bstr (make-bytes (* 4 (string-length str)))]
[big? (eq? endianness 'big)])
(for ([c (in-string str)]
[i (in-naturals)])
(let* ([v (char->integer c)]
[a (arithmetic-shift v -24)]
[b (bitwise-and #xFF (arithmetic-shift v -16))]
[c (bitwise-and #xFF (arithmetic-shift v -8))]
[d (bitwise-and #xFF v)]
[pos (* i 4)])
(if big?
(begin
(bytes-set! bstr pos a)
(bytes-set! bstr (+ 1 pos) b)
(bytes-set! bstr (+ 2 pos) c)
(bytes-set! bstr (+ 3 pos) d))
(begin
(bytes-set! bstr pos d)
(bytes-set! bstr (+ 1 pos) c)
(bytes-set! bstr (+ 2 pos) b)
(bytes-set! bstr (+ 3 pos) a)))))
bstr))
(define (utf8->string bstr)
(bytes->string/utf-8 bstr #\uFFFD))
(define (utf16->string bstr endianness [skip-bom? #f])
;; This version skips a two bytes for decoding errors,
;; except those that correspond to a trailing single byte.
(check-endian endianness)
(let ([len (bytes-length bstr)])
(let-values ([(big? offset)
(cond
[skip-bom?
(values (eq? endianness 'big) 0)]
[(len . >= . 2)
(cond
[(and (eq? #xFE (bytes-ref bstr 0))
(eq? #xFF (bytes-ref bstr 1)))
(values #t 2)]
[(and (eq? #xFF (bytes-ref bstr 0))
(eq? #xFE (bytes-ref bstr 1)))
(values #f 2)]
[else (values (eq? endianness 'big) 0)])]
[else (values (eq? endianness 'big) 0)])])
(list->string
(let loop ([pos offset])
(cond
[(= pos len) null]
[(= (add1 pos) len)
;; decoding error
'(#\uFFFD)]
[else
(let ([a (bytes-ref bstr pos)]
[b (bytes-ref bstr (add1 pos))])
(let ([a (if big? a b)]
[b (if big? b a)])
(cond
[(= (bitwise-and a #xFC) #xD8)
(if (len . < . (+ pos 4))
;; decoding error
(cons #\uFFFD (loop (+ pos 2)))
;; Surrogate...
(let ([c (bytes-ref bstr (+ pos 2))]
[d (bytes-ref bstr (+ pos 3))])
(let ([c (if big? c d)]
[d (if big? d c)])
(cond
[(= (bitwise-and c #xFC) #xDC)
;; A valid surrogate
(let ([v (+ #x10000
(bitwise-ior
(arithmetic-shift (bitwise-and #x3 a) 18)
(arithmetic-shift b 10)
(arithmetic-shift (bitwise-and #x3 c) 8)
d))])
(cons (integer->char v) (loop (+ pos 4))))]
[else
;; Invalid surrogate.
(cons #\uFFFD (loop (+ pos 2)))]))))]
[(= (bitwise-and a #xFC) #xDC)
;; invalid surrogate code
(cons #\uFFFD (loop (+ pos 2)))]
[else
(let ([v (bitwise-ior (arithmetic-shift a 8)
b)])
(cons (integer->char v)
(loop (+ pos 2))))])))]))))))
(define (utf32->string bstr endianness [skip-bom? #f])
;; Skips 4 bytes for each dcoding error (except too-few-bytes-at-end
;; errors, obviously).
(check-endian endianness)
(let ([len (bytes-length bstr)])
(let-values ([(big? offset)
(cond
[skip-bom?
(values (eq? endianness 'big) 0)]
[(len . >= . 4)
(cond
[(and (eq? #x00 (bytes-ref bstr 0))
(eq? #x00 (bytes-ref bstr 1))
(eq? #xFE (bytes-ref bstr 2))
(eq? #xFF (bytes-ref bstr 3)))
(values #t 4)]
[(and (eq? #xFF (bytes-ref bstr 0))
(eq? #xFE (bytes-ref bstr 1))
(eq? #x00 (bytes-ref bstr 2))
(eq? #x00 (bytes-ref bstr 3)))
(values #f 4)]
[else (values (eq? endianness 'big) 0)])]
[else (values (eq? endianness 'big) 0)])])
(list->string
(let loop ([pos offset])
(cond
[(= pos len) null]
[((+ pos 4) . > . len)
;; decoding error
'(#\uFFFD)]
[else
(let ([a (bytes-ref bstr pos)]
[b (bytes-ref bstr (+ pos 1))]
[c (bytes-ref bstr (+ pos 2))]
[d (bytes-ref bstr (+ pos 3))])
(let ([a (if big? a d)]
[b (if big? b c)]
[c (if big? c b)]
[d (if big? d a)])
(let ([v (bitwise-ior
(arithmetic-shift a 24)
(arithmetic-shift b 16)
(arithmetic-shift c 8)
d)])
(if (or (and (v . >= . #xD800)
(v . <= . #xDFFF))
(v . > . #x10FFFF))
(cons #\uFFFD (loop (+ pos 4)))
(cons (integer->char v) (loop (+ pos 4)))))))]))))))