add ffi/unsafe/string-list

Passing an array of strings or byte strings to a foreign function can
be especially tedious on Racket CS, due to the prohibition against
passing an array of GCable pointers to a foreign function.

Closes #3825
This commit is contained in:
Matthew Flatt 2021-05-08 09:04:35 -06:00
parent c42d64cdbc
commit 36195c71f6
5 changed files with 224 additions and 1 deletions

View File

@ -8,6 +8,7 @@
@include-section["vector.scrbl"]
@include-section["cvector.scrbl"]
@include-section["cpointer.scrbl"]
@include-section["string-list.scrbl"]
@include-section["serialize-cstruct.scrbl"]
@include-section["define.scrbl"]
@include-section["alloc.scrbl"]

View File

@ -0,0 +1,59 @@
#lang scribble/doc
@(require "utils.rkt"
(only-in scribble/decode make-splice)
scribble/racket
(for-label ffi/unsafe/string-list))
@title[#:tag "string-list"]{String List Types}
@defmodule[ffi/unsafe/string-list]{The
@racketmodname[ffi/unsafe/string-list] module provides types for
converting between string and bytes string lists and foreign arrays of
strings. This library is particularly useful with @CS[], since
types like @racket[(_list i _string)] cannot work for passing an
array of strings to a foreign call.}
@history[#:added "8.1.0.5"]
@deftogether[(
@defthing[_string-list ctype?]
@defthing[_string-list/utf-8 ctype?]
@defthing[_string-list/locale ctype?]
@defthing[_string-list/latin-1 ctype?]
@defthing[_string-list/utf-16 ctype?]
@defthing[_string-list/ucs-4 ctype?]
@defthing[_bytes-list ctype?]
@defthing[_bytes-list/nul-terminated ctype?]
)]{
Types that are similar to @racket[(_list i _string)],
@racket[(_list i _string/utf-8)],
@racket[(_list i _string/locale)],
@racket[(_list i _string/latin-1)],
@racket[(_list i _string/latin-1)],
@racket[(_list i _string/utf-16)],
@racket[(_list i _string/ucs-4)],
@racket[(_list i _bytes)], and
@racket[(_list i _bytes/nul-terminated)], but they work as foreign-call
arguments for @CS[] (as well as @BC[]).
These types convert a lists of strings or bytes to a single block of
@racket['atomic-interior] memory (see @racket[malloc]) that starts
with a NULL-terminated array of pointers into itself. Each string or
bytes string is word-aligned within the atomic block and terminated by
a suitable sequence of NUL bytes (except for @racket[_bytes-list], which
does not include a terminator).
The @racket[_string-list] type works only when the
@racket[default-_string-type] parameter has one of the following
values:
@racket[_string/utf-8], @racket[_string*/utf-8],
@racket[_string/latin-1], @racket[_string*/latin-1],
@racket[_string/locale], @racket[_string*/locale],
@racket[_string/utf-16], or
@racket[_string/ucs-4].
When used as a foreign-call result type and similar positions, these
types expect the foreign representation to include a NULL terminator
for the array of pointers as well as a NUL terminator for each string
or byte string.}

View File

@ -1203,7 +1203,8 @@ return two values, the vector and the boolean.
Note that in the @CS[] implementation of Racket, a @racket[(_list i
__ctype)] argument will trigger an error if @racket[__ctype] indicates
values that are managed by the garbage collector, since pointers to
non-atomic memory cannot be passed to foreign functions.
non-atomic memory cannot be passed to foreign functions. See also
@racketmodname[ffi/unsafe/string-list].
@history[#:changed "7.7.0.2" @elem{Added @racket[maybe-mode].}]
#:changed "7.7.0.6" @elem{The modes @racket[i], @racket[o],

View File

@ -11,6 +11,7 @@
ffi/unsafe/global
ffi/unsafe/atomic
ffi/unsafe/os-async-channel
ffi/unsafe/string-list
ffi/vector
racket/extflonum
racket/place
@ -1418,6 +1419,37 @@
(test #t cpointer-has-tag? p 'extra))
(test #t cpointer-predicate-procedure? foo?)
;; ----------------------------------------
(define (try-strings _t-list
#:latin-1? [latin-1? (not (equal? (locale-string-encoding) "UTF-8"))]
#:bytes? [as-bytes? #f]
#:add-nul? [add-nul? #f])
(define l (map
(if as-bytes? string->bytes/utf-8 values)
(if latin-1?
'("apple" "banana" "\xFF")
'("apple" "banana" "\u3BB x . x" "(\U1F600)"))))
(define l2 (cast (cast (if add-nul?
(map (lambda (bstr) (bytes-append bstr #"\0")) l)
l)
_t-list
_pointer)
_pointer
_t-list))
(unless (equal? l l2)
(error "failed ~s ~s ~s" _t-list l l2)))
(try-strings _string-list)
(try-strings _string-list/utf-8)
(try-strings _string-list/latin-1 #:latin-1? #t)
(try-strings _string-list/locale)
(try-strings _string-list/utf-16)
(try-strings _string-list/ucs-4)
(try-strings _bytes-list #:bytes? #t #:add-nul? #t)
(try-strings _bytes-list/nul-terminated #:bytes? #t)
;; ----------------------------------------
;; Test JIT inlining

View File

@ -0,0 +1,130 @@
#lang racket/base
(require ffi/unsafe)
(provide (protect-out
_bytes-list
_bytes-list/nul-terminated
_string-list
_string-list/utf-8
_string-list/locale
_string-list/latin-1
_string-list/utf-16
_string-list/ucs-4))
(define (check who l elem elem?)
(unless (and (list? l)
(andmap elem? l))
(raise-argument-error who (format "(listof ~a)" elem) l)))
(define (allocate-bytes-block l terminator-len)
(define ptr-bytes (ctype-sizeof _pointer))
(define roundup (sub1 ptr-bytes))
(define mask (bitwise-not roundup))
(define (align n) (bitwise-and (+ n roundup) mask))
(define size (for/fold ([size ptr-bytes]) ([bstr (in-list l)])
(+ size ptr-bytes (align (+ (bytes-length bstr) terminator-len)))))
(define m (malloc size 'atomic-interior))
(memset m 0 size)
(for/fold ([offset (* (add1 (length l)) ptr-bytes)]) ([bstr (in-list l)]
[i (in-naturals)])
(ptr-set! m _pointer i (ptr-add m offset))
(memcpy m offset bstr (bytes-length bstr))
(+ offset (align (+ (bytes-length bstr) terminator-len))))
m)
(define (extract-list p _t)
(define len (let loop ([i 0])
(if (ptr-ref p _pointer i)
(loop (add1 i))
i)))
(cast p _pointer (_list o _t len)))
(define _bytes-list
(make-ctype _pointer
(lambda (l)
(check '_bytes-list l 'bytes bytes?)
(allocate-bytes-block l 0))
(lambda (p)
(extract-list p _bytes))))
(define _bytes-list/nul-terminated
(make-ctype _pointer
(lambda (l)
(check '_bytes-list/nul-terminated l 'bytes bytes?)
(allocate-bytes-block l 1))
(lambda (p)
(extract-list p _bytes))))
(define (make-_string-list who convert _t terminator-len string?)
(make-ctype _pointer
(lambda (l)
(check who l 'string string?)
(allocate-bytes-block (map convert l) terminator-len))
(lambda (p)
(extract-list p (or _t _string)))))
(define (_string? str)
(define _t _string)
(cond
[(or (eq? _t _string*/utf-8)
(eq? _t _string*/latin-1)
(eq? _t _string*/locale))
(or (path? str) (string? str))]
[else (string? str)]))
(define (string->bytes str)
(define _t _string)
(cond
[(eq? _t _string/utf-8) (string->bytes/utf-8 str)]
[(eq? _t _string*/utf-8) (if (path? str)
(path->bytes str)
(string->bytes/utf-8 str))]
[(eq? _t _string/latin-1) (bytes->string/latin-1 str)]
[(eq? _t _string*/latin-1) (if (path? str)
(path->bytes str)
(string->bytes/latin-1 str))]
[(eq? _t _string/locale) (string->bytes/locale str)]
[(eq? _t _string*/locale) (if (path? str)
(path->bytes str)
(string->bytes/locale str))]
[(eq? _t _string/utf-16) (string->bytes/utf-16 str)]
[(eq? _t _string/ucs-4) (string->bytes/ucs-4 str)]
[else (error '_string-list
"unrecognized current _string conversion")]))
(define (string->bytes/utf-16 str)
(define len
(for/fold ([len 0]) ([c (in-string str)])
(+ len
(if ((char->integer c) . > . #xFFFF)
2
1))))
(define bstr (make-bytes (* 2 len)))
(let loop ([i 0] [j 0])
(unless (= i (string-length str))
(define v (char->integer (string-ref str i)))
(cond
[(v . <= . #xFFFF)
(ptr-set! bstr _uint16 j v)
(loop (add1 i) (+ j 1))]
[else
(define av (- v #x10000))
(define hi (bitwise-ior #xD800 (bitwise-and (arithmetic-shift av -10) #x3FF)))
(define lo (bitwise-ior #xDC00 (bitwise-and av #x3FF)))
(ptr-set! bstr _uint16 j hi)
(ptr-set! bstr _uint16 (+ j 1) lo)
(loop (add1 i) (+ j 2))])))
bstr)
(define (string->bytes/ucs-4 str)
(define bstr (make-bytes (* 4 (string-length str))))
(for ([i (in-range (string-length str))])
(ptr-set! bstr _uint32 i (char->integer (string-ref str i))))
bstr)
(define _string-list (make-_string-list '_string-list string->bytes #f 1 _string?))
(define _string-list/utf-8 (make-_string-list '_string-list/utf-8 string->bytes/utf-8 _string/utf-8 1 string?))
(define _string-list/locale (make-_string-list '_string-list/locale string->bytes/locale _string/locale 1 string?))
(define _string-list/latin-1 (make-_string-list '_string-list/latin-1 string->bytes/latin-1 _string/latin-1 1 string?))
(define _string-list/utf-16 (make-_string-list '_string-list/utf-16 string->bytes/utf-16 _string/utf-16 2 string?))
(define _string-list/ucs-4 (make-_string-list '_string-list/ucs-4 string->bytes/ucs-4 _string/ucs-4 4 string?))