From 36195c71f6941fcf9e86dcf6529eaa3130b20871 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Sat, 8 May 2021 09:04:35 -0600 Subject: [PATCH] add ffi/unsafe/string-list Passing an array of strings or byte strings to a foreign function can be especially tedious on Racket CS, due to the prohibition against passing an array of GCable pointers to a foreign function. Closes #3825 --- .../scribblings/foreign/derived.scrbl | 1 + .../scribblings/foreign/string-list.scrbl | 59 ++++++++ .../scribblings/foreign/types.scrbl | 3 +- .../tests/racket/foreign-test.rktl | 32 +++++ racket/collects/ffi/unsafe/string-list.rkt | 130 ++++++++++++++++++ 5 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 pkgs/racket-doc/scribblings/foreign/string-list.scrbl create mode 100644 racket/collects/ffi/unsafe/string-list.rkt diff --git a/pkgs/racket-doc/scribblings/foreign/derived.scrbl b/pkgs/racket-doc/scribblings/foreign/derived.scrbl index 1bf21dcc18..cbeb4ddd83 100644 --- a/pkgs/racket-doc/scribblings/foreign/derived.scrbl +++ b/pkgs/racket-doc/scribblings/foreign/derived.scrbl @@ -8,6 +8,7 @@ @include-section["vector.scrbl"] @include-section["cvector.scrbl"] @include-section["cpointer.scrbl"] +@include-section["string-list.scrbl"] @include-section["serialize-cstruct.scrbl"] @include-section["define.scrbl"] @include-section["alloc.scrbl"] diff --git a/pkgs/racket-doc/scribblings/foreign/string-list.scrbl b/pkgs/racket-doc/scribblings/foreign/string-list.scrbl new file mode 100644 index 0000000000..dfee48dff4 --- /dev/null +++ b/pkgs/racket-doc/scribblings/foreign/string-list.scrbl @@ -0,0 +1,59 @@ +#lang scribble/doc +@(require "utils.rkt" + (only-in scribble/decode make-splice) + scribble/racket + (for-label ffi/unsafe/string-list)) + +@title[#:tag "string-list"]{String List Types} + +@defmodule[ffi/unsafe/string-list]{The +@racketmodname[ffi/unsafe/string-list] module provides types for +converting between string and bytes string lists and foreign arrays of +strings. This library is particularly useful with @CS[], since +types like @racket[(_list i _string)] cannot work for passing an +array of strings to a foreign call.} + +@history[#:added "8.1.0.5"] + +@deftogether[( +@defthing[_string-list ctype?] +@defthing[_string-list/utf-8 ctype?] +@defthing[_string-list/locale ctype?] +@defthing[_string-list/latin-1 ctype?] +@defthing[_string-list/utf-16 ctype?] +@defthing[_string-list/ucs-4 ctype?] +@defthing[_bytes-list ctype?] +@defthing[_bytes-list/nul-terminated ctype?] +)]{ + +Types that are similar to @racket[(_list i _string)], +@racket[(_list i _string/utf-8)], +@racket[(_list i _string/locale)], +@racket[(_list i _string/latin-1)], +@racket[(_list i _string/latin-1)], +@racket[(_list i _string/utf-16)], +@racket[(_list i _string/ucs-4)], +@racket[(_list i _bytes)], and +@racket[(_list i _bytes/nul-terminated)], but they work as foreign-call +arguments for @CS[] (as well as @BC[]). + +These types convert a lists of strings or bytes to a single block of +@racket['atomic-interior] memory (see @racket[malloc]) that starts +with a NULL-terminated array of pointers into itself. Each string or +bytes string is word-aligned within the atomic block and terminated by +a suitable sequence of NUL bytes (except for @racket[_bytes-list], which +does not include a terminator). + +The @racket[_string-list] type works only when the +@racket[default-_string-type] parameter has one of the following +values: +@racket[_string/utf-8], @racket[_string*/utf-8], +@racket[_string/latin-1], @racket[_string*/latin-1], +@racket[_string/locale], @racket[_string*/locale], +@racket[_string/utf-16], or +@racket[_string/ucs-4]. + +When used as a foreign-call result type and similar positions, these +types expect the foreign representation to include a NULL terminator +for the array of pointers as well as a NUL terminator for each string +or byte string.} diff --git a/pkgs/racket-doc/scribblings/foreign/types.scrbl b/pkgs/racket-doc/scribblings/foreign/types.scrbl index 424bc20d3a..918a1bba63 100644 --- a/pkgs/racket-doc/scribblings/foreign/types.scrbl +++ b/pkgs/racket-doc/scribblings/foreign/types.scrbl @@ -1203,7 +1203,8 @@ return two values, the vector and the boolean. Note that in the @CS[] implementation of Racket, a @racket[(_list i __ctype)] argument will trigger an error if @racket[__ctype] indicates values that are managed by the garbage collector, since pointers to -non-atomic memory cannot be passed to foreign functions. +non-atomic memory cannot be passed to foreign functions. See also +@racketmodname[ffi/unsafe/string-list]. @history[#:changed "7.7.0.2" @elem{Added @racket[maybe-mode].}] #:changed "7.7.0.6" @elem{The modes @racket[i], @racket[o], diff --git a/pkgs/racket-test-core/tests/racket/foreign-test.rktl b/pkgs/racket-test-core/tests/racket/foreign-test.rktl index 2ea119bc14..4b8e4f743f 100644 --- a/pkgs/racket-test-core/tests/racket/foreign-test.rktl +++ b/pkgs/racket-test-core/tests/racket/foreign-test.rktl @@ -11,6 +11,7 @@ ffi/unsafe/global ffi/unsafe/atomic ffi/unsafe/os-async-channel + ffi/unsafe/string-list ffi/vector racket/extflonum racket/place @@ -1418,6 +1419,37 @@ (test #t cpointer-has-tag? p 'extra)) (test #t cpointer-predicate-procedure? foo?) +;; ---------------------------------------- + +(define (try-strings _t-list + #:latin-1? [latin-1? (not (equal? (locale-string-encoding) "UTF-8"))] + #:bytes? [as-bytes? #f] + #:add-nul? [add-nul? #f]) + (define l (map + (if as-bytes? string->bytes/utf-8 values) + (if latin-1? + '("apple" "banana" "\xFF") + '("apple" "banana" "\u3BB x . x" "(\U1F600)")))) + (define l2 (cast (cast (if add-nul? + (map (lambda (bstr) (bytes-append bstr #"\0")) l) + l) + _t-list + _pointer) + _pointer + _t-list)) + (unless (equal? l l2) + (error "failed ~s ~s ~s" _t-list l l2))) + +(try-strings _string-list) +(try-strings _string-list/utf-8) +(try-strings _string-list/latin-1 #:latin-1? #t) +(try-strings _string-list/locale) +(try-strings _string-list/utf-16) +(try-strings _string-list/ucs-4) + +(try-strings _bytes-list #:bytes? #t #:add-nul? #t) +(try-strings _bytes-list/nul-terminated #:bytes? #t) + ;; ---------------------------------------- ;; Test JIT inlining diff --git a/racket/collects/ffi/unsafe/string-list.rkt b/racket/collects/ffi/unsafe/string-list.rkt new file mode 100644 index 0000000000..cbeb22a7cc --- /dev/null +++ b/racket/collects/ffi/unsafe/string-list.rkt @@ -0,0 +1,130 @@ +#lang racket/base +(require ffi/unsafe) + +(provide (protect-out + _bytes-list + _bytes-list/nul-terminated + _string-list + _string-list/utf-8 + _string-list/locale + _string-list/latin-1 + _string-list/utf-16 + _string-list/ucs-4)) + +(define (check who l elem elem?) + (unless (and (list? l) + (andmap elem? l)) + (raise-argument-error who (format "(listof ~a)" elem) l))) + +(define (allocate-bytes-block l terminator-len) + (define ptr-bytes (ctype-sizeof _pointer)) + (define roundup (sub1 ptr-bytes)) + (define mask (bitwise-not roundup)) + (define (align n) (bitwise-and (+ n roundup) mask)) + (define size (for/fold ([size ptr-bytes]) ([bstr (in-list l)]) + (+ size ptr-bytes (align (+ (bytes-length bstr) terminator-len))))) + (define m (malloc size 'atomic-interior)) + (memset m 0 size) + (for/fold ([offset (* (add1 (length l)) ptr-bytes)]) ([bstr (in-list l)] + [i (in-naturals)]) + (ptr-set! m _pointer i (ptr-add m offset)) + (memcpy m offset bstr (bytes-length bstr)) + (+ offset (align (+ (bytes-length bstr) terminator-len)))) + m) + +(define (extract-list p _t) + (define len (let loop ([i 0]) + (if (ptr-ref p _pointer i) + (loop (add1 i)) + i))) + (cast p _pointer (_list o _t len))) + +(define _bytes-list + (make-ctype _pointer + (lambda (l) + (check '_bytes-list l 'bytes bytes?) + (allocate-bytes-block l 0)) + (lambda (p) + (extract-list p _bytes)))) + +(define _bytes-list/nul-terminated + (make-ctype _pointer + (lambda (l) + (check '_bytes-list/nul-terminated l 'bytes bytes?) + (allocate-bytes-block l 1)) + (lambda (p) + (extract-list p _bytes)))) + +(define (make-_string-list who convert _t terminator-len string?) + (make-ctype _pointer + (lambda (l) + (check who l 'string string?) + (allocate-bytes-block (map convert l) terminator-len)) + (lambda (p) + (extract-list p (or _t _string))))) + +(define (_string? str) + (define _t _string) + (cond + [(or (eq? _t _string*/utf-8) + (eq? _t _string*/latin-1) + (eq? _t _string*/locale)) + (or (path? str) (string? str))] + [else (string? str)])) + +(define (string->bytes str) + (define _t _string) + (cond + [(eq? _t _string/utf-8) (string->bytes/utf-8 str)] + [(eq? _t _string*/utf-8) (if (path? str) + (path->bytes str) + (string->bytes/utf-8 str))] + [(eq? _t _string/latin-1) (bytes->string/latin-1 str)] + [(eq? _t _string*/latin-1) (if (path? str) + (path->bytes str) + (string->bytes/latin-1 str))] + [(eq? _t _string/locale) (string->bytes/locale str)] + [(eq? _t _string*/locale) (if (path? str) + (path->bytes str) + (string->bytes/locale str))] + [(eq? _t _string/utf-16) (string->bytes/utf-16 str)] + [(eq? _t _string/ucs-4) (string->bytes/ucs-4 str)] + [else (error '_string-list + "unrecognized current _string conversion")])) + +(define (string->bytes/utf-16 str) + (define len + (for/fold ([len 0]) ([c (in-string str)]) + (+ len + (if ((char->integer c) . > . #xFFFF) + 2 + 1)))) + (define bstr (make-bytes (* 2 len))) + (let loop ([i 0] [j 0]) + (unless (= i (string-length str)) + (define v (char->integer (string-ref str i))) + (cond + [(v . <= . #xFFFF) + (ptr-set! bstr _uint16 j v) + (loop (add1 i) (+ j 1))] + [else + (define av (- v #x10000)) + (define hi (bitwise-ior #xD800 (bitwise-and (arithmetic-shift av -10) #x3FF))) + (define lo (bitwise-ior #xDC00 (bitwise-and av #x3FF))) + (ptr-set! bstr _uint16 j hi) + (ptr-set! bstr _uint16 (+ j 1) lo) + (loop (add1 i) (+ j 2))]))) + bstr) + +(define (string->bytes/ucs-4 str) + (define bstr (make-bytes (* 4 (string-length str)))) + (for ([i (in-range (string-length str))]) + (ptr-set! bstr _uint32 i (char->integer (string-ref str i)))) + bstr) + +(define _string-list (make-_string-list '_string-list string->bytes #f 1 _string?)) +(define _string-list/utf-8 (make-_string-list '_string-list/utf-8 string->bytes/utf-8 _string/utf-8 1 string?)) +(define _string-list/locale (make-_string-list '_string-list/locale string->bytes/locale _string/locale 1 string?)) +(define _string-list/latin-1 (make-_string-list '_string-list/latin-1 string->bytes/latin-1 _string/latin-1 1 string?)) +(define _string-list/utf-16 (make-_string-list '_string-list/utf-16 string->bytes/utf-16 _string/utf-16 2 string?)) +(define _string-list/ucs-4 (make-_string-list '_string-list/ucs-4 string->bytes/ucs-4 _string/ucs-4 4 string?))