diff --git a/LOG b/LOG index 959f75e6b5..90f12982e5 100644 --- a/LOG +++ b/LOG @@ -759,3 +759,9 @@ schlib.c, prim.c, externs.h mats/foreign4.c, mats/foreign.ms mats/Mf-* foreign.stex, release_notes.stex +- add a __thread convention for foreign procedures and callables + to automate thread [de]activation + syntax.ss, ftype.ss, x86.ss, x86_64.ss, ppc32.ss, + cmacros.ss, base-lang.ss, np-languages.ss, cprep.ss + thread.c, prim.c, externs.h, foreign.stex, release_notes.stex, + mats/Mf-t*, foreign.ms, foreign4.c diff --git a/c/externs.h b/c/externs.h index 98fa0f80b3..8663a4de74 100644 --- a/c/externs.h +++ b/c/externs.h @@ -216,6 +216,8 @@ extern void S_mutex_release PROTO((scheme_mutex_t *m)); extern s_thread_cond_t *S_make_condition PROTO((void)); extern void S_condition_free PROTO((s_thread_cond_t *c)); extern IBOOL S_condition_wait PROTO((s_thread_cond_t *c, scheme_mutex_t *m, ptr t)); +extern INT S_activate_thread PROTO((void)); +extern void S_unactivate_thread PROTO((int mode)); #endif /* scheme.c */ diff --git a/c/prim.c b/c/prim.c index 0041012a81..56709b2fcc 100644 --- a/c/prim.c +++ b/c/prim.c @@ -124,6 +124,9 @@ static void create_c_entry_vector() { #ifdef PTHREADS install_c_entry(CENTRY_raw_collect_cond, (ptr)&S_collect_cond); install_c_entry(CENTRY_raw_tc_mutex, (ptr)&S_tc_mutex); + install_c_entry(CENTRY_activate_thread, proc2ptr(S_activate_thread)); + install_c_entry(CENTRY_deactivate_thread, proc2ptr(Sdeactivate_thread)); + install_c_entry(CENTRY_unactivate_thread, proc2ptr(S_unactivate_thread)); #endif /* PTHREADS */ install_c_entry(CENTRY_handle_values_error, proc2ptr(S_handle_values_error)); install_c_entry(CENTRY_handle_mvlet_error, proc2ptr(S_handle_mvlet_error)); @@ -139,7 +142,10 @@ static void create_c_entry_vector() { for (i = 0; i < c_entry_vector_size; i++) { #ifndef PTHREADS - if (i == CENTRY_raw_collect_cond || i == CENTRY_raw_tc_mutex) continue; + if (i == CENTRY_raw_collect_cond || i == CENTRY_raw_tc_mutex + || i == CENTRY_activate_thread || i == CENTRY_deactivate_thread + || i == CENTRY_unactivate_thread) + continue; #endif /* NOT PTHREADS */ if (Svector_ref(S_G.c_entry_vector, i) == Sfalse) { fprintf(stderr, "c_entry_vector entry %d is uninitialized\n", i); diff --git a/c/thread.c b/c/thread.c index 51b135c920..f7491e347b 100644 --- a/c/thread.c +++ b/c/thread.c @@ -143,6 +143,33 @@ IBOOL Sactivate_thread() { /* create or reactivate current thread */ } } +int S_activate_thread() { /* Like Sactivate_thread(), but returns a mode to revert the effect */ + ptr tc = get_thread_context(); + + if (tc == (ptr)0) { + Sactivate_thread(); + return unactivate_mode_destroy; + } else if (!ACTIVE(tc)) { + reactivate_thread(tc); + return unactivate_mode_deactivate; + } else + return unactivate_mode_noop; +} + +void S_unactivate_thread(int mode) { /* Reverts a previous S_activate_thread() effect */ + switch (mode) { + case unactivate_mode_deactivate: + Sdeactivate_thread(); + break; + case unactivate_mode_destroy: + Sdestroy_thread(); + break; + case unactivate_mode_noop: + default: + break; + } +} + void Sdeactivate_thread() { /* deactivate current thread */ ptr tc = get_thread_context(); if (tc != (ptr)0) deactivate_thread(tc) diff --git a/csug/foreign.stex b/csug/foreign.stex index f0dc71dd2b..8453a55777 100644 --- a/csug/foreign.stex +++ b/csug/foreign.stex @@ -192,8 +192,7 @@ Scheme-callable wrappers for foreign procedures can also be created via %---------------------------------------------------------------------------- \entryheader -\formdef{foreign-procedure}{\categorysyntax}{(foreign-procedure \var{entry-exp} (\var{param-type} \dots) \var{res-type})} -\formdef{foreign-procedure}{\categorysyntax}{(foreign-procedure \var{conv} \var{entry-exp} (\var{param-type} \dots) \var{res-type})} +\formdef{foreign-procedure}{\categorysyntax}{(foreign-procedure \var{conv} \dots \var{entry-exp} (\var{param-type} \dots) \var{res-type})} \returns a procedure \listlibraries \endentryheader @@ -213,13 +212,14 @@ by the \var{res-type}. Multiple procedures may be created for the same \index{foreign entry}foreign entry. \label{page:conv-description}% -If \var{conv} is present, it specifies the calling convention to be used. -The default is \scheme{#f}, which specifies the default calling convention -on the target machine. -Three other conventions are currently supported, all only under -Windows: \scheme{__stdcall}, \scheme{__cdecl}, and \scheme{__com}. +Each \var{conv} adjusts specifies the calling convention to be used. +A \scheme{#f} is allowed as \var{conv} to inicated the default calling convention +on the target machine (so the \scheme{#f} has no effect). +Three other conventions are currently supported under +Windows: \scheme{__stdcall}, \scheme{__cdecl}, and \scheme{__com} (32-bit only). Since \scheme{__cdecl} is the default, specifying \scheme{__cdecl} is equivalent to specifying \scheme{#f} or no convention. +Finally, \var{conv} can be \scheme{__thread} to control thread deactivation. Use \scheme{__stdcall} to access most Windows API procedures. Use \scheme{__cdecl} for Windows API varargs procedures, @@ -250,6 +250,31 @@ encapsulated within the COM instance passed as the first argument, with the second argument being a double float and the return value being an integer. +Use \scheme{__thread} to make the current thread deactivated (see +\scheme{fork-thread}) while a foreign procedure is called. The +thread is activated again when the foreign procedure returns. Deactivation +of the thread allows garbage collection to proceed in other threads, +so do not pass collectable memory to the foreign procedure, or use +\scheme{lock-object} to lock the memory in place; see also +\scheme{Sdeactivate_thread}. The \scheme{__thread} +declaration has no effect on a non-threaded version of the system. + +For example, calling the C \scheme{sleep} function with the default +convention will block other Scheme threads from performing a garbage +collection, but adding the \scheme{__thread} declaration avoids that +problem: + +\schemedisplay +(define c-sleep (foreign-procedure __thread "sleep" (unsigned) unsigned)) +(c-sleep 10) \var{; sleeps for 10 seconds without blocking other threads} +\endschemedisplay + +\noindent +If a foreign procedure that is called with \scheme{__thread} can +invoke callables, then each callable should also be declared with +\scheme{__thread} so that the callable reactivates the thread. + + Complete type checking and conversion is performed on the parameters. The types \index{\scheme{scheme-object}}\scheme{scheme-object}, @@ -976,8 +1001,7 @@ function ftype (Section~\ref{SECTFOREIGNDATA}). %---------------------------------------------------------------------------- \entryheader -\formdef{foreign-callable}{\categorysyntax}{(foreign-callable \var{proc-exp} (\var{param-type} \dots) \var{res-type})} -\formdef{foreign-procedure}{\categorysyntax}{(foreign-callable \var{conv} \var{proc-exp} (\var{param-type} \dots) \var{res-type})} +\formdef{foreign-callable}{\categorysyntax}{(foreign-callable \var{conv} \dots \var{proc-exp} (\var{param-type} \dots) \var{res-type})} \returns a code object \listlibraries \endentryheader @@ -1002,9 +1026,16 @@ since the parameter values are provided by the foreign code and must be assumed to be correct. -If \var{conv} is present, it specifies the calling convention to be used. +Each \var{conv} adjusts the calling convention to be used. \scheme{foreign-callable} supports the same conventions as \scheme{foreign-procedure} with the exception of \scheme{__com}. +The \scheme{__thread} convention for a callable activates a +calling thread if the thread is not already activated, and +the thread's activation state is reverted when the callable +returns. If a calling thread is not currently registered with +the Scheme system, then reverting the thread's activation state implies +destroying the thread's registration (see \scheme{Sdestroy_thread}). + The value produced by \scheme{foreign-callable} is a Scheme code object, which contains some header information as well as code that performs @@ -1067,8 +1098,8 @@ void cb_init(void) { callbacks[i] = (CB)0; } -void register_callback(char c, int cb) { - callbacks[c] = (CB)cb; +void register_callback(char c, CB cb) { + callbacks[c] = cb; } void event_loop(void) { @@ -1090,9 +1121,9 @@ Interfaces to these functions may be defined in Scheme as follows. (define cb-init (foreign-procedure "cb_init" () void)) (define register-callback - (foreign-procedure "register_callback" (char int) void)) + (foreign-procedure "register_callback" (char void*) void)) (define event-loop - (foreign-procedure "event_loop" () void)) + (foreign-procedure __thread "event_loop" () void)) \endschemedisplay \noindent @@ -1101,7 +1132,7 @@ A callback for selected characters can then be defined. \schemedisplay (define callback (lambda (p) - (let ([code (foreign-callable p (char) void)]) + (let ([code (foreign-callable __thread p (char) void)]) (lock-object code) (foreign-callable-entry-point code)))) (define ouch @@ -1135,7 +1166,10 @@ Ouch! Hit by 'e' \endschemedisplay \noindent -A more well-behaved version of this example would save each code object +The \scheme{__thread} declarations in this example ensure that +other threads can continue working while \scheme{event-loop} +blocks waiting for input. +A more well-behaved version of the example would save each code object returned by \scheme{foreign-callable} and unlock it when it is no longer registered as a callback. @@ -1440,8 +1474,7 @@ An \var{ftype} must take one of the following forms: (array \var{length} \var{ftype}) (* \var{ftype}) (bits (\var{field-name} \var{signedness} \var{bits}) \dots) -(function (\var{ftype} \dots) \var{ftype}) -(function \var{conv} (\var{ftype} \dots) \var{ftype}) +(function \var{conv} \dots (\var{ftype} \dots) \var{ftype}) (packed \var{ftype}) (unpacked \var{ftype}) (endian \var{endianness} \var{ftype}) @@ -3431,15 +3464,17 @@ in the active state and need not be activated. Any thread that has been deactivated, and any thread created by some mechanism other than \scheme{fork-thread} must, however, be activated before before it can access Scheme data or execute -Scheme code. -\scheme{Sactivate_thread} is used for this purpose. +Scheme code. A foreign callable that is declared with \scheme{__thread} +can activate a calling thread. +Otherwise, \scheme{Sactivate_thread} must be used to activate a thread. It returns 1 the first time the thread is activated and 0 on each -subsequent call. +subsequent call until the activation is destroyed with \scheme{Sdestroy_thread}. Since active threads operating in C code prevent the storage management system from garbage collecting, -a thread should be deactivated via \scheme{Sdeactivate_thread} whenever -it may spend a significant amount of time in C code. +a thread should be deactivated via \scheme{Sdeactivate_thread} or +through a \scheme{foreign-procedure} \scheme{__thread} declaration whenever +the thread may spend a significant amount of time in C code. This is especially important whenever the thread calls a C library function, like \scheme{read}, that may block indefinitely. Once deactivated, the thread must not touch any Scheme data or diff --git a/mats/Mf-ta6fb b/mats/Mf-ta6fb index fe3a659010..921d6098b4 100644 --- a/mats/Mf-ta6fb +++ b/mats/Mf-ta6fb @@ -21,7 +21,7 @@ fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} + cc -pthread -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ta6le b/mats/Mf-ta6le index dc214ea4cb..cd014ec658 100644 --- a/mats/Mf-ta6le +++ b/mats/Mf-ta6le @@ -21,7 +21,7 @@ fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -m64 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} + cc -m64 -pthread -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ta6nb b/mats/Mf-ta6nb index 49ca02b48b..6b1929d81c 100644 --- a/mats/Mf-ta6nb +++ b/mats/Mf-ta6nb @@ -21,7 +21,7 @@ fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} + cc -pthread -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ta6ob b/mats/Mf-ta6ob index f6381ebeef..a7aee9122f 100644 --- a/mats/Mf-ta6ob +++ b/mats/Mf-ta6ob @@ -21,7 +21,7 @@ fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} + cc -pthread -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ta6osx b/mats/Mf-ta6osx index fe6e8c7ce5..42da5d7c5d 100644 --- a/mats/Mf-ta6osx +++ b/mats/Mf-ta6osx @@ -21,7 +21,7 @@ fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -m64 -dynamiclib -undefined dynamic_lookup -I${Include} -o foreign1.so ${fsrc} + cc -m64 -pthread -dynamiclib -undefined dynamic_lookup -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ta6s2 b/mats/Mf-ta6s2 index 08233c261e..c5f0b0e145 100644 --- a/mats/Mf-ta6s2 +++ b/mats/Mf-ta6s2 @@ -21,7 +21,7 @@ fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - gcc -m64 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} + gcc -m64 -D_REENTRANT -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c gcc -o cat_flush cat_flush.c diff --git a/mats/Mf-ti3fb b/mats/Mf-ti3fb index 4e77f7590e..c8911455ec 100644 --- a/mats/Mf-ti3fb +++ b/mats/Mf-ti3fb @@ -21,7 +21,7 @@ fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} + cc -pthread -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ti3le b/mats/Mf-ti3le index 1f2d31aec6..12e77b8358 100644 --- a/mats/Mf-ti3le +++ b/mats/Mf-ti3le @@ -21,7 +21,7 @@ fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -m32 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} + cc -m32 -pthread -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ti3nb b/mats/Mf-ti3nb index 94ccf0102c..028c652722 100644 --- a/mats/Mf-ti3nb +++ b/mats/Mf-ti3nb @@ -21,7 +21,7 @@ fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} + cc -pthread -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ti3ob b/mats/Mf-ti3ob index fca1378175..8a4741c022 100644 --- a/mats/Mf-ti3ob +++ b/mats/Mf-ti3ob @@ -21,7 +21,7 @@ fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} + cc -pthread -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ti3osx b/mats/Mf-ti3osx index 9bb964e5d8..6913c3423d 100644 --- a/mats/Mf-ti3osx +++ b/mats/Mf-ti3osx @@ -21,7 +21,7 @@ fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -m32 -dynamiclib -undefined dynamic_lookup -I${Include} -o foreign1.so ${fsrc} + cc -m32 -pthread -dynamiclib -undefined dynamic_lookup -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ti3s2 b/mats/Mf-ti3s2 index 2514adf351..bb3b3605ec 100644 --- a/mats/Mf-ti3s2 +++ b/mats/Mf-ti3s2 @@ -21,7 +21,7 @@ fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - gcc -m32 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} + gcc -m32 -D_REENTRANT -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c gcc -o cat_flush cat_flush.c diff --git a/mats/Mf-tppc32le b/mats/Mf-tppc32le index 6c8945ca64..a12b515dee 100644 --- a/mats/Mf-tppc32le +++ b/mats/Mf-tppc32le @@ -21,7 +21,7 @@ fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -m32 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} + cc -m32 -pthread -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/foreign.ms b/mats/foreign.ms index dbdd8ba5bb..4f81ea2fe9 100644 --- a/mats/foreign.ms +++ b/mats/foreign.ms @@ -2652,49 +2652,49 @@ (define-ftype i64 integer-64) (define-syntax check* (syntax-rules () - [(_ T s [vi ...] [T-ref ...] [T-set! ...]) + [(_ (conv ...) T s [vi ...] [T-ref ...] [T-set! ...]) (let () - (define-ftype callback (function ((& T)) double)) - (define-ftype callback-two (function ((& T) (& T)) double)) - (define-ftype pre-int-callback (function (int (& T)) double)) - (define-ftype pre-double-callback (function (double (& T)) double)) - (define-ftype callback-r (function () (& T))) - (define get (foreign-procedure (format "f4_get~a" s) + (define-ftype callback (function conv ... ((& T)) double)) + (define-ftype callback-two (function conv ... ((& T) (& T)) double)) + (define-ftype pre-int-callback (function conv ... (int (& T)) double)) + (define-ftype pre-double-callback (function conv ... (double (& T)) double)) + (define-ftype callback-r (function conv ... () (& T))) + (define get (foreign-procedure conv ... (format "f4_get~a" s) () (& T))) - (define sum (foreign-procedure (format "f4_sum~a" s) + (define sum (foreign-procedure conv ... (format "f4_sum~a" s) ((& T)) double)) - (define sum_two (foreign-procedure (format "f4_sum_two~a" s) + (define sum_two (foreign-procedure conv ... (format "f4_sum_two~a" s) ((& T) (& T)) double)) - (define sum_pre_int (foreign-procedure (format "f4_sum_pre_int~a" s) + (define sum_pre_int (foreign-procedure conv ... (format "f4_sum_pre_int~a" s) (int (& T)) double)) - (define sum_pre_int_int (foreign-procedure (format "f4_sum_pre_int_int~a" s) + (define sum_pre_int_int (foreign-procedure conv ... (format "f4_sum_pre_int_int~a" s) (int int (& T)) double)) - (define sum_pre_int_int_int_int (foreign-procedure (format "f4_sum_pre_int_int_int_int~a" s) + (define sum_pre_int_int_int_int (foreign-procedure conv ... (format "f4_sum_pre_int_int_int_int~a" s) (int int int int (& T)) double)) - (define sum_pre_int_int_int_int_int_int (foreign-procedure (format "f4_sum_pre_int_int_int_int_int_int~a" s) + (define sum_pre_int_int_int_int_int_int (foreign-procedure conv ... (format "f4_sum_pre_int_int_int_int_int_int~a" s) (int int int int int int (& T)) double)) - (define sum_post_int (foreign-procedure (format "f4_sum~a_post_int" s) + (define sum_post_int (foreign-procedure conv ... (format "f4_sum~a_post_int" s) ((& T) int) double)) - (define sum_pre_double (foreign-procedure (format "f4_sum_pre_double~a" s) + (define sum_pre_double (foreign-procedure conv ... (format "f4_sum_pre_double~a" s) (double (& T)) double)) - (define sum_pre_double_double (foreign-procedure (format "f4_sum_pre_double_double~a" s) + (define sum_pre_double_double (foreign-procedure conv ... (format "f4_sum_pre_double_double~a" s) (double double (& T)) double)) - (define sum_pre_double_double_double_double (foreign-procedure (format "f4_sum_pre_double_double_double_double~a" s) + (define sum_pre_double_double_double_double (foreign-procedure conv ... (format "f4_sum_pre_double_double_double_double~a" s) (double double double double (& T)) double)) (define sum_pre_double_double_double_double_double_double_double_double - (foreign-procedure (format "f4_sum_pre_double_double_double_double_double_double_double_double~a" s) + (foreign-procedure conv ... (format "f4_sum_pre_double_double_double_double_double_double_double_double~a" s) (double double double double double double double double (& T)) double)) - (define sum_post_double (foreign-procedure (format "f4_sum~a_post_double" s) + (define sum_post_double (foreign-procedure conv ... (format "f4_sum~a_post_double" s) ((& T) double) double)) - (define cb_send (foreign-procedure (format "f4_cb_send~a" s) + (define cb_send (foreign-procedure conv ... (format "f4_cb_send~a" s) ((* callback)) double)) - (define cb_send_two (foreign-procedure (format "f4_cb_send_two~a" s) + (define cb_send_two (foreign-procedure conv ... (format "f4_cb_send_two~a" s) ((* callback-two)) double)) - (define cb_send_pre_int (foreign-procedure (format "f4_cb_send_pre_int~a" s) + (define cb_send_pre_int (foreign-procedure conv ... (format "f4_cb_send_pre_int~a" s) ((* pre-int-callback)) double)) - (define cb_send_pre_double (foreign-procedure (format "f4_cb_send_pre_double~a" s) + (define cb_send_pre_double (foreign-procedure conv ... (format "f4_cb_send_pre_double~a" s) ((* pre-double-callback)) double)) - (define sum_cb (foreign-procedure (format "f4_sum_cb~a" s) + (define sum_cb (foreign-procedure conv ... (format "f4_sum_cb~a" s) ((* callback-r)) double)) (define-syntax with-callback (syntax-rules () @@ -2758,6 +2758,11 @@ (begin (free_at_boundary (ftype-pointer-address a)) #t)))))])) + (define-syntax check*t + (syntax-rules () + [(_ arg ...) + (and (check* () arg ...) + (check* (__thread) arg ...))])) (define-syntax check-n (syntax-rules () [(_ [ni ti vi] ...) @@ -2770,17 +2775,17 @@ [(null? l) '()] [else (cons (format "_~a" (car l)) (loop (cdr l)))])))) - (check* T s - [vi ...] - [(lambda (a) (ftype-ref T (ni) a)) ...] - [(lambda (a) (ftype-set! T (ni) a vi)) ...]))])) + (check*t T s + [vi ...] + [(lambda (a) (ftype-ref T (ni) a)) ...] + [(lambda (a) (ftype-set! T (ni) a vi)) ...]))])) (define-syntax check (syntax-rules () [(_ t1 v1) - (check* t1 (format "_~a" 't1) - [v1] - [(lambda (a) (ftype-ref t1 () a))] - [(lambda (a) (ftype-set! t1 () a v1))])])) + (check*t t1 (format "_~a" 't1) + [v1] + [(lambda (a) (ftype-ref t1 () a))] + [(lambda (a) (ftype-set! t1 () a v1))])])) (define-syntax check-union (syntax-rules () [(_ [n0 t0 v0] [ni ti vi] ...) @@ -2793,10 +2798,10 @@ [(null? l) '()] [else (cons (format "_~a" (car l)) (loop (cdr l)))])))) - (check* T s - [v0] - [(lambda (a) (ftype-ref T (n0) a))] - [(lambda (a) (ftype-set! T (n0) a v0))]))])) + (check*t T s + [v0] + [(lambda (a) (ftype-ref T (n0) a))] + [(lambda (a) (ftype-set! T (n0) a v0))]))])) (define-syntax check-1 (syntax-rules () [(_ t1 v1) @@ -2887,4 +2892,113 @@ (check-union [x int 48] [y int 0]) (check-union [x i64 43] [y int 0]) (check-union [x float 58.0] [y int 0]) - (check-union [x double 68.0] [y int 0])) + (check-union [x double 68.0] [y int 0]) + ) + +(mat thread + (begin + (define-ftype thread-callback-T (function __thread (double) double)) + (define (call-with-thread-callback cb-proc proc) + (let ([callback (make-ftype-pointer thread-callback-T cb-proc)]) + (let ([r (proc callback)]) + (unlock-object + (foreign-callable-code-object + (ftype-pointer-address callback))) + r))) + (define (call-in-unknown-thread-1 proc arg n-times) + ;; Baseline implementation that uses the current thread + (let loop ([i 0] [arg arg]) + (cond + [(= i n-times) arg] + [else (loop (fx+ i 1) (proc arg))]))) + (define call-in-unknown-thread-2 + ;; Call in the current thread, but through the foreign procedure + (if (and (threaded?) + (foreign-entry? "call_in_unknown_thread")) + (let ([call (foreign-procedure "call_in_unknown_thread" + ((* thread-callback-T) double int boolean boolean) + double)]) + (lambda (proc arg n-times) + (call-with-thread-callback + proc + (lambda (callback) (call callback arg n-times #f #t))))) + call-in-unknown-thread-1)) + (define call-in-unknown-thread-3 + ;; Call in a truly unknown thread: + (if (and (threaded?) + (foreign-entry? "call_in_unknown_thread")) + (let ([call (foreign-procedure "call_in_unknown_thread" + ((* thread-callback-T) double int boolean boolean) + double)]) + (lambda (proc arg n-times) + (call-with-thread-callback + proc + (lambda (callback) (call callback arg n-times #t #t))))) + call-in-unknown-thread-1)) + (define call-in-unknown-thread-4 + ;; In an truly unknown thread, but also using `__thread` to + ;; deactivate the current thread instead of using `Sdeactivate_thread` + ;; within the foreign function: + (if (and (threaded?) + (foreign-entry? "call_in_unknown_thread")) + (let ([call (foreign-procedure __thread "call_in_unknown_thread" + ((* thread-callback-T) double int boolean boolean) + double)]) + (lambda (proc arg n-times) + (call-with-thread-callback + proc + (lambda (callback) (call callback arg n-times #t #f))))) + call-in-unknown-thread-1)) + #t) + ;; These tests will pass only if `collect` can run, where `collect` + ;; can run only if a single thread is active + (equal? (call-in-unknown-thread-1 (lambda (n) (collect 0) (+ n 1.0)) 3.5 1) + 4.5) + (equal? (call-in-unknown-thread-2 (lambda (n) (collect 0) (+ n 1.0)) 3.5 2) + 5.5) + (equal? (call-in-unknown-thread-3 (lambda (n) (collect 0) (+ n 1.0)) 3.5 3) + 6.5) + (equal? (call-in-unknown-thread-4 (lambda (n) (collect 0) (+ n 1.0)) 3.5 4) + 7.5) + (equal? (let loop ([n 10.0]) + (call-in-unknown-thread-4 + (lambda (n) + (cond + [(zero? n) (collect) 0.5] + [else (+ 1.0 (loop (- n 1.0)))])) + n + 1)) + 10.5) + ;; Try to crash a `__thread` foreign-procedure call by moving the + ;; return address out from under the foreign procedure. This attempt + ;; should fail, because deactivating a thread first locks the + ;; current code object. + (or (not (threaded?)) + (let ([m (make-mutex)] + [done? #f] + [ok? #t]) + (define object->addr + (foreign-procedure "(cs)fxmul" + (scheme-object uptr) + uptr)) + (fork-thread (lambda () + (let loop ([i 10]) + (unless (zero? i) + (let ([spin (eval '(foreign-procedure __thread "spin_a_while" (int unsigned unsigned) unsigned))]) + (spin 1000000 0 1)) + (loop (sub1 i)))) + (mutex-acquire m) + (set! done? #t) + (mutex-release m))) + (let loop () + (mutex-acquire m) + (let ([done? done?]) + (mutex-release m) + (unless done? + (let loop ([i 10]) + (unless (zero? i) + (eval '(foreign-procedure "spin_a_while" () void)) + (loop (sub1 i)))) + (loop)))) + ok?)) + ) diff --git a/mats/foreign4.c b/mats/foreign4.c index b2bfb62f0b..dd17f38103 100644 --- a/mats/foreign4.c +++ b/mats/foreign4.c @@ -17,6 +17,18 @@ #include #include +#if defined(_REENTRANT) || defined(_WIN32) +# ifdef _WIN32 +# include +# define SCHEME_IMPORT +# include "scheme.h" +# else +# include +# include "scheme.h" +# endif +# undef EXPORT +#endif + typedef signed char i8; typedef unsigned char u8; typedef unsigned short u16; @@ -63,6 +75,78 @@ EXPORT void free_at_boundary(void *p) } #endif +#if defined(_REENTRANT) || defined(_WIN32) + +typedef struct in_thread_args_t { + double (*proc)(double arg); + double arg; + int n_times; +} in_thread_args_t; + +void *in_thread(void *_proc_and_arg) +{ + in_thread_args_t *proc_and_arg = _proc_and_arg; + int i; + + for (i = 0; i < proc_and_arg->n_times; i++) { + proc_and_arg->arg = proc_and_arg->proc(proc_and_arg->arg); + } + + return NULL; +} + +#if defined(_WIN32) +# define os_thread_t unsigned +# define os_thread_create(addr, proc, arg) (((*(addr)) = _beginthread(proc, 0, arg)) == -1) +# define os_thread_join(t) WaitForSingleObject((HANDLE)(intptr_t)(t), INFINITE) +#else +# define os_thread_t pthread_t +# define os_thread_create(addr, proc, arg) pthread_create(addr, NULL, in_thread, proc_and_arg) +# define os_thread_join(t) pthread_join(t, NULL) +#endif + +EXPORT double call_in_unknown_thread(double (*proc)(double arg), double arg, int n_times, + int do_fork, int do_deactivate) { + os_thread_t t; + in_thread_args_t *proc_and_arg = malloc(sizeof(in_thread_args_t)); + + proc_and_arg->proc = proc; + proc_and_arg->arg = arg; + proc_and_arg->n_times = n_times; + + if (do_fork) { + if (!os_thread_create(&t, in_thread, proc_and_arg)) { + if (do_deactivate) + Sdeactivate_thread(); + os_thread_join(t); + if (do_deactivate) + Sactivate_thread(); + } + } else { + in_thread(proc_and_arg); + } + + arg = proc_and_arg->arg; + free(proc_and_arg); + + return arg; +} + +#endif + +EXPORT unsigned spin_a_while(int amt, unsigned a, unsigned b) +{ + int i; + + /* A loop that the compiler is unlikely to optimize away */ + for (i = 0; i < amt; i++) { + a = a + 1; + b = b + a; + } + + return a; +} + #define GEN(ts, init, sum) \ EXPORT ts f4_get_ ## ts () { \ ts r = init; \ diff --git a/release_notes/release_notes.stex b/release_notes/release_notes.stex index 0ecff20cfb..c5389f1d6d 100644 --- a/release_notes/release_notes.stex +++ b/release_notes/release_notes.stex @@ -70,6 +70,17 @@ address. When \scheme{(& \var{ftype})} is used as a result type, an extra \scheme{(* \var{ftype})} argument must be provided to receive the copied result, and the directly returned result is unspecified. +\subsection{Foreign-procedure thread activation (9.5.1)} + +A new \scheme{__thread} foreign-procedure convention, which can be +combined with other conventions, causes a foreign-procedure call +to deactive the current thread during the call. Similarly, the +\scheme{__thread} convention modifier for callables causes the +current thread to be activated on entry to the callable, and the +activation state is reverted on exit from the callable; this +activation makes callables work from threads that are otherwise +unknown to the Scheme system. + \subsection{Record equality and hashing (9.5)} The new procedures \scheme{record-type-equal-procedure} and diff --git a/s/base-lang.ss b/s/base-lang.ss index 8a18331ca6..5a6ca29807 100644 --- a/s/base-lang.ss +++ b/s/base-lang.ss @@ -155,7 +155,7 @@ (define convention? (lambda (x) - (or (eq? x #f) (symbol? x)))) + (and (list? x) (andmap symbol? x)))) (define-record-type preinfo (nongenerative #{preinfo e23pkvo5btgapnzomqgegm-2}) diff --git a/s/cmacros.ss b/s/cmacros.ss index a215006125..7c48732a94 100644 --- a/s/cmacros.ss +++ b/s/cmacros.ss @@ -1385,6 +1385,10 @@ (cons (string->symbol (substring str 3 (- n 5))) params) params)))))) +(define-constant unactivate-mode-noop 0) +(define-constant unactivate-mode-deactivate 1) +(define-constant unactivate-mode-destroy 2) + (define-primitive-structure-disps rtd-counts type-typed-object ([iptr type] [U64 timestamp] @@ -2622,6 +2626,9 @@ split-and-resize raw-collect-cond raw-tc-mutex + activate-thread + deactivate-thread + unactivate-thread handle-values-error handle-mvlet-error handle-arg-error diff --git a/s/cprep.ss b/s/cprep.ss index dca079e1af..a58b01f2e5 100644 --- a/s/cprep.ss +++ b/s/cprep.ss @@ -85,11 +85,14 @@ (uncprep-sequence e2 ls))] [else (cons (uncprep x) ls)]))) (define uncprep-fp-conv - (lambda (x) - (case x - [(i3nt-stdcall) '__stdcall] - [(i3nt-com) '__com] - [else #f]))) + (lambda (x*) + (map (lambda (x) + (case x + [(i3nt-stdcall) '__stdcall] + [(i3nt-com) '__com] + [(adjust-active) '__thread] + [else #f])) + x*))) (define-who uncprep-fp-specifier (lambda (x) (nanopass-case (Ltype Type) x diff --git a/s/ftype.ss b/s/ftype.ss index b7d84424b1..9e57e065f3 100644 --- a/s/ftype.ss +++ b/s/ftype.ss @@ -527,7 +527,7 @@ ftype operators: [(function-kwd (arg-type ...) result-type) (eq? (datum function-kwd) 'function) (f #'(function-kwd #f (arg-type ...) result-type) #f stype funok?)] - [(function-kwd conv (arg-type ...) result-type) + [(function-kwd conv ... (arg-type ...) result-type) (eq? (datum function-kwd) 'function) (let () (define filter-type @@ -539,7 +539,7 @@ ftype operators: (make-ftd-function rtd/fptr (and defid (symbol->string (syntax->datum defid))) stype #f #f - ($filter-conv 'function-ftype #'conv) + ($filter-conv 'function-ftype #'(conv ...)) (map (lambda (x) (filter-type r x #f)) #'(arg-type ...)) (filter-type r #'result-type #t)))] [(packed-kwd ftype) diff --git a/s/np-languages.ss b/s/np-languages.ss index a5c00e0a8b..5fa4621ccc 100644 --- a/s/np-languages.ss +++ b/s/np-languages.ss @@ -78,9 +78,9 @@ (import (nanopass)) (include "base-lang.ss") - ; convention is a symbol or #f (we're assuming the front end already verified - ; the convention is a valid one for this machine-type - (define convention? (lambda (x) (or (symbol? x) (eq? #f x)))) + ; convention is a list of symbols (we're assuming the front end already verified + ; the convention is a valid one for this machine-type) + (define convention? (lambda (x) (and (list? x) (andmap symbol? x)))) ; r6rs says a quote subform should be a datum, not must be a datum ; chez scheme allows a quote subform to be any value @@ -489,6 +489,7 @@ (declare-primitive c-call effect #f) (declare-primitive c-simple-call effect #f) (declare-primitive c-simple-return effect #f) + (declare-primitive deactivate-thread effect #f) ; threaded version only (declare-primitive fl* effect #f) (declare-primitive fl+ effect #f) (declare-primitive fl- effect #f) @@ -521,6 +522,7 @@ (declare-primitive store-single effect #f) (declare-primitive store-single->double effect #f) (declare-primitive store-with-update effect #f) ; ppc + (declare-primitive unactivate-thread effect #f) ; threaded version only (declare-primitive vpush-multiple effect #f) ; arm (declare-primitive < pred #t) @@ -550,6 +552,7 @@ (declare-primitive fstps value #f) ; x86 only (declare-primitive get-double value #t) ; x86_64 (declare-primitive get-tc value #f) ; threaded version only + (declare-primitive activate-thread value #f) ; threaded version only (declare-primitive lea1 value #t) (declare-primitive lea2 value #t) (declare-primitive load value #t) diff --git a/s/ppc32.ss b/s/ppc32.ss index 815fae889b..9c868b9549 100644 --- a/s/ppc32.ss +++ b/s/ppc32.ss @@ -57,7 +57,7 @@ [%xp %r20 #t 20] [%ts %r14 #t 14] [%td %r15 #t 15] - [%ac1 %r12 #f 12] + [%ac1 %r12 %deact #f 12] [%ret %r17 #t 17] [%cp %r24 #t 24] [%yp %r27 #t 27] @@ -668,6 +668,30 @@ `(set! ,(make-live-info) ,u (asm ,null-info ,asm-kill)) `(set! ,(make-live-info) ,z (asm ,info ,asm-get-tc ,u))))]) + ;; like get-tc + (define-instruction value (activate-thread) + [(op (z ur)) + (safe-assert (eq? z %Cretval)) + (let ([u (make-tmp 'u)]) + (seq + `(set! ,(make-live-info) ,u (asm ,null-info ,asm-kill)) + `(set! ,(make-live-info) ,z (asm ,info ,asm-activate-thread ,u))))]) + + (define-instruction effect (deactivate-thread) + [(op) + (let ([u (make-tmp 'u)]) + (seq + `(set! ,(make-live-info) ,u (asm ,null-info ,asm-kill)) + `(asm ,info ,asm-deactivate-thread ,u)))]) + + (define-instruction effect (unactivate-thread) + [(op (z ur)) + (safe-assert (eq? z %Carg1)) + (let ([u (make-tmp 'u)]) + (seq + `(set! ,(make-live-info) ,u (asm ,null-info ,asm-kill)) + `(asm ,info ,asm-unactivate-thread ,u)))]) + (define-instruction value (asmlibcall) [(op (z ur)) (let ([u (make-tmp 'u)]) @@ -823,7 +847,7 @@ shift-count? asm-isync ; threaded version specific - asm-get-tc + asm-get-tc asm-activate-thread asm-deactivate-thread asm-unactivate-thread ; machine dependent exports asm-kill) @@ -1906,6 +1930,21 @@ (lambda (code* dest tmp . ignore) ; dest is ignored, since it is always Cretval (asm-helper-call code* target #f tmp)))) + (define asm-activate-thread + (let ([target `(ppc32-call 0 (entry ,(lookup-c-entry activate-thread)))]) + (lambda (code* dest tmp . ignore) ; dest is ignored, since it is always Cretval + (asm-helper-call code* target #f tmp)))) + + (define asm-deactivate-thread + (let ([target `(ppc32-call 0 (entry ,(lookup-c-entry deactivate-thread)))]) + (lambda (code* tmp . ignore) + (asm-helper-call code* target #f tmp)))) + + (define asm-unactivate-thread + (let ([target `(ppc32-call 0 (entry ,(lookup-c-entry unactivate-thread)))]) + (lambda (code* tmp . ignore) + (asm-helper-call code* target #f tmp)))) + (define-who asm-return-address (lambda (dest l incr-offset next-addr) (make-rachunk dest l incr-offset next-addr @@ -2133,6 +2172,7 @@ (define align (lambda (b x) (let ([k (- b 1)]) (fxlogand (fx+ x k) (fxlognot k))))) (define gp-parameter-regs (lambda () (list %Carg1 %Carg2 %Carg3 %Carg4 %Carg5 %Carg6 %Carg7 %Carg8))) (define fp-parameter-regs (lambda () (list %Cfparg1 %Cfparg2 %Cfparg3 %Cfparg4 %Cfparg5 %Cfparg6 %Cfparg7 %Cfparg8))) + (define fp-result-regs (lambda () (list %Cfpretval))) (define (indirect-result-that-fits-in-registers? result-type) (nanopass-case (Ltype Type) result-type [(fp-ftd& ,ftd) (not ($ftd-compound? ftd))] @@ -2141,6 +2181,32 @@ (nanopass-case (Ltype Type) result-type [(fp-ftd& ,ftd) ($ftd-compound? ftd)] [else #f])) + + (module (push-registers pop-registers) + ;; stack offset must be 8-byte aligned if fp-reg-count is non-zero + (define (move-registers regs fp-reg-count fp-regs load? offset e) + (with-output-language (L13 Effect) + (cond + [(fx> fp-reg-count 0) + ;; Push floating-point first to get correct alignment + (let ([offset (align 8 offset)]) + (move-registers regs (fx- fp-reg-count 1) (cdr fp-regs) load? (fx+ offset 8) + (cond + [load? `(seq ,e (inline ,(make-info-loadfl (car fp-regs)) ,%load-double ,%sp ,%zero (immediate ,offset)))] + [else `(seq (inline ,(make-info-loadfl (car fp-regs)) ,%store-double ,%sp ,%zero (immediate ,offset)) ,e)])))] + [(pair? regs) + (move-registers (cdr regs) 0 '() load? (fx+ offset 4) + (cond + [load? `(seq ,e (set! ,(car regs) ,(%mref ,%sp ,offset)))] + [else `(seq (set! ,(%mref ,%sp ,offset) ,(car regs)) ,e)]))] + [else e]))) + ;; Add "pushes" before e + (define (push-registers regs fp-reg-count fp-regs offset e) + (move-registers regs fp-reg-count fp-regs #f offset e)) + ;; Add "pops" after e + (define (pop-registers regs fp-reg-count fp-regs offset e) + (move-registers regs fp-reg-count fp-regs #t offset e))) + (define-who asm-foreign-call (with-output-language (L13 Effect) (define load-double-stack @@ -2233,10 +2299,12 @@ (lambda (types) ;; NB: start stack pointer at 8 to put arguments above the linkage area (let loop ([types types] [locs '()] [live* '()] [int* (gp-parameter-regs)] [flt* (fp-parameter-regs)] [isp 8] + ;; needed when adjusting active: + [fp-live-count 0] ;; configured for `ftd-fp&` unpacking of floats: [fp-disp (constant flonum-data-disp)] [single? #f]) (if (null? types) - (values isp locs live*) + (values isp locs live* fp-live-count) (nanopass-case (Ltype Type) (car types) [(fp-double-float) (if (constant software-floating-point) @@ -2245,21 +2313,21 @@ (let ([isp (align 8 isp)]) (loop (cdr types) (cons (load-double-stack isp fp-disp) locs) - live* '() flt* (fx+ isp 8) + live* '() flt* (fx+ isp 8) fp-live-count (constant flonum-data-disp) #f)) (loop (cdr types) (cons (load-soft-double-reg (cadr int*) (car int*) fp-disp) locs) - (cons* (car int*) (cadr int*) live*) (cddr int*) flt* isp + (cons* (car int*) (cadr int*) live*) (cddr int*) flt* isp fp-live-count (constant flonum-data-disp) #f))) (if (null? flt*) (let ([isp (align 8 isp)]) (loop (cdr types) (cons (load-double-stack isp fp-disp) locs) - live* int* '() (fx+ isp 8) + live* int* '() (fx+ isp 8) fp-live-count (constant flonum-data-disp) #f)) (loop (cdr types) (cons (load-double-reg (car flt*) fp-disp) locs) - live* int* (cdr flt*) isp + live* int* (cdr flt*) isp (fx+ fp-live-count 1) (constant flonum-data-disp) #f)))] [(fp-single-float) (if (constant software-floating-point) @@ -2267,29 +2335,29 @@ ; NB: ABI says singles are passed as doubles on the stack, but gcc/linux doesn't (loop (cdr types) (cons (load-single-stack isp fp-disp single?) locs) - live* '() flt* (fx+ isp 4) + live* '() flt* (fx+ isp 4) fp-live-count (constant flonum-data-disp) #f) (loop (cdr types) (cons (load-soft-single-reg (car int*) fp-disp single?) locs) - (cons (car int*) live*) (cdr int*) flt* isp + (cons (car int*) live*) (cdr int*) flt* isp fp-live-count (constant flonum-data-disp) #f)) (if (null? flt*) ; NB: ABI says singles are passed as doubles on the stack, but gcc/linux doesn't (let ([isp (align 4 isp)]) (loop (cdr types) (cons (load-single-stack isp fp-disp single?) locs) - live* int* '() (fx+ isp 4) + live* int* '() (fx+ isp 4) fp-live-count (constant flonum-data-disp) #f)) (loop (cdr types) (cons (load-single-reg (car flt*) fp-disp single?) locs) - live* int* (cdr flt*) isp + live* int* (cdr flt*) isp (fx+ fp-live-count 1) (constant flonum-data-disp) #f)))] [(fp-ftd& ,ftd) (cond [($ftd-compound? ftd) ;; pass as pointer (let ([pointer-type (with-output-language (Ltype Type) `(fp-integer 32))]) - (loop (cons pointer-type (cdr types)) locs live* int* flt* isp + (loop (cons pointer-type (cdr types)) locs live* int* flt* isp fp-live-count (constant flonum-data-disp) #f))] [else ;; extract content and pass that content @@ -2301,7 +2369,7 @@ (case ($ftd-size ftd) [(4) `(fp-single-float)] [else `(fp-double-float)]))]) - (loop (cons unpacked-type (cdr types)) locs live* int* flt* isp + (loop (cons unpacked-type (cdr types)) locs live* int* flt* isp fp-live-count ;; no floating displacement within pointer: 0 ;; in case of float, load as single-float: @@ -2313,21 +2381,21 @@ (let ([isp (align 8 isp)]) (loop (cdr types) (cons (load-indirect-int64-stack isp) locs) - live* '() flt* (fx+ isp 8) + live* '() flt* (fx+ isp 8) fp-live-count (constant flonum-data-disp) #f)) (loop (cdr types) (cons (load-indirect-int64-reg (cadr int*) (car int*)) locs) - (cons* (car int*) (cadr int*) live*) (cddr int*) flt* isp + (cons* (car int*) (cadr int*) live*) (cddr int*) flt* isp fp-live-count (constant flonum-data-disp) #f)))] [else (if (null? int*) (loop (cdr types) (cons (load-indirect-int-stack isp ($ftd-size ftd)) locs) - live* '() flt* (fx+ isp 4) + live* '() flt* (fx+ isp 4) fp-live-count (constant flonum-data-disp) #f) (loop (cdr types) (cons (load-indirect-int-reg (car int*) ($ftd-size ftd) category) locs) - (cons (car int*) live*) (cdr int*) flt* isp + (cons (car int*) live*) (cdr int*) flt* isp fp-live-count (constant flonum-data-disp) #f))]))])] [else (if (nanopass-case (Ltype Type) (car types) @@ -2339,20 +2407,20 @@ (let ([isp (align 8 isp)]) (loop (cdr types) (cons (load-int64-stack isp) locs) - live* '() flt* (fx+ isp 8) + live* '() flt* (fx+ isp 8) fp-live-count (constant flonum-data-disp) #f)) (loop (cdr types) (cons (load-int64-reg (cadr int*) (car int*)) locs) - (cons* (car int*) (cadr int*) live*) (cddr int*) flt* isp + (cons* (car int*) (cadr int*) live*) (cddr int*) flt* isp fp-live-count (constant flonum-data-disp) #f))) (if (null? int*) (loop (cdr types) (cons (load-int-stack isp) locs) - live* '() flt* (fx+ isp 4) + live* '() flt* (fx+ isp 4) fp-live-count (constant flonum-data-disp) #f) (loop (cdr types) (cons (load-int-reg (car int*)) locs) - (cons (car int*) live*) (cdr int*) flt* isp + (cons (car int*) live*) (cdr int*) flt* isp fp-live-count (constant flonum-data-disp) #f)))]))))) (define do-indirect-result-from-registers (lambda (ftd offset) @@ -2374,38 +2442,69 @@ (inline ,(make-info-load 'integer-32 #f) ,%store ,tmp ,%zero (immediate 0) ,%Cretval-high) (inline ,(make-info-load 'integer-32 #f) ,%store ,tmp ,%zero (immediate 4) ,%Cretval-low))] [else (sorry! who "unexpected result size")])]))))) + (define (add-deactivate t0 offset live* fp-live-count result-live* result-fp-live-count e) + (let ([save-and-restore + (lambda (regs fp-count fp-regs e) + (cond + [(and (null? regs) (fx= 0 fp-count)) e] + [else + (pop-registers regs fp-count fp-regs offset + (push-registers regs fp-count fp-regs offset + e))]))]) + (%seq + (set! ,%deact ,t0) + ,(save-and-restore (cons %deact live*) fp-live-count (fp-parameter-regs) (%inline deactivate-thread)) + ,e + ,(save-and-restore result-live* result-fp-live-count (fp-result-regs) `(set! ,%Cretval ,(%inline activate-thread)))))) (lambda (info) (safe-assert (reg-callee-save? %tc)) ; no need to save-restore (let* ([arg-type* (info-foreign-arg-type* info)] [result-type (info-foreign-result-type info)] - [fill-result-here? (indirect-result-that-fits-in-registers? result-type)]) + [fill-result-here? (indirect-result-that-fits-in-registers? result-type)] + [adjust-active? (memq 'adjust-active (info-foreign-conv info))]) (with-values (do-args (if fill-result-here? (cdr arg-type*) arg-type*)) - (lambda (frame-size locs live*) + (lambda (orig-frame-size locs live* fp-live-count) ;; NB: add 4 to frame size for CR save word - (let ([frame-size (align 16 (fx+ frame-size 4 (if fill-result-here? 4 0)))]) + (let* ([fill-stash-offset orig-frame-size] + [base-frame-size (fx+ orig-frame-size (if fill-result-here? 4 0))] + [deactivate-save-offset (if (and adjust-active? (fx> fp-live-count 0)) + (align 8 base-frame-size) ; for `double` save + base-frame-size)] + [frame-size (align 16 (fx+ 4 ; for CR save + (if adjust-active? + (fx+ deactivate-save-offset + (fx* fp-live-count 8) + (fx* (length live*) 4)) + deactivate-save-offset)))]) (values (lambda () (%inline store-with-update ,%Csp ,%Csp (immediate ,(fx- frame-size)))) (let ([locs (reverse locs)]) (cond [fill-result-here? ;; stash extra argument on the stack to be retrieved after call and filled with the result: - (cons (load-int-stack frame-size) locs)] + (cons (load-int-stack fill-stash-offset) locs)] [else locs])) (lambda (t0) - (if (constant software-floating-point) + (define (make-call result-live* result-fp-live-count) + (cond + [adjust-active? + (add-deactivate t0 deactivate-save-offset live* fp-live-count result-live* result-fp-live-count + `(inline ,(make-info-kill*-live* result-live* live*) ,%c-call ,%deact))] + [else `(inline ,(make-info-kill*-live* result-live* live*) ,%c-call ,t0)])) + (if (constant software-floating-point) (let () (define handle-64-bit (lambda () - `(inline ,(make-info-kill*-live* (reg-list %Cretval-high %Cretval-low) live*) ,%c-call ,t0))) + (make-call (reg-list %Cretval-high %Cretval-low) 0))) (define handle-32-bit (lambda () - `(inline ,(make-info-kill*-live* (reg-list %Cretval) live*) ,%c-call ,t0))) + (make-call (reg-list %Cretval) 0))) (define handle-integer-cases (lambda (bits) (case bits [(8 16 32) (handle-32-bit)] [(64) (handle-64-bit)] - [else (sorry! who "unexpected asm-foriegn-procedures fp-integer size ~s" bits)]))) + [else (sorry! who "unexpected asm-foreign-procedures fp-integer size ~s" bits)]))) (define (handle-ftd&-case ftd) (cond [fill-result-here? @@ -2413,21 +2512,21 @@ ,(if (> ($ftd-size ftd) 4) (handle-64-bit) (handle-32-bit)) - ,(do-indirect-result-from-registers ftd frame-size))] - [else `(inline ,(make-info-kill*-live* (reg-list) live*) ,%c-call ,t0)])) + ,(do-indirect-result-from-registers ftd fill-stash-offset))] + [else (make-call (reg-list) 0)])) (nanopass-case (Ltype Type) result-type [(fp-double-float) (handle-64-bit)] [(fp-single-float) (handle-32-bit)] [(fp-integer ,bits) (handle-integer-cases bits)] [(fp-integer ,bits) (handle-integer-cases bits)] [(fp-ftd& ,ftd) (handle-ftd&-case ftd)] - [else `(inline ,(make-info-kill*-live* (reg-list %Cretval) live*) ,%c-call ,t0)])) + [else (make-call (reg-list %Cretval) 0)])) (let () (define handle-integer-cases (lambda (bits) (case bits - [(8 16 32) `(inline ,(make-info-kill*-live* (reg-list %Cretval) live*) ,%c-call ,t0)] - [(64) `(inline ,(make-info-kill*-live* (reg-list %Cretval-high %Cretval-low) live*) ,%c-call ,t0)] + [(8 16 32) (make-call (reg-list %Cretval) 0)] + [(64) (make-call (reg-list %Cretval-high %Cretval-low) 0)] [else (sorry! who "unexpected asm-foreign-procedures fp-integer size ~s" bits)]))) (define (handle-ftd&-case ftd) (cond @@ -2435,16 +2534,16 @@ (%seq ,(if (not (eq? 'float ($ftd-atomic-category ftd))) (handle-integer-cases (* 8 ($ftd-size ftd))) - `(inline ,(make-info-kill*-live* (reg-list) live*) ,%c-call ,t0)) - ,(do-indirect-result-from-registers ftd frame-size))] + (make-call (reg-list) 1)) + ,(do-indirect-result-from-registers ftd fill-stash-offset))] [else `(inline ,(make-info-kill*-live* (reg-list) live*) ,%c-call ,t0)])) (nanopass-case (Ltype Type) result-type - [(fp-double-float) `(inline ,(make-info-kill*-live* (reg-list) live*) ,%c-call ,t0)] - [(fp-single-float) `(inline ,(make-info-kill*-live* (reg-list) live*) ,%c-call ,t0)] + [(fp-double-float) (make-call (reg-list) 1)] + [(fp-single-float) (make-call (reg-list) 1)] [(fp-integer ,bits) (handle-integer-cases bits)] [(fp-unsigned ,bits) (handle-integer-cases bits)] [(fp-ftd& ,ftd) (handle-ftd&-case ftd)] - [else `(inline ,(make-info-kill*-live* (reg-list %Cretval) live*) ,%c-call ,t0)])))) + [else (make-call (reg-list %Cretval) 0)])))) (nanopass-case (Ltype Type) result-type [(fp-double-float) (lambda (lvalue) @@ -2551,12 +2650,14 @@ | | | back chain | 1 word sp+X: | | - +---------------------------+ +---------------------------+ <- 16-byte aligned + +---------------------------+ | | | &-return space | 2 words, if needed | | +---------------------------+ <- 8-byte aligned + | unactivate mode | 1 word, if needed + +---------------------------+ | | | callee-save regs | | | @@ -2566,9 +2667,9 @@ | | +---------------------------+ <- 8-byte aligned | | - | integer argument regs | + | integer argument regs | Also used to stash results during unactivate | | - sp+8: +---------------------------+ <-- 8-byte aligned + sp+8: +---------------------------+ <- 8-byte aligned | | | lr | 1 word (place for get-thread-context to store lr) | | @@ -2836,20 +2937,23 @@ (case ($ftd-size ftd) [(4) `(inline ,(make-info-loadfl %Cfpretval) ,%load-single ,%sp ,%zero (immediate ,return-space-offset))] [else `(inline ,(make-info-loadfl %Cfpretval) ,%load-double ,%sp ,%zero (immediate ,return-space-offset))])) - '())] + '() + 1)] [else (cond [($ftd-compound? ftd) ;; return pointer (values (lambda () `(set! ,%Cretval ,(%mref ,%sp ,int-reg-offset))) - (list %Cretval))] + (list %Cretval) + 0)] [(fx= 8 ($ftd-size ftd)) (values (lambda () (%seq (set! ,%Cretval-high ,(%mref ,%sp ,return-space-offset)) (set! ,%Cretval-low ,(%mref ,%sp ,(fx+ return-space-offset 4))))) - (list %Cretval-high %Cretval-low))] + (list %Cretval-high %Cretval-low) + 0)] [else (values (lambda () @@ -2857,18 +2961,22 @@ [(1) `(set! ,%Cretval (inline ,(make-info-load 'integer-8 #f) ,%load ,%sp ,%zero (immediate ,return-space-offset)))] [(2) `(set! ,%Cretval (inline ,(make-info-load 'integer-16 #f) ,%load ,%sp ,%zero (immediate ,return-space-offset)))] [else `(set! ,%Cretval ,(%mref ,%sp ,return-space-offset))])) - (list %Cretval))])])] + (list %Cretval) + 0)])])] [(fp-double-float) (values (lambda (x) `(inline ,(make-info-loadfl %Cfpretval) ,%load-double ,x ,%zero ,(%constant flonum-data-disp))) - '())] + '() + 1)] [(fp-single-float) (values (lambda (x) `(inline ,(make-info-loadfl %Cfpretval) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp))) - '())] + '() + 1)] [(fp-void) (values (lambda () `(nop)) - '())] + '() + 0)] [else (cond [(nanopass-case (Ltype Type) result-type @@ -2879,11 +2987,20 @@ (%seq (set! ,%Cretval-low ,lo-rhs) (set! ,%Cretval-high ,hi-rhs))) - (list %Cretval-high %Cretval-low))] + (list %Cretval-high %Cretval-low) + 0)] [else (values (lambda (rhs) `(set! ,%Cretval ,rhs)) - (list %Cretval))])]))) + (list %Cretval) + 0)])]))) + (define (unactivate unactivate-mode-offset result-regs result-num-fp-regs stash-offset) + (let ([e (%seq + (set! ,%Carg1 ,(%mref ,%sp ,unactivate-mode-offset)) + ,(%inline unactivate-thread ,%Carg1))]) + (pop-registers result-regs result-num-fp-regs (fp-result-regs) stash-offset + (push-registers result-regs result-num-fp-regs (fp-result-regs) stash-offset + e)))) (lambda (info) (define callee-save-regs (list %r14 %r15 %r16 %r17 %r18 %r19 %r20 %r21 %r22 %r23 %r24 %r25 %r26 %r27 %r28 %r29 %r30 %r31)) (define isaved (length callee-save-regs)) @@ -2898,12 +3015,12 @@ float-reg-offset (fx+ (fx* fp-reg-count 8) float-reg-offset))] [synthesize-first-argument? (indirect-result-that-fits-in-registers? result-type)] - [return-space-offset (align 8 (fx+ (fx* isaved 4) callee-save-offset))] - [stack-size (align 16 (if synthesize-first-argument? - (fx+ return-space-offset 8) - return-space-offset))] + [adjust-active? (memq 'adjust-active (info-foreign-conv info))] + [unactivate-mode-offset (fx+ (fx* isaved 4) callee-save-offset)] + [return-space-offset (align 8 (fx+ unactivate-mode-offset (if adjust-active? 4 0)))] + [stack-size (align 16 (fx+ return-space-offset (if synthesize-first-argument? 8 0)))] [stack-arg-offset (fx+ stack-size 8)]) - (let-values ([(get-result result-regs) (do-result result-type return-space-offset int-reg-offset)]) + (let-values ([(get-result result-regs result-num-fp-regs) (do-result result-type return-space-offset int-reg-offset)]) (values (lambda () (%seq @@ -2915,9 +3032,16 @@ ; not bothering with cr, because we don't update nonvolatile fields ,(save-regs callee-save-regs callee-save-offset) ,(if-feature pthreads - (%seq - (set! ,%Cretval ,(%inline get-tc)) - (set! ,%tc ,%Cretval)) + ((lambda (e) + (if adjust-active? + (%seq + (set! ,%Cretval ,(%inline activate-thread)) + (set! ,(%mref ,%sp ,unactivate-mode-offset) ,%Cretval) + ,e) + e)) + (%seq + (set! ,%Cretval ,(%inline get-tc)) + (set! ,%tc ,%Cretval))) `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) ; list of procedures that marshal arguments from their C stack locations ; to the Scheme argument locations @@ -2926,6 +3050,12 @@ get-result (lambda () (in-context Tail + ((lambda (e) + (if adjust-active? + (%seq + ,(unactivate unactivate-mode-offset result-regs result-num-fp-regs int-reg-offset) + ,e) + e)) (%seq ; restore the lr (inline ,null-info ,%restore-lr (immediate ,(fx+ stack-size 4))) @@ -2934,5 +3064,5 @@ ; deallocate space for pad & arg reg values (set! ,%Csp ,(%inline + ,%Csp (immediate ,stack-size))) ; done - (asm-c-return ,null-info ,callee-save-regs ... ,result-regs ...)))))))))))))) + (asm-c-return ,null-info ,callee-save-regs ... ,result-regs ...))))))))))))))) ) diff --git a/s/syntax.ss b/s/syntax.ss index ae0b68ea08..59ca276d54 100644 --- a/s/syntax.ss +++ b/s/syntax.ss @@ -8544,21 +8544,39 @@ (define squawk (lambda (x) (syntax-error x (format "invalid ~s convention" who)))) - (let ([c (syntax->datum conv)]) - (if (not c) - #f - (case ($target-machine) - [(i3nt ti3nt) - (case c - [(__stdcall) #'i3nt-stdcall] - [(__cdecl) #f] - [(__com) #'i3nt-com] - [else (squawk conv)])] - [(ppcnt) - (case c - [(__stdcall __cdecl) #f] - [else (squawk conv)])] - [else (squawk conv)]))))) + (let loop ([conv conv] [accum '()] [keep-accum '()]) + (cond + [(null? conv) (datum->syntax #'filter-conv keep-accum)] + [else + (let* ([orig-c (car conv)] + [c (syntax->datum orig-c)] + [c (cond + [(not c) #f] + [(eq? c '__thread) 'adjust-active] + [else + (case ($target-machine) + [(i3nt ti3nt) + (case c + [(__stdcall) 'i3nt-stdcall] + [(__cdecl) #f] + [(__com) 'i3nt-com] + [else (squawk orig-c)])] + [(ppcnt) + (case c + [(__stdcall __cdecl) #f] + [else (squawk orig-c)])] + [else (squawk orig-c)])])]) + (when (member c accum) + (syntax-error orig-c (format "redundant ~s convention" who))) + (unless (or (null? accum) + (eq? c 'adjust-active) + (and (eq? 'adjust-active (car accum)) + (null? (cdr accum)))) + (syntax-error orig-c (format "conflicting ~s convention" who))) + (loop (cdr conv) (cons c accum) + (if (and c (if-feature pthreads #t (not (eq? c 'adjust-active)))) + (cons c keep-accum) + keep-accum)))])))) (define $make-foreign-procedure (lambda (conv foreign-name ?foreign-addr type* result-type) @@ -8730,12 +8748,10 @@ (or ($fp-filter-type ($expand-fp-ftype 'foreign-procedure what r x) result?) (syntax-error x (format "invalid foreign-procedure ~s type specifier" what)))))) (syntax-case x () - [(_ ?name (arg ...) result) - #'(foreign-procedure #f ?name (arg ...) result)] - [(_ conv ?name (arg ...) result) + [(_ c ... ?name (arg ...) result) (lambda (r) ($make-foreign-procedure - ($filter-conv 'foreign-procedure #'conv) + ($filter-conv 'foreign-procedure #'(c ...)) (let ([x (datum ?name)]) (and (string? x) x)) #'($foreign-entry ?name) (map (lambda (x) (filter-type r x #f)) #'(arg ...)) @@ -8743,7 +8759,10 @@ (define $make-foreign-callable (lambda (who conv ?proc type* result-type) - (when (eq? conv 'i3nt-com) ($oops who "unsupported convention ~s" conv)) + (for-each (lambda (c) + (when (eq? (syntax->datum c) 'i3nt-com) + ($oops who "unsupported convention ~s" c))) + (syntax->list conv)) (let ([unsafe? (= (optimize-level) 3)]) (with-syntax ([conv conv] [?proc ?proc]) (with-syntax ([((actual (t ...) (arg ...)) ...) @@ -8978,12 +8997,10 @@ (or ($fp-filter-type ($expand-fp-ftype 'foreign-callable what r x) result?) (syntax-error x (format "invalid foreign-callable ~s type specifier" what)))))) (syntax-case x () - [(_ proc (arg ...) result) - #'(foreign-callable #f proc (arg ...) result)] - [(_ conv ?proc (arg ...) result) + [(_ c ... ?proc (arg ...) result) (lambda (r) ($make-foreign-callable 'foreign-callable - ($filter-conv 'foreign-callable #'conv) + ($filter-conv 'foreign-callable #'(c ...)) #'?proc (map (lambda (x) (filter-type r x #f)) #'(arg ...)) (filter-type r #'result #t)))]))) diff --git a/s/x86.ss b/s/x86.ss index 07b83be780..153d86f928 100644 --- a/s/x86.ss +++ b/s/x86.ss @@ -778,6 +778,19 @@ (safe-assert (eq? z %eax)) `(set! ,(make-live-info) ,z (asm ,info ,asm-get-tc))]) + (define-instruction value activate-thread + [(op (z ur)) + (safe-assert (eq? z %eax)) ; see get-tc + `(set! ,(make-live-info) ,z (asm ,info ,asm-activate-thread))]) + + (define-instruction effect deactivate-thread + [(op) + `(asm ,info ,asm-deactivate-thread)]) + + (define-instruction effect unactivate-thread + [(op) + `(asm ,info ,asm-unactivate-thread)]) + ; TODO: should we insist that asm-library-call preserve %ts and %td? ; TODO: risc architectures will have to take info-asmlib-save-ra? into account (define-instruction value asmlibcall @@ -925,7 +938,7 @@ asm-inc-profile-counter asm-inc-cc-counter asm-read-time-stamp-counter asm-read-performance-monitoring-counter ; threaded version specific - asm-get-tc + asm-get-tc asm-activate-thread asm-deactivate-thread asm-unactivate-thread ; machine dependent exports asm-sext-eax->edx) @@ -2104,7 +2117,22 @@ (define asm-get-tc (let ([target `(literal 0 (entry ,(lookup-c-entry get-thread-context)))]) - (lambda (code* dest) ; dest is ignored, since it is always the first C argument (eax in this case) + (lambda (code* dest) ; dest is ignored, since it is always the first C result (eax in this case) + (emit bsr target code*)))) + + (define asm-activate-thread + (let ([target `(literal 0 (entry ,(lookup-c-entry activate-thread)))]) + (lambda (code* dest) ; dest is ignored, as in asm-get-tc + (emit bsr target code*)))) + + (define asm-deactivate-thread + (let ([target `(literal 0 (entry ,(lookup-c-entry deactivate-thread)))]) + (lambda (code*) + (emit bsr target code*)))) + + (define asm-unactivate-thread + (let ([target `(literal 0 (entry ,(lookup-c-entry unactivate-thread)))]) + (lambda (code*) (emit bsr target code*)))) (define asm-indirect-call @@ -2281,6 +2309,43 @@ [(fp-ftd& ,ftd) (not (callee-expects-result-pointer? result-type))] [else #f]))) + (module (push-registers pop-registers push-registers-size) + (define (move-registers regs fp-reg-count load? offset e) + (with-output-language (L13 Effect) + (cond + [(fx> fp-reg-count 0) + (let ([offset (fx- offset 8)]) + (move-registers regs (fx- fp-reg-count 1) load? offset + (cond + [load? `(seq ,(%inline fldl ,(%mref ,%sp ,offset)) ,e)] + [else `(seq ,e ,(%inline fstpl ,(%mref ,%sp ,offset)))])))] + [(pair? regs) + (let ([offset (fx- offset 4)]) + (move-registers (cdr regs) 0 load? offset + (cond + [load? `(seq (set! ,(car regs) ,(%mref ,%sp ,offset)) ,e)] + [else `(seq ,e (set! ,(%mref ,%sp ,offset) ,(car regs)))])))] + [else e]))) + (define (push-registers-size regs fp-reg-count arg-count) + ;; Align with the expectation that `arg-count` arguments + ;; will be pushed later, before a function call + (let ([offset (fx+ (fx* 4 (length regs)) (fx* 8 fp-reg-count))]) + (constant-case machine-type-name + [(i3osx ti3osx) + (fx- (fxlogand (fx+ offset (fx* 4 arg-count) 15) -16) + (fx* 4 arg-count))] + [else offset]))) + (define (push-registers regs fp-reg-count arg-count) + (let ([offset (push-registers-size regs fp-reg-count arg-count)]) + (move-registers regs fp-reg-count #f offset + (with-output-language (L13 Effect) + `(set! ,%sp ,(%inline - ,%sp (immediate ,offset))))))) + (define (pop-registers regs fp-reg-count arg-count) + (let ([offset (push-registers-size regs fp-reg-count arg-count)]) + (move-registers regs fp-reg-count #t offset + (with-output-language (L13 Effect) + `(set! ,%sp ,(%inline + ,%sp (immediate ,offset)))))))) + (define asm-foreign-call (with-output-language (L13 Effect) (letrec ([load-double-stack @@ -2386,6 +2451,58 @@ (cons (load-stack n) locs) (fx+ n 4) #f))])))]) + (define (get-result-registers fill-result-here? result-type) + (cond + [fill-result-here? + (let* ([ftd (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) ftd])] + [size ($ftd-size ftd)]) + (case size + [(4) + (cond + [(and (if-feature windows (not ($ftd-compound? ftd)) #t) + (equal? '((float 4 0)) ($ftd->members ftd))) + (values '() 1)] + [else (values (reg-list %eax) 0)])] + [(8) + (cond + [(and (if-feature windows (not ($ftd-compound? ftd)) #t) + (equal? '((float 8 0)) ($ftd->members ftd))) + (values '() 1)] + [else (values (reg-list %eax %edx) 0)])] + [else (values (reg-list %eax) 0)]))] + [else + (nanopass-case (Ltype Type) result-type + [(fp-double-float) (values '() 1)] + [(fp-single-float) (values '() 1)] + [(fp-integer ,bits) + (case bits + [(64) (values (reg-list %eax %edx) 0)] + [else (values (reg-list %eax) 0)])] + [(fp-unsigned ,bits) + (case bits + [(64) (values (reg-list %eax %edx) 0)] + [else (values (reg-list %eax) 0)])] + [(fp-void) (values '() 0)] + [else (values (reg-list %eax) 0)])])) + (define (add-deactivate adjust-active? fill-result-here? t0 result-type e) + (cond + [adjust-active? + (let-values ([(result-regs result-fp-count) (get-result-registers fill-result-here? result-type)]) + (let ([save-and-restore + (lambda (regs fp-count e) + (cond + [(and (null? regs) (fx= 0 fp-count)) e] + [else (%seq + ,(push-registers regs fp-count 0) + ,e + ,(pop-registers regs fp-count 0))]))]) + (%seq + (set! ,%eax ,t0) + ,(save-and-restore (list %eax) 0 (%inline deactivate-thread)) + ,e + ,(save-and-restore result-regs result-fp-count `(set! ,%eax ,(%inline activate-thread))))))] + [else e])) (define returnem (lambda (conv orig-frame-size locs result-type ccall r-loc) (let ([frame-size (constant-case machine-type-name @@ -2402,7 +2519,7 @@ r-loc ; Windows __stdcall convention requires callee to clean up (lambda () - (if (or (fx= frame-size 0) (memq conv '(i3nt-stdcall i3nt-com))) + (if (or (fx= frame-size 0) (memq 'i3nt-stdcall conv) (memq 'i3nt-com conv)) `(nop) (let ([frame-size (if (callee-pops-result-pointer? result-type) (fx- frame-size (constant ptr-bytes)) @@ -2417,21 +2534,25 @@ (lambda (frame-size locs) (returnem conv frame-size locs result-type (lambda (t0) - (let ([call - (case conv - [(i3nt-com) - (when (null? arg-type*) - ($oops 'foreign-procedure - "__com convention requires instance argument")) - ; jump indirect - (%seq - (set! ,%eax ,(%mref ,%sp 0)) - (set! ,%eax ,(%mref ,%eax 0)) - (set! ,%eax ,(%inline + ,%eax ,t0)) - (inline ,(make-info-kill*-live* (reg-list %eax %edx) '()) ,%c-call ,(%mref ,%eax 0)))] - [else `(inline ,(make-info-kill*-live* (reg-list %eax %edx) '()) ,%c-call ,t0)])]) + (let* ([fill-result-here? (fill-result-pointer-from-registers? result-type)] + [adjust-active? (memq 'adjust-active conv)] + [t (if adjust-active? %eax t0)] ; need a register if `adjust-active?` + [call + (add-deactivate adjust-active? fill-result-here? t0 result-type + (cond + [(memq 'i3nt-com conv) + (when (null? arg-type*) + ($oops 'foreign-procedure + "__com convention requires instance argument")) + ; jump indirect + (%seq + (set! ,%eax ,(%mref ,%sp 0)) + (set! ,%eax ,(%mref ,%eax 0)) + (set! ,%eax ,(%inline + ,%eax ,t)) + (inline ,(make-info-kill*-live* (reg-list %eax %edx) '()) ,%c-call ,(%mref ,%eax 0)))] + [else `(inline ,(make-info-kill*-live* (reg-list %eax %edx) '()) ,%c-call ,t)]))]) (cond - [(fill-result-pointer-from-registers? result-type) + [fill-result-here? (let* ([ftd (nanopass-case (Ltype Type) result-type [(fp-ftd& ,ftd) ftd])] [size ($ftd-size ftd)]) @@ -2509,13 +2630,15 @@ +---------------------------+ | | | incoming stack args | - sp+X+Y: | | + sp+X+Y+Z: | | +---------------------------+ <- i3osx: 16-byte boundary | incoming return address | one word +---------------------------+ | | | callee-save registers | EBP, ESI, EDI, EBX (4 words) - sp+X: | | + sp+X+Y: | | + +---------------------------+ + sp+X: | unactivate mode | 0 words or 1 word +---------------------------+ | indirect result space | i3osx: 3 words | (for & results via regs) | other: 2 words @@ -2610,38 +2733,46 @@ (equal? '((float 4 0)) ($ftd->members ftd))) (values (lambda () (%inline flds ,(%mref ,%sp 0))) - '())] + '() + 1)] [(and (if-feature windows (not ($ftd-compound? ftd)) #t) (equal? '((float 8 0)) ($ftd->members ftd))) (values (lambda () (%inline fldl ,(%mref ,%sp 0))) - '())] + '() + 1)] [(fx= ($ftd-size ftd) 8) (values (lambda () `(seq (set! ,%eax ,(%mref ,%sp 0)) (set! ,%edx ,(%mref ,%sp 4)))) - (list %eax %edx))] + (list %eax %edx) + 0)] [else (values (lambda () `(set! ,%eax ,(%mref ,%sp 0))) - (list %eax))])] + (list %eax) + 0)])] [else (values (lambda () ;; Return pointer that was filled; destination was the first argument `(set! ,%eax ,(%mref ,%sp ,init-stack-offset))) - (list %eax))])] + (list %eax) + 0)])] [(fp-double-float) (values (lambda (x) (%inline fldl ,(%mref ,x ,(constant flonum-data-disp)))) - '())] + '() + 1)] [(fp-single-float) (values (lambda (x) (%inline fldl ,(%mref ,x ,(constant flonum-data-disp)))) - '())] + '() + 1)] [(fp-void) (values (lambda () `(nop)) - '())] + '() + 0)] [else (cond [(nanopass-case (Ltype Type) result-type @@ -2652,25 +2783,42 @@ (%seq (set! ,%eax ,lorhs) (set! ,%edx ,hirhs))) - (list %eax %edx))] + (list %eax %edx) + 0)] [else (values (lambda (x) `(set! ,%eax ,x)) - (list %eax))])])) + (list %eax) + 0)])])) + (define (unactivate result-regs result-num-fp-regs) + (let ([e (%seq + (set! ,%eax ,(%mref ,%sp ,(+ 8 (push-registers-size result-regs result-num-fp-regs 1)))) + ,(%inline push ,%eax) + ,(%inline unactivate-thread) + ,(%inline pop ,%eax))]) + (if (and (null? result-regs) (fx= 0 result-num-fp-regs)) + e + (%seq + ,(push-registers result-regs result-num-fp-regs 1) + ,e + ,(pop-registers result-regs result-num-fp-regs 1))))) (lambda (info) - (let ([conv (info-foreign-conv info)] - [arg-type* (info-foreign-arg-type* info)] - [result-type (info-foreign-result-type info)] - [init-stack-offset (constant-case machine-type-name [(i3osx ti3osx) 32] [else 28])] - [indirect-result-space (constant-case machine-type-name - [(i3osx ti3osx) - ;; maintain 16-bit alignment for i3osx, taking into account - ;; 16 bytes pushed above + 4 for RA pushed by asmCcall; - ;; 8 of these bytes are used for &-return space, if needed - 12] - [else 8])]) - (let ([indirect-result-to-registers? (fill-result-pointer-from-registers? result-type)]) - (let-values ([(get-result result-regs) (do-result result-type init-stack-offset indirect-result-to-registers?)]) + (let* ([conv (info-foreign-conv info)] + [adjust-active? (memq 'adjust-active conv)] + [arg-type* (info-foreign-arg-type* info)] + [result-type (info-foreign-result-type info)] + [indirect-result-space (constant-case machine-type-name + [(i3osx ti3osx) + ;; maintain 16-bit alignment for i3osx, taking into account + ;; 16 bytes pushed above + 4 for RA pushed by asmCcall; + ;; 8 of these bytes are used for &-return space, if needed; + ;; the extra 4 bytes may be used for the unactivate mode + 12] + [else (if adjust-active? 12 8)])] + [init-stack-offset (fx+ 20 indirect-result-space)] + [indirect-result-to-registers? (fill-result-pointer-from-registers? result-type)]) + (let-values ([(get-result result-regs result-num-fp-regs) + (do-result result-type init-stack-offset indirect-result-to-registers?)]) (with-values (do-stack (if indirect-result-to-registers? (cdr arg-type*) arg-type*) @@ -2686,9 +2834,16 @@ ,(%inline push ,%ebx) (set! ,%sp ,(%inline - ,%sp (immediate ,indirect-result-space))) ,(if-feature pthreads - `(seq - (set! ,%eax ,(%inline get-tc)) - (set! ,%tc ,%eax)) + ((lambda (e) + (if adjust-active? + (%seq + (set! ,%eax ,(%inline activate-thread)) + (set! ,(%mref ,%sp ,8) ,%eax) + ,e) + e)) + `(seq + (set! ,%eax ,(%inline get-tc)) + (set! ,%tc ,%eax))) `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) (let ([locs (reverse locs)]) (if indirect-result-to-registers? @@ -2698,6 +2853,12 @@ get-result (lambda () (in-context Tail + ((lambda (e) + (if adjust-active? + (%seq + ,(unactivate result-regs result-num-fp-regs) + ,e) + e)) (%seq (set! ,%sp ,(%inline + ,%sp (immediate ,indirect-result-space))) (set! ,%ebx ,(%inline pop)) @@ -2706,8 +2867,8 @@ (set! ,%ebp ,(%inline pop)) ; Windows __stdcall convention requires callee to clean up ,((lambda (e) - (if (memq conv '(i3nt-stdcall i3nt-com)) - (let ([arg-size (fx- frame-size 20)]) + (if (or (memq 'i3nt-stdcall conv) (memq 'i3nt-com conv)) + (let ([arg-size (fx- frame-size init-stack-offset)]) (if (fx> arg-size 0) (%seq (set! diff --git a/s/x86_64.ss b/s/x86_64.ss index b066f09f2b..dbe664a664 100644 --- a/s/x86_64.ss +++ b/s/x86_64.ss @@ -28,7 +28,7 @@ [%xp %r12 #t 12] [%ts %rax %Cretval #f 0] [%td %rbx #t 3] - [%ac1 %r10 #f 10] + [%ac1 %r10 %deact #f 10] [%yp %r11 #f 11] [%cp %r15 #t 15] [#;%ret %rsi #t 6] @@ -57,7 +57,7 @@ [%xp %r12 #t 12] [%ts %rax %Cretval #f 0] [%td %rbx #t 3] - [%ac1 %r10 #f 10] + [%ac1 %r10 %deact #f 10] [%yp %r11 #f 11] [%cp %r15 #t 15] [#;%ret %r8 %Carg5 #f 8] @@ -824,6 +824,20 @@ (safe-assert (eq? z %rax)) `(set! ,(make-live-info) ,z (asm ,info ,asm-get-tc))]) + (define-instruction value activate-thread + [(op (z ur)) + (safe-assert (eq? z %rax)) ; see get-tc + `(set! ,(make-live-info) ,z (asm ,info ,asm-activate-thread))]) + + (define-instruction effect deactivate-thread + [(op) + `(asm ,info ,asm-deactivate-thread)]) + + (define-instruction effect unactivate-thread + [(op (x ur)) + (safe-assert (eq? x %Carg1)) + `(asm ,info ,asm-unactivate-thread ,x)]) + ; TODO: risc architectures will have to take info-asmlib-save-ra? into account (define-instruction value asmlibcall [(op (z ur)) @@ -982,7 +996,7 @@ asm-inc-profile-counter asm-inc-cc-counter asm-read-time-stamp-counter asm-read-performance-monitoring-counter ; threaded version specific - asm-get-tc + asm-get-tc asm-activate-thread asm-deactivate-thread asm-unactivate-thread ; machine dependent exports asm-sext-rax->rdx asm-store-single->double asm-kill asm-get-double) @@ -2219,6 +2233,21 @@ (lambda (code* jmp-reg) ; dest is ignored, since it is always the first C argument (rax in this case) (asm-helper-call code* target jmp-reg)))) + (define asm-activate-thread + (let ([target `(x86_64-call 0 (entry ,(lookup-c-entry activate-thread)))]) + (lambda (code* jmp-reg) + (asm-helper-call code* target jmp-reg)))) + + (define asm-deactivate-thread + (let ([target `(x86_64-call 0 (entry ,(lookup-c-entry deactivate-thread)))]) + (lambda (code*) + (asm-helper-call code* target %rax)))) + + (define asm-unactivate-thread + (let ([target `(x86_64-call 0 (entry ,(lookup-c-entry unactivate-thread)))]) + (lambda (code* arg-reg) + (asm-helper-call code* target %rax)))) + (define asm-indirect-call (lambda (code* t . ignore) ; NB: c-call is already required to be a register or memory operand, so @@ -2494,6 +2523,47 @@ (fx> (fx+ iint ints) 6) (fx> (fx+ ifp fps) 8))) + (module (push-registers pop-registers push-registers-size) + (define (move-registers regs load?) + (define vfp (make-vfp)) + (define (fp-reg? reg) + (let loop ([i (fx- (vector-length vfp) 1)]) + (or (eq? reg (vector-ref vfp i)) + (and (fx> i 0) (loop (fx- i 1)))))) + (with-output-language (L13 Effect) + (let loop ([regs regs] [offset 0]) + (let* ([reg (car regs)] + [e (cond + [(fp-reg? reg) + `(inline ,(make-info-loadfl reg) ,(if load? %load-double %store-double) ,%sp ,%zero (immediate ,offset))] + [load? `(set! ,reg ,(%mref ,%sp ,offset))] + [else `(set! ,(%mref ,%sp ,offset) ,reg)])] + [regs (cdr regs)]) + (if (null? regs) + e + `(seq ,e ,(loop regs (fx+ offset 8)))))))) + (define (push-registers-size regs) + (align (fx* 8 (length regs)) 16)) + (define (push-registers regs) + (with-output-language (L13 Effect) + (%seq + (set! ,%sp ,(%inline - ,%sp (immediate ,(push-registers-size regs)))) + ,(move-registers regs #f)))) + (define (pop-registers regs) + (with-output-language (L13 Effect) + (%seq + ,(move-registers regs #t) + (set! ,%sp ,(%inline + ,%sp (immediate ,(push-registers-size regs)))))))) + + (define (as-c-call e) + (if-feature windows + (with-output-language (L13 Effect) + (%seq + (set! ,%sp ,(%inline - ,%sp (immediate 32))) + ,e + (set! ,%sp ,(%inline + ,%sp (immediate 32))))) + e)) + (define asm-foreign-call (with-output-language (L13 Effect) (letrec ([load-double-stack @@ -2737,6 +2807,20 @@ (loop (cdr types) (cons (load-int-stack isp) locs) regs iint ifp (fx+ isp 8)))])))))]) + (define (add-deactivate adjust-active? t0 live* result-live* e) + (cond + [adjust-active? + (let ([save-and-restore + (lambda (regs e) + (cond + [(null? regs) e] + [else (%seq ,(push-registers regs) ,e ,(pop-registers regs))]))]) + (%seq + (set! ,%deact ,t0) + ,(save-and-restore (cons %deact live*) (as-c-call (%inline deactivate-thread))) + ,e + ,(save-and-restore result-live* (as-c-call `(set! ,%rax ,(%inline activate-thread))))))] + [else e])) (define (add-save-fill-target fill-result-here? frame-size locs) (cond [fill-result-here? @@ -2766,6 +2850,20 @@ `(seq ,(loop (cdr classes) (fx+ offset 8) (cdr iregs) fpregs) (set! ,(%mref ,%rcx ,offset) ,(car iregs)))]))) + (define (get-result-regs fill-result-here? result-type result-classes) + (if fill-result-here? + (let loop ([classes result-classes] [iregs (reg-list %rax %rdx)] [fpregs (reg-list %Cfparg1 %Cfparg2)]) + (cond + [(null? classes) '()] + [(eq? 'sse (car classes)) + (cons (car fpregs) (loop (cdr classes) iregs (cdr fpregs)))] + [else + (cons (car iregs) (loop (cdr classes) (cdr iregs) fpregs))])) + (nanopass-case (Ltype Type) result-type + [(fp-double-float) (list %Cfpretval)] + [(fp-single-float) (list %Cfpretval)] + [(fp-void) '()] + [else (list %rax)]))) (define returnem (lambda (frame-size locs ccall r-loc) ; need to maintain 16-byte alignment, ignoring the return address @@ -2789,24 +2887,28 @@ [arg-type* (info-foreign-arg-type* info)] [result-type (info-foreign-result-type info)] [result-classes (classify-type result-type)] - [fill-result-here? (result-fits-in-registers? result-classes)]) + [fill-result-here? (result-fits-in-registers? result-classes)] + [adjust-active? (memq 'adjust-active conv)]) (with-values (do-args (if fill-result-here? (cdr arg-type*) arg-type*) (make-vint) (make-vfp)) (lambda (frame-size nfp locs live*) (with-values (add-save-fill-target fill-result-here? frame-size locs) (lambda (frame-size locs) (returnem frame-size locs (lambda (t0) - (let ([c-call - (if-feature windows - (%seq - (set! ,%sp ,(%inline - ,%sp (immediate 32))) - (inline ,(make-info-kill*-live* (reg-list %rax) live*) ,%c-call ,t0) - (set! ,%sp ,(%inline + ,%sp (immediate 32)))) - (%seq - ;; System V ABI varargs functions require count of fp regs used in %al register. - ;; since we don't know if the callee is a varargs function, we always set it. - (set! ,%rax (immediate ,nfp)) - (inline ,(make-info-kill*-live* (reg-list %rax) (cons %rax live*)) ,%c-call ,t0)))]) + (let* ([t (if adjust-active? %deact t0)] ; need a register if `adjust-active?` + [c-call + (add-deactivate adjust-active? t0 live* + (get-result-regs fill-result-here? result-type result-classes) + (if-feature windows + (%seq + (set! ,%sp ,(%inline - ,%sp (immediate 32))) + (inline ,(make-info-kill*-live* (reg-list %rax %rdx) live*) ,%c-call ,t) + (set! ,%sp ,(%inline + ,%sp (immediate 32)))) + (%seq + ;; System V ABI varargs functions require count of fp regs used in %al register. + ;; since we don't know if the callee is a varargs function, we always set it. + (set! ,%rax (immediate ,nfp)) + (inline ,(make-info-kill*-live* (reg-list %rax %rdx) (cons %rax live*)) ,%c-call ,t))))]) (cond [fill-result-here? (add-fill-result c-call (fx- frame-size (constant ptr-bytes)) result-classes)] @@ -2851,10 +2953,12 @@ +---------------------------+ <- 16-byte boundary | | | space for register args | four quads - sp+80: | | + sp+80/96: | | +---------------------------+ <- 16-byte boundary | incoming return address | one quad incoming sp: +---------------------------+ + sp+72: | active state | zero or two quads + +---------------------------+ | | | callee-save registers | RBX, RBP, RDI, RSI, R12, R13, R14, R15 (8 quads) | | @@ -2872,10 +2976,10 @@ +---------------------------+ <- 16-byte boundary | incoming return address | one quad +---------------------------+ - | pad word | one quad + sp+176: | pad word / active state | one quad +---------------------------+ | indirect result space | two quads - sp+160 | (for & results via regs) | + sp+160: | (for & results via regs) | +---------------------------+<- 16-byte boundary | | | saved register args | space for Carg*, Cfparg* (14 quads) @@ -3038,11 +3142,11 @@ ,(f (cdr types) (fx+ iint 1) ifp (fx+ isp 8))) (f (cdr types) iint ifp isp))])))))) (define do-stack - (lambda (types) + (lambda (types adjust-active?) ; risp is where incoming register args are stored ; sisp is where incoming stack args are stored (if-feature windows - (let f ([types types] [locs '()] [isp 80]) + (let f ([types types] [locs '()] [isp (if adjust-active? 96 80)]) (if (null? types) locs (f (cdr types) @@ -3111,7 +3215,7 @@ (f (cdr types) (cons (load-int-stack (car types) risp) locs) (fx+ iint 1) ifp (fx+ risp 8) sisp))])))))) - (define (do-result result-type result-classes) + (define (do-result result-type result-classes adjust-active?) (nanopass-case (Ltype Type) result-type [(fp-ftd& ,ftd) (cond @@ -3148,7 +3252,7 @@ [else (values (lambda () ;; Return pointer that was filled; destination was the first argument - `(set! ,%Cretval ,(%mref ,%sp ,(if-feature windows 80 48)))) + `(set! ,%Cretval ,(%mref ,%sp ,(if-feature windows (if adjust-active? 96 80) 48)))) (list %Cretval))])] [(fp-double-float) (values @@ -3167,21 +3271,37 @@ (values(lambda (x) `(set! ,%Cretval ,x)) (list %Cretval))])) + (define (unactivate result-regs) + (let ([e `(seq + (set! ,%Carg1 ,(%mref ,%sp ,(+ (push-registers-size result-regs) (if-feature windows 72 176)))) + ,(as-c-call (%inline unactivate-thread ,%Carg1)))]) + (if (null? result-regs) + e + (%seq + ,(push-registers result-regs) + ,e + ,(pop-registers result-regs))))) (lambda (info) (let ([conv (info-foreign-conv info)] [arg-type* (info-foreign-arg-type* info)] [result-type (info-foreign-result-type info)]) (let* ([result-classes (classify-type result-type)] + [adjust-active? (memq 'adjust-active conv)] [synthesize-first? (and result-classes (result-fits-in-registers? result-classes))] - [locs (do-stack (if synthesize-first? (cdr arg-type*) arg-type*))]) - (let-values ([(get-result result-regs) (do-result result-type result-classes)]) + [locs (do-stack (if synthesize-first? (cdr arg-type*) arg-type*) adjust-active?)]) + (let-values ([(get-result result-regs) (do-result result-type result-classes adjust-active?)]) (values (lambda () (%seq ,(if-feature windows (%seq - ,(save-arg-regs arg-type*) + ,(let ([e (save-arg-regs arg-type*)]) + (if adjust-active? + (%seq + ,e + (set! ,%sp ,(%inline - ,%sp (immediate 16)))) + e)) ,(%inline push ,%rbx) ,(%inline push ,%rbp) ,(%inline push ,%rdi) @@ -3201,9 +3321,16 @@ ,(%inline push ,%r15) ,(save-arg-regs arg-type*))) ,(if-feature pthreads - (%seq + ((lambda (e) + (if adjust-active? + (%seq + ,(as-c-call `(set! ,%rax ,(%inline activate-thread))) + (set! ,(%mref ,%sp ,(if-feature windows 72 176)) ,%rax) + ,e) + e)) + (%seq (set! ,%rax ,(%inline get-tc)) - (set! ,%tc ,%rax)) + (set! ,%tc ,%rax))) `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) (let ([locs (reverse locs)]) (if synthesize-first? @@ -3213,9 +3340,19 @@ get-result (lambda () (in-context Tail - (%seq + ((lambda (e) + (if adjust-active? + (%seq + ,(unactivate result-regs) + ,e) + e)) + (%seq ,(if-feature windows - (%seq + ((lambda (e) + (if adjust-active? + (%seq ,e (set! ,%sp ,(%inline + ,%sp (immediate 16)))) + e)) + (%seq (set! ,%sp ,(%inline + ,%sp (immediate 8))) (set! ,%r15 ,(%inline pop)) (set! ,%r14 ,(%inline pop)) @@ -3224,7 +3361,7 @@ (set! ,%rsi ,(%inline pop)) (set! ,%rdi ,(%inline pop)) (set! ,%rbp ,(%inline pop)) - (set! ,%rbx ,(%inline pop))) + (set! ,%rbx ,(%inline pop)))) (%seq (set! ,%r15 ,(%inline pop)) (set! ,%r14 ,(%inline pop)) @@ -3233,5 +3370,5 @@ (set! ,%rbp ,(%inline pop)) (set! ,%rbx ,(%inline pop)) (set! ,%sp ,(%inline + ,%sp (immediate 136))))) - (asm-c-return ,null-info ,result-regs ...))))))))))))) + (asm-c-return ,null-info ,result-regs ...)))))))))))))) )