diff --git a/LOG b/LOG index 2e14f1caac..fbdd67bcf5 100644 --- a/LOG +++ b/LOG @@ -756,3 +756,10 @@ prims.ss, primdata.ss, cp0.ss, cpnanopass.ss, cmacros.ss, mkheader.ss, gc.c, segment.c, types.h, 4.ms, smgmt.stex, release_notes.stex +- add (& ftype) argument/result for foreign-procedure, which supports + struct arguments and results for foreign calls + syntax.ss, ftype.ss, cpnanopass.ss, x86.ss, x86_64.ss, + base-lang.ss, np-languages.ss, cprep.ss, primdata.ss, + schlib.c, prim.c, externs.h + mats/foreign4.c, mats/foreign.ms mats/Mf-* + foreign.stex, release_notes.stex diff --git a/c/externs.h b/c/externs.h index 45b8b02ad9..98fa0f80b3 100644 --- a/c/externs.h +++ b/c/externs.h @@ -339,17 +339,8 @@ extern void S_initframe PROTO((ptr tc, iptr n)); extern void S_put_arg PROTO((ptr tc, iptr i, ptr x)); extern void S_return PROTO((void)); extern void S_call_help PROTO((ptr tc, IBOOL singlep)); -extern void S_call_void PROTO((void)); -extern ptr S_call_ptr PROTO((void)); -extern iptr S_call_fixnum PROTO((void)); -extern I32 S_call_int32 PROTO((void)); -extern U32 S_call_uns32 PROTO((void)); -extern double S_call_double PROTO((void)); -extern float S_call_single PROTO((void)); -extern U8 *S_call_bytevector PROTO((void)); -extern I64 S_call_int64 PROTO((void)); -extern U64 S_call_uns64 PROTO((void)); -extern uptr S_call_fptr PROTO((void)); +extern void S_call_one_result PROTO((void)); +extern void S_call_any_results PROTO((void)); #ifdef WIN32 /* windows.c */ diff --git a/c/prim.c b/c/prim.c index 3b22818e71..0041012a81 100644 --- a/c/prim.c +++ b/c/prim.c @@ -134,17 +134,8 @@ static void create_c_entry_vector() { install_c_entry(CENTRY_scan_remembered_set, proc2ptr(S_scan_remembered_set)); install_c_entry(CENTRY_instantiate_code_object, proc2ptr(s_instantiate_code_object)); install_c_entry(CENTRY_Sreturn, proc2ptr(S_return)); - install_c_entry(CENTRY_Scall_ptr, proc2ptr(S_call_ptr)); - install_c_entry(CENTRY_Scall_fptr, proc2ptr(S_call_fptr)); - install_c_entry(CENTRY_Scall_bytevector, proc2ptr(S_call_bytevector)); - install_c_entry(CENTRY_Scall_fixnum, proc2ptr(S_call_fixnum)); - install_c_entry(CENTRY_Scall_int32, proc2ptr(S_call_int32)); - install_c_entry(CENTRY_Scall_uns32, proc2ptr(S_call_uns32)); - install_c_entry(CENTRY_Scall_double, proc2ptr(S_call_double)); - install_c_entry(CENTRY_Scall_single, proc2ptr(S_call_single)); - install_c_entry(CENTRY_Scall_int64, proc2ptr(S_call_int64)); - install_c_entry(CENTRY_Scall_uns64, proc2ptr(S_call_uns64)); - install_c_entry(CENTRY_Scall_void, proc2ptr(S_call_void)); + install_c_entry(CENTRY_Scall_one_result, proc2ptr(S_call_one_result)); + install_c_entry(CENTRY_Scall_any_results, proc2ptr(S_call_any_results)); for (i = 0; i < c_entry_vector_size; i++) { #ifndef PTHREADS diff --git a/c/schlib.c b/c/schlib.c index ad0de89052..7ee50b3b61 100644 --- a/c/schlib.c +++ b/c/schlib.c @@ -252,71 +252,16 @@ void S_call_help(tc, singlep) ptr tc; IBOOL singlep; { CP(tc) = code; } -void S_call_void() { +void S_call_one_result() { + ptr tc = get_thread_context(); + S_call_help(tc, 1); +} + +void S_call_any_results() { ptr tc = get_thread_context(); S_call_help(tc, 0); } -ptr S_call_ptr() { - ptr tc = get_thread_context(); - S_call_help(tc, 1); - return AC0(tc); -} - -iptr S_call_fixnum() { - ptr tc = get_thread_context(); - S_call_help(tc, 1); - return Sfixnum_value(AC0(tc)); -} - -I32 S_call_int32() { - ptr tc = get_thread_context(); - S_call_help(tc, 1); - return (I32)Sinteger_value(AC0(tc)); -} - -U32 S_call_uns32() { - ptr tc = get_thread_context(); - S_call_help(tc, 1); - return (U32)Sinteger_value(AC0(tc)); -} - -I64 S_call_int64() { - ptr tc = get_thread_context(); - S_call_help(tc, 1); - return S_int64_value("foreign-callable", AC0(tc)); -} - -U64 S_call_uns64() { - ptr tc = get_thread_context(); - S_call_help(tc, 1); - return S_int64_value("foreign-callable", AC0(tc)); -} - -double S_call_double() { - ptr tc = get_thread_context(); - S_call_help(tc, 1); - return Sflonum_value(AC0(tc)); -} - -float S_call_single() { - ptr tc = get_thread_context(); - S_call_help(tc, 1); - return (float)Sflonum_value(AC0(tc)); -} - -U8 *S_call_bytevector() { - ptr tc = get_thread_context(); - S_call_help(tc, 1); - return (U8 *)&BVIT(AC0(tc),0); -} - -uptr S_call_fptr() { - ptr tc = get_thread_context(); - S_call_help(tc, 1); - return (uptr)RECORDINSTIT(AC0(tc),0); -} - /* cchain = ((jb . co) ...) */ void S_return() { ptr tc = get_thread_context(); diff --git a/csug/foreign.stex b/csug/foreign.stex index ecac576d7a..f0dc71dd2b 100644 --- a/csug/foreign.stex +++ b/csug/foreign.stex @@ -550,12 +550,24 @@ under Windows running on Intel hardware. \foreigntype{\scheme{(* \var{ftype})}} \index{ftype}This type allows a pointer to a foreign type (ftype) to be passed. -The argument must be an ftype pointer of with type \var{ftype}, +The argument must be an ftype pointer of type \var{ftype}, and the actual argument is the address encapsulated in the ftype pointer. See Section~\ref{SECTFOREIGNDATA} for a description of foreign types. +\foreigntype{\scheme{(& \var{ftype})}} +\index{ftype}This type allows a foreign +type (ftype) to be passed as a value, but represented +on the Scheme side as a pointer to the foreign-type data. +That is, a \scheme{(& \var{ftype})} argument is represented on +the Scheme side the same as a \scheme{(* \var{ftype})} argument, +but a \scheme{(& \var{ftype})} argument is passed to the foreign procedure as the +content at the foreign pointer's address instead of as the +address. For example, if \var{ftype} is a \scheme{struct} type, +then \scheme{(& \var{ftype})} passes a struct argument instead of +a struct-pointer argument. The \var{ftype} cannot refer to an array type. + \medskip\noindent The result types are similar to the parameter types with the addition of a \index{\scheme{void}}\scheme{void} type. @@ -814,6 +826,16 @@ ftype pointer encapsulating the address is returned. See Section~\ref{SECTFOREIGNDATA} for a description of foreign types. +\foreigntype{\scheme{(& \var{ftype})}} +\index{ftype}The result is interpreted as a foreign object +whose structure is described by \var{ftype}, where the foreign +procedure returns a \var{ftype} result, but the caller +must provide an extra \scheme{(* \var{ftype})} argument before +all other arguments to receive the result. An unspecified Scheme object +is returned when the foreign procedure is called, since the result +is instead written into storage referenced by the extra argument. + The \var{ftype} cannot refer to an array type. + \medskip\noindent Consider a C identity procedure: \schemedisplay @@ -969,6 +991,12 @@ except that the requirements and conversions are effectively reversed, e.g., the conversions described for \scheme{foreign-procedure} arguments are performed for \scheme{foreign-callable} return values. +A \scheme{(& \var{ftype})} argument to the callable refers to an address +that is valid only during the dynamic extent of the callback invocation. +A \scheme{(& \var{ftype})} result type for a callable causes the Scheme +procedure to receive an extra \scheme{(& \var{ftype})} argument before +all others; the Scheme procedure should write a result into the extra +argument, and the direct result of the Scheme procedure is ignored. Type checking is performed for result values but not argument values, since the parameter values are provided by the foreign code and must be assumed to be diff --git a/mats/Mf-a6fb b/mats/Mf-a6fb index eb39b33701..b16d1b60da 100644 --- a/mats/Mf-a6fb +++ b/mats/Mf-a6fb @@ -15,13 +15,13 @@ m = a6fb -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-a6le b/mats/Mf-a6le index c209d33f7b..d6fee09cd6 100644 --- a/mats/Mf-a6le +++ b/mats/Mf-a6le @@ -15,13 +15,13 @@ m = a6le -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -m64 -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -m64 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-a6nb b/mats/Mf-a6nb index 75dc3bdf60..48187ef9b2 100644 --- a/mats/Mf-a6nb +++ b/mats/Mf-a6nb @@ -15,13 +15,13 @@ m = a6nb -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-a6nt b/mats/Mf-a6nt index 093d0c3071..51957fad69 100644 --- a/mats/Mf-a6nt +++ b/mats/Mf-a6nt @@ -15,9 +15,9 @@ m = a6nt -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so -mdclean = cat_flush.exe cat_flush.obj foreign1.exp foreign1.lib foreign1.obj foreign2.obj foreign3.obj +mdclean = cat_flush.exe cat_flush.obj foreign1.exp foreign1.lib foreign1.obj foreign2.obj foreign3.obj foreign4.obj include Mf-base diff --git a/mats/Mf-a6ob b/mats/Mf-a6ob index 8d19133639..12758f303d 100644 --- a/mats/Mf-a6ob +++ b/mats/Mf-a6ob @@ -15,13 +15,13 @@ m = a6ob -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-a6osx b/mats/Mf-a6osx index 0a607e43ee..f1dbf85dc4 100644 --- a/mats/Mf-a6osx +++ b/mats/Mf-a6osx @@ -15,7 +15,7 @@ m = a6osx -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base diff --git a/mats/Mf-a6s2 b/mats/Mf-a6s2 index 22ae90038a..eccb7d86f0 100644 --- a/mats/Mf-a6s2 +++ b/mats/Mf-a6s2 @@ -15,13 +15,13 @@ m = a6s2 -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - gcc -m64 -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + gcc -m64 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c gcc -o cat_flush cat_flush.c diff --git a/mats/Mf-arm32le b/mats/Mf-arm32le index 652435163a..ce547827ee 100644 --- a/mats/Mf-arm32le +++ b/mats/Mf-arm32le @@ -15,13 +15,13 @@ m = arm32le -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-i3fb b/mats/Mf-i3fb index 1bc79ff08c..150cedbf44 100644 --- a/mats/Mf-i3fb +++ b/mats/Mf-i3fb @@ -15,13 +15,13 @@ m = i3fb -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-i3le b/mats/Mf-i3le index 7a0aa01b76..8f521c8fd9 100644 --- a/mats/Mf-i3le +++ b/mats/Mf-i3le @@ -15,13 +15,13 @@ m = i3le -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -m32 -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -m32 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-i3nb b/mats/Mf-i3nb index ecd2301de1..e81f6ff862 100644 --- a/mats/Mf-i3nb +++ b/mats/Mf-i3nb @@ -15,13 +15,13 @@ m = i3nb -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-i3nt b/mats/Mf-i3nt index 572f66aeb6..52e9d3e093 100644 --- a/mats/Mf-i3nt +++ b/mats/Mf-i3nt @@ -15,7 +15,7 @@ m = i3nt -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so mdclean = cat_flush.exe cat_flush.obj foreign1.exp foreign1.lib foreign1.obj foreign2.obj foreign3.obj diff --git a/mats/Mf-i3ob b/mats/Mf-i3ob index c660dd4c3b..4e3ee1b32d 100644 --- a/mats/Mf-i3ob +++ b/mats/Mf-i3ob @@ -15,13 +15,13 @@ m = i3ob -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-i3osx b/mats/Mf-i3osx index 95b41f33e7..53c7d4ab31 100644 --- a/mats/Mf-i3osx +++ b/mats/Mf-i3osx @@ -15,7 +15,7 @@ m = i3osx -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base diff --git a/mats/Mf-i3qnx b/mats/Mf-i3qnx index d476e51dc6..724f2dbb84 100644 --- a/mats/Mf-i3qnx +++ b/mats/Mf-i3qnx @@ -15,13 +15,13 @@ m = i3qnx -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -m32 -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -m32 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-i3s2 b/mats/Mf-i3s2 index 76148d108a..c39fffec98 100644 --- a/mats/Mf-i3s2 +++ b/mats/Mf-i3s2 @@ -15,13 +15,13 @@ m = i3s2 -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - gcc -m32 -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + gcc -m32 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c gcc -o cat_flush cat_flush.c diff --git a/mats/Mf-ppc32le b/mats/Mf-ppc32le index 7a6b4e3b53..28151a8376 100644 --- a/mats/Mf-ppc32le +++ b/mats/Mf-ppc32le @@ -15,13 +15,13 @@ m = ppc32le -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -m32 -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -m32 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ta6fb b/mats/Mf-ta6fb index 6e9f75f9fc..fe3a659010 100644 --- a/mats/Mf-ta6fb +++ b/mats/Mf-ta6fb @@ -15,13 +15,13 @@ m = ta6fb -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ta6le b/mats/Mf-ta6le index cb29256f36..dc214ea4cb 100644 --- a/mats/Mf-ta6le +++ b/mats/Mf-ta6le @@ -15,13 +15,13 @@ m = ta6le -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -m64 -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -m64 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ta6nb b/mats/Mf-ta6nb index e43e832e4b..49ca02b48b 100644 --- a/mats/Mf-ta6nb +++ b/mats/Mf-ta6nb @@ -15,13 +15,13 @@ m = ta6nb -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ta6nt b/mats/Mf-ta6nt index 16733d74c1..177a78aed0 100644 --- a/mats/Mf-ta6nt +++ b/mats/Mf-ta6nt @@ -15,7 +15,7 @@ m = ta6nt -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so mdclean = cat_flush.exe cat_flush.obj foreign1.exp foreign1.lib foreign1.obj foreign2.obj foreign3.obj diff --git a/mats/Mf-ta6ob b/mats/Mf-ta6ob index 54efe04478..f6381ebeef 100644 --- a/mats/Mf-ta6ob +++ b/mats/Mf-ta6ob @@ -15,13 +15,13 @@ m = ta6ob -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ta6osx b/mats/Mf-ta6osx index 8f9cba4ed8..fe6e8c7ce5 100644 --- a/mats/Mf-ta6osx +++ b/mats/Mf-ta6osx @@ -15,7 +15,7 @@ m = ta6osx -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base diff --git a/mats/Mf-ta6s2 b/mats/Mf-ta6s2 index 35212dd35d..08233c261e 100644 --- a/mats/Mf-ta6s2 +++ b/mats/Mf-ta6s2 @@ -15,13 +15,13 @@ m = ta6s2 -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - gcc -m64 -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + gcc -m64 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c gcc -o cat_flush cat_flush.c diff --git a/mats/Mf-ti3fb b/mats/Mf-ti3fb index 2f01d79ca6..4e77f7590e 100644 --- a/mats/Mf-ti3fb +++ b/mats/Mf-ti3fb @@ -15,13 +15,13 @@ m = ti3fb -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ti3le b/mats/Mf-ti3le index 8d62aa997a..1f2d31aec6 100644 --- a/mats/Mf-ti3le +++ b/mats/Mf-ti3le @@ -15,13 +15,13 @@ m = ti3le -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -m32 -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -m32 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ti3nb b/mats/Mf-ti3nb index a56b37bdb4..94ccf0102c 100644 --- a/mats/Mf-ti3nb +++ b/mats/Mf-ti3nb @@ -15,13 +15,13 @@ m = ti3nb -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ti3nt b/mats/Mf-ti3nt index 5059169016..ab61f72a34 100644 --- a/mats/Mf-ti3nt +++ b/mats/Mf-ti3nt @@ -15,7 +15,7 @@ m = ti3nt -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so mdclean = cat_flush.exe cat_flush.obj foreign1.exp foreign1.lib foreign1.obj foreign2.obj foreign3.obj diff --git a/mats/Mf-ti3ob b/mats/Mf-ti3ob index e023a17dbe..fca1378175 100644 --- a/mats/Mf-ti3ob +++ b/mats/Mf-ti3ob @@ -15,13 +15,13 @@ m = ti3ob -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/Mf-ti3osx b/mats/Mf-ti3osx index baeaaa40ba..9bb964e5d8 100644 --- a/mats/Mf-ti3osx +++ b/mats/Mf-ti3osx @@ -15,7 +15,7 @@ m = ti3osx -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base diff --git a/mats/Mf-ti3s2 b/mats/Mf-ti3s2 index 4bf6059990..2514adf351 100644 --- a/mats/Mf-ti3s2 +++ b/mats/Mf-ti3s2 @@ -15,13 +15,13 @@ m = ti3s2 -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - gcc -m32 -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + gcc -m32 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c gcc -o cat_flush cat_flush.c diff --git a/mats/Mf-tppc32le b/mats/Mf-tppc32le index fdbca7a5eb..6c8945ca64 100644 --- a/mats/Mf-tppc32le +++ b/mats/Mf-tppc32le @@ -15,13 +15,13 @@ m = tppc32le -fsrc = foreign1.c foreign2.c foreign3.c +fsrc = foreign1.c foreign2.c foreign3.c foreign4.c fobj = foreign1.so include Mf-base foreign1.so: ${fsrc} ../boot/$m/scheme.h - cc -m32 -fPIC -shared -I${Include} -o foreign1.so foreign1.c foreign2.c foreign3.c + cc -m32 -fPIC -shared -I${Include} -o foreign1.so ${fsrc} cat_flush: cat_flush.c cc -o cat_flush cat_flush.c diff --git a/mats/foreign.ms b/mats/foreign.ms index c4f93fb672..dbdd8ba5bb 100644 --- a/mats/foreign.ms +++ b/mats/foreign.ms @@ -2643,3 +2643,248 @@ read) '(3.5 2.5 -1.5 6.75 8.25 -9.5 1e32 -4.5 7.25 -0.5)) ) + +(mat structs + (begin + (define-ftype i8 integer-8) + (define-ftype u8 unsigned-8) + (define-ftype u16 unsigned-16) + (define-ftype i64 integer-64) + (define-syntax check* + (syntax-rules () + [(_ T s [vi ...] [T-ref ...] [T-set! ...]) + (let () + (define-ftype callback (function ((& T)) double)) + (define-ftype callback-two (function ((& T) (& T)) double)) + (define-ftype pre-int-callback (function (int (& T)) double)) + (define-ftype pre-double-callback (function (double (& T)) double)) + (define-ftype callback-r (function () (& T))) + (define get (foreign-procedure (format "f4_get~a" s) + () (& T))) + (define sum (foreign-procedure (format "f4_sum~a" s) + ((& T)) double)) + (define sum_two (foreign-procedure (format "f4_sum_two~a" s) + ((& T) (& T)) double)) + (define sum_pre_int (foreign-procedure (format "f4_sum_pre_int~a" s) + (int (& T)) double)) + (define sum_pre_int_int (foreign-procedure (format "f4_sum_pre_int_int~a" s) + (int int (& T)) double)) + (define sum_pre_int_int_int_int (foreign-procedure (format "f4_sum_pre_int_int_int_int~a" s) + (int int int int (& T)) double)) + (define sum_pre_int_int_int_int_int_int (foreign-procedure (format "f4_sum_pre_int_int_int_int_int_int~a" s) + (int int int int int int (& T)) double)) + (define sum_post_int (foreign-procedure (format "f4_sum~a_post_int" s) + ((& T) int) double)) + (define sum_pre_double (foreign-procedure (format "f4_sum_pre_double~a" s) + (double (& T)) double)) + (define sum_pre_double_double (foreign-procedure (format "f4_sum_pre_double_double~a" s) + (double double (& T)) double)) + (define sum_pre_double_double_double_double (foreign-procedure (format "f4_sum_pre_double_double_double_double~a" s) + (double double double double (& T)) double)) + (define sum_pre_double_double_double_double_double_double_double_double + (foreign-procedure (format "f4_sum_pre_double_double_double_double_double_double_double_double~a" s) + (double double double double double double double double (& T)) double)) + (define sum_post_double (foreign-procedure (format "f4_sum~a_post_double" s) + ((& T) double) double)) + (define cb_send (foreign-procedure (format "f4_cb_send~a" s) + ((* callback)) double)) + (define cb_send_two (foreign-procedure (format "f4_cb_send_two~a" s) + ((* callback-two)) double)) + (define cb_send_pre_int (foreign-procedure (format "f4_cb_send_pre_int~a" s) + ((* pre-int-callback)) double)) + (define cb_send_pre_double (foreign-procedure (format "f4_cb_send_pre_double~a" s) + ((* pre-double-callback)) double)) + (define sum_cb (foreign-procedure (format "f4_sum_cb~a" s) + ((* callback-r)) double)) + (define-syntax with-callback + (syntax-rules () + [(_ ([id rhs]) + body) + (let ([id rhs]) + (let ([v body]) + (unlock-object + (foreign-callable-code-object + (ftype-pointer-address id))) + v))])) + (and (let ([v (make-ftype-pointer T (foreign-alloc (ftype-sizeof T)))]) + (get v) + (and (= (T-ref v) vi) + ... + (begin + (foreign-free (ftype-pointer-address v)) + #t))) + (let ([a (make-ftype-pointer T (malloc_at_boundary (ftype-sizeof T)))]) + (T-set! a) ... + (and (= (+ vi ...) (sum a)) + (= (+ vi ... vi ...) (sum_two a a)) + (= (+ 8 vi ...) (sum_pre_int 8 a)) + (= (+ 8 9 vi ...) (sum_pre_int_int 8 9 a)) + (= (+ 8 9 10 11 vi ...) (sum_pre_int_int_int_int 8 9 10 11 a)) + (= (+ 8 9 10 11 12 13 vi ...) (sum_pre_int_int_int_int_int_int 8 9 10 11 12 13 a)) + (= (+ 8 vi ...) (sum_post_int a 8)) + (= (+ 8.25 vi ...) (sum_pre_double 8.25 a)) + (= (+ 8.25 9.25 vi ...) (sum_pre_double_double 8.25 9.25 a)) + (= (+ 8.25 9.25 10.25 11.25 vi ...) (sum_pre_double_double_double_double 8.25 9.25 10.25 11.25 a)) + (= (+ 8.25 9.25 10.25 11.25 12.25 13.25 14.25 15.25 vi ...) + (sum_pre_double_double_double_double_double_double_double_double + 8.25 9.25 10.25 11.25 12.25 13.25 14.25 15.25 a)) + (= (+ 8.25 vi ...) (sum_post_double a 8.25)) + (= (+ 1.0 vi ...) (with-callback ([cb (make-ftype-pointer + callback + (lambda (r) + (exact->inexact (+ (T-ref r) ...))))]) + (cb_send cb))) + (= (+ 1.0 vi ... vi ...) (with-callback ([cb (make-ftype-pointer + callback-two + (lambda (r1 r2) + (exact->inexact (+ (T-ref r1) ... + (T-ref r2) ...))))]) + (cb_send_two cb))) + (= (+ 1.0 8 vi ...) (with-callback ([cb (make-ftype-pointer + pre-int-callback + (lambda (v r) + (exact->inexact (+ v (T-ref r) ...))))]) + (cb_send_pre_int cb))) + (= (+ 1.0 8.25 vi ...) (with-callback ([cb (make-ftype-pointer + pre-double-callback + (lambda (v r) + (exact->inexact (+ v (T-ref r) ...))))]) + (cb_send_pre_double cb))) + (= (+ vi ...) (with-callback ([cb (make-ftype-pointer + callback-r + (lambda (r) + (T-set! r) ...))]) + (sum_cb cb))) + (begin + (free_at_boundary (ftype-pointer-address a)) + #t)))))])) + (define-syntax check-n + (syntax-rules () + [(_ [ni ti vi] ...) + (let () + (define-ftype T (struct [ni ti] ...)) + (define s (apply string-append + "_struct" + (let loop ([l '(ti ...)]) + (cond + [(null? l) '()] + [else (cons (format "_~a" (car l)) + (loop (cdr l)))])))) + (check* T s + [vi ...] + [(lambda (a) (ftype-ref T (ni) a)) ...] + [(lambda (a) (ftype-set! T (ni) a vi)) ...]))])) + (define-syntax check + (syntax-rules () + [(_ t1 v1) + (check* t1 (format "_~a" 't1) + [v1] + [(lambda (a) (ftype-ref t1 () a))] + [(lambda (a) (ftype-set! t1 () a v1))])])) + (define-syntax check-union + (syntax-rules () + [(_ [n0 t0 v0] [ni ti vi] ...) + (let () + (define-ftype T (union [n0 t0] [ni ti] ...)) + (define s (apply string-append + "_union" + (let loop ([l '(t0 ti ...)]) + (cond + [(null? l) '()] + [else (cons (format "_~a" (car l)) + (loop (cdr l)))])))) + (check* T s + [v0] + [(lambda (a) (ftype-ref T (n0) a))] + [(lambda (a) (ftype-set! T (n0) a v0))]))])) + (define-syntax check-1 + (syntax-rules () + [(_ t1 v1) + (check-n [x t1 v1])])) + (define-syntax check-2 + (syntax-rules () + [(_ t1 t2 v1 v2) + (check-n [x t1 v1] [y t2 v2])])) + (define-syntax check-2-set + (syntax-rules () + [(_ t x) + (and + (check-2 t i8 (+ 1 x) 10) + (check-2 t short (+ 2 x) 20) + (check-2 t long (+ 3 x) 30) + (check-2 t i64 (+ 5 x) 50) + (check-2 short t 6 (+ 60 x)) + (check-2 long t 7 (+ 70 x)) + (check-2 i64 t 9 (+ 90 x)) + (check-2 i8 t 10 (+ 100 x)))])) + (define-syntax check-3 + (syntax-rules () + [(_ t1 t2 t3 v1 v2 v3) + (check-n [x t1 v1] [y t2 v2] [z t3 v3])])) + (define-syntax check-3-set + (syntax-rules () + [(_ t x) + (and + (check-3 t i8 int (+ 1 x) 10 100) + (check-3 t short int (+ 2 x) 20 200) + (check-3 t long int (+ 3 x) 30 300) + (check-3 t i64 int (+ 5 x) 50 500) + (check-3 short t int 6 (+ 60 x) 600) + (check-3 long t int 7 (+ 70 x) 700) + (check-3 i64 t int 9 (+ 90 x) 900) + (check-3 i8 t int 10 (+ 100 x) 1000))])) + (define malloc_at_boundary (foreign-procedure "malloc_at_boundary" + (int) uptr)) + (define free_at_boundary (foreign-procedure "free_at_boundary" + (uptr) void)) + #t) + (check i8 -11) + (check u8 129) + (check short -22) + (check u16 33022) + (check long 33) + (check int 44) + (check i64 49) + (check float 55.0) + (check double 66.0) + (check-1 i8 -12) + (check-1 u8 212) + (check-1 short -23) + (check-1 u16 33023) + (check-1 long 34) + (check-1 int 45) + (check-1 i64 48) + (check-1 float 56.0) + (check-1 double 67.0) + (check-2-set int 0) + (check-2-set float 0.5) + (check-2-set double 0.25) + (check-2 int int 4 40) + (check-2 float float 4.5 40.5) + (check-2 double double 4.25 40.25) + (check-3-set int 0) + (check-3-set float 0.5) + (check-3-set double 0.25) + (check-3 i8 i8 i8 4 38 127) + (check-3 short short short 4 39 399) + (check-3 int int int 4 40 400) + (check-3 float float float 4.5 40.5 400.5) + (check-3 double double double 4.25 40.25 400.25) + (check-n [x i8 1] [y i8 2] [z i8 3] [w i8 4] [q i8 5]) + (check-n [x i8 1] [y i8 2] [z i8 3] [w i8 4] [q i8 5] [r i8 6] [s i8 7]) + (check-union [x i8 -17]) + (check-union [x u8 217]) + (check-union [x short -27]) + (check-union [x u16 33027]) + (check-union [x long 37]) + (check-union [x int 47]) + (check-union [x i64 49]) + (check-union [x float 57.0]) + (check-union [x double 77.0]) + (check-union [x i8 18] [y int 0]) + (check-union [x short 28] [y int 0]) + (check-union [x long 38] [y int 0]) + (check-union [x int 48] [y int 0]) + (check-union [x i64 43] [y int 0]) + (check-union [x float 58.0] [y int 0]) + (check-union [x double 68.0] [y int 0])) diff --git a/mats/foreign4.c b/mats/foreign4.c new file mode 100644 index 0000000000..b2bfb62f0b --- /dev/null +++ b/mats/foreign4.c @@ -0,0 +1,288 @@ +/* foreign4.c + * Copyright 1984-2017 Cisco Systems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +typedef signed char i8; +typedef unsigned char u8; +typedef unsigned short u16; +#ifdef _WIN32 +typedef __int64 i64; +# define EXPORT extern __declspec (dllexport) +#else +typedef long long i64; +# define EXPORT +#endif + +/* To help make sure that argument and result handling doens't + read or write too far, try to provide functions that allocate + a structure at the end of a memory page (where the next page is + likely to be unmapped) */ +#if defined(__linux__) || (defined(__APPLE__) && defined(__MACH__)) +# include +# include +# include +# include + +EXPORT void *malloc_at_boundary(int sz) +{ + intptr_t alloc_size = getpagesize(); + char *p; + p = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + return p + alloc_size - sz; +} + +EXPORT void free_at_boundary(void *p) +{ + intptr_t alloc_size = getpagesize(); + munmap((void *)(((intptr_t)p) & ~(alloc_size-1)), alloc_size); +} +#else +EXPORT void *malloc_at_boundary(int sz) +{ + return malloc(sz); +} + +EXPORT void free_at_boundary(void *p) +{ + free(p); +} +#endif + +#define GEN(ts, init, sum) \ + EXPORT ts f4_get_ ## ts () { \ + ts r = init; \ + return r; \ + } \ + EXPORT double f4_sum_ ## ts (ts v) { \ + return sum(v); \ + } \ + EXPORT double f4_sum_two_ ## ts (ts v1, ts v2) { \ + return sum(v1) + sum(v2); \ + } \ + EXPORT double f4_sum_pre_double_ ## ts (double v0, ts v) { \ + return v0 + sum(v); \ + } \ + EXPORT double f4_sum_pre_double_double_ ## ts (double v0, double v1, ts v) { \ + return v0 + v1 + sum(v); \ + } \ + EXPORT double f4_sum_pre_double_double_double_double_ ## ts (double v0, double v1, double v2, double v3, ts v) { \ + return v0 + v1 + v2 + v3 + sum(v); \ + } \ + EXPORT double f4_sum_pre_double_double_double_double_double_double_double_double_ ## ts \ + (double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7, ts v) { \ + return v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7 + sum(v); \ + } \ + EXPORT double f4_sum_ ## ts ## _post_double (ts v, double v0) { \ + return v0 + sum(v); \ + } \ + EXPORT double f4_sum_pre_int_ ## ts (int v0, ts v) { \ + return (double)v0 + sum(v); \ + } \ + EXPORT double f4_sum_pre_int_int_ ## ts (int v0, int v1, ts v) { \ + return (double)v0 + (double)v1 + sum(v); \ + } \ + EXPORT double f4_sum_pre_int_int_int_int_ ## ts (int v0, int v1, int v2, int v3, ts v) { \ + return (double)v0 + (double)v1 + (double)v2 + (double)v3 + sum(v); \ + } \ + EXPORT double f4_sum_pre_int_int_int_int_int_int_ ## ts (int v0, int v1, int v2, int v3, int v4, int v5, ts v) { \ + return (double)v0 + (double)v1 + (double)v2 + (double)v3 + (double)v4 + (double)v5 + sum(v); \ + } \ + EXPORT double f4_sum_ ## ts ## _post_int (ts v, int v0) { \ + return (double)v0 + sum(v); \ + } \ + EXPORT double f4_cb_send_ ## ts (double (*cb)(ts)) { \ + ts r = init; \ + return cb(r) + 1.0; \ + } \ + EXPORT double f4_cb_send_two_ ## ts (double (*cb)(ts, ts)) { \ + ts r1 = init; \ + ts r2 = init; \ + return cb(r1, r2) + 1.0; \ + } \ + EXPORT double f4_cb_send_pre_int_ ## ts (double (*cb)(int, ts)) { \ + ts r = init; \ + return cb(8, r) + 1.0; \ + } \ + EXPORT double f4_cb_send_pre_int_int_ ## ts (double (*cb)(int, int, ts)) { \ + ts r = init; \ + return cb(8, 9, r) + 1.0; \ + } \ + EXPORT double f4_cb_send_pre_int_int_int_int_ ## ts (double (*cb)(int, int, int, int, ts)) { \ + ts r = init; \ + return cb(8, 9, 10, 11, r) + 1.0; \ + } \ + EXPORT double f4_cb_send_pre_int_int_int_int_int_int_ ## ts (double (*cb)(int, int, int, int, int, int, ts)) { \ + ts r = init; \ + return cb(8, 9, 10, 11, 12, 13, r) + 1.0; \ + } \ + EXPORT double f4_cb_send_pre_double_ ## ts (double (*cb)(double, ts)) { \ + ts r = init; \ + return cb(8.25, r) + 1.0; \ + } \ + EXPORT double f4_cb_send_pre_double_double_ ## ts (double (*cb)(double, double, ts)) { \ + ts r = init; \ + return cb(8.25, 9.25, r) + 1.0; \ + } \ + EXPORT double f4_cb_send_pre_double_double_double_double_ ## ts (double (*cb)(double, double, double, double, ts)) { \ + ts r = init; \ + return cb(8.25, 9.25, 10.25, 11.25, r) + 1.0; \ + } \ + EXPORT double f4_cb_send_pre_double_double_double_double_double_double_double_double_ ## ts \ + (double (*cb)(double, double, double, double, double, double, double, double, ts)) { \ + ts r = init; \ + return cb(8.25, 9.25, 10.25, 11.25, 12.25, 13.25, 14.25, 15.25, r) + 1.0; \ + } \ + EXPORT double f4_sum_cb_ ## ts (ts (*cb)()) { \ + ts v = cb(); \ + return sum(v); \ + } + +#define TO_DOUBLE(x) ((double)(x)) +GEN(i8, -11, TO_DOUBLE) +GEN(u8, 129, TO_DOUBLE) +GEN(short, -22, TO_DOUBLE) +GEN(u16, 33022, TO_DOUBLE) +GEN(long, 33, TO_DOUBLE) +GEN(int, 44, TO_DOUBLE) +GEN(i64, 49, TO_DOUBLE) +GEN(float, 55.0, TO_DOUBLE) +GEN(double, 66.0, TO_DOUBLE) + +/* Some ABIs treat a struct containing a single field different that + just the field */ +#define GEN_1(t1, v1) \ + typedef struct struct_ ## t1 { t1 x; } struct_ ## t1; \ + static double _f4_sum_struct_ ## t1 (struct_ ## t1 v) { \ + return (double)v.x; \ + } \ + static struct_ ## t1 init_struct_ ## t1 = { v1 }; \ + GEN(struct_ ## t1, init_struct_ ## t1, _f4_sum_struct_ ## t1) + +GEN_1(i8, -12) +GEN_1(u8, 212) +GEN_1(short, -23) +GEN_1(u16, 33023) +GEN_1(long, 34) +GEN_1(int, 45) +GEN_1(i64, 48) +GEN_1(float, 56.0) +GEN_1(double, 67.0) + +#define GEN_2(t1, t2, v1, v2) \ + typedef struct struct_ ## t1 ## _ ## t2 { t1 x; t2 y; } struct_ ## t1 ## _ ## t2; \ + static double _f4_sum_struct_ ## t1 ## _ ## t2 (struct_ ## t1 ## _ ## t2 v) { \ + return (double)v.x + (double)v.y; \ + } \ + static struct_ ## t1 ## _ ## t2 init_struct_ ## t1 ## _ ## t2 = { v1, v2 }; \ + GEN(struct_ ## t1 ## _ ## t2, init_struct_ ## t1 ## _ ## t2, _f4_sum_struct_ ## t1 ## _ ## t2) + +#define GEN_2_SET(t, x) \ + GEN_2(t, i8, 1+x, 10) \ + GEN_2(t, short, 2+x, 20) \ + GEN_2(t, long, 3+x, 30) \ + GEN_2(t, i64, 5+x, 50) \ + GEN_2(short, t, 6, 60+x) \ + GEN_2(long, t, 7, 70+x) \ + GEN_2(i64, t, 9, 90+x) \ + GEN_2(i8, t, 10, 100+x) + +GEN_2_SET(int, 0) +GEN_2_SET(float, 0.5) +GEN_2_SET(double, 0.25) + +GEN_2(int, int, 4, 40) +GEN_2(float, float, 4.5, 40.5) +GEN_2(double, double, 4.25, 40.25) + +#define GEN_3(t1, t2, t3, v1, v2, v3) \ + typedef struct struct_ ## t1 ## _ ## t2 ## _ ## t3 { t1 x; t2 y; t3 z; } struct_ ## t1 ## _ ## t2 ## _ ## t3; \ + static double _f4_sum_struct_ ## t1 ## _ ## t2 ## _ ## t3 (struct_ ## t1 ## _ ## t2 ## _ ## t3 v) { \ + return (double)v.x + (double)v.y + (double)v.z; \ + } \ + static struct_ ## t1 ## _ ## t2 ## _ ## t3 init_struct_ ## t1 ## _ ## t2 ## _ ## t3 = { v1, v2, v3 }; \ + GEN(struct_ ## t1 ## _ ## t2 ## _ ## t3, init_struct_ ## t1 ## _ ## t2 ## _ ## t3, _f4_sum_struct_ ## t1 ## _ ## t2 ## _ ## t3) + +#define GEN_3_SET(t, x) \ + GEN_3(t, i8, int, 1+x, 10, 100) \ + GEN_3(t, short, int, 2+x, 20, 200) \ + GEN_3(t, long, int, 3+x, 30, 300) \ + GEN_3(t, i64, int, 5+x, 50, 500) \ + GEN_3(short, t, int, 6, 60+x, 600) \ + GEN_3(long, t, int, 7, 70+x, 700) \ + GEN_3(i64, t, int, 9, 90+x, 900) \ + GEN_3(i8, t, int, 10, 100+x, 1000) + +GEN_3_SET(int, 0) +GEN_3_SET(float, 0.5) +GEN_3_SET(double, 0.25) + +GEN_3(i8, i8, i8, 4, 38, 127) +GEN_3(short, short, short, 4, 39, 399) +GEN_3(int, int, int, 4, 40, 400) +GEN_3(float, float, float, 4.5, 40.5, 400.5) +GEN_3(double, double, double, 4.25, 40.25, 400.25) + +typedef struct struct_i8_i8_i8_i8_i8 { i8 x, y, z, w, q; } struct_i8_i8_i8_i8_i8; +static double _f4_sum_struct_i8_i8_i8_i8_i8 (struct_i8_i8_i8_i8_i8 v) { + return (double)v.x + (double)v.y + (double)v.z + (double)v.w + (double)v.q; +} +static struct struct_i8_i8_i8_i8_i8 init_struct_i8_i8_i8_i8_i8 = { 1, 2, 3, 4, 5 }; +GEN(struct_i8_i8_i8_i8_i8, init_struct_i8_i8_i8_i8_i8, _f4_sum_struct_i8_i8_i8_i8_i8) + +typedef struct struct_i8_i8_i8_i8_i8_i8_i8 { i8 x, y, z, w, q, r, s; } struct_i8_i8_i8_i8_i8_i8_i8; +static double _f4_sum_struct_i8_i8_i8_i8_i8_i8_i8 (struct struct_i8_i8_i8_i8_i8_i8_i8 v) { + return (double)v.x + (double)v.y + (double)v.z + (double)v.w + (double)v.q + (double)v.r + (double)v.s; +} +static struct struct_i8_i8_i8_i8_i8_i8_i8 init_struct_i8_i8_i8_i8_i8_i8_i8 = { 1, 2, 3, 4, 5, 6, 7 }; +GEN(struct_i8_i8_i8_i8_i8_i8_i8, init_struct_i8_i8_i8_i8_i8_i8_i8, _f4_sum_struct_i8_i8_i8_i8_i8_i8_i8) + +/* Some ABIs treat a union containing a single field different that + just the field */ +#define GEN_U1(t1, v1) \ + typedef union union_ ## t1 { t1 x; } union_ ## t1; \ + static double _f4_sum_union_ ## t1 (union_ ## t1 v) { \ + return (double)v.x; \ + } \ + static union_ ## t1 init_union_ ## t1 = { v1 }; \ + GEN(union_ ## t1, init_union_ ## t1, _f4_sum_union_ ## t1) + +GEN_U1(i8, -17) +GEN_U1(u8, 217) +GEN_U1(short, -27) +GEN_U1(u16, 33027) +GEN_U1(long, 37) +GEN_U1(int, 47) +GEN_U1(i64, 49) +GEN_U1(float, 57.0) +GEN_U1(double, 77.0) + +#define GEN_U2(t1, t2, v1) \ + typedef union union_ ## t1 ## _ ## t2 { t1 x; t2 y; } union_ ## t1 ## _ ## t2; \ + static double _f4_sum_union_ ## t1 ## _ ## t2 (union_ ## t1 ## _ ## t2 v) { \ + return (double)v.x; \ + } \ + static union_ ## t1 ## _ ## t2 init_union_ ## t1 ## _ ## t2 = { v1 }; \ + GEN(union_ ## t1 ## _ ## t2, init_union_ ## t1 ## _ ## t2, _f4_sum_union_ ## t1 ## _ ## t2) + +GEN_U2(i8, int, 18) +GEN_U2(short, int, 28) +GEN_U2(long, int, 38) +GEN_U2(int, int, 48) +GEN_U2(i64, int, 43) +GEN_U2(float, int, 58.0) +GEN_U2(double, int, 68.0) diff --git a/release_notes/release_notes.stex b/release_notes/release_notes.stex index bc9cfc3fe6..b4df634cd3 100644 --- a/release_notes/release_notes.stex +++ b/release_notes/release_notes.stex @@ -58,6 +58,18 @@ Online versions of both books can be found at %----------------------------------------------------------------------------- \section{Functionality Changes}\label{section:functionality} +\subsection{Foreign-procedure struct arguments and results (9.5.1)} + +A new \scheme{(& \var{ftype})} form allows a struct or union to be +passed between Scheme and a foreign procedure. The Scheme-side +representation of a \scheme{(& \var{ftype})} argument is the +same as a \scheme{(* \var{ftype})} argument, but where +\scheme{(& \var{ftype})} passes an address between the Scheme and C +worlds, \scheme{(& \var{ftype})} passes a copy of the data at the +address. When \scheme{(& \var{ftype})} is used as a result type, +an extra \scheme{(* \var{ftype})} argument must be provided to receive +the copied result, and the directly returned result is unspecified. + \subsection{Record equality and hashing (9.5)} The new procedures \scheme{record-type-equal-procedure} and diff --git a/s/arm32.ss b/s/arm32.ss index 9710b6becd..711261939a 100644 --- a/s/arm32.ss +++ b/s/arm32.ss @@ -890,7 +890,7 @@ asm-fl-load/cvt asm-fl-store/cvt asm-flt asm-trunc asm-lock asm-lock+/- asm-flop-2 asm-flsqrt asm-c-simple-call - asm-save-flrv asm-restore-flrv asm-return asm-size + asm-save-flrv asm-restore-flrv asm-return asm-c-return asm-size asm-enter asm-foreign-call asm-foreign-callable asm-read-counter asm-inc-cc-counter @@ -2051,7 +2051,7 @@ (rec asm-c-simple-call-internal (lambda (code* jmp-tmp . ignore) (asm-helper-call code* target save-ra? jmp-tmp)))))) - + (define-who asm-indirect-call (lambda (code* dest lr . ignore) (safe-assert (eq? lr %lr)) @@ -2277,6 +2277,8 @@ ; NB: reads from %lr...should be okay if declare-intrinsics sets up return-live* properly (define asm-return (lambda () (emit bx (cons 'reg %lr) '()))) + (define asm-c-return (lambda (info) (emit bx (cons 'reg %lr) '()))) + (define-who asm-shiftop (lambda (op) (lambda (code* dest src0 src1) @@ -2313,10 +2315,28 @@ (module (asm-foreign-call asm-foreign-callable) (define align (lambda (b x) (let ([k (- b 1)]) (fxlogand (fx+ x k) (fxlognot k))))) + (define (double-member? m) (and (eq? (car m) 'float) + (fx= (cadr m) 8))) + (define (float-member? m) (and (eq? (car m) 'float) + (fx= (cadr m) 4))) + (define (indirect-result-that-fits-in-registers? result-type) + (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) + (let* ([members ($ftd->members ftd)] + [num-members (length members)]) + (or (fx<= ($ftd-size ftd) 4) + (and (fx= num-members 1) + ;; a struct containing only int64 is not returned in a register + (or (not ($ftd-compound? ftd)))) + (and (fx<= num-members 4) + (or (andmap double-member? members) + (andmap float-member? members)))))] + [else #f])) + (define sgl-regs (lambda () (list %Cfparg1 %Cfparg1b %Cfparg2 %Cfparg2b %Cfparg3 %Cfparg3b %Cfparg4 %Cfparg4b + %Cfparg5 %Cfparg5b %Cfparg6 %Cfparg6b %Cfparg7 %Cfparg7b %Cfparg8 %Cfparg8b))) (define-who asm-foreign-call (with-output-language (L13 Effect) (define int-regs (lambda () (list %Carg1 %Carg2 %Carg3 %Carg4))) - (define sgl-regs (lambda () (list %Cfparg1 %Cfparg1b %Cfparg2 %Cfparg2b %Cfparg3 %Cfparg3b %Cfparg4 %Cfparg4b %Cfparg5 %Cfparg5b %Cfparg6 %Cfparg6b %Cfparg7 %Cfparg7b %Cfparg8 %Cfparg8b))) (letrec ([load-double-stack (lambda (offset) (lambda (x) ; requires var @@ -2327,7 +2347,7 @@ (lambda (offset) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) + (inline ,(make-info-loadfl %flreg1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) (inline ,(make-info-loadfl %flreg1) ,%store-single ,%sp ,%zero (immediate ,offset)))))] [load-int-stack (lambda (offset) @@ -2339,14 +2359,33 @@ (%seq (set! ,(%mref ,%sp ,offset) ,lorhs) (set! ,(%mref ,%sp ,(fx+ offset 4)) ,hirhs))))] + [load-int-indirect-stack + (lambda (offset from-offset size) + (lambda (x) ; requires var + (case size + [(3) + (%seq + (set! ,(%mref ,%sp ,offset) (inline ,(make-info-load 'integer-16 #f) ,%load ,x ,%zero (immediate ,from-offset))) + (set! ,(%mref ,%sp ,(fx+ offset 2)) (inline ,(make-info-load 'integer-8 #f) ,%load ,x ,%zero (immediate ,(fx+ from-offset 2)))))] + [else + `(set! ,(%mref ,%sp ,offset) ,(case size + [(1) `(inline ,(make-info-load 'integer-8 #f) ,%load ,x ,%zero (immediate ,from-offset))] + [(2) `(inline ,(make-info-load 'integer-16 #f) ,%load ,x ,%zero (immediate ,from-offset))] + [(4) (%mref ,x ,from-offset)]))])))] + [load-int64-indirect-stack + (lambda (offset from-offset) + (lambda (x) ; requires var + (%seq + (set! ,(%mref ,%sp ,offset) ,(%mref ,x ,from-offset)) + (set! ,(%mref ,%sp ,(fx+ offset 4)) ,(%mref ,x ,(fx+ from-offset 4))))))] [load-double-reg - (lambda (fpreg) + (lambda (fpreg fp-disp) (lambda (x) ; requires var - `(inline ,(make-info-loadfl fpreg) ,%load-double ,x ,%zero ,(%constant flonum-data-disp))))] + `(inline ,(make-info-loadfl fpreg) ,%load-double ,x ,%zero (immediate ,fp-disp))))] [load-single-reg - (lambda (fpreg) + (lambda (fpreg fp-disp single?) (lambda (x) ; requires var - `(inline ,(make-info-loadfl fpreg) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp))))] + `(inline ,(make-info-loadfl fpreg) ,(if single? %load-single %load-double->single) ,x ,%zero (immediate ,fp-disp))))] [load-int-reg (lambda (ireg) (lambda (x) @@ -2357,6 +2396,28 @@ (%seq (set! ,loreg ,lo) (set! ,hireg ,hi))))] + [load-int-indirect-reg + (lambda (ireg from-offset size) + (lambda (x) + (case size + [(3) + (let ([tmp %lr]) ; ok to use %lr here? + (%seq + (set! ,ireg (inline ,(make-info-load 'integer-16 #f) ,%load ,x ,%zero (immediate ,from-offset))) + (set! ,tmp (inline ,(make-info-load 'integer-8 #f) ,%load ,x ,%zero (immediate ,(fx+ from-offset 2)))) + (set! ,tmp ,(%inline sll ,tmp (immediate 16))) + (set! ,ireg ,(%inline + ,ireg ,tmp))))] + [else + `(set! ,ireg ,(case size + [(1) `(inline ,(make-info-load 'integer-8 #f) ,%load ,x ,%zero (immediate ,from-offset))] + [(2) `(inline ,(make-info-load 'integer-16 #f) ,%load ,x ,%zero (immediate ,from-offset))] + [(4) (%mref ,x ,from-offset)]))])))] + [load-int64-indirect-reg + (lambda (loreg hireg from-offset) + (lambda (x) + (%seq + (set! ,loreg ,(%mref ,x ,from-offset)) + (set! ,hireg ,(%mref ,x ,(fx+ from-offset 4))))))] [do-args (lambda (types) ; sgl* is always of even-length, i.e., has a sgl/dbl reg first @@ -2372,21 +2433,97 @@ (cons (load-double-stack isp) locs) live* int* '() #f (fx+ isp 8))) (loop (cdr types) - (cons (load-double-reg (car sgl*)) locs) + (cons (load-double-reg (car sgl*) (constant flonum-data-disp)) locs) live* int* (cddr sgl*) bsgl isp))] [(fp-single-float) (if bsgl (loop (cdr types) - (cons (load-single-reg bsgl) locs) + (cons (load-single-reg bsgl (constant flonum-data-disp) #f) locs) live* int* sgl* #f isp) (if (null? sgl*) (loop (cdr types) (cons (load-single-stack isp) locs) live* int* '() #f (fx+ isp 4)) (loop (cdr types) - (cons (load-single-reg (car sgl*)) locs) + (cons (load-single-reg (car sgl*) (constant flonum-data-disp) #f) locs) live* int* (cddr sgl*) (cadr sgl*) isp)))] - [else + [(fp-ftd& ,ftd) + (let ([size ($ftd-size ftd)] + [members ($ftd->members ftd)] + [combine-loc (lambda (loc f) + (if loc + (lambda (x) (%seq ,(loc x) ,(f x))) + f))]) + (case ($ftd-alignment ftd) + [(8) + (let* ([int* (if (even? (length int*)) int* (cdr int*))] + [num-members (length members)] + [doubles? (and (fx<= num-members 4) + (andmap double-member? members))]) + ;; Sequence of up to 4 doubles that fits in registers? + (cond + [(and doubles? + (fx>= (length sgl*) (fx* 2 num-members))) + ;; Allocate each double to a register + (let dbl-loop ([size size] [offset 0] [sgl* sgl*] [loc #f]) + (cond + [(fx= size 0) + (loop (cdr types) (cons loc locs) live* int* sgl* #f isp)] + [else + (dbl-loop (fx- size 8) (fx+ offset 8) (cddr sgl*) + (combine-loc loc (load-double-reg (car sgl*) offset)))]))] + [else + ;; General case; for non-doubles, use integer registers while available, + ;; possibly splitting between registers and stack + (let obj-loop ([size size] [offset 0] [loc #f] + [live* live*] [int* int*] [isp isp]) + (cond + [(fx= size 0) + (loop (cdr types) (cons loc locs) live* int* sgl* bsgl isp)] + [else + (if (or (null? int*) doubles?) + (let ([isp (align 8 isp)]) + (obj-loop (fx- size 8) (fx+ offset 8) + (combine-loc loc (load-int64-indirect-stack isp offset)) + live* int* (fx+ isp 8))) + (obj-loop (fx- size 8) (fx+ offset 8) + (combine-loc loc (load-int64-indirect-reg (car int*) (cadr int*) offset)) + (cons* (car int*) (cadr int*) live*) (cddr int*) isp))]))]))] + [else + (let* ([num-members (length members)] + [floats? (and (fx<= num-members 4) + (andmap float-member? members))]) + ;; Sequence of up to 4 floats that fits in registers? + (cond + [(and floats? + (fx>= (fx+ (length sgl*) (if bsgl 1 0)) num-members)) + ;; Allocate each float to register + (let flt-loop ([size size] [offset 0] [sgl* sgl*] [bsgl bsgl] [loc #f]) + (cond + [(fx= size 0) + (loop (cdr types) (cons loc locs) live* int* sgl* bsgl isp)] + [else + (flt-loop (fx- size 4) (fx+ offset 4) + (if bsgl sgl* (cddr sgl*)) + (if bsgl #f (cadr sgl*)) + (combine-loc loc (load-single-reg (or bsgl (car sgl*)) offset #t)))]))] + [else + ;; General case; use integer registers while available, + ;; possibly splitting between registers and stack + (let obj-loop ([size size] [offset 0] [loc #f] + [live* live*] [int* int*] [isp isp]) + (cond + [(fx<= size 0) + (loop (cdr types) (cons loc locs) live* int* sgl* bsgl isp)] + [else + (if (or (null? int*) floats?) + (obj-loop (fx- size 4) (fx+ offset 4) + (combine-loc loc (load-int-indirect-stack isp offset (fxmin size 4))) + live* int* (fx+ isp 4)) + (obj-loop (fx- size 4) (fx+ offset 4) + (combine-loc loc (load-int-indirect-reg (car int*) offset (fxmin size 4))) + (cons (car int*) live*) (cdr int*) isp))]))]))]))] + [else (if (nanopass-case (Ltype Type) (car types) [(fp-integer ,bits) (fx= bits 64)] [(fp-unsigned ,bits) (fx= bits 64)] @@ -2406,14 +2543,62 @@ live* '() sgl* bsgl (fx+ isp 4)) (loop (cdr types) (cons (load-int-reg (car int*)) locs) - (cons (car int*) live*) (cdr int*) sgl* bsgl isp)))]))))]) + (cons (car int*) live*) (cdr int*) sgl* bsgl isp)))]))))] + [add-fill-result + (lambda (fill-result-here? result-type args-frame-size e) + (cond + [fill-result-here? + (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) + (let* ([members ($ftd->members ftd)] + [num-members (length members)] + ;; result pointer is stashed on the stack after all arguments: + [dest-x %r2] + [init-dest-e `(seq ,e (set! ,dest-x ,(%mref ,%sp ,args-frame-size)))]) + (cond + [(and (fx<= num-members 4) + (or (andmap double-member? members) + (andmap float-member? members))) + ;; double/float results are in floating-point registers + (let ([double? (and (pair? members) (double-member? (car members)))]) + (let loop ([members members] [sgl* (sgl-regs)] [offset 0] [e init-dest-e]) + (cond + [(null? members) e] + [else + (loop (cdr members) + (if double? (cddr sgl*) (cdr sgl*)) + (fx+ offset (if double? 8 4)) + `(seq + ,e + (inline ,(make-info-loadfl (car sgl*)) ,(if double? %store-double %store-single) + ,dest-x ,%zero (immediate ,offset))))])))] + [else + ;; result is in %Cretval and maybe %r1 + `(seq + ,init-dest-e + ,(case ($ftd-size ftd) + [(1) `(inline ,(make-info-load 'integer-8 #f) ,%store ,dest-x ,%zero (immediate 0) ,%Cretval)] + [(2) `(inline ,(make-info-load 'integer-16 #f) ,%store ,dest-x ,%zero (immediate 0) ,%Cretval)] + [(3) (%seq + (inline ,(make-info-load 'integer-16 #f) ,%store ,dest-x ,%zero (immediate 0) ,%Cretval) + (set! ,%Cretval ,(%inline srl ,%Cretval (immediate 16))) + (inline ,(make-info-load 'integer-8 #f) ,%store ,dest-x ,%zero (immediate 2) ,%Cretval))] + [(4) `(set! ,(%mref ,dest-x ,0) ,%Cretval)] + [(8) `(seq + (set! ,(%mref ,dest-x ,0) ,%Cretval) + (set! ,(%mref ,dest-x ,4) ,%r1))]))]))])] + [else e]))]) (lambda (info) (safe-assert (reg-callee-save? %tc)) ; no need to save-restore - (let ([arg-type* (info-foreign-arg-type* info)] - [result-type (info-foreign-result-type info)]) - (with-values (do-args arg-type*) - (lambda (frame-size locs live*) - (let* ([frame-size (align 8 frame-size)] + (let* ([arg-type* (info-foreign-arg-type* info)] + [result-type (info-foreign-result-type info)] + [fill-result-here? (indirect-result-that-fits-in-registers? result-type)]) + (with-values (do-args (if fill-result-here? (cdr arg-type*) arg-type*)) + (lambda (args-frame-size locs live*) + (let* ([frame-size (align 8 (+ args-frame-size + (if fill-result-here? + 4 + 0)))] [adjust-frame (lambda (op) (lambda () (if (fx= frame-size 0) @@ -2421,9 +2606,15 @@ `(set! ,%sp (inline ,null-info ,op ,%sp (immediate ,frame-size))))))]) (values (adjust-frame %-) - (reverse locs) + (let ([locs (reverse locs)]) + (cond + [fill-result-here? + ;; stash extra argument on the stack to be retrieved after call and filled with the result: + (cons (load-int-stack args-frame-size) locs)] + [else locs])) (lambda (t0) - `(inline ,(make-info-kill*-live* (reg-list %r0) live*) ,%c-call ,t0)) + (add-fill-result fill-result-here? result-type args-frame-size + `(inline ,(make-info-kill*-live* (reg-list %r0) live*) ,%c-call ,t0))) (nanopass-case (Ltype Type) result-type [(fp-double-float) (lambda (lvalue) @@ -2463,18 +2654,26 @@ +---------------------------+ | | | incoming stack args | - sp+36+X+Y+Z: | | - +---------------------------+<- 8-byte boundary - | | - | saved float reg args | 0-16 words - sp+36+X+Y: | | + sp+36+R+X+Y+Z+W: | | +---------------------------+<- 8-byte boundary | | | saved int reg args | 0-4 words - sp+36+X: | | + sp+36+R+X+Y+Z: | | +---------------------------+ | | | pad word if necessary | 0-1 words + sp+36+R+X+Y: | | + +---------------------------+<- 8-byte boundary + | | + | saved float reg args | 0-16 words + sp+36+R+X: | | + +---------------------------+<- 8-byte boundary + | | + | &-return space | up to 8 words + sp+36+R: | | + +---------------------------+<- 8-byte boundary + | | + | pad word if necessary | 0-1 words sp+36: | | +---------------------------+ | | @@ -2523,10 +2722,14 @@ (%seq (set! ,lolvalue ,(%mref ,%sp ,offset)) (set! ,hilvalue ,(%mref ,%sp ,(fx+ offset 4))))))) + (define load-stack-address + (lambda (offset) + (lambda (lvalue) + `(set! ,lvalue ,(%inline + ,%sp (immediate ,offset)))))) (define count-reg-args - (lambda (types) + (lambda (types synthesize-first?) ; bsgl? is #t iff we have a "b" single (second half of double) float reg to fill - (let f ([types types] [iint 0] [idbl 0] [bsgl? #f]) + (let f ([types types] [iint (if synthesize-first? -1 0)] [idbl 0] [bsgl? #f]) (if (null? types) (values iint idbl) (nanopass-case (Ltype Type) (car types) @@ -2540,6 +2743,34 @@ (if (fx< idbl 8) (f (cdr types) iint (fx+ idbl 1) #t) (f (cdr types) iint idbl #f)))] + [(fp-ftd& ,ftd) + (let* ([size ($ftd-size ftd)] + [members ($ftd->members ftd)] + [num-members (length members)]) + (cond + [(and (fx<= num-members 4) + (andmap double-member? members)) + ;; doubles are either in registers or all on stack + (if (fx<= (fx+ idbl num-members) 8) + (f (cdr types) iint (fx+ idbl num-members) #f) + ;; no more floating-point registers should be used, but ok if we count more + (f (cdr types) iint idbl #f))] + [(and (fx<= num-members 4) + (andmap float-member? members)) + ;; floats are either in registers or all on stack + (let ([amt (fxsrl (align 2 (fx- num-members (if bsgl? 1 0))) 1)]) + (if (fx<= (fx+ idbl amt) 8) + (let ([odd-floats? (fxodd? num-members)]) + (if bsgl? + (f (cdr types) iint (+ idbl amt) (not odd-floats?)) + (f (cdr types) iint (+ idbl amt) odd-floats?))) + ;; no more floating-point registers should be used, but ok if we count more + (f (cdr types) iint idbl #f)))] + [(fx= 8 ($ftd-alignment ftd)) + (f (cdr types) (fxmin 4 (fx+ (align 2 iint) (fxsrl size 2))) idbl bsgl?)] + [else + (let ([size (align 4 size)]) + (f (cdr types) (fxmin 4 (fx+ iint (fxsrl size 2))) idbl bsgl?))]))] [else (if (nanopass-case (Ltype Type) (car types) [(fp-integer ,bits) (fx= bits 64)] @@ -2551,12 +2782,16 @@ (define do-stack ; all of the args are on the stack at this point, though not contiguous since ; we push all of the int reg args with one push instruction and all of the - ; float reg args with another (v)push instruction - (lambda (types saved-reg-bytes pad-bytes int-reg-bytes float-reg-bytes) - (let* ([int-reg-offset (fx+ saved-reg-bytes pad-bytes)] - [float-reg-offset (fx+ int-reg-offset int-reg-bytes)] - [stack-arg-offset (fx+ float-reg-offset float-reg-bytes)]) - (let loop ([types types] + ; float reg args with another (v)push instruction; the saved int regs + ; continue on into the stack variables, which is convenient when a struct + ; argument is split across registers and the stack + (lambda (types saved-reg-bytes pre-pad-bytes return-bytes float-reg-bytes post-pad-bytes int-reg-bytes + synthesize-first?) + (let* ([return-space-offset (fx+ saved-reg-bytes pre-pad-bytes)] + [float-reg-offset (fx+ return-space-offset return-bytes)] + [int-reg-offset (fx+ float-reg-offset float-reg-bytes post-pad-bytes)] + [stack-arg-offset (fx+ int-reg-offset int-reg-bytes)]) + (let loop ([types (if synthesize-first? (cdr types) types)] [locs '()] [iint 0] [idbl 0] @@ -2565,7 +2800,11 @@ [float-reg-offset float-reg-offset] [stack-arg-offset stack-arg-offset]) (if (null? types) - (reverse locs) + (let ([locs (reverse locs)]) + (if synthesize-first? + (cons (load-stack-address return-space-offset) + locs) + locs)) (nanopass-case (Ltype Type) (car types) [(fp-double-float) (if (< idbl 8) @@ -2590,12 +2829,73 @@ (loop (cdr types) (cons (load-single-stack stack-arg-offset) locs) iint 8 #f int-reg-offset float-reg-offset (fx+ stack-arg-offset 4))))] + [(fp-ftd& ,ftd) + (let* ([size ($ftd-size ftd)] + [members ($ftd->members ftd)] + [num-members (length members)]) + (cond + [(and (fx<= num-members 4) + (andmap double-member? members)) + ;; doubles are either in registers or all on stack + (if (fx<= (fx+ idbl num-members) 8) + (loop (cdr types) + (cons (load-stack-address float-reg-offset) locs) + iint (fx+ idbl num-members) #f int-reg-offset (fx+ float-reg-offset size) stack-arg-offset) + (let ([stack-arg-offset (align 8 stack-arg-offset)]) + (loop (cdr types) + (cons (load-stack-address stack-arg-offset) locs) + iint 8 #f int-reg-offset #f (fx+ stack-arg-offset size))))] + [(and (fx<= num-members 4) + (andmap float-member? members)) + ;; floats are either in registers or all on stack + (let ([amt (fxsrl (align 2 (fx- num-members (if bsgl-offset 1 0))) 1)]) + (if (fx<= (fx+ idbl amt) 8) + (let ([odd-floats? (fxodd? num-members)]) + (if bsgl-offset + (let ([dbl-size (align 8 (fx- size 4))]) + (loop (cdr types) + (cons (load-stack-address bsgl-offset) locs) + iint (fx+ idbl amt) (if odd-floats? #f (+ bsgl-offset size)) int-reg-offset + (fx+ float-reg-offset dbl-size) stack-arg-offset)) + (let ([dbl-size (align 8 size)]) + (loop (cdr types) + (cons (load-stack-address float-reg-offset) locs) + iint (fx+ idbl amt) (and odd-floats? (fx+ float-reg-offset size)) int-reg-offset + (fx+ float-reg-offset dbl-size) stack-arg-offset)))) + (loop (cdr types) + (cons (load-stack-address stack-arg-offset) locs) + iint 8 #f int-reg-offset float-reg-offset (fx+ stack-arg-offset size))))] + [(fx= 8 ($ftd-alignment ftd)) + (let ([int-reg-offset (if (fxeven? iint) int-reg-offset (fx+ int-reg-offset 4))] + [iint (align 2 iint)] + [amt (fxsrl size 2)]) + (if (fx< iint 4) ; argument starts in registers, may continue on stack + (loop (cdr types) + (cons (load-stack-address int-reg-offset) locs) + (fxmin 4 (fx+ iint amt)) idbl bsgl-offset (fx+ int-reg-offset size) float-reg-offset + (fx+ stack-arg-offset (fxmax 0 (fx* 4 (fx- (fx+ iint amt) 4))))) + (let ([stack-arg-offset (align 8 stack-arg-offset)]) + (loop (cdr types) + (cons (load-stack-address stack-arg-offset) locs) + iint idbl bsgl-offset int-reg-offset float-reg-offset (fx+ stack-arg-offset size)))))] + [else + (let* ([size (align 4 size)] + [amt (fxsrl size 2)]) + (if (fx< iint 4) ; argument starts in registers, may continue on stack + (loop (cdr types) + (cons (load-stack-address int-reg-offset) locs) + (fxmin 4 (fx+ iint amt)) idbl bsgl-offset (fx+ int-reg-offset size) float-reg-offset + (fx+ stack-arg-offset (fxmax 0 (fx* 4 (fx- (fx+ iint amt) 4))))) + (loop (cdr types) + (cons (load-stack-address stack-arg-offset) locs) + iint idbl bsgl-offset int-reg-offset float-reg-offset (fx+ stack-arg-offset size))))]))] [else (if (nanopass-case (Ltype Type) (car types) [(fp-integer ,bits) (fx= bits 64)] [(fp-unsigned ,bits) (fx= bits 64)] [else #f]) - (let ([iint (align 2 iint)]) + (let ([int-reg-offset (if (fxeven? iint) int-reg-offset (fx+ int-reg-offset 4))] + [iint (align 2 iint)]) (if (fx= iint 4) (let ([stack-arg-offset (align 8 stack-arg-offset)]) (loop (cdr types) @@ -2611,44 +2911,127 @@ (loop (cdr types) (cons (load-int-stack (car types) int-reg-offset) locs) (fx+ iint 1) idbl bsgl-offset (fx+ int-reg-offset 4) float-reg-offset stack-arg-offset)))])))))) + (define do-result + (lambda (result-type synthesize-first? return-stack-offset) + (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) + (let* ([members ($ftd->members ftd)] + [num-members (length members)]) + (cond + [(and (fx<= 1 num-members 4) + (or (andmap double-member? members) + (andmap float-member? members))) + ;; double/float results returned in floating-point registers + (values + (lambda () + (let ([double? (and (pair? members) (double-member? (car members)))]) + (let loop ([members members] [sgl* (sgl-regs)] [offset return-stack-offset] [e #f]) + (cond + [(null? members) e] + [else + (loop (cdr members) + (if double? (cddr sgl*) (cdr sgl*)) + (fx+ offset (if double? 8 4)) + (let ([new-e + `(inline ,(make-info-loadfl (car sgl*)) ,(if double? %load-double %load-single) + ,%sp ,%zero (immediate ,offset))]) + (if e `(seq ,e ,new-e) new-e)))])))) + '() + ($ftd-size ftd))] + [else + (case ($ftd-size ftd) + [(8) + (values (lambda () + `(seq + (set! ,%Cretval ,(%mref ,%sp ,return-stack-offset)) + (set! ,%r1 ,(%mref ,%sp ,(fx+ 4 return-stack-offset))))) + (list %Cretval %r1) + 8)] + [else + (values (lambda () + `(set! ,%Cretval ,(%mref ,%sp ,return-stack-offset))) + (list %Cretval %r1) + 4)])]))] + [(fp-double-float) + (values (lambda (rhs) + `(inline ,(make-info-loadfl %Cfpretval) ,%load-double + ,rhs ,%zero ,(%constant flonum-data-disp))) + '() + 0)] + [(fp-single-float) + (values (lambda (rhs) + `(inline ,(make-info-loadfl %Cfpretval) ,%load-double->single + ,rhs ,%zero ,(%constant flonum-data-disp))) + '() + 0)] + [(fp-void) + (values (lambda () `(nop)) + '() + 0)] + [else + (cond + [(nanopass-case (Ltype Type) result-type + [(fp-integer ,bits) (fx= bits 64)] + [(fp-unsigned ,bits) (fx= bits 64)] + [else #f]) + (values (lambda (lo hi) + `(seq + (set! ,%Cretval ,lo) + (set! ,%r1 ,hi))) + (list %Cretval %r1) + 0)] + [else + (values (lambda (x) + `(set! ,%Cretval ,x)) + (list %Cretval %r1) + 0)])]))) (lambda (info) (define callee-save-regs+lr (list %r4 %r5 %r6 %r7 %r8 %r9 %r10 %r11 %lr)) (define isaved (length callee-save-regs+lr)) - (let ([arg-type* (info-foreign-arg-type* info)] - [result-type (info-foreign-result-type info)]) - (let-values ([(iint idbl) (count-reg-args arg-type*)]) + (let* ([arg-type* (info-foreign-arg-type* info)] + [result-type (info-foreign-result-type info)] + [synthesize-first? (indirect-result-that-fits-in-registers? result-type)]) + (let-values ([(iint idbl) (count-reg-args arg-type* synthesize-first?)]) (let ([saved-reg-bytes (fx* isaved 4)] - [pad-bytes (if (fxeven? (fx+ isaved iint)) 0 4)] + [pre-pad-bytes (if (fxeven? isaved) 0 4)] [int-reg-bytes (fx* iint 4)] + [post-pad-bytes (if (fxeven? iint) 0 4)] [float-reg-bytes (fx* idbl 8)]) - (values - (lambda () - (%seq - ; save argument register values to the stack so we don't lose the values - ; across possible calls to C while setting up the tc and allocating memory - ,(if (fx= idbl 0) `(nop) `(inline ,(make-info-vpush %Cfparg1 idbl) ,%vpush-multiple)) - ,(if (fx= iint 0) `(nop) `(inline ,(make-info-kill*-live* '() (list-head (list %Carg1 %Carg2 %Carg3 %Carg4) iint)) ,%push-multiple)) - ; pad if necessary to force 8-byte boundardy after saving callee-save-regs+lr - ,(if (fx= pad-bytes 0) `(nop) `(set! ,%sp ,(%inline - ,%sp (immediate 4)))) - ; save the callee save registers & return address - (inline ,(make-info-kill*-live* '() callee-save-regs+lr) ,%push-multiple) - ; set up tc for benefit of argument-conversion code, which might allocate - ,(if-feature pthreads - (%seq - (set! ,%r0 ,(%inline get-tc)) - (set! ,%tc ,%r0)) - `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) - ; list of procedures that marshal arguments from their C stack locations - ; to the Scheme argument locations - (do-stack arg-type* saved-reg-bytes pad-bytes int-reg-bytes float-reg-bytes) - (lambda (fv* Scall->result-type) - (in-context Tail - (%seq - ; restore the callee save registers - (inline ,(make-info-kill* callee-save-regs+lr) ,%pop-multiple) - ; deallocate space for pad & arg reg values - (set! ,%sp ,(%inline + ,%sp (immediate ,(fx+ pad-bytes int-reg-bytes float-reg-bytes)))) - ; tail call the C helper that calls the Scheme procedure - (jump (literal ,(make-info-literal #f 'entry Scall->result-type 0)) - (,callee-save-regs+lr ... ,fv* ...)))))))))))))) + (let-values ([(get-result result-regs return-bytes) (do-result result-type synthesize-first? + (fx+ saved-reg-bytes pre-pad-bytes))]) + (let ([return-bytes (align 8 return-bytes)]) + (values + (lambda () + (%seq + ; save argument register values to the stack so we don't lose the values + ; across possible calls to C while setting up the tc and allocating memory + ,(if (fx= iint 0) `(nop) `(inline ,(make-info-kill*-live* '() (list-head (list %Carg1 %Carg2 %Carg3 %Carg4) iint)) ,%push-multiple)) + ; pad if necessary to force 8-byte boundary, and make room for indirect return: + ,(let ([len (+ post-pad-bytes return-bytes)]) + (if (fx= len 0) `(nop) `(set! ,%sp ,(%inline - ,%sp (immediate ,len))))) + ,(if (fx= idbl 0) `(nop) `(inline ,(make-info-vpush %Cfparg1 idbl) ,%vpush-multiple)) + ; pad if necessary to force 8-byte boundardy after saving callee-save-regs+lr + ,(if (fx= pre-pad-bytes 0) `(nop) `(set! ,%sp ,(%inline - ,%sp (immediate 4)))) + ; save the callee save registers & return address + (inline ,(make-info-kill*-live* '() callee-save-regs+lr) ,%push-multiple) + ; set up tc for benefit of argument-conversion code, which might allocate + ,(if-feature pthreads + (%seq + (set! ,%r0 ,(%inline get-tc)) + (set! ,%tc ,%r0)) + `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) + ; list of procedures that marshal arguments from their C stack locations + ; to the Scheme argument locations + (do-stack arg-type* saved-reg-bytes pre-pad-bytes return-bytes float-reg-bytes post-pad-bytes int-reg-bytes + synthesize-first?) + get-result + (lambda () + (in-context Tail + (%seq + ; restore the callee save registers + (inline ,(make-info-kill* callee-save-regs+lr) ,%pop-multiple) + ; deallocate space for pad & arg reg values + (set! ,%sp ,(%inline + ,%sp (immediate ,(fx+ pre-pad-bytes int-reg-bytes post-pad-bytes float-reg-bytes)))) + ; done + (asm-c-return ,null-info ,callee-save-regs+lr ... ,result-regs ...))))))))))))))) ) diff --git a/s/base-lang.ss b/s/base-lang.ss index 829c509c93..8a18331ca6 100644 --- a/s/base-lang.ss +++ b/s/base-lang.ss @@ -184,7 +184,7 @@ ; language of foreign types (define-language Ltype - (nongenerative-id #{Ltype czp82kxwe75y4e18-0}) + (nongenerative-id #{Ltype czp82kxwe75y4e18-1}) (terminals (exact-integer (bits)) ($ftd (ftd))) @@ -199,7 +199,8 @@ (fp-fixnum) (fp-double-float) (fp-single-float) - (fp-ftd ftd))) + (fp-ftd ftd) + (fp-ftd& ftd))) (define arity? (lambda (x) diff --git a/s/cmacros.ss b/s/cmacros.ss index 5dc426ab99..3802efef7c 100644 --- a/s/cmacros.ss +++ b/s/cmacros.ss @@ -2633,16 +2633,7 @@ scan-remembered-set instantiate-code-object Sreturn - Scall->ptr - Scall->fptr - Scall->bytevector - Scall->fixnum - Scall->int32 - Scall->uns32 - Scall->double - Scall->single - Scall->int64 - Scall->uns64 - Scall->void + Scall-one-result + Scall-any-results )) ) diff --git a/s/cpnanopass.ss b/s/cpnanopass.ss index 1e84cc6d64..7ccd098fe1 100644 --- a/s/cpnanopass.ss +++ b/s/cpnanopass.ss @@ -972,9 +972,19 @@ (fields type reversed? invertible?)) (define-record-type info-c-simple-call (nongenerative) + (parent info-kill*-live*) + (sealed #t) + (fields save-ra? entry) + (protocol + (lambda (new) + (case-lambda + [(save-ra? entry) ((new '() '()) save-ra? entry)] + [(live* save-ra? entry) ((new '() live*) save-ra? entry)])))) + + (define-record-type info-c-return (nongenerative) (parent info) (sealed #t) - (fields save-ra? entry)) + (fields offset)) (module () (record-writer (record-type-descriptor info-load) @@ -10472,7 +10482,23 @@ (set! ,x ,t) ,(toC (in-context Rhs (%mref ,x ,(constant record-data-disp))))))] + [(fp-ftd& ,ftd) + (let ([x (make-tmp 't)]) + (%seq + (set! ,x ,t) + (set! ,x ,(%mref ,x ,(constant record-data-disp))) + ,(toC x)))] [else ($oops who "invalid parameter type specifier ~s" type)]))) + (define Scheme->C-for-result + (lambda (type toC t) + (nanopass-case (Ltype Type) type + [(fp-void) (toC)] + [(fp-ftd& ,ftd) + ;; pointer isn't received as a result, but instead passed + ;; to the function as its first argument (or simulated as such) + (toC)] + [else + (Scheme->C type toC t)]))) (define C->Scheme ; ASSUMPTIONS: ac0, ac1, and xp are not C argument registers (lambda (type fromC lvalue) @@ -10540,6 +10566,15 @@ ,(e1 `(goto ,Lbig)) (seq (label ,Lbig) ,e2))))) (e1 e2)))))) + (define (alloc-fptr ftd) + (%seq + (set! ,%xp + ,(%constant-alloc type-typed-object (fx* (constant ptr-bytes) 2) #f)) + (set! + ,(%mref ,%xp ,(constant record-type-disp)) + (literal ,(make-info-literal #f 'object ftd 0))) + (set! ,(%mref ,%xp ,(constant record-data-disp)) ,%ac0) + (set! ,lvalue ,%xp))) (nanopass-case (Ltype Type) type [(fp-void) `(set! ,lvalue ,(%constant svoid))] [(fp-scheme-object) (fromC lvalue)] @@ -10587,15 +10622,17 @@ (set! ,lvalue ,%xp))] [(fp-ftd ,ftd) (%seq - ,(fromC %ac0) ; C integer return might be wiped out by alloc - (set! ,%xp - ,(%constant-alloc type-typed-object (fx* (constant ptr-bytes) 2) #f)) - (set! - ,(%mref ,%xp ,(constant record-type-disp)) - (literal ,(make-info-literal #f 'object ftd 0))) - (set! ,(%mref ,%xp ,(constant record-data-disp)) ,%ac0) - (set! ,lvalue ,%xp))] + ,(fromC %ac0) ; C integer return might be wiped out by alloc + ,(alloc-fptr ftd))] + [(fp-ftd& ,ftd) + (%seq + ,(fromC %ac0) + ,(alloc-fptr ftd))] [else ($oops who "invalid result type specifier ~s" type)])))) + (define (pick-Scall result-type) + (nanopass-case (Ltype Type) result-type + [(fp-void) (lookup-c-entry Scall-any-results)] + [else (lookup-c-entry Scall-one-result)])) (define build-foreign-call (with-output-language (L13 Effect) (lambda (info t0 t1* maybe-lvalue new-frame?) @@ -10615,7 +10652,13 @@ (ccall t0) t1* arg-type* c-args)) ,(let ([e (deallocate)]) (if maybe-lvalue - `(seq ,(C->Scheme result-type c-res maybe-lvalue) ,e) + (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) + ;; Don't actually return a value, because the result + ;; was instead installed in the first argument. + `(seq (set! ,maybe-lvalue ,(%constant svoid)) ,e)] + [else + `(seq ,(C->Scheme result-type c-res maybe-lvalue) ,e)]) e))))]) (if new-frame? (sorry! who "can't handle nontail foreign calls") @@ -10640,7 +10683,7 @@ (cons (get-fv i) (f (cdr frame-x*) i)))))]) ; add 2 for the old RA and cchain (set! max-fv (fx+ max-fv 2)) - (let-values ([(c-init c-args c-scall) (asm-foreign-callable info)]) + (let-values ([(c-init c-args c-result c-return) (asm-foreign-callable info)]) ; c-init save C callee-save registers and restores tc ; each of c-args sets a variable to one of the C arguments ; c-scall restores callee-save registers and tail-calls C @@ -10669,28 +10712,12 @@ ,(save-scheme-state (in %ac0 %ac1) (out %cp %xp %yp %ts %td scheme-args extra-regs)) - ,(c-scall fv* - (nanopass-case (Ltype Type) result-type - [(fp-scheme-object) (lookup-c-entry Scall->ptr)] - [(fp-void) (lookup-c-entry Scall->void)] - [(fp-fixnum) (lookup-c-entry Scall->fixnum)] - [(fp-integer ,bits) - (case bits - [(8 16 32) (lookup-c-entry Scall->int32)] - [(64) (lookup-c-entry Scall->int64)] - [else ($oops 'foreign-callable "unsupported result type specifier integer-~s" bits)])] - [(fp-unsigned ,bits) - (case bits - [(8 16 32) (lookup-c-entry Scall->uns32)] - [(64) (lookup-c-entry Scall->uns64)] - [else ($oops 'foreign-callable "unsupported result type specifier unsigned-~s" bits)])] - [(fp-double-float) (lookup-c-entry Scall->double)] - [(fp-single-float) (lookup-c-entry Scall->single)] - [(fp-u8*) (lookup-c-entry Scall->bytevector)] - [(fp-u16*) (lookup-c-entry Scall->bytevector)] - [(fp-u32*) (lookup-c-entry Scall->bytevector)] - [(fp-ftd ,ftd) (lookup-c-entry Scall->fptr)] - [else ($oops 'compiler-internal "invalid result type specifier ~s" result-type)])))))))))))) + (inline ,(make-info-c-simple-call fv* #f (pick-Scall result-type)) ,%c-simple-call) + ,(restore-scheme-state + (in %ac0) + (out %ac1 %cp %xp %yp %ts %td scheme-args extra-regs)) + ,(Scheme->C-for-result result-type c-result %ac0) + ,(c-return))))))))))) (define handle-do-rest (lambda (fixed-args offset save-asm-ra?) (with-output-language (L13 Effect) @@ -12497,6 +12524,10 @@ (let ([block (make-tail-block)]) (tail-block-tail-set! block (with-output-language (L15a Tail) `(asm-return ,reg* ...))) (values block (cons block block*)))] + [(asm-c-return ,info ,reg* ...) + (let ([block (make-tail-block)]) + (tail-block-tail-set! block (with-output-language (L15a Tail) `(asm-c-return ,info ,reg* ...))) + (values block (cons block block*)))] [else ($oops who "unexpected Tail ~s" ir)]) (Effect : Effect (ir target block*) -> * (target block*) [(nop) (values target block*)] @@ -13810,6 +13841,7 @@ [else (sorry! who "unrecognized block ~s" block)])))) (Tail : Tail (ir chunk* offset) -> * (code* chunk* offset) [(asm-return) (values (asm-return) chunk* offset)] + [(asm-c-return ,info) (values (asm-c-return info) chunk* offset)] [(jump (label-ref ,l ,offset0)) (values (asm-direct-jump l offset0) chunk* offset)] [(jump (literal ,info)) @@ -14095,6 +14127,9 @@ [(asm-return ,reg* ...) (safe-assert (eq? out no-live*)) (fold-left add-var no-live* reg*)] + [(asm-c-return ,info ,reg* ...) + (safe-assert (eq? out no-live*)) + (fold-left add-var no-live* reg*)] [(jump ,live-info ,t (,var* ...)) (let ([out (fold-left add-var out var*)]) (live-info-live-set! live-info out) @@ -14665,7 +14700,8 @@ (Pred : Pred (ir) -> Pred ()) (Tail : Tail (ir) -> Tail () [(jump ,live-info ,[t] (,var* ...)) `(jump ,live-info ,t)] - [(asm-return ,reg* ...) `(asm-return)]) + [(asm-return ,reg* ...) `(asm-return)] + [(asm-c-return ,info ,reg* ...) `(asm-c-return ,info)]) (Effect : Effect (ir) -> Effect ()) (foldable-Effect : Effect (ir new-effect*) -> * (new-effect*) [(return-point ,info ,rpl ,mrvl (,cnfv* ...)) @@ -15064,7 +15100,8 @@ (Tail : Tail (ir) -> Tail () [(jump ,live-info ,t) (handle-jump t (live-info-live live-info))] [(goto ,l) (values '() `(goto ,l))] - [(asm-return) (values '() `(asm-return))]) + [(asm-return) (values '() `(asm-return))] + [(asm-c-return ,info) (values '() `(asm-c-return ,info))]) (Effect : Effect (ir new-effect*) -> * (new-effect*) [(set! ,live-info ,lvalue ,rhs) (Rhs rhs lvalue new-effect* (live-info-live live-info))] [(inline ,live-info ,info ,effect-prim ,t* ...) diff --git a/s/cprep.ss b/s/cprep.ss index 09e76f3431..dca079e1af 100644 --- a/s/cprep.ss +++ b/s/cprep.ss @@ -115,7 +115,8 @@ [(fp-fixnum) 'fixnum] [(fp-double-float) 'double-float] [(fp-single-float) 'single-float] - [(fp-ftd ,ftd) 'ftype]))) + [(fp-ftd ,ftd) 'ftype] + [(fp-ftd& ,ftd) 'ftype]))) (define uncprep (lambda (x) (define keyword? diff --git a/s/ftype.ss b/s/ftype.ss index 27b2e54517..b7d84424b1 100644 --- a/s/ftype.ss +++ b/s/ftype.ss @@ -560,21 +560,32 @@ ftype operators: (define expand-fp-ftype (lambda (who what r ftype def-alist) (syntax-case ftype () - [(*-kwd ftype-name) - (and (eq? (datum *-kwd) '*) (identifier? #'ftype-name)) - (let ([stype (syntax->datum ftype)]) - (cond - [(assp (lambda (x) (bound-identifier=? #'ftype-name x)) def-alist) => - (lambda (a) - (if (ftd? (cdr a)) - (make-ftd-pointer rtd/fptr #f stype pointer-size pointer-alignment (cdr a)) - (let ([ftd (make-ftd-pointer rtd/fptr #f stype pointer-size pointer-alignment #f)]) - (set-cdr! a (cons ftd (cdr a))) - ftd)))] - [(expand-ftype-name r #'ftype-name #f) => - (lambda (ftd) - (make-ftd-pointer rtd/fptr #f stype pointer-size pointer-alignment ftd))] - [else (syntax-error #'ftype-name (format "unrecognized ~s ~s ftype name" who what))]))] + [(*/&-kwd ftype-name) + (and (or (eq? (datum */&-kwd) '*) + (eq? (datum */&-kwd) '&)) + (identifier? #'ftype-name)) + (let* ([stype (syntax->datum ftype)] + [ftd + (cond + [(assp (lambda (x) (bound-identifier=? #'ftype-name x)) def-alist) => + (lambda (a) + (if (ftd? (cdr a)) + (make-ftd-pointer rtd/fptr #f stype pointer-size pointer-alignment (cdr a)) + (let ([ftd (make-ftd-pointer rtd/fptr #f stype pointer-size pointer-alignment #f)]) + (set-cdr! a (cons ftd (cdr a))) + ftd)))] + [(expand-ftype-name r #'ftype-name #f) => + (lambda (ftd) + (make-ftd-pointer rtd/fptr #f stype pointer-size pointer-alignment ftd))] + [else (syntax-error #'ftype-name (format "unrecognized ~s ~s ftype name" who what))])]) + ;; Scheme-side argument is a pointer to a value, but foreign side has two variants: + (if (eq? (datum */&-kwd) '&) + (cond + [(ftd-array? (ftd-pointer-ftd ftd)) + (syntax-error ftype (format "array value invalid as ~a ~s" who what))] + [else + (box ftd)]) ; boxed ftd => pass/receive the value (as opposed to a pointer to the value) + ftd))] ; plain ftd => pass/receive a pointer to the value [_ (cond [(and (identifier? ftype) (expand-ftype-name r ftype #f)) => (lambda (ftd) @@ -586,11 +597,14 @@ ftype operators: [else (syntax->datum ftype)])]))) (define-who indirect-ftd-pointer (lambda (x) - (if (ftd? x) - (if (ftd-pointer? x) - (ftd-pointer-ftd x) - ($oops who "~s is not an ftd-pointer" x)) - x))) + (cond + [(ftd? x) + (if (ftd-pointer? x) + (ftd-pointer-ftd x) + ($oops who "~s is not an ftd-pointer" x))] + [(box? x) + (box (indirect-ftd-pointer (unbox x)))] + [else x]))) (define-who expand-ftype-defns (lambda (r defid* ftype*) (define patch-pointer-ftds! @@ -926,6 +940,74 @@ ftype operators: (set! $ftd? (lambda (x) (ftd? x))) + (set! $ftd-as-box? ; represents `(& )` from `$expand-fp-ftype` + (lambda (x) + (and (box? x) (ftd? (unbox x))))) + (set! $ftd-size + (lambda (x) + (ftd-size x))) + (set! $ftd-alignment + (lambda (x) + (ftd-alignment x))) + (set! $ftd-compound? + (lambda (x) + (or (ftd-struct? x) + (ftd-union? x) + (ftd-array? x)))) + (set! $ftd->members + (lambda (x) + ;; Currently used for x86_64 and arm32 ABI: Returns a list of + ;; (list 'integer/'float size offset) + (let loop ([x x] [offset 0] [accum '()]) + (cond + [(ftd-base? x) + (cons (list (case (ftd-base-type x) + [(double double-float float single-float) + 'float] + [else 'integer]) + (ftd-size x) + offset) + accum)] + [(ftd-struct? x) + (let struct-loop ([field* (ftd-struct-field* x)] [accum accum]) + (cond + [(null? field*) accum] + [else (let* ([fld (car field*)] + [sub-ftd (caddr fld)] + [sub-offset (cadr fld)]) + (struct-loop (cdr field*) + (loop sub-ftd (+ offset sub-offset) accum)))]))] + [(ftd-union? x) + (let union-loop ([field* (ftd-union-field* x)] [accum accum]) + (cond + [(null? field*) accum] + [else (let* ([fld (car field*)] + [sub-ftd (cdr fld)]) + (union-loop (cdr field*) + (loop sub-ftd offset accum)))]))] + [(ftd-array? x) + (let ([elem-ftd (ftd-array-ftd x)]) + (let array-loop ([len (ftd-array-length x)] [offset offset] [accum accum]) + (cond + [(fx= len 0) accum] + [else (array-loop (fx- len 1) + (+ offset (ftd-size elem-ftd)) + (loop elem-ftd offset accum))])))] + [else (cons (list 'integer (ftd-size x) offset) accum)])))) + (set! $ftd-atomic-category + (lambda (x) + ;; Currently used for PowerPC32 ABI + (cond + [(ftd-base? x) + (case (ftd-base-type x) + [(double double-float float single-float) + 'float] + [(unsigned-short unsigned unsigned-int + unsigned-long unsigned-long-long + unsigned-8 unsigned-16 unsigned-32 unsigned-64) + 'unsigned] + [else 'integer])] + [else 'integer]))) (set! $expand-fp-ftype ; for foreign-procedure, foreign-callable (lambda (who what r ftype) (indirect-ftd-pointer diff --git a/s/np-languages.ss b/s/np-languages.ss index c6e9de1cb9..a5c00e0a8b 100644 --- a/s/np-languages.ss +++ b/s/np-languages.ss @@ -488,10 +488,13 @@ (declare-primitive asmlibcall! effect #f) (declare-primitive c-call effect #f) (declare-primitive c-simple-call effect #f) + (declare-primitive c-simple-return effect #f) (declare-primitive fl* effect #f) (declare-primitive fl+ effect #f) (declare-primitive fl- effect #f) (declare-primitive fl/ effect #f) + (declare-primitive fldl effect #f) ; x86 + (declare-primitive flds effect #f) ; x86 (declare-primitive flsqrt effect #f) ; not implemented for some ppc32 (so we don't use it) (declare-primitive flt effect #f) (declare-primitive inc-cc-counter effect #f) @@ -544,6 +547,7 @@ (declare-primitive -/eq value #f) (declare-primitive asmlibcall value #f) (declare-primitive fstpl value #f) ; x86 only + (declare-primitive fstps value #f) ; x86 only (declare-primitive get-double value #t) ; x86_64 (declare-primitive get-tc value #f) ; threaded version only (declare-primitive lea1 value #t) @@ -849,6 +853,7 @@ (jump t (var* ...)) (joto l (nfv* ...)) (asm-return reg* ...) + (asm-c-return info reg* ...) (if p0 tl1 tl2) (seq e0 tl1) (goto l))) @@ -961,7 +966,8 @@ (Tail (tl) (goto l) (jump live-info t (var* ...)) - (asm-return reg* ...))) + (asm-return reg* ...) + (asm-c-return info reg* ...))) (define-language L15b (extends L15a) (terminals @@ -979,9 +985,11 @@ (+ (fp-offset live-info imm))) (Tail (tl) (- (jump live-info t (var* ...)) - (asm-return reg* ...)) + (asm-return reg* ...) + (asm-c-return info reg* ...)) (+ (jump live-info t) - (asm-return)))) + (asm-return) + (asm-c-return info)))) (define ur? (lambda (x) diff --git a/s/ppc32.ss b/s/ppc32.ss index 4bcbe8f8f9..815fae889b 100644 --- a/s/ppc32.ss +++ b/s/ppc32.ss @@ -810,7 +810,7 @@ asm-lock asm-lock+/- asm-fl-load/store asm-flop-2 asm-c-simple-call - asm-save-flrv asm-restore-flrv asm-return asm-size + asm-save-flrv asm-restore-flrv asm-return asm-c-return asm-size asm-enter asm-foreign-call asm-foreign-callable asm-read-counter asm-read-time-base @@ -2077,6 +2077,10 @@ (lambda () (emit blr '()))) + (define asm-c-return + (lambda (info) + (emit blr '()))) + (define asm-lognot (lambda (code* dest src) (Trivit (dest src) @@ -2129,19 +2133,27 @@ (define align (lambda (b x) (let ([k (- b 1)]) (fxlogand (fx+ x k) (fxlognot k))))) (define gp-parameter-regs (lambda () (list %Carg1 %Carg2 %Carg3 %Carg4 %Carg5 %Carg6 %Carg7 %Carg8))) (define fp-parameter-regs (lambda () (list %Cfparg1 %Cfparg2 %Cfparg3 %Cfparg4 %Cfparg5 %Cfparg6 %Cfparg7 %Cfparg8))) + (define (indirect-result-that-fits-in-registers? result-type) + (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) (not ($ftd-compound? ftd))] + [else #f])) + (define (indirect-result-to-pointer? result-type) + (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) ($ftd-compound? ftd)] + [else #f])) (define-who asm-foreign-call (with-output-language (L13 Effect) (define load-double-stack - (lambda (offset) + (lambda (offset fp-disp) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double ,x ,%zero ,(%constant flonum-data-disp)) + (inline ,(make-info-loadfl %flreg1) ,%load-double ,x ,%zero (immediate ,fp-disp)) (inline ,(make-info-loadfl %flreg1) ,%store-double ,%sp ,%zero (immediate ,offset)))))) (define load-single-stack - (lambda (offset) + (lambda (offset fp-disp single?) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) + (inline ,(make-info-loadfl %flreg1) ,(if single? %load-single %load-double->single) ,x ,%zero (immediate ,fp-disp)) (inline ,(make-info-loadfl %flreg1) ,%store-single ,%sp ,%zero (immediate ,offset)))))) (define load-int-stack (lambda (offset) @@ -2153,25 +2165,39 @@ (%seq (set! ,(%mref ,%sp ,(fx+ offset 4)) ,lorhs) (set! ,(%mref ,%sp ,offset) ,hirhs))))) - (define load-double-reg - (lambda (fpreg) + (define load-indirect-int-stack + (lambda (offset size) + (lambda (rhs) ; requires rhs + (let ([int-type (case size + [(1) 'integer-8] + [(2) 'integer-16] + [else 'integer-32])]) + `(set! ,(%mref ,%sp ,offset) (inline ,(make-info-load int-type #f) ,%load ,rhs ,%zero (immediate ,0))))))) + (define load-indirect-int64-stack + (lambda (offset) (lambda (x) ; requires var - `(inline ,(make-info-loadfl fpreg) ,%load-double ,x ,%zero ,(%constant flonum-data-disp))))) + `(seq + (set! ,(%mref ,%sp ,offset) ,(%mref ,x 0)) + (set! ,(%mref ,%sp ,(fx+ offset 4)) ,(%mref ,x 4)))))) + (define load-double-reg + (lambda (fpreg fp-disp) + (lambda (x) ; requires var + `(inline ,(make-info-loadfl fpreg) ,%load-double ,x ,%zero (immediate ,fp-disp))))) (define load-soft-double-reg - (lambda (loreg hireg) + (lambda (loreg hireg fp-disp) (lambda (x) (%seq - (set! ,loreg ,(%mref ,x ,(fx+ (constant flonum-data-disp) 4))) - (set! ,hireg ,(%mref ,x ,(constant flonum-data-disp))))))) + (set! ,loreg ,(%mref ,x ,(fx+ fp-disp 4))) + (set! ,hireg ,(%mref ,x ,fp-disp)))))) (define load-single-reg - (lambda (fpreg) + (lambda (fpreg fp-disp single?) (lambda (x) ; requires var - `(inline ,(make-info-loadfl fpreg) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp))))) + `(inline ,(make-info-loadfl fpreg) ,(if single? %load-single %load-double->single) ,x ,%zero (immediate ,fp-disp))))) (define load-soft-single-reg - (lambda (ireg) + (lambda (ireg fp-disp single?) (lambda (x) (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) + (inline ,(make-info-loadfl %flreg1) ,(if single? %load-single %load-double->single) ,x ,%zero (immediate ,fp-disp)) (inline ,(make-info-loadfl %flreg1) ,%store-single ,%tc ,%zero (immediate ,(constant tc-ac0-disp))) (set! ,ireg ,(%tc-ref ac0)))))) (define load-int-reg @@ -2184,10 +2210,31 @@ (%seq (set! ,loreg ,lo) (set! ,hireg ,hi))))) + (define load-indirect-int-reg + (lambda (ireg size category) + (lambda (rhs) ; requires var + (let ([int-type (case category + [(unsigned) (case size + [(1) 'unsigned-8] + [(2) 'unsigned-16] + [else 'unsigned-32])] + [else (case size + [(1) 'integer-8] + [(2) 'integer-16] + [else 'integer-32])])]) + `(set! ,ireg (inline ,(make-info-load int-type #f) ,%load ,rhs ,%zero (immediate ,0))))))) + (define load-indirect-int64-reg + (lambda (loreg hireg) + (lambda (x) ; requires var + `(seq + (set! ,hireg ,(%mref ,x 0)) + (set! ,loreg ,(%mref ,x 4)))))) (define do-args (lambda (types) ;; NB: start stack pointer at 8 to put arguments above the linkage area - (let loop ([types types] [locs '()] [live* '()] [int* (gp-parameter-regs)] [flt* (fp-parameter-regs)] [isp 8]) + (let loop ([types types] [locs '()] [live* '()] [int* (gp-parameter-regs)] [flt* (fp-parameter-regs)] [isp 8] + ;; configured for `ftd-fp&` unpacking of floats: + [fp-disp (constant flonum-data-disp)] [single? #f]) (if (null? types) (values isp locs live*) (nanopass-case (Ltype Type) (car types) @@ -2197,38 +2244,91 @@ (if (null? int*) (let ([isp (align 8 isp)]) (loop (cdr types) - (cons (load-double-stack isp) locs) - live* '() flt* (fx+ isp 8))) + (cons (load-double-stack isp fp-disp) locs) + live* '() flt* (fx+ isp 8) + (constant flonum-data-disp) #f)) (loop (cdr types) - (cons (load-soft-double-reg (cadr int*) (car int*)) locs) - (cons* (car int*) (cadr int*) live*) (cddr int*) flt* isp))) + (cons (load-soft-double-reg (cadr int*) (car int*) fp-disp) locs) + (cons* (car int*) (cadr int*) live*) (cddr int*) flt* isp + (constant flonum-data-disp) #f))) (if (null? flt*) (let ([isp (align 8 isp)]) (loop (cdr types) - (cons (load-double-stack isp) locs) - live* int* '() (fx+ isp 8))) + (cons (load-double-stack isp fp-disp) locs) + live* int* '() (fx+ isp 8) + (constant flonum-data-disp) #f)) (loop (cdr types) - (cons (load-double-reg (car flt*)) locs) - live* int* (cdr flt*) isp)))] + (cons (load-double-reg (car flt*) fp-disp) locs) + live* int* (cdr flt*) isp + (constant flonum-data-disp) #f)))] [(fp-single-float) (if (constant software-floating-point) (if (null? int*) ; NB: ABI says singles are passed as doubles on the stack, but gcc/linux doesn't (loop (cdr types) - (cons (load-single-stack isp) locs) - live* '() flt* (fx+ isp 4)) + (cons (load-single-stack isp fp-disp single?) locs) + live* '() flt* (fx+ isp 4) + (constant flonum-data-disp) #f) (loop (cdr types) - (cons (load-soft-single-reg (car int*)) locs) - (cons (car int*) live*) (cdr int*) flt* isp)) + (cons (load-soft-single-reg (car int*) fp-disp single?) locs) + (cons (car int*) live*) (cdr int*) flt* isp + (constant flonum-data-disp) #f)) (if (null? flt*) ; NB: ABI says singles are passed as doubles on the stack, but gcc/linux doesn't (let ([isp (align 4 isp)]) (loop (cdr types) - (cons (load-single-stack isp) locs) - live* int* '() (fx+ isp 4))) + (cons (load-single-stack isp fp-disp single?) locs) + live* int* '() (fx+ isp 4) + (constant flonum-data-disp) #f)) (loop (cdr types) - (cons (load-single-reg (car flt*)) locs) - live* int* (cdr flt*) isp)))] + (cons (load-single-reg (car flt*) fp-disp single?) locs) + live* int* (cdr flt*) isp + (constant flonum-data-disp) #f)))] + [(fp-ftd& ,ftd) + (cond + [($ftd-compound? ftd) + ;; pass as pointer + (let ([pointer-type (with-output-language (Ltype Type) `(fp-integer 32))]) + (loop (cons pointer-type (cdr types)) locs live* int* flt* isp + (constant flonum-data-disp) #f))] + [else + ;; extract content and pass that content + (let ([category ($ftd-atomic-category ftd)]) + (cond + [(eq? category 'float) + ;; piggy-back on unboxed handler + (let ([unpacked-type (with-output-language (Ltype Type) + (case ($ftd-size ftd) + [(4) `(fp-single-float)] + [else `(fp-double-float)]))]) + (loop (cons unpacked-type (cdr types)) locs live* int* flt* isp + ;; no floating displacement within pointer: + 0 + ;; in case of float, load as single-float: + (= ($ftd-size ftd) 4)))] + [(and (memq category '(integer unsigned)) + (fx= 8 ($ftd-size ftd))) + (let ([int* (if (even? (length int*)) int* (cdr int*))]) + (if (null? int*) + (let ([isp (align 8 isp)]) + (loop (cdr types) + (cons (load-indirect-int64-stack isp) locs) + live* '() flt* (fx+ isp 8) + (constant flonum-data-disp) #f)) + (loop (cdr types) + (cons (load-indirect-int64-reg (cadr int*) (car int*)) locs) + (cons* (car int*) (cadr int*) live*) (cddr int*) flt* isp + (constant flonum-data-disp) #f)))] + [else + (if (null? int*) + (loop (cdr types) + (cons (load-indirect-int-stack isp ($ftd-size ftd)) locs) + live* '() flt* (fx+ isp 4) + (constant flonum-data-disp) #f) + (loop (cdr types) + (cons (load-indirect-int-reg (car int*) ($ftd-size ftd) category) locs) + (cons (car int*) live*) (cdr int*) flt* isp + (constant flonum-data-disp) #f))]))])] [else (if (nanopass-case (Ltype Type) (car types) [(fp-integer ,bits) (fx= bits 64)] @@ -2239,28 +2339,58 @@ (let ([isp (align 8 isp)]) (loop (cdr types) (cons (load-int64-stack isp) locs) - live* '() flt* (fx+ isp 8))) + live* '() flt* (fx+ isp 8) + (constant flonum-data-disp) #f)) (loop (cdr types) (cons (load-int64-reg (cadr int*) (car int*)) locs) - (cons* (car int*) (cadr int*) live*) (cddr int*) flt* isp))) + (cons* (car int*) (cadr int*) live*) (cddr int*) flt* isp + (constant flonum-data-disp) #f))) (if (null? int*) (loop (cdr types) (cons (load-int-stack isp) locs) - live* '() flt* (fx+ isp 4)) + live* '() flt* (fx+ isp 4) + (constant flonum-data-disp) #f) (loop (cdr types) (cons (load-int-reg (car int*)) locs) - (cons (car int*) live*) (cdr int*) flt* isp)))]))))) + (cons (car int*) live*) (cdr int*) flt* isp + (constant flonum-data-disp) #f)))]))))) + (define do-indirect-result-from-registers + (lambda (ftd offset) + (let ([tmp %Carg8]) + (%seq + (set! ,tmp ,(%mref ,%sp ,offset)) + ,(cond + [(and (not (constant software-floating-point)) + (eq? 'float ($ftd-atomic-category ftd))) + `(inline ,(make-info-loadfl %Cfpretval) ,(if (= 4 ($ftd-size ftd)) %store-single %store-double) + ,tmp ,%zero (immediate 0))] + [else + (case ($ftd-size ftd) + [(1) `(inline ,(make-info-load 'integer-8 #f) ,%store ,tmp ,%zero (immediate 0) ,%Cretval)] + [(2) `(inline ,(make-info-load 'integer-16 #f) ,%store ,tmp ,%zero (immediate 0) ,%Cretval)] + [(4) `(inline ,(make-info-load 'integer-32 #f) ,%store ,tmp ,%zero (immediate 0) ,%Cretval)] + [(8) + (%seq + (inline ,(make-info-load 'integer-32 #f) ,%store ,tmp ,%zero (immediate 0) ,%Cretval-high) + (inline ,(make-info-load 'integer-32 #f) ,%store ,tmp ,%zero (immediate 4) ,%Cretval-low))] + [else (sorry! who "unexpected result size")])]))))) (lambda (info) (safe-assert (reg-callee-save? %tc)) ; no need to save-restore - (let ([arg-type* (info-foreign-arg-type* info)] - [result-type (info-foreign-result-type info)]) - (with-values (do-args arg-type*) + (let* ([arg-type* (info-foreign-arg-type* info)] + [result-type (info-foreign-result-type info)] + [fill-result-here? (indirect-result-that-fits-in-registers? result-type)]) + (with-values (do-args (if fill-result-here? (cdr arg-type*) arg-type*)) (lambda (frame-size locs live*) ;; NB: add 4 to frame size for CR save word - (let ([frame-size (align 16 (fx+ frame-size 4))]) + (let ([frame-size (align 16 (fx+ frame-size 4 (if fill-result-here? 4 0)))]) (values (lambda () (%inline store-with-update ,%Csp ,%Csp (immediate ,(fx- frame-size)))) - (reverse locs) + (let ([locs (reverse locs)]) + (cond + [fill-result-here? + ;; stash extra argument on the stack to be retrieved after call and filled with the result: + (cons (load-int-stack frame-size) locs)] + [else locs])) (lambda (t0) (if (constant software-floating-point) (let () @@ -2276,11 +2406,21 @@ [(8 16 32) (handle-32-bit)] [(64) (handle-64-bit)] [else (sorry! who "unexpected asm-foriegn-procedures fp-integer size ~s" bits)]))) + (define (handle-ftd&-case ftd) + (cond + [fill-result-here? + (%seq + ,(if (> ($ftd-size ftd) 4) + (handle-64-bit) + (handle-32-bit)) + ,(do-indirect-result-from-registers ftd frame-size))] + [else `(inline ,(make-info-kill*-live* (reg-list) live*) ,%c-call ,t0)])) (nanopass-case (Ltype Type) result-type [(fp-double-float) (handle-64-bit)] [(fp-single-float) (handle-32-bit)] [(fp-integer ,bits) (handle-integer-cases bits)] [(fp-integer ,bits) (handle-integer-cases bits)] + [(fp-ftd& ,ftd) (handle-ftd&-case ftd)] [else `(inline ,(make-info-kill*-live* (reg-list %Cretval) live*) ,%c-call ,t0)])) (let () (define handle-integer-cases @@ -2288,12 +2428,22 @@ (case bits [(8 16 32) `(inline ,(make-info-kill*-live* (reg-list %Cretval) live*) ,%c-call ,t0)] [(64) `(inline ,(make-info-kill*-live* (reg-list %Cretval-high %Cretval-low) live*) ,%c-call ,t0)] - [else (sorry! who "unexpected asm-foreign-procedures fp-integer size ~s" bits)]))) + [else (sorry! who "unexpected asm-foreign-procedures fp-integer size ~s" bits)]))) + (define (handle-ftd&-case ftd) + (cond + [fill-result-here? + (%seq + ,(if (not (eq? 'float ($ftd-atomic-category ftd))) + (handle-integer-cases (* 8 ($ftd-size ftd))) + `(inline ,(make-info-kill*-live* (reg-list) live*) ,%c-call ,t0)) + ,(do-indirect-result-from-registers ftd frame-size))] + [else `(inline ,(make-info-kill*-live* (reg-list) live*) ,%c-call ,t0)])) (nanopass-case (Ltype Type) result-type [(fp-double-float) `(inline ,(make-info-kill*-live* (reg-list) live*) ,%c-call ,t0)] [(fp-single-float) `(inline ,(make-info-kill*-live* (reg-list) live*) ,%c-call ,t0)] [(fp-integer ,bits) (handle-integer-cases bits)] [(fp-unsigned ,bits) (handle-integer-cases bits)] + [(fp-ftd& ,ftd) (handle-ftd&-case ftd)] [else `(inline ,(make-info-kill*-live* (reg-list %Cretval) live*) ,%c-call ,t0)])))) (nanopass-case (Ltype Type) result-type [(fp-double-float) @@ -2396,40 +2546,36 @@ +---------------------------+ | | | lr | 1 word - sp+184: | | + sp+X+4: | | +---------------------------+ | | | back chain | 1 word - sp+180: | | + sp+X: | | +---------------------------+ + +---------------------------+ <- 16-byte aligned + | | + | &-return space | 2 words, if needed + | | + +---------------------------+ <- 8-byte aligned + | | + | callee-save regs | + | | +---------------------------+ | | - | floating-point regs | 0 words - sp+180: | | - +---------------------------+ + | floating-point arg regs | | | - | integer regs | 18 words - sp+108: | | - +---------------------------+ + +---------------------------+ <- 8-byte aligned | | - | control register | 1 word - sp+104: | | - +---------------------------+ + | integer argument regs | | | - | local variable space | 24 words: 8 words for gp arg regs, 8 double words for fp arg regs, 0 for padding - sp+8: | (and padding) | - +---------------------------+ - | | - | parameter list | 0 words - sp+8: | | - +---------------------------+ + sp+8: +---------------------------+ <-- 8-byte aligned | | | lr | 1 word (place for get-thread-context to store lr) - sp+4: | | + | | +---------------------------+ | | | back chain | 1 word - sp+0: | [sp+176] | + sp+0: | [sp+X-4] | +---------------------------+ FOR foreign callable (nb: assuming flreg1 & flreg2 are caller-save): @@ -2438,14 +2584,14 @@ save fp arg regs (based on number declared by foreign-callable form) at sp+40 don't bother saving cr save callee-save gp registers at sp+108 (could avoid those we don't use during argument conversion, if we knew what they were) - save lr at sp[180] (actually sp 4, before sp is moved) + save lr at sp[188] (actually sp 4, before sp is moved) if threaded: call get-thread-context else tc <- thread-context endif ... - restore lr from sp[180] + restore lr from sp[188] INVARIANTS stack grows down @@ -2488,9 +2634,22 @@ (%seq (set! ,lolvalue ,(%mref ,%sp ,(fx+ offset 4))) (set! ,hilvalue ,(%mref ,%sp ,offset)))))) + (define load-stack-address + (lambda (offset) + (lambda (lvalue) + `(set! ,lvalue ,(%inline + ,%sp (immediate ,offset)))))) + (define load-stack-address/convert-float + (lambda (offset) + (lambda (lvalue) + (%seq + ;; Overwrite argument on stack with single-precision version + ;; FIXME: is the callee allowed to do this if the argument is passed on the stack? + (inline ,(make-info-loadfl %flreg1) ,%load-double->single ,%sp ,%zero (immediate ,offset)) + (inline ,(make-info-loadfl %flreg1) ,%store-single ,%sp ,%zero (immediate ,offset)) + (set! ,lvalue ,(%inline + ,%sp (immediate ,offset))))))) (define count-reg-args - (lambda (types gp-reg-count fp-reg-count) - (let f ([types types] [iint 0] [iflt 0]) + (lambda (types gp-reg-count fp-reg-count synthesize-first-argument?) + (let f ([types types] [iint (if synthesize-first-argument? -1 0)] [iflt 0]) (if (null? types) (values iint iflt) (cond @@ -2498,11 +2657,14 @@ (nanopass-case (Ltype Type) (car types) [(fp-double-float) #t] [(fp-single-float) #t] + [(fp-ftd& ,ftd) (eq? 'float ($ftd-atomic-category ftd))] [else #f])) (f (cdr types) iint (if (fx< iflt fp-reg-count) (fx+ iflt 1) iflt))] [(or (nanopass-case (Ltype Type) (car types) [(fp-integer ,bits) (fx= bits 64)] [(fp-unsigned ,bits) (fx= bits 64)] + [(fp-ftd& ,ftd) (and (not ($ftd-compound? ftd)) + (fx= 8 ($ftd-size ftd)))] [else #f]) (and (constant software-floating-point) (nanopass-case (Ltype Type) (car types) @@ -2515,8 +2677,9 @@ ; all of the args are on the stack at this point, though not contiguous since ; we push all of the int reg args with one push instruction and all of the ; float reg args with another (v)push instruction - (lambda (types gp-reg-count fp-reg-count int-reg-offset float-reg-offset stack-arg-offset) - (let loop ([types types] + (lambda (types gp-reg-count fp-reg-count int-reg-offset float-reg-offset stack-arg-offset + synthesize-first-argument? return-space-offset) + (let loop ([types (if synthesize-first-argument? (cdr types) types)] [locs '()] [iint 0] [iflt 0] @@ -2524,7 +2687,11 @@ [float-reg-offset float-reg-offset] [stack-arg-offset stack-arg-offset]) (if (null? types) - (reverse locs) + (let ([locs (reverse locs)]) + (if synthesize-first-argument? + (cons (load-stack-address return-space-offset) + locs) + locs)) (cond [(and (not (constant software-floating-point)) (nanopass-case (Ltype Type) (car types) @@ -2564,7 +2731,49 @@ (loop (cdr types) (cons (load-soft-single-stack stack-arg-offset) locs) iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 4)))] - [(nanopass-case (Ltype Type) (car types) + [(nanopass-case (Ltype Type) (car types) + [(fp-ftd& ,ftd) (not ($ftd-compound? ftd))] + [else #f]) + ;; load pointer to address on the stack + (let ([ftd (nanopass-case (Ltype Type) (car types) + [(fp-ftd& ,ftd) ftd])]) + (case (and (not (constant software-floating-point)) + ($ftd-atomic-category ftd)) + [(float) + (let ([load-address (case ($ftd-size ftd) + [(4) load-stack-address/convert-float] + [else load-stack-address])]) + (if (fx< iflt fp-reg-count) + (loop (cdr types) + (cons (load-address float-reg-offset) locs) + iint (fx+ iflt 1) int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset) + (let ([stack-arg-offset (align 8 stack-arg-offset)]) + (loop (cdr types) + (cons (load-address stack-arg-offset) locs) + iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 8)))))] + [else + (case ($ftd-size ftd) + [(8) + (let ([iint (align 2 iint)]) + (if (fx< iint gp-reg-count) + (let ([int-reg-offset (align 8 int-reg-offset)]) + (loop (cdr types) + (cons (load-stack-address int-reg-offset) locs) + (fx+ iint 2) iflt (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)) + (let ([stack-arg-offset (align 8 stack-arg-offset)]) + (loop (cdr types) + (cons (load-stack-address stack-arg-offset) locs) + iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 8)))))] + [else + (let ([byte-offset (- 4 ($ftd-size ftd))]) + (if (fx< iint gp-reg-count) + (loop (cdr types) + (cons (load-stack-address (+ int-reg-offset byte-offset)) locs) + (fx+ iint 1) iflt (fx+ int-reg-offset 4) float-reg-offset stack-arg-offset) + (loop (cdr types) + (cons (load-stack-address (+ stack-arg-offset byte-offset)) locs) + iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 4))))])]))] + [(nanopass-case (Ltype Type) (car types) [(fp-integer ,bits) (fx= bits 64)] [(fp-unsigned ,bits) (fx= bits 64)] [else #f]) @@ -2616,48 +2825,114 @@ (if (null? regs) inline (%seq ,inline ,(f regs (fx+ offset 4)))))))))) + (define do-result + (lambda (result-type return-space-offset int-reg-offset) + (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) + (case ($ftd-atomic-category ftd) + [(float) + (values + (lambda () + (case ($ftd-size ftd) + [(4) `(inline ,(make-info-loadfl %Cfpretval) ,%load-single ,%sp ,%zero (immediate ,return-space-offset))] + [else `(inline ,(make-info-loadfl %Cfpretval) ,%load-double ,%sp ,%zero (immediate ,return-space-offset))])) + '())] + [else + (cond + [($ftd-compound? ftd) + ;; return pointer + (values + (lambda () `(set! ,%Cretval ,(%mref ,%sp ,int-reg-offset))) + (list %Cretval))] + [(fx= 8 ($ftd-size ftd)) + (values (lambda () + (%seq + (set! ,%Cretval-high ,(%mref ,%sp ,return-space-offset)) + (set! ,%Cretval-low ,(%mref ,%sp ,(fx+ return-space-offset 4))))) + (list %Cretval-high %Cretval-low))] + [else + (values + (lambda () + (case ($ftd-size ftd) + [(1) `(set! ,%Cretval (inline ,(make-info-load 'integer-8 #f) ,%load ,%sp ,%zero (immediate ,return-space-offset)))] + [(2) `(set! ,%Cretval (inline ,(make-info-load 'integer-16 #f) ,%load ,%sp ,%zero (immediate ,return-space-offset)))] + [else `(set! ,%Cretval ,(%mref ,%sp ,return-space-offset))])) + (list %Cretval))])])] + [(fp-double-float) + (values (lambda (x) + `(inline ,(make-info-loadfl %Cfpretval) ,%load-double ,x ,%zero ,(%constant flonum-data-disp))) + '())] + [(fp-single-float) + (values (lambda (x) + `(inline ,(make-info-loadfl %Cfpretval) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp))) + '())] + [(fp-void) + (values (lambda () `(nop)) + '())] + [else + (cond + [(nanopass-case (Ltype Type) result-type + [(fp-integer ,bits) (fx= bits 64)] + [(fp-unsigned ,bits) (fx= bits 64)] + [else #f]) + (values (lambda (lo-rhs hi-rhs) + (%seq + (set! ,%Cretval-low ,lo-rhs) + (set! ,%Cretval-high ,hi-rhs))) + (list %Cretval-high %Cretval-low))] + [else + (values (lambda (rhs) + `(set! ,%Cretval ,rhs)) + (list %Cretval))])]))) (lambda (info) (define callee-save-regs (list %r14 %r15 %r16 %r17 %r18 %r19 %r20 %r21 %r22 %r23 %r24 %r25 %r26 %r27 %r28 %r29 %r30 %r31)) (define isaved (length callee-save-regs)) (let ([arg-type* (info-foreign-arg-type* info)] + [result-type (info-foreign-result-type info)] [gp-reg-count (length (gp-parameter-regs))] [fp-reg-count (length (fp-parameter-regs))]) - (let-values ([(iint iflt) (count-reg-args arg-type* gp-reg-count fp-reg-count)]) + (let-values ([(iint iflt) (count-reg-args arg-type* gp-reg-count fp-reg-count (indirect-result-that-fits-in-registers? result-type))]) (let* ([int-reg-offset 8] ; initial offset for calling conventions - [float-reg-offset (fx+ (fx* gp-reg-count 4) int-reg-offset)] + [float-reg-offset (align 8 (fx+ (fx* gp-reg-count 4) int-reg-offset))] [callee-save-offset (if (constant software-floating-point) float-reg-offset (fx+ (fx* fp-reg-count 8) float-reg-offset))] - [stack-size (align 16 (fx+ (fx* isaved 4) callee-save-offset))] + [synthesize-first-argument? (indirect-result-that-fits-in-registers? result-type)] + [return-space-offset (align 8 (fx+ (fx* isaved 4) callee-save-offset))] + [stack-size (align 16 (if synthesize-first-argument? + (fx+ return-space-offset 8) + return-space-offset))] [stack-arg-offset (fx+ stack-size 8)]) - (values - (lambda () - (%seq - ,(%inline save-lr (immediate 4)) - ,(%inline store-with-update ,%Csp ,%Csp (immediate ,(fx- stack-size))) - ,(save-regs (list-head (gp-parameter-regs) iint) int-reg-offset) - ,(save-fp-regs (list-head (fp-parameter-regs) iflt) float-reg-offset) - ; not bothering with callee-save floating point regs right now - ; not bothering with cr, because we don't update nonvolatile fields - ,(save-regs callee-save-regs callee-save-offset) - ,(if-feature pthreads - (%seq - (set! ,%Cretval ,(%inline get-tc)) - (set! ,%tc ,%Cretval)) - `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) - ; list of procedures that marshal arguments from their C stack locations - ; to the Scheme argument locations - (do-stack arg-type* gp-reg-count fp-reg-count int-reg-offset float-reg-offset stack-arg-offset) - (lambda (fv* Scall->result-type) - (in-context Tail - (%seq - ; restore the lr - (inline ,null-info ,%restore-lr (immediate ,(fx+ stack-size 4))) - ; restore the callee save registers - ,(restore-regs callee-save-regs callee-save-offset) - ; deallocate space for pad & arg reg values - (set! ,%Csp ,(%inline + ,%Csp (immediate ,stack-size))) - ; tail call the C helper that calls the Scheme procedure - (jump (literal ,(make-info-literal #f 'entry Scall->result-type 0)) - (,callee-save-regs ... ,fv* ...)))))))))))))) + (let-values ([(get-result result-regs) (do-result result-type return-space-offset int-reg-offset)]) + (values + (lambda () + (%seq + ,(%inline save-lr (immediate 4)) + ,(%inline store-with-update ,%Csp ,%Csp (immediate ,(fx- stack-size))) + ,(save-regs (list-head (gp-parameter-regs) iint) int-reg-offset) + ,(save-fp-regs (list-head (fp-parameter-regs) iflt) float-reg-offset) + ; not bothering with callee-save floating point regs right now + ; not bothering with cr, because we don't update nonvolatile fields + ,(save-regs callee-save-regs callee-save-offset) + ,(if-feature pthreads + (%seq + (set! ,%Cretval ,(%inline get-tc)) + (set! ,%tc ,%Cretval)) + `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) + ; list of procedures that marshal arguments from their C stack locations + ; to the Scheme argument locations + (do-stack arg-type* gp-reg-count fp-reg-count int-reg-offset float-reg-offset stack-arg-offset + synthesize-first-argument? return-space-offset) + get-result + (lambda () + (in-context Tail + (%seq + ; restore the lr + (inline ,null-info ,%restore-lr (immediate ,(fx+ stack-size 4))) + ; restore the callee save registers + ,(restore-regs callee-save-regs callee-save-offset) + ; deallocate space for pad & arg reg values + (set! ,%Csp ,(%inline + ,%Csp (immediate ,stack-size))) + ; done + (asm-c-return ,null-info ,callee-save-regs ... ,result-regs ...)))))))))))))) ) diff --git a/s/primdata.ss b/s/primdata.ss index 46003bfd56..2399bd24ba 100644 --- a/s/primdata.ss +++ b/s/primdata.ss @@ -1967,6 +1967,12 @@ ($fptr-unlock! [flags]) ($fp-type->pred [flags]) ($ftd? [flags]) + ($ftd-alignment [flags]) + ($ftd-as-box? [flags]) + ($ftd-atomic-category [flags]) + ($ftd-compound? [flags]) + ($ftd-size [flags]) + ($ftd->members [flags]) ($ftype-pointer? [flags]) ($fxaddress [flags unrestricted alloc]) ($fx-? [flags]) diff --git a/s/syntax.ss b/s/syntax.ss index bbec064e3a..ae0b68ea08 100644 --- a/s/syntax.ss +++ b/s/syntax.ss @@ -679,7 +679,11 @@ [(integer-40 integer-48 integer-56 integer-64) `(fp-integer 64)] [(unsigned-40 unsigned-48 unsigned-56 unsigned-64) `(fp-unsigned 64)] [(void) (and void-okay? `(fp-void))] - [else (and ($ftd? x) `(fp-ftd ,x))]) + [else + (cond + [($ftd? x) `(fp-ftd ,x)] + [($ftd-as-box? x) `(fp-ftd& ,(unbox x))] + [else #f])]) ($oops #f "invalid ~a ~a specifier ~s" who what x))))) (define build-foreign-procedure @@ -8508,7 +8512,9 @@ (constant-case native-endianness [(little) 'utf-32le] [(big) 'utf-32be])])] - [else (and ($ftd? type) type)]))) + [else + (and (or ($ftd? type) ($ftd-as-box? type)) + type)]))) (define $fp-type->pred (lambda (type) @@ -8649,10 +8655,11 @@ (err ($moi) x))))) (u32*))] [else #f]) - (if ($ftd? type) - #`(#,(if unsafe? #'() #`((unless (record? x '#,type) (err ($moi) x)))) - (x) - (#,type)) + (if (or ($ftd? type) ($ftd-as-box? type)) + (let ([ftd (if ($ftd? type) type (unbox type))]) + #`(#,(if unsafe? #'() #`((unless (record? x '#,ftd) (err ($moi) x)))) + (x) + (#,type))) (with-syntax ([pred (datum->syntax #'foreign-procedure ($fp-type->pred type))] [type (datum->syntax #'foreign-procedure type)]) #`(#,(if unsafe? #'() #'((unless (pred x) (err ($moi) x)))) @@ -8684,15 +8691,36 @@ [(unsigned-48) #`((lambda (x) (mod x #x1000000000000)) unsigned-64)] [(integer-56) #`((lambda (x) (mod0 x #x100000000000000)) integer-64)] [(unsigned-56) #`((lambda (x) (mod x #x100000000000000)) unsigned-64)] - [else #`(values #,(datum->syntax #'foreign-procedure result-type))])]) - #`(let ([p ($foreign-procedure conv foreign-name ?foreign-addr (arg ... ...) result)] + [else + (cond + [($ftd-as-box? result-type) + ;; Return void, since an extra first argument receives the result, + ;; but tell `$foreign-procedure` that the result is actually an & form + #`((lambda (r) (void)) #,(datum->syntax #'foreign-procedure result-type))] + [else + #`(values #,(datum->syntax #'foreign-procedure result-type))])])] + [([extra ...] [extra-arg ...] [extra-check ...]) + ;; When the result type is `(& )`, the `$foreign-procedure` result + ;; expects an extra argument as a `(* )` that it uses to store the + ;; foreign-procedure result, and it returns void. The extra argument is made + ;; explicit for `$foreign-procedure`, and the return type is preserved as-is + ;; to let `$foreign-procedure` know that it needs to fill the first argument. + (cond + [($ftd-as-box? result-type) + #`([&-result] + [#,(unbox result-type)] + #,(if unsafe? + #`[] + #`[(unless (record? &-result '#,(unbox result-type)) (err ($moi) &-result))]))] + [else #'([] [] [])])]) + #`(let ([p ($foreign-procedure conv foreign-name ?foreign-addr (extra-arg ... arg ... ...) result)] #,@(if unsafe? #'() #'([err (lambda (who x) ($oops (or who foreign-name) "invalid foreign-procedure argument ~s" x))]))) - (lambda (t ...) check ... ... (result-filter (p actual ... ...))))))))) + (lambda (extra ... t ...) extra-check ... check ... ... (result-filter (p extra ... actual ... ...))))))))) (define-syntax foreign-procedure (lambda (x) @@ -8810,12 +8838,13 @@ (with-syntax ([(x) (generate-temporaries #'(*))]) #`(x (x) (#,(datum->syntax #'foreign-callable type)))))) type*)] - [(result-filter result) + [(result-filter result [extra-arg ...] [extra ...]) (case result-type [(boolean) #`((lambda (x) (if x 1 0)) #,(constant-case int-bits [(32) #'integer-32] - [(64) #'integer-64]))] + [(64) #'integer-64]) + [] [])] [(char) #`((lambda (x) #,(if unsafe? @@ -8824,7 +8853,8 @@ (let ([x (char->integer x)]) (and (fx<= x #xff) x))) (err x)))) - unsigned-8)] + unsigned-8 + [] [])] [(wchar) (constant-case wchar-bits [(16) #`((lambda (x) @@ -8834,14 +8864,16 @@ (let ([x (char->integer x)]) (and (fx<= x #xffff) x))) (err x)))) - unsigned-16)] + unsigned-16 + [] [])] [(32) #`((lambda (x) #,(if unsafe? #'(char->integer x) #'(if (char? x) (char->integer x) (err x)))) - unsigned-16)])] + unsigned-16 + [] [])])] [(utf-8) #`((lambda (x) (if (eq? x #f) @@ -8851,7 +8883,8 @@ #'(if (string? x) ($fp-string->utf8 x) (err x))))) - u8*)] + u8* + [] [])] [(utf-16le) #`((lambda (x) (if (eq? x #f) @@ -8861,7 +8894,8 @@ #'(if (string? x) ($fp-string->utf16 x 'little) (err x))))) - u16*)] + u16* + [] [])] [(utf-16be) #`((lambda (x) (if (eq? x #f) @@ -8871,7 +8905,8 @@ #'(if (string? x) ($fp-string->utf16 x 'big) (err x))))) - u16*)] + u16* + [] [])] [(utf-32le) #`((lambda (x) (if (eq? x #f) @@ -8881,7 +8916,8 @@ #'(if (string? x) ($fp-string->utf32 x 'little) (err x))))) - u32*)] + u32* + [] [])] [(utf-32be) #`((lambda (x) (if (eq? x #f) @@ -8891,21 +8927,37 @@ #'(if (string? x) ($fp-string->utf32 x 'big) (err x))))) - u32*)] + u32* + [] [])] [else - (if ($ftd? result-type) - (with-syntax ([type (datum->syntax #'foreign-callable result-type)]) - #`((lambda (x) - #,@(if unsafe? #'() #'((unless (record? x 'type) (err x)))) - x) - type)) - (with-syntax ([pred (datum->syntax #'foreign-callable ($fp-type->pred result-type))] - [type (datum->syntax #'foreign-callable result-type)]) - #`((lambda (x) - #,@(if unsafe? #'() #'((unless (pred x) (err x)))) - x) - type)))])]) - ; use a gensym to avoid giving the procedure a confusing namej + (cond + [($ftd? result-type) + (with-syntax ([type (datum->syntax #'foreign-callable result-type)]) + #`((lambda (x) + #,@(if unsafe? #'() #'((unless (record? x 'type) (err x)))) + x) + type + [] []))] + [($ftd-as-box? result-type) + ;; callable receives an extra pointer argument to fill with the result; + ;; we add this type to `$foreign-callable` as an initial address argument, + ;; which may be actually provided by the caller or synthesized by the + ;; back end, depending on the type and architecture + (with-syntax ([type (datum->syntax #'foreign-callable result-type)] + [ftd (datum->syntax #'foreign-callable (unbox result-type))]) + #`((lambda (x) (void)) ; callable result is ignored + type + [ftd] + [&-result]))] + [else + (with-syntax ([pred (datum->syntax #'foreign-callable ($fp-type->pred result-type))] + [type (datum->syntax #'foreign-callable result-type)]) + #`((lambda (x) + #,@(if unsafe? #'() #'((unless (pred x) (err x)))) + x) + type + [] []))])])]) + ; use a gensym to avoid giving the procedure a confusing name (with-syntax ([p (datum->syntax #'foreign-callable (gensym))]) #`($foreign-callable conv (let ([p ?proc]) @@ -8914,8 +8966,8 @@ "invalid return value ~s from ~s" x p)) #,@(if unsafe? #'() #'((unless (procedure? p) ($oops 'foreign-callable "~s is not a procedure" p)))) - (lambda (t ... ...) (result-filter (p actual ...)))) - (arg ... ...) + (lambda (extra ... t ... ...) (result-filter (p extra ... actual ...)))) + (extra-arg ... arg ... ...) result))))))) (define-syntax foreign-callable diff --git a/s/x86.ss b/s/x86.ss index 28962e4893..07b83be780 100644 --- a/s/x86.ss +++ b/s/x86.ss @@ -733,6 +733,15 @@ (define-instruction value (fstpl) [(op (z mem)) `(set! ,(make-live-info) ,z (asm ,info ,asm-fstpl))]) + (define-instruction value (fstps) + [(op (z mem)) `(set! ,(make-live-info) ,z (asm ,info ,asm-fstps))]) + + (define-instruction effect (fldl) + [(op (z mem)) `(asm ,info ,asm-fldl ,z)]) + + (define-instruction effect (flds) + [(op (z mem)) `(asm ,info ,asm-flds ,z)]) + (define-instruction effect (load-single->double load-double->single) [(op (x ur) (y ur) (z imm32)) `(asm ,info ,(asm-fl-cvt op (info-loadfl-flreg info)) ,x ,y ,z)]) @@ -907,11 +916,11 @@ asm-pop asm-shiftop asm-sll asm-logand asm-lognot asm-logtest asm-fl-relop asm-relop asm-push asm-indirect-jump asm-literal-jump asm-direct-jump asm-return-address asm-jump asm-conditional-jump asm-data-label asm-rp-header - asm-lea1 asm-lea2 asm-indirect-call asm-fstpl asm-condition-code + asm-lea1 asm-lea2 asm-indirect-call asm-fstpl asm-fstps asm-fldl asm-flds asm-condition-code asm-fl-cvt asm-fl-store asm-fl-load asm-flt asm-trunc asm-div asm-exchange asm-pause asm-locked-incr asm-locked-decr asm-flop-2 asm-flsqrt asm-c-simple-call - asm-save-flrv asm-restore-flrv asm-return asm-size + asm-save-flrv asm-restore-flrv asm-return asm-c-return asm-size asm-enter asm-foreign-call asm-foreign-callable asm-inc-profile-counter asm-inc-cc-counter asm-read-time-stamp-counter asm-read-performance-monitoring-counter @@ -1039,6 +1048,7 @@ (define-op popf byte-op #b10011101) (define-op nop byte-op #b10010000) (define-op ret byte-op #b11000011) + (define-op retl byte+short-op #b11000010) (define-op sahf byte-op #b10011110) (define-op extad byte-op #b10011001) ; extend eax to edx @@ -1076,7 +1086,9 @@ ; coprocessor ops required to handle calling conventions (define-op fldl float-op2 #b101 #b000) ; double memory push => ST[0] + (define-op flds float-op2 #b001 #b000) ; single memory push => ST[0] (define-op fstpl float-op2 #b101 #b011) ; ST[0] => double memory, pop + (define-op fstps float-op2 #b001 #b011) ; ST[0] => single memory, pop ; SSE2 instructions (pulled from x86_64macros.ss) (define-op sse.addsd sse-op1 #xF2 #x58) @@ -1434,6 +1446,13 @@ (build byte op-code1) (build byte op-code2)))) + (define byte+short-op + (lambda (op op-code1 t code*) + (emit-code (op code*) + (build byte op-code1) + (build byte (fxand (cadr t) #xFF)) + (build byte (fxsrl (cadr t) 16))))) + (define byte-reg-op1 (lambda (op op-code1 reg code*) (begin @@ -1629,6 +1648,21 @@ (Trivit (dest) (emit fstpl dest code*)))) + (define asm-fstps + (lambda (code* dest) + (Trivit (dest) + (emit fstps dest code*)))) + + (define asm-fldl + (lambda (code* src) + (Trivit (src) + (emit fldl src code*)))) + + (define asm-flds + (lambda (code* src) + (Trivit (src) + (emit flds src code*)))) + (define asm-fl-cvt (lambda (op flreg) (lambda (code* base index offset) @@ -1849,6 +1883,14 @@ [(i3osx ti3osx) (emit addi '(imm 12) (cons 'reg %sp) (emit ret '()))] [else (emit ret '())]))) + (define asm-c-return + (lambda (info) + (if (info-c-return? info) + (let ([offset (info-c-return-offset info)]) + (safe-assert (<= 0 offset #xFFFF)) + (emit retl `(imm ,offset) '())) + (emit ret '())))) + (define asm-locked-incr (lambda (code* base index offset) (let ([dest (build-mem-opnd base index offset)]) @@ -2220,6 +2262,25 @@ ,e))])))] [else (define asm-enter values)]) + (define callee-expects-result-pointer? + (lambda (result-type) + (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) (constant-case machine-type-name + [(i3osx ti3osx i3nt ti3nt) + (case ($ftd-size ftd) + [(1 2 4 8) #f] + [else #t])] + [else ($ftd-compound? ftd)])] + [else #f]))) + (define callee-pops-result-pointer? + (lambda (result-type) + (callee-expects-result-pointer? result-type))) + (define fill-result-pointer-from-registers? + (lambda (result-type) + (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) (not (callee-expects-result-pointer? result-type))] + [else #f]))) + (define asm-foreign-call (with-output-language (L13 Effect) (letrec ([load-double-stack @@ -2244,19 +2305,74 @@ (%seq (set! ,(%mref ,%sp ,offset) ,lorhs) (set! ,(%mref ,%sp ,(fx+ offset 4)) ,hirhs))))] + [load-content + (lambda (offset len) + (lambda (x) ; requires var + (let loop ([offset offset] [x-offset 0] [len len]) + (cond + [(= len 0) `(nop)] + [(>= len 4) + `(seq + (set! ,(%mref ,%sp ,offset) (inline ,(make-info-load 'integer-32 #f) + ,%load ,x ,%zero (immediate ,x-offset))) + ,(loop (fx+ offset 4) (fx+ x-offset 4) (fx- len 4)))] + [(>= len 2) + (%seq + (set! ,%eax (inline ,(make-info-load 'integer-16 #f) + ,%load ,x ,%zero (immediate ,x-offset))) + (inline ,(make-info-load 'integer-16 #f) + ,%store ,%sp ,%zero (immediate ,offset) + ,%eax) + ,(loop (fx+ offset 2) (fx+ x-offset 2) (fx- len 2)))] + [else + (%seq + (set! ,%eax (inline ,(make-info-load 'integer-8 #f) + ,%load ,x ,%zero (immediate ,x-offset))) + (inline ,(make-info-load 'integer-8 #f) + ,%store ,%sp ,%zero (immediate ,offset) + ,%eax))]))))] [do-stack - (lambda (types locs n) + (lambda (types locs n result-type) (if (null? types) (values n locs) (nanopass-case (Ltype Type) (car types) [(fp-double-float) (do-stack (cdr types) (cons (load-double-stack n) locs) - (fx+ n 8))] + (fx+ n 8) + #f)] [(fp-single-float) (do-stack (cdr types) (cons (load-single-stack n) locs) - (fx+ n 4))] + (fx+ n 4) + #f)] + [(fp-ftd& ,ftd) + (do-stack (cdr types) + (cons (load-content n ($ftd-size ftd)) locs) + (fx+ n (fxlogand (fx+ ($ftd-size ftd) 3) -4)) + #f)] + [(fp-ftd ,ftd) + (cond + [(and result-type + (fill-result-pointer-from-registers? result-type)) + ;; Callee doesn't expect this argument; move + ;; it to the end just to save it for filling + ;; when the callee returns + (let ([end-n 0]) + (with-values (do-stack (cdr types) + (cons (lambda (rhs) + ((load-stack end-n) rhs)) + locs) + n + #f) + (lambda (frame-size locs) + (set! end-n frame-size) + (values (fx+ frame-size 4) locs))))] + [else + (do-stack (cdr types) + (cons (load-stack n) locs) + (fx+ n 4) + #f)])] [else (if (nanopass-case (Ltype Type) (car types) [(fp-integer ,bits) (fx= bits 64)] @@ -2264,17 +2380,19 @@ [else #f]) (do-stack (cdr types) (cons (load-stack64 n) locs) - (fx+ n 8)) + (fx+ n 8) + #f) (do-stack (cdr types) (cons (load-stack n) locs) - (fx+ n 4)))])))]) + (fx+ n 4) + #f))])))]) (define returnem - (lambda (conv frame-size locs ccall r-loc) + (lambda (conv orig-frame-size locs result-type ccall r-loc) (let ([frame-size (constant-case machine-type-name ; maintain 16-byte alignment not including the return address pushed ; by the call instruction, which counts as part of callee's frame - [(i3osx ti3osx) (fxlogand (fx+ frame-size 15) -16)] - [else frame-size])]) + [(i3osx ti3osx) (fxlogand (fx+ orig-frame-size 15) -16)] + [else orig-frame-size])]) (values (lambda () (if (fx= frame-size 0) `(nop) @@ -2286,28 +2404,64 @@ (lambda () (if (or (fx= frame-size 0) (memq conv '(i3nt-stdcall i3nt-com))) `(nop) - `(set! ,%sp ,(%inline + ,%sp (immediate ,frame-size))))))))) + (let ([frame-size (if (callee-pops-result-pointer? result-type) + (fx- frame-size (constant ptr-bytes)) + frame-size)]) + `(set! ,%sp ,(%inline + ,%sp (immediate ,frame-size)))))))))) (lambda (info) (safe-assert (reg-callee-save? %tc)) ; no need to save-restore (let ([conv (info-foreign-conv info)] [arg-type* (info-foreign-arg-type* info)] [result-type (info-foreign-result-type info)]) - (with-values (do-stack arg-type* '() 0) + (with-values (do-stack arg-type* '() 0 result-type) (lambda (frame-size locs) - (returnem conv frame-size locs + (returnem conv frame-size locs result-type (lambda (t0) - (case conv - [(i3nt-com) - (when (null? arg-type*) - ($oops 'foreign-procedure - "__com convention requires instance argument")) - ; jump indirect - (%seq - (set! ,%eax ,(%mref ,%sp 0)) - (set! ,%eax ,(%mref ,%eax 0)) - (set! ,%eax ,(%inline + ,%eax ,t0)) - (inline ,(make-info-kill*-live* (reg-list %eax %edx) '()) ,%c-call ,(%mref ,%eax 0)))] - [else `(inline ,(make-info-kill*-live* (reg-list %eax %edx) '()) ,%c-call ,t0)])) + (let ([call + (case conv + [(i3nt-com) + (when (null? arg-type*) + ($oops 'foreign-procedure + "__com convention requires instance argument")) + ; jump indirect + (%seq + (set! ,%eax ,(%mref ,%sp 0)) + (set! ,%eax ,(%mref ,%eax 0)) + (set! ,%eax ,(%inline + ,%eax ,t0)) + (inline ,(make-info-kill*-live* (reg-list %eax %edx) '()) ,%c-call ,(%mref ,%eax 0)))] + [else `(inline ,(make-info-kill*-live* (reg-list %eax %edx) '()) ,%c-call ,t0)])]) + (cond + [(fill-result-pointer-from-registers? result-type) + (let* ([ftd (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) ftd])] + [size ($ftd-size ftd)]) + (%seq + ,call + (set! ,%ecx ,(%mref ,%sp ,(fx- frame-size (constant ptr-bytes)))) + ,(case size + [(1) + `(inline ,(make-info-load 'integer-8 #f) ,%store + ,%ecx ,%zero (immediate ,0) ,%eax)] + [(2) + `(inline ,(make-info-load 'integer-16 #f) ,%store + ,%ecx ,%zero (immediate ,0) ,%eax)] + [(4) + (cond + [(and (if-feature windows (not ($ftd-compound? ftd)) #t) + (equal? '((float 4 0)) ($ftd->members ftd))) + `(set! ,(%mref ,%ecx 0) ,(%inline fstps))] + [else + `(set! ,(%mref ,%ecx 0) ,%eax)])] + [(8) + (cond + [(and (if-feature windows (not ($ftd-compound? ftd)) #t) + (equal? '((float 8 0)) ($ftd->members ftd))) + `(set! ,(%mref ,%ecx 0) ,(%inline fstpl))] + [else + `(seq + (set! ,(%mref ,%ecx 0) ,%eax) + (set! ,(%mref ,%ecx 4) ,%edx))])])))] + [else call]))) (nanopass-case (Ltype Type) result-type [(fp-double-float) (lambda (x) @@ -2350,6 +2504,25 @@ [else (lambda (lvalue) `(set! ,lvalue ,%eax))]))))))))) (define asm-foreign-callable + #| + Frame Layout + +---------------------------+ + | | + | incoming stack args | + sp+X+Y: | | + +---------------------------+ <- i3osx: 16-byte boundary + | incoming return address | one word + +---------------------------+ + | | + | callee-save registers | EBP, ESI, EDI, EBX (4 words) + sp+X: | | + +---------------------------+ + | indirect result space | i3osx: 3 words + | (for & results via regs) | other: 2 words + sp+0: +---------------------------+<- i3osx: 16-byte boundary + |# + + (with-output-language (L13 Effect) (let () (define load-double-stack @@ -2389,6 +2562,10 @@ "unexpected load-int-stack fp-unsigned size ~s" bits)])] [else `(set! ,lvalue ,(%mref ,%sp ,offset))])))) + (define load-stack-address + (lambda (offset) + (lambda (lvalue) ; requires lvalue + `(set! ,lvalue ,(%inline + ,%sp (immediate ,offset)))))) (define load-stack64 (lambda (type offset) (lambda (lolvalue hilvalue) ; requires lvalue @@ -2408,6 +2585,10 @@ (do-stack (cdr types) (cons (load-single-stack n) locs) (fx+ n 4))] + [(fp-ftd& ,ftd) + (do-stack (cdr types) + (cons (load-stack-address n) locs) + (fx+ n (fxlogand (fx+ ($ftd-size ftd) 3) -4)))] [else (if (nanopass-case (Ltype Type) (car types) [(fp-integer ,bits) (fx= bits 64)] @@ -2419,61 +2600,127 @@ (do-stack (cdr types) (cons (load-stack (car types) n) locs) (fx+ n 4)))])))) + (define (do-result result-type init-stack-offset indirect-result-to-registers?) + (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) + (cond + [indirect-result-to-registers? + (cond + [(and (if-feature windows (not ($ftd-compound? ftd)) #t) + (equal? '((float 4 0)) ($ftd->members ftd))) + (values (lambda () + (%inline flds ,(%mref ,%sp 0))) + '())] + [(and (if-feature windows (not ($ftd-compound? ftd)) #t) + (equal? '((float 8 0)) ($ftd->members ftd))) + (values (lambda () + (%inline fldl ,(%mref ,%sp 0))) + '())] + [(fx= ($ftd-size ftd) 8) + (values (lambda () + `(seq + (set! ,%eax ,(%mref ,%sp 0)) + (set! ,%edx ,(%mref ,%sp 4)))) + (list %eax %edx))] + [else + (values (lambda () + `(set! ,%eax ,(%mref ,%sp 0))) + (list %eax))])] + [else + (values (lambda () + ;; Return pointer that was filled; destination was the first argument + `(set! ,%eax ,(%mref ,%sp ,init-stack-offset))) + (list %eax))])] + [(fp-double-float) + (values (lambda (x) + (%inline fldl ,(%mref ,x ,(constant flonum-data-disp)))) + '())] + [(fp-single-float) + (values (lambda (x) + (%inline fldl ,(%mref ,x ,(constant flonum-data-disp)))) + '())] + [(fp-void) + (values (lambda () `(nop)) + '())] + [else + (cond + [(nanopass-case (Ltype Type) result-type + [(fp-integer ,bits) (fx= bits 64)] + [(fp-unsigned ,bits) (fx= bits 64)] + [else #f]) + (values (lambda (lorhs hirhs) ; requires rhs + (%seq + (set! ,%eax ,lorhs) + (set! ,%edx ,hirhs))) + (list %eax %edx))] + [else + (values (lambda (x) + `(set! ,%eax ,x)) + (list %eax))])])) (lambda (info) (let ([conv (info-foreign-conv info)] [arg-type* (info-foreign-arg-type* info)] - [result-type (info-foreign-result-type info)]) - (with-values (do-stack arg-type* '() - (constant-case machine-type-name [(i3osx ti3osx) 32] [else 20])) - (lambda (frame-size locs) - (values - (lambda () - (%seq - ,(%inline push ,%ebp) - ,(%inline push ,%esi) - ,(%inline push ,%edi) - ,(%inline push ,%ebx) - ,((lambda (e) - (constant-case machine-type-name - [(i3osx ti3osx) - ; maintain 16-bit alignment for i3osx, taking into account - ; 16 bytes pushed below + 4 for RA pushed by asmCcall - (%seq - (set! ,%sp ,(%inline - ,%sp (immediate 12))) - ,e)] - [else e])) - (if-feature pthreads - `(seq - (set! ,%eax ,(%inline get-tc)) - (set! ,%tc ,%eax)) - `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0))))))) - (reverse locs) - (lambda (fv* Scall->result-type) - (in-context Tail - ((lambda (e) - (constant-case machine-type-name - [(i3osx ti3osx) - (%seq - (set! ,%sp ,(%inline + ,%sp (immediate 12))) - ,e)] - [else e])) + [result-type (info-foreign-result-type info)] + [init-stack-offset (constant-case machine-type-name [(i3osx ti3osx) 32] [else 28])] + [indirect-result-space (constant-case machine-type-name + [(i3osx ti3osx) + ;; maintain 16-bit alignment for i3osx, taking into account + ;; 16 bytes pushed above + 4 for RA pushed by asmCcall; + ;; 8 of these bytes are used for &-return space, if needed + 12] + [else 8])]) + (let ([indirect-result-to-registers? (fill-result-pointer-from-registers? result-type)]) + (let-values ([(get-result result-regs) (do-result result-type init-stack-offset indirect-result-to-registers?)]) + (with-values (do-stack (if indirect-result-to-registers? + (cdr arg-type*) + arg-type*) + '() + init-stack-offset) + (lambda (frame-size locs) + (values + (lambda () (%seq - (set! ,%ebx ,(%inline pop)) - (set! ,%edi ,(%inline pop)) - (set! ,%esi ,(%inline pop)) - (set! ,%ebp ,(%inline pop)) - ; Windows __stdcall convention requires callee to clean up - ,((lambda (e) - (if (memq conv '(i3nt-stdcall i3nt-com)) + ,(%inline push ,%ebp) + ,(%inline push ,%esi) + ,(%inline push ,%edi) + ,(%inline push ,%ebx) + (set! ,%sp ,(%inline - ,%sp (immediate ,indirect-result-space))) + ,(if-feature pthreads + `(seq + (set! ,%eax ,(%inline get-tc)) + (set! ,%tc ,%eax)) + `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) + (let ([locs (reverse locs)]) + (if indirect-result-to-registers? + (cons (load-stack-address 0) ; use the &-return space + locs) + locs)) + get-result + (lambda () + (in-context Tail + (%seq + (set! ,%sp ,(%inline + ,%sp (immediate ,indirect-result-space))) + (set! ,%ebx ,(%inline pop)) + (set! ,%edi ,(%inline pop)) + (set! ,%esi ,(%inline pop)) + (set! ,%ebp ,(%inline pop)) + ; Windows __stdcall convention requires callee to clean up + ,((lambda (e) + (if (memq conv '(i3nt-stdcall i3nt-com)) (let ([arg-size (fx- frame-size 20)]) (if (fx> arg-size 0) (%seq - (set! - ,(%mref ,%sp ,arg-size) - ,(%mref ,%sp 0)) - (set! ,%sp ,(%inline + ,%sp (immediate ,arg-size))) - ,e) + (set! + ,(%mref ,%sp ,arg-size) + ,(%mref ,%sp 0)) + (set! ,%sp ,(%inline + ,%sp (immediate ,arg-size))) + ,e) e)) e)) - `(jump (literal ,(make-info-literal #f 'entry Scall->result-type 0)) - (,%ebx ,%edi ,%esi ,%ebp ,fv* ...)))))))))))))))) + `(asm-c-return ,(if (callee-pops-result-pointer? result-type) + ;; remove the pointer argument provided by the caller + ;; after popping the return address + (make-info-c-return 4) + null-info) + ,result-regs ...))))))))))))))) + ) diff --git a/s/x86_64.ss b/s/x86_64.ss index 3162df302a..b066f09f2b 100644 --- a/s/x86_64.ss +++ b/s/x86_64.ss @@ -977,7 +977,7 @@ asm-fl-cvt asm-fl-store asm-fl-load asm-flt asm-trunc asm-div asm-exchange asm-pause asm-locked-incr asm-locked-decr asm-flop-2 asm-flsqrt asm-c-simple-call - asm-save-flrv asm-restore-flrv asm-return asm-size + asm-save-flrv asm-restore-flrv asm-return asm-c-return asm-size asm-enter asm-foreign-call asm-foreign-callable asm-inc-profile-counter asm-inc-cc-counter asm-read-time-stamp-counter asm-read-performance-monitoring-counter @@ -1991,6 +1991,10 @@ (emit addi '(imm 8) (cons 'reg %sp) (emit ret '())))) + (define asm-c-return + (lambda (info) + (emit ret '()))) + (define asm-locked-incr (lambda (code* base index offset) (let ([dest (build-mem-opnd base index offset)]) @@ -2408,6 +2412,88 @@ (define make-vint (lambda () (vector %Carg1 %Carg2 %Carg3 %Carg4 %Carg5 %Carg6))) (define make-vfp (lambda () (vector %Cfparg1 %Cfparg2 %Cfparg3 %Cfparg4 %Cfparg5 %Cfparg6 %Cfparg7 %Cfparg8))))) + (define (align n size) + (fxlogand (fx+ n (fx- size 1)) (fx- size))) + + (define (classify-type type) + (nanopass-case (Ltype Type) type + [(fp-ftd& ,ftd) (classify-eightbytes ftd)] + [else #f])) + + ;; classify-eightbytes: returns '(memory) or a nonemtpy list of 'integer/'sse + (if-feature windows + ;; Windows: either passed in one register or not + (define (classify-eightbytes ftd) + (cond + [($ftd-compound? ftd) + (if (memv ($ftd-size ftd) '(1 2 4 8)) + '(integer) + '(memory))] + [(eq? 'float (caar ($ftd->members ftd))) + '(sse)] + [else '(integer)])) + ;; Non-Windows: SYSV ABI is a more general classification of + ;; 8-byte segments into 'integer, 'sse, or 'memory modes + (define (classify-eightbytes ftd) + (define (merge t1 t2) + (cond + [(eq? t1 t2) t1] + [(eq? t1 'no-class) t2] + [(eq? t2 'no-class) t1] + [(eq? t1 'memory) 'memory] + [(eq? t2 'memory) 'memory] + [else 'integer])) + (cond + [(or (> ($ftd-size ftd) 16) ; more than 2 eightbytes => passed in memory + (fx= 0 ($ftd-size ftd))) + '(memory)] + [else + (let ([classes (make-vector (fxsrl (align ($ftd-size ftd) 8) 3) 'no-class)]) + (let loop ([mbrs ($ftd->members ftd)]) + (cond + [(null? mbrs) + (vector->list classes)] + [else + (let ([kind (caar mbrs)] + [size (cadar mbrs)] + [offset (caddar mbrs)]) + (cond + [(not (fx= offset (align offset size))) + ;; misaligned + '(memory)] + [else + (let* ([pos (fxsrl offset 3)] + [class (vector-ref classes pos)] + [new-class (merge class (if (eq? kind 'float) 'sse 'integer))]) + (cond + [(eq? new-class 'memory) + '(memory)] + [else + (vector-set! classes pos new-class) + (loop (cdr mbrs))]))]))])))]))) + + (define (count v l) + (cond + [(null? l) 0] + [(eq? (car l) v) (fx+ 1 (count v (cdr l)))] + [else (count v (cdr l))])) + + ;; A result is put in registers if it has up to two + ;; eightbytes, each 'integer or 'sse. On Windows, + ;; `result-classes` always has only one item. + (define (result-fits-in-registers? result-classes) + (and result-classes + (not (eq? 'memory (car result-classes))) + (or (null? (cdr result-classes)) + (null? (cddr result-classes))))) + + ;; An argument is put in registeres depending on how many + ;; registers are left + (define (pass-here-by-stack? classes iint ints ifp fps) + (or (eq? 'memory (car classes)) + (fx> (fx+ iint ints) 6) + (fx> (fx+ ifp fps) 8))) + (define asm-foreign-call (with-output-language (L13 Effect) (letrec ([load-double-stack @@ -2452,6 +2538,87 @@ ; x is a non-triv right-hand-side [else (%seq (set! ,ireg ,x) (set! ,ireg ,(%inline zext32 ,ireg)))])] [else `(set! ,ireg ,x)])))] + [load-content-stack + (lambda (offset len) + (lambda (x) ; requires var + (let loop ([offset offset] [x-offset 0] [len len]) + (cond + [(= len 0) `(nop)] + [(>= len 8) + `(seq + (set! ,(%mref ,%sp ,offset) (inline ,(make-info-load 'integer-64 #f) + ,%load ,x ,%zero (immediate ,x-offset))) + ,(loop (fx+ offset 8) (fx+ x-offset 8) (fx- len 8)))] + [(>= len 4) + `(seq + (set! ,(%mref ,%sp ,offset) (inline ,(make-info-load 'integer-32 #f) + ,%load ,x ,%zero (immediate ,x-offset))) + ,(loop (fx+ offset 4) (fx+ x-offset 4) (fx- len 4)))] + [(>= len 2) + `(seq + (set! ,(%mref ,%sp ,offset) (inline ,(make-info-load 'integer-16 #f) + ,%load ,x ,%zero (immediate ,x-offset))) + ,(loop (fx+ offset 2) (fx+ x-offset 2) (fx- len 2)))] + [else + `(set! ,(%mref ,%sp ,offset) (inline ,(make-info-load 'integer-8 #f) + ,%load ,x ,%zero (immediate ,x-offset)))]))))] + [load-content-regs + (lambda (classes size iint ifp vint vfp) + (lambda (x) ; requires var + (let loop ([size size] [iint iint] [ifp ifp] [classes classes] [x-offset 0]) + (cond + [(null? classes) `(nop)] + [(eq? 'sse (car classes)) + (cond + [(fx= size 4) + ;; Must be the last element + `(inline ,(make-info-loadfl (vector-ref vfp ifp)) ,%load-single ,x ,%zero (immediate ,x-offset))] + [else + `(seq + (inline ,(make-info-loadfl (vector-ref vfp ifp)) ,%load-double ,x ,%zero (immediate ,x-offset)) + ,(loop (fx- size 8) iint (fx+ ifp 1) (cdr classes) (fx+ x-offset 8)))])] + ;; Remaining cases are integers: + [(>= size 8) + `(seq + (set! ,(vector-ref vint iint) (inline ,(make-info-load 'integer-64 #f) + ,%load ,x ,%zero (immediate ,x-offset))) + ,(loop (fx- size 8) (fx+ iint 1) ifp (cdr classes) (fx+ x-offset 8)))] + ;; Remaining cases must be the last element + [else + (let loop ([reg (vector-ref vint iint)] [size size] [x-offset x-offset]) + (cond + [(= size 4) + `(set! ,reg (inline ,(make-info-load 'unsigned-32 #f) + ,%load ,x ,%zero (immediate ,x-offset)))] + [(= size 2) + `(set! ,reg (inline ,(make-info-load 'unsigned-16 #f) + ,%load ,x ,%zero (immediate ,x-offset)))] + [(= size 1) + `(set! ,reg (inline ,(make-info-load 'unsigned-8 #f) + ,%load ,x ,%zero (immediate ,x-offset)))] + [(> size 4) + ;; 5, 6, or 7: multiple steps to avoid reading too many bytes + (let ([tmp %rax]) ;; ?? ok to use %rax? + (%seq + ,(loop reg (fx- size 4) (fx+ x-offset 4)) + (set! ,reg ,(%inline sll ,reg (immediate 32))) + ,(loop tmp 4 x-offset) + (set! ,reg ,(%inline + ,reg ,tmp))))] + [else + ;; 3: multiple steps to avoid reading too many bytes + (let ([tmp %rax]) ;; ?? ok to use %rax? + (%seq + ,(loop reg (fx- size 2) (fx+ x-offset 2)) + (set! ,reg ,(%inline sll ,reg (immediate 16))) + ,(loop tmp 2 x-offset) + (set! ,reg ,(%inline + ,reg ,tmp))))]))]))))] + [add-int-regs + (lambda (ints iint vint regs) + (cond + [(fx= 0 ints) regs] + [else + (add-int-regs (fx- ints 1) (fx+ iint 1) vint + (cons (vector-ref vint iint) regs))]))] [do-args (lambda (types vint vfp) (if-feature windows @@ -2476,6 +2643,44 @@ (loop (cdr types) (cons (load-single-stack isp) locs) regs i (fx+ isp 8)))] + [(fp-ftd& ,ftd) + (cond + [(memv ($ftd-size ftd) '(1 2 4 8)) + ;; pass as value in register or as value on the stack + (cond + [(< i 4) + ;; pass as value in register + (cond + [(and (not ($ftd-compound? ftd)) + (eq? 'float (caar ($ftd->members ftd)))) + ;; float or double + (loop (cdr types) + (cons (load-content-regs '(sse) ($ftd-size ftd) i i vint vfp) locs) + (add-int-regs 1 i vint regs) (fx+ i 1) isp)] + [else + ;; integer + (loop (cdr types) + (cons (load-content-regs '(integer) ($ftd-size ftd) i i vint vfp) locs) + (add-int-regs 1 i vint regs) (fx+ i 1) isp)])] + [else + ;; pass as value on the stack + (loop (cdr types) + (cons (load-content-stack isp ($ftd-size ftd)) locs) + regs i (fx+ isp (align ($ftd-size ftd) 8)))])] + [else + ;; pass by reference in register or by reference on the stack + (cond + [(< i 4) + ;; pass by reference in a register + (let ([reg (vector-ref vint i)]) + (loop (cdr types) + (cons (load-int-reg (car types) reg) locs) + (cons reg regs) (fx+ i 1) isp))] + [else + ;; pass by reference on the stack + (loop (cdr types) + (cons (load-int-stack isp) locs) + regs i (fx+ isp 8))])])] [else (if (< i 4) (let ([reg (vector-ref vint i)]) @@ -2506,6 +2711,22 @@ (loop (cdr types) (cons (load-single-stack isp) locs) regs iint ifp (fx+ isp 8)))] + [(fp-ftd& ,ftd) + (let* ([classes (classify-eightbytes ftd)] + [ints (count 'integer classes)] + [fps (count 'sse classes)]) + (cond + [(pass-here-by-stack? classes iint ints ifp fps) + ;; pass on the stack + (loop (cdr types) + (cons (load-content-stack isp ($ftd-size ftd)) locs) + regs iint ifp (fx+ isp (align ($ftd-size ftd) 8)))] + [else + ;; pass in registers + (loop (cdr types) + (cons (load-content-regs classes ($ftd-size ftd) iint ifp vint vfp) locs) + (add-int-regs ints iint vint regs) + (fx+ iint ints) (fx+ ifp fps) isp)]))] [else (if (< iint 6) (let ([reg (vector-ref vint iint)]) @@ -2516,6 +2737,35 @@ (loop (cdr types) (cons (load-int-stack isp) locs) regs iint ifp (fx+ isp 8)))])))))]) + (define (add-save-fill-target fill-result-here? frame-size locs) + (cond + [fill-result-here? + ;; The callee isn't expecting a pointer to fill with the result. + ;; Stash the pointer as an extra argument, and then when the + ;; function returns, we'll move register content for the result + ;; into the pointer's target + (values (fx+ frame-size (constant ptr-bytes)) + (append locs + (list + (lambda (x) ; requires var + `(set! ,(%mref ,%sp ,frame-size) ,x)))))] + [else + (values frame-size locs)])) + (define (add-fill-result c-call saved-offset classes) + (let loop ([classes classes] [offset 0] [iregs (reg-list %rax %rdx)] [fpregs (reg-list %Cfparg1 %Cfparg2)]) + (cond + [(null? classes) + `(seq + ,c-call + (set! ,%rcx ,(%mref ,%sp ,saved-offset)))] + [(eq? 'sse (car classes)) + `(seq + ,(loop (cdr classes) (fx+ offset 8) iregs (cdr fpregs)) + (inline ,(make-info-loadfl (car fpregs)) ,%store-double ,%rcx ,%zero (immediate ,offset)))] + [else + `(seq + ,(loop (cdr classes) (fx+ offset 8) (cdr iregs) fpregs) + (set! ,(%mref ,%rcx ,offset) ,(car iregs)))]))) (define returnem (lambda (frame-size locs ccall r-loc) ; need to maintain 16-byte alignment, ignoring the return address @@ -2535,51 +2785,60 @@ `(set! ,%sp ,(%inline + ,%sp (immediate ,frame-size))))))))) (lambda (info) (safe-assert (reg-callee-save? %tc)) ; no need to save-restore - (let ([conv (info-foreign-conv info)] - [arg-type* (info-foreign-arg-type* info)] - [result-type (info-foreign-result-type info)]) - (with-values (do-args arg-type* (make-vint) (make-vfp)) + (let* ([conv (info-foreign-conv info)] + [arg-type* (info-foreign-arg-type* info)] + [result-type (info-foreign-result-type info)] + [result-classes (classify-type result-type)] + [fill-result-here? (result-fits-in-registers? result-classes)]) + (with-values (do-args (if fill-result-here? (cdr arg-type*) arg-type*) (make-vint) (make-vfp)) (lambda (frame-size nfp locs live*) - (returnem frame-size locs - (lambda (t0) - (if-feature windows - (%seq - (set! ,%sp ,(%inline - ,%sp (immediate 32))) - (inline ,(make-info-kill*-live* (reg-list %rax) live*) ,%c-call ,t0) - (set! ,%sp ,(%inline + ,%sp (immediate 32)))) - (%seq - ; System V ABI varargs functions require count of fp regs used in %al register. - ; since we don't know if the callee is a varargs function, we always set it. - (set! ,%rax (immediate ,nfp)) - (inline ,(make-info-kill*-live* (reg-list %rax) (cons %rax live*)) ,%c-call ,t0)))) - (nanopass-case (Ltype Type) result-type - [(fp-double-float) - (lambda (lvalue) - `(inline ,(make-info-loadfl %Cfpretval) ,%store-double ,lvalue ,%zero - ,(%constant flonum-data-disp)))] - [(fp-single-float) - (lambda (lvalue) - `(inline ,(make-info-loadfl %Cfpretval) ,%store-single->double ,lvalue ,%zero - ,(%constant flonum-data-disp)))] - [(fp-integer ,bits) - (case bits - [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline sext8 ,%rax)))] - [(16) (lambda (lvalue) `(set! ,lvalue ,(%inline sext16 ,%rax)))] - [(32) (lambda (lvalue) `(set! ,lvalue ,(%inline sext32 ,%rax)))] - [(64) (lambda (lvalue) `(set! ,lvalue ,%rax))] - [else ($oops 'assembler-internal - "unexpected asm-foreign-procedures fp-integer size ~s" - bits)])] - [(fp-unsigned ,bits) - (case bits - [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline zext8 ,%rax)))] - [(16) (lambda (lvalue) `(set! ,lvalue ,(%inline zext16 ,%rax)))] - [(32) (lambda (lvalue) `(set! ,lvalue ,(%inline zext32 ,%rax)))] - [(64) (lambda (lvalue) `(set! ,lvalue ,%rax))] - [else ($oops 'assembler-internal - "unexpected asm-foreign-procedures fp-unsigned size ~s" - bits)])] - [else (lambda (lvalue) `(set! ,lvalue ,%rax))]))))))))) + (with-values (add-save-fill-target fill-result-here? frame-size locs) + (lambda (frame-size locs) + (returnem frame-size locs + (lambda (t0) + (let ([c-call + (if-feature windows + (%seq + (set! ,%sp ,(%inline - ,%sp (immediate 32))) + (inline ,(make-info-kill*-live* (reg-list %rax) live*) ,%c-call ,t0) + (set! ,%sp ,(%inline + ,%sp (immediate 32)))) + (%seq + ;; System V ABI varargs functions require count of fp regs used in %al register. + ;; since we don't know if the callee is a varargs function, we always set it. + (set! ,%rax (immediate ,nfp)) + (inline ,(make-info-kill*-live* (reg-list %rax) (cons %rax live*)) ,%c-call ,t0)))]) + (cond + [fill-result-here? + (add-fill-result c-call (fx- frame-size (constant ptr-bytes)) result-classes)] + [else c-call]))) + (nanopass-case (Ltype Type) result-type + [(fp-double-float) + (lambda (lvalue) + `(inline ,(make-info-loadfl %Cfpretval) ,%store-double ,lvalue ,%zero + ,(%constant flonum-data-disp)))] + [(fp-single-float) + (lambda (lvalue) + `(inline ,(make-info-loadfl %Cfpretval) ,%store-single->double ,lvalue ,%zero + ,(%constant flonum-data-disp)))] + [(fp-integer ,bits) + (case bits + [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline sext8 ,%rax)))] + [(16) (lambda (lvalue) `(set! ,lvalue ,(%inline sext16 ,%rax)))] + [(32) (lambda (lvalue) `(set! ,lvalue ,(%inline sext32 ,%rax)))] + [(64) (lambda (lvalue) `(set! ,lvalue ,%rax))] + [else ($oops 'assembler-internal + "unexpected asm-foreign-procedures fp-integer size ~s" + bits)])] + [(fp-unsigned ,bits) + (case bits + [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline zext8 ,%rax)))] + [(16) (lambda (lvalue) `(set! ,lvalue ,(%inline zext16 ,%rax)))] + [(32) (lambda (lvalue) `(set! ,lvalue ,(%inline zext32 ,%rax)))] + [(64) (lambda (lvalue) `(set! ,lvalue ,%rax))] + [else ($oops 'assembler-internal + "unexpected asm-foreign-procedures fp-unsigned size ~s" + bits)])] + [else (lambda (lvalue) `(set! ,lvalue ,%rax))]))))))))))) (define asm-foreign-callable #| @@ -2600,7 +2859,7 @@ | callee-save registers | RBX, RBP, RDI, RSI, R12, R13, R14, R15 (8 quads) | | +---------------------------+ - | pad word | one quad + | pad word / indirect space | one quad sp+0: +---------------------------+<- 16-byte boundary @@ -2609,11 +2868,14 @@ +---------------------------+ | | | incoming stack args | - sp+176: | | + sp+192: | | +---------------------------+ <- 16-byte boundary | incoming return address | one quad +---------------------------+ | pad word | one quad + +---------------------------+ + | indirect result space | two quads + sp+160 | (for & results via regs) | +---------------------------+<- 16-byte boundary | | | saved register args | space for Carg*, Cfparg* (14 quads) @@ -2661,6 +2923,10 @@ "unexpected load-int-stack fp-unsigned size ~s" bits)])] [else `(set! ,lvalue ,(%mref ,%sp ,offset))])))) + (define load-stack-address + (lambda (offset) + (lambda (lvalue) ; requires lvalue + `(set! ,lvalue ,(%inline + ,%sp (immediate ,offset)))))) (define save-arg-regs (lambda (types) (define vint (make-vint)) @@ -2684,6 +2950,40 @@ ,%sp ,%zero (immediate ,isp)) ,(f (cdr types) (fx+ i 1) (fx+ isp 8))) (f (cdr types) i isp))] + [(fp-ftd& ,ftd) + (cond + [(memv ($ftd-size ftd) '(1 2 4 8)) + ;; receive as value in register or on the stack + (cond + [(< i 4) + ;; receive in register + (cond + [(and (not ($ftd-compound? ftd)) + (eq? 'float (caar ($ftd->members ftd)))) + ;; float or double + `(seq + (inline ,(make-info-loadfl (vector-ref vfp i)) ,%store-double + ,%sp ,%zero (immediate ,isp)) + ,(f (cdr types) (fx+ i 1) (fx+ isp 8)))] + [else + ;; integer + `(seq + (set! ,(%mref ,%sp ,isp) ,(vector-ref vint i)) + ,(f (cdr types) (fx+ i 1) (fx+ isp 8)))])] + [else + ;; receive by value on the stack + (f (cdr types) i isp)])] + [else + ;; receive by reference in register or on the stack + (cond + [(< i 4) + ;; receive by reference in register + `(seq + (set! ,(%mref ,%sp ,isp) ,(vector-ref vint i)) + ,(f (cdr types) (fx+ i 1) (fx+ isp 8)))] + [else + ;; receive by reference on the stack + (f (cdr types) i isp)])])] [else (if (< i 4) (%seq @@ -2708,6 +3008,29 @@ ,%sp ,%zero (immediate ,isp)) ,(f (cdr types) iint (fx+ ifp 1) (fx+ isp 8))) (f (cdr types) iint ifp isp))] + [(fp-ftd& ,ftd) + (let* ([classes (classify-eightbytes ftd)] + [ints (count 'integer classes)] + [fps (count 'sse classes)]) + (cond + [(pass-here-by-stack? classes iint ints ifp fps) + ;; receive on the stack + (f (cdr types) iint ifp isp)] + [else + ;; receive via registers + (let reg-loop ([classes classes] [iint iint] [ifp ifp] [isp isp]) + (cond + [(null? classes) + (f (cdr types) iint ifp isp)] + [(eq? (car classes) 'sse) + `(seq + (inline ,(make-info-loadfl (vector-ref vfp ifp)) ,%store-double + ,%sp ,%zero (immediate ,isp)) + ,(reg-loop (cdr classes) iint (fx+ ifp 1) (+ isp 8)))] + [else + `(seq + (set! ,(%mref ,%sp ,isp) ,(vector-ref vint iint)) + ,(reg-loop (cdr classes) (fx+ iint 1) ifp (+ isp 8)))]))]))] [else (if (< iint 6) (%seq @@ -2727,10 +3050,23 @@ (nanopass-case (Ltype Type) (car types) [(fp-double-float) (load-double-stack isp)] [(fp-single-float) (load-single-stack isp)] + [(fp-ftd& ,ftd) + (cond + [(memq ($ftd-size ftd) '(1 2 4 8)) + ;; passed by value + (load-stack-address isp)] + [else + ;; passed by reference + (load-int-stack (car types) isp)])] [else (load-int-stack (car types) isp)]) locs) (fx+ isp 8)))) - (let f ([types types] [locs '()] [iint 0] [ifp 0] [risp 48] [sisp 176]) + (let f ([types types] + [locs '()] + [iint 0] + [ifp 0] + [risp 48] + [sisp 192]) (if (null? types) locs (nanopass-case (Ltype Type) (car types) @@ -2750,6 +3086,23 @@ (f (cdr types) (cons (load-single-stack risp) locs) iint (fx+ ifp 1) (fx+ risp 8) sisp))] + [(fp-ftd& ,ftd) + (let* ([classes (classify-eightbytes ftd)] + [ints (count 'integer classes)] + [fps (count 'sse classes)]) + (cond + [(pass-here-by-stack? classes iint ints ifp fps) + ;; receive on the stack + (f (cdr types) + (cons (load-stack-address sisp) locs) + iint ifp risp (fx+ sisp ($ftd-size ftd)))] + [else + ;; receive via registers; `save-args-regs` has saved + ;; the registers in a suitable order so that the data + ;; is contiguous on the stack + (f (cdr types) + (cons (load-stack-address risp) locs) + (fx+ iint ints) (fx+ ifp fps) (fx+ risp (fx* 8 (fx+ ints fps))) sisp)]))] [else (if (= iint 6) (f (cdr types) @@ -2758,14 +3111,74 @@ (f (cdr types) (cons (load-int-stack (car types) risp) locs) (fx+ iint 1) ifp (fx+ risp 8) sisp))])))))) + (define (do-result result-type result-classes) + (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) + (cond + [(result-fits-in-registers? result-classes) + ;; Copy content of result area on stack into + ;; the integer and floating-point registers + (let loop ([result-classes result-classes] + [offset (if-feature windows 0 160)] + [int* (list %rax %rdx)] + [fp* (list %Cfpretval %Cfparg2)] + [accum '()] + [live* '()]) + (cond + [(null? result-classes) + (values (lambda () + (if (pair? (cdr accum)) `(seq ,(car accum) ,(cadr accum)) (car accum))) + live*)] + [(eq? (car result-classes) 'integer) + (loop (cdr result-classes) + (fx+ offset 8) + (cdr int*) + fp* + (cons `(set! ,(car int*) ,(%mref ,%sp ,offset)) + accum) + (cons (car int*) live*))] + [(eq? (car result-classes) 'sse) + (loop (cdr result-classes) + (fx+ offset 8) + int* + (cdr fp*) + (cons `(inline ,(make-info-loadfl (car fp*)) ,%load-double ,%sp ,%zero (immediate ,offset)) + accum) + live*)]))] + [else + (values (lambda () + ;; Return pointer that was filled; destination was the first argument + `(set! ,%Cretval ,(%mref ,%sp ,(if-feature windows 80 48)))) + (list %Cretval))])] + [(fp-double-float) + (values + (lambda (x) + `(inline ,(make-info-loadfl %Cfpretval) ,%load-double ,x ,%zero ,(%constant flonum-data-disp))) + '())] + [(fp-single-float) + (values + (lambda (x) + `(inline ,(make-info-loadfl %Cfpretval) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp))) + '())] + [(fp-void) + (values (lambda () `(nop)) + '())] + [else + (values(lambda (x) + `(set! ,%Cretval ,x)) + (list %Cretval))])) (lambda (info) (let ([conv (info-foreign-conv info)] [arg-type* (info-foreign-arg-type* info)] [result-type (info-foreign-result-type info)]) - (let ([locs (do-stack arg-type*)]) - (values - (lambda () - (%seq + (let* ([result-classes (classify-type result-type)] + [synthesize-first? (and result-classes + (result-fits-in-registers? result-classes))] + [locs (do-stack (if synthesize-first? (cdr arg-type*) arg-type*))]) + (let-values ([(get-result result-regs) (do-result result-type result-classes)]) + (values + (lambda () + (%seq ,(if-feature windows (%seq ,(save-arg-regs arg-type*) @@ -2779,7 +3192,7 @@ ,(%inline push ,%r15) (set! ,%sp ,(%inline - ,%sp (immediate 8)))) (%seq - (set! ,%sp ,(%inline - ,%sp (immediate 120))) + (set! ,%sp ,(%inline - ,%sp (immediate 136))) ,(%inline push ,%rbx) ,(%inline push ,%rbp) ,(%inline push ,%r12) @@ -2792,9 +3205,14 @@ (set! ,%rax ,(%inline get-tc)) (set! ,%tc ,%rax)) `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) - (reverse locs) - (lambda (fv* Scall->result-type) - (in-context Tail + (let ([locs (reverse locs)]) + (if synthesize-first? + (cons (load-stack-address (if-feature windows 0 160)) ; space on stack for results to be returned via registers + locs) + locs)) + get-result + (lambda () + (in-context Tail (%seq ,(if-feature windows (%seq @@ -2814,7 +3232,6 @@ (set! ,%r12 ,(%inline pop)) (set! ,%rbp ,(%inline pop)) (set! ,%rbx ,(%inline pop)) - (set! ,%sp ,(%inline + ,%sp (immediate 120))))) - (jump (literal ,(make-info-literal #f 'entry Scall->result-type 0)) - (,%rbx ,%rbp ,%r12 ,%r13 ,%r14 ,%r15 ,fv* ...))))))))))))) + (set! ,%sp ,(%inline + ,%sp (immediate 136))))) + (asm-c-return ,null-info ,result-regs ...))))))))))))) )