slimy pthread_getspecifc()-inlining trick to practically eliminate the overhead of futures support under OS X

svn: r16952
This commit is contained in:
Matthew Flatt 2009-11-22 02:34:40 +00:00
parent 28f5070d5e
commit 79817a2087
5 changed files with 118 additions and 42 deletions

View File

@ -585,20 +585,22 @@
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define per-block-push? #t)
(define gc-var-stack-through-table?
(define gc-var-stack-mode
(ormap (lambda (e)
(and (pragma? e)
(regexp-match #rx"GC_VARIABLE_STACK_THOUGH_TABLE" (pragma-s e))))
e-raw))
(define gc-var-stack-through-thread-local?
(ormap (lambda (e)
(and (tok? e)
(eq? (tok-n e) 'XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL)))
e-raw))
(define gc-var-stack-through-getspecific?
(ormap (lambda (e)
(and (tok? e)
(eq? (tok-n e) 'XFORM_GC_VARIABLE_STACK_THROUGH_GETSPECIFIC)))
(cond
[(and (pragma? e)
(regexp-match #rx"GC_VARIABLE_STACK_THOUGH_TABLE" (pragma-s e)))
'table]
[(and (tok? e)
(eq? (tok-n e) 'XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL))
'thread-local]
[(and (tok? e)
(eq? (tok-n e) 'XFORM_GC_VARIABLE_STACK_THROUGH_GETSPECIFIC))
'getspecific]
[(and (tok? e)
(eq? (tok-n e) 'XFORM_GC_VARIABLE_STACK_THROUGH_FUNCTION))
'function]
[else #f]))
e-raw))
;; The code produced by xform uses a number of macros. These macros
@ -608,12 +610,14 @@
(when (and pgc? (not precompiled-header))
;; Setup GC_variable_stack macro
(printf (cond
[gc-var-stack-through-table?
(printf (case gc-var-stack-mode
[(table)
"#define GC_VARIABLE_STACK (scheme_extension_table->GC_variable_stack)~n"]
[gc-var-stack-through-getspecific?
[(getspecific)
"#define GC_VARIABLE_STACK (((Thread_Local_Variables *)pthread_getspecific(scheme_thread_local_key))->GC_variable_stack_)~n"]
[gc-var-stack-through-thread-local?
[(function)
"#define GC_VARIABLE_STACK ((scheme_get_thread_local_variables())->GC_variable_stack_)~n"]
[(thread-local)
"#define GC_VARIABLE_STACK ((&scheme_thread_locals)->GC_variable_stack_)~n"]
[else "#define GC_VARIABLE_STACK GC_variable_stack~n"]))
@ -1075,8 +1079,7 @@
(set! non-gcing-functions (hash-table-copy (list-ref l 7)))
(set! gc-var-stack-through-thread-local? (list-ref l 8))
(set! gc-var-stack-through-getspecific? (list-ref l 9))))))
(set! gc-var-stack-mode (list-ref l 8))))))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pretty-printing output
@ -1611,6 +1614,7 @@
(define (threadlocal-decl? e)
(and (pair? e)
(or (eq? 'XFORM_GC_VARIABLE_STACK_THROUGH_GETSPECIFIC (tok-n (car e)))
(eq? 'XFORM_GC_VARIABLE_STACK_THROUGH_FUNCTION (tok-n (car e)))
(eq? 'XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL (tok-n (car e))))))
(define (access-modifier? e)
@ -4003,8 +4007,7 @@
(marshall non-pointer-types)
(marshall struct-defs)
non-gcing-functions
gc-var-stack-through-thread-local?
gc-var-stack-through-getspecific?)])
(list 'quote gc-var-stack-mode))])
(with-output-to-file (change-suffix file-out #".zo")
(lambda ()
(let ([orig (current-namespace)])

View File

@ -3756,4 +3756,4 @@
; To run program, evaluate: (run)
(time (run))
(time (let loop ([i 10]) (if (zero? i) 'done (begin (run) (loop (- i 1))))))

View File

@ -25,6 +25,9 @@
# define THREAD_LOCAL __declspec(thread)
# elif defined(OS_X)
# define IMPLEMENT_THREAD_LOCAL_VIA_PTHREADS
# if defined(__x86_64__) || defined(__i386__)
# define INLINE_GETSPECIFIC_ASSEMBLY_CODE
# endif
# else
# define THREAD_LOCAL __thread
# endif
@ -219,9 +222,29 @@ typedef struct Thread_Local_Variables {
/* Using Pthread getspecific() */
# include <pthread.h>
MZ_EXTERN pthread_key_t scheme_thread_local_key;
# define scheme_get_thread_local_variables() ((Thread_Local_Variables *)pthread_getspecific(scheme_thread_local_key))
#ifdef MZ_XFORM
# ifndef INLINE_GETSPECIFIC_ASSEMBLY_CODE
# define scheme_get_thread_local_variables() ((Thread_Local_Variables *)pthread_getspecific(scheme_thread_local_key))
# ifdef MZ_XFORM
XFORM_GC_VARIABLE_STACK_THROUGH_GETSPECIFIC;
# endif
# else
# ifdef MZ_XFORM
START_XFORM_SKIP;
# endif
static inline Thread_Local_Variables *scheme_get_thread_local_variables() __attribute__((used));
static inline Thread_Local_Variables *scheme_get_thread_local_variables() {
Thread_Local_Variables *x;
# if defined(__x86_64__)
asm volatile("movq %%gs:0x8A0, %0" : "=r"(x));
# else
asm volatile("movl %%gs:0x468, %0" : "=r"(x));
# endif
return x;
}
# ifdef MZ_XFORM
END_XFORM_SKIP;
XFORM_GC_VARIABLE_STACK_THROUGH_FUNCTION;
# endif
# endif
#else
/* Using `THREAD_LOCAL' variable: */

View File

@ -248,6 +248,10 @@ typedef struct {
MAIN_char **argv;
} Main_Args;
# ifdef MZ_PRECISE_GC
START_XFORM_SKIP;
# endif
static int main_after_dlls(int argc, MAIN_char **argv)
{
Main_Args ma;
@ -256,6 +260,10 @@ static int main_after_dlls(int argc, MAIN_char **argv)
return scheme_main_stack_setup(1, main_after_stack, &ma);
}
# ifdef MZ_PRECISE_GC
END_XFORM_SKIP;
# endif
/************************ main_after_stack *************************/
/* Setup, parse command-line, and go to cont_run */

View File

@ -157,20 +157,11 @@ int scheme_main_setup(int no_auto_statics, Scheme_Env_Main _main, int argc, char
return scheme_main_stack_setup(no_auto_statics, call_with_basic, &d);
}
int scheme_main_stack_setup(int no_auto_statics, Scheme_Nested_Main _main, void *data)
static int do_main_stack_setup(int no_auto_statics, Scheme_Nested_Main _main, void *data)
{
void *stack_start;
int volatile return_code;
#ifdef IMPLEMENT_THREAD_LOCAL_VIA_PTHREADS
if (pthread_key_create(&scheme_thread_local_key, NULL)) {
fprintf(stderr, "pthread key create failed");
abort();
}
#endif
scheme_init_os_thread();
#ifdef USE_THREAD_LOCAL
scheme_vars = scheme_get_thread_local_variables();
#endif
@ -187,6 +178,65 @@ int scheme_main_stack_setup(int no_auto_statics, Scheme_Nested_Main _main, void
return return_code;
}
#ifdef MZ_PRECISE_GC
START_XFORM_SKIP;
#endif
int scheme_main_stack_setup(int no_auto_statics, Scheme_Nested_Main _main, void *data)
{
#ifdef IMPLEMENT_THREAD_LOCAL_VIA_PTHREADS
# ifdef INLINE_GETSPECIFIC_ASSEMBLY_CODE
/* Our [highly questionable] strategy for inlining pthread_getspecific() is taken from
the Go implementation (see "http://golang.org/src/libcgo/darwin_386.c").
In brief, we assume that thread-local variables are going to be
accessed via the gs segment register at offset 0x48 (i386) or 0x60 (x86_64),
and we also hardwire the therad-local key 0x108. Here we have to try to get
that particular key and double-check that it worked. */
pthread_key_t unwanted[16];
int num_unwanted = 0;
# endif
while (1) {
if (pthread_key_create(&scheme_thread_local_key, NULL)) {
fprintf(stderr, "pthread key create failed\n");
abort();
}
# ifdef INLINE_GETSPECIFIC_ASSEMBLY_CODE
if (scheme_thread_local_key == 0x108)
break;
else {
if (num_unwanted == 16) {
fprintf(stderr, "pthread key create never produced 0x108 for inline hack\n");
abort();
}
unwanted[num_unwanted++] = scheme_thread_local_key;
}
# else
break;
# endif
}
# ifdef INLINE_GETSPECIFIC_ASSEMBLY_CODE
pthread_setspecific(scheme_thread_local_key, (void *)0xaced);
if (scheme_get_thread_local_variables() != (Thread_Local_Variables *)0xaced) {
fprintf(stderr, "pthread getspecific inline hack failed\n");
abort();
}
while (num_unwanted--) {
pthread_key_delete(unwanted[num_unwanted]);
}
# endif
#endif
scheme_init_os_thread();
return do_main_stack_setup(no_auto_statics, _main, data);
}
#ifdef MZ_PRECISE_GC
END_XFORM_SKIP;
#endif
void scheme_set_stack_bounds(void *base, void *deepest, int no_auto_statics)
{
scheme_set_stack_base(base, no_auto_statics);
@ -243,14 +293,6 @@ void scheme_init_os_thread()
vars = (Thread_Local_Variables *)malloc(sizeof(Thread_Local_Variables));
memset(vars, 0, sizeof(Thread_Local_Variables));
pthread_setspecific(scheme_thread_local_key, vars);
# ifdef OS_X
/* A hack that smehow avoids a problem with calling vm_allocate()
later. There must be some deeper bug that I have't found, yet. */
if (1) {
void *r;
vm_allocate(mach_task_self(), (vm_address_t*)&r, 4096, TRUE);
}
# endif
#endif
#ifdef OS_X
# ifdef MZ_PRECISE_GC