slimy pthread_getspecifc()-inlining trick to practically eliminate the overhead of futures support under OS X
svn: r16952
This commit is contained in:
parent
28f5070d5e
commit
79817a2087
|
@ -585,20 +585,22 @@
|
|||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define per-block-push? #t)
|
||||
(define gc-var-stack-through-table?
|
||||
(define gc-var-stack-mode
|
||||
(ormap (lambda (e)
|
||||
(and (pragma? e)
|
||||
(regexp-match #rx"GC_VARIABLE_STACK_THOUGH_TABLE" (pragma-s e))))
|
||||
e-raw))
|
||||
(define gc-var-stack-through-thread-local?
|
||||
(ormap (lambda (e)
|
||||
(and (tok? e)
|
||||
(eq? (tok-n e) 'XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL)))
|
||||
e-raw))
|
||||
(define gc-var-stack-through-getspecific?
|
||||
(ormap (lambda (e)
|
||||
(and (tok? e)
|
||||
(eq? (tok-n e) 'XFORM_GC_VARIABLE_STACK_THROUGH_GETSPECIFIC)))
|
||||
(cond
|
||||
[(and (pragma? e)
|
||||
(regexp-match #rx"GC_VARIABLE_STACK_THOUGH_TABLE" (pragma-s e)))
|
||||
'table]
|
||||
[(and (tok? e)
|
||||
(eq? (tok-n e) 'XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL))
|
||||
'thread-local]
|
||||
[(and (tok? e)
|
||||
(eq? (tok-n e) 'XFORM_GC_VARIABLE_STACK_THROUGH_GETSPECIFIC))
|
||||
'getspecific]
|
||||
[(and (tok? e)
|
||||
(eq? (tok-n e) 'XFORM_GC_VARIABLE_STACK_THROUGH_FUNCTION))
|
||||
'function]
|
||||
[else #f]))
|
||||
e-raw))
|
||||
|
||||
;; The code produced by xform uses a number of macros. These macros
|
||||
|
@ -608,12 +610,14 @@
|
|||
|
||||
(when (and pgc? (not precompiled-header))
|
||||
;; Setup GC_variable_stack macro
|
||||
(printf (cond
|
||||
[gc-var-stack-through-table?
|
||||
(printf (case gc-var-stack-mode
|
||||
[(table)
|
||||
"#define GC_VARIABLE_STACK (scheme_extension_table->GC_variable_stack)~n"]
|
||||
[gc-var-stack-through-getspecific?
|
||||
[(getspecific)
|
||||
"#define GC_VARIABLE_STACK (((Thread_Local_Variables *)pthread_getspecific(scheme_thread_local_key))->GC_variable_stack_)~n"]
|
||||
[gc-var-stack-through-thread-local?
|
||||
[(function)
|
||||
"#define GC_VARIABLE_STACK ((scheme_get_thread_local_variables())->GC_variable_stack_)~n"]
|
||||
[(thread-local)
|
||||
"#define GC_VARIABLE_STACK ((&scheme_thread_locals)->GC_variable_stack_)~n"]
|
||||
[else "#define GC_VARIABLE_STACK GC_variable_stack~n"]))
|
||||
|
||||
|
@ -1075,8 +1079,7 @@
|
|||
|
||||
(set! non-gcing-functions (hash-table-copy (list-ref l 7)))
|
||||
|
||||
(set! gc-var-stack-through-thread-local? (list-ref l 8))
|
||||
(set! gc-var-stack-through-getspecific? (list-ref l 9))))))
|
||||
(set! gc-var-stack-mode (list-ref l 8))))))
|
||||
|
||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Pretty-printing output
|
||||
|
@ -1611,6 +1614,7 @@
|
|||
(define (threadlocal-decl? e)
|
||||
(and (pair? e)
|
||||
(or (eq? 'XFORM_GC_VARIABLE_STACK_THROUGH_GETSPECIFIC (tok-n (car e)))
|
||||
(eq? 'XFORM_GC_VARIABLE_STACK_THROUGH_FUNCTION (tok-n (car e)))
|
||||
(eq? 'XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL (tok-n (car e))))))
|
||||
|
||||
(define (access-modifier? e)
|
||||
|
@ -4003,8 +4007,7 @@
|
|||
(marshall non-pointer-types)
|
||||
(marshall struct-defs)
|
||||
non-gcing-functions
|
||||
gc-var-stack-through-thread-local?
|
||||
gc-var-stack-through-getspecific?)])
|
||||
(list 'quote gc-var-stack-mode))])
|
||||
(with-output-to-file (change-suffix file-out #".zo")
|
||||
(lambda ()
|
||||
(let ([orig (current-namespace)])
|
||||
|
|
|
@ -3756,4 +3756,4 @@
|
|||
|
||||
; To run program, evaluate: (run)
|
||||
|
||||
(time (run))
|
||||
(time (let loop ([i 10]) (if (zero? i) 'done (begin (run) (loop (- i 1))))))
|
||||
|
|
|
@ -25,6 +25,9 @@
|
|||
# define THREAD_LOCAL __declspec(thread)
|
||||
# elif defined(OS_X)
|
||||
# define IMPLEMENT_THREAD_LOCAL_VIA_PTHREADS
|
||||
# if defined(__x86_64__) || defined(__i386__)
|
||||
# define INLINE_GETSPECIFIC_ASSEMBLY_CODE
|
||||
# endif
|
||||
# else
|
||||
# define THREAD_LOCAL __thread
|
||||
# endif
|
||||
|
@ -219,9 +222,29 @@ typedef struct Thread_Local_Variables {
|
|||
/* Using Pthread getspecific() */
|
||||
# include <pthread.h>
|
||||
MZ_EXTERN pthread_key_t scheme_thread_local_key;
|
||||
# define scheme_get_thread_local_variables() ((Thread_Local_Variables *)pthread_getspecific(scheme_thread_local_key))
|
||||
#ifdef MZ_XFORM
|
||||
# ifndef INLINE_GETSPECIFIC_ASSEMBLY_CODE
|
||||
# define scheme_get_thread_local_variables() ((Thread_Local_Variables *)pthread_getspecific(scheme_thread_local_key))
|
||||
# ifdef MZ_XFORM
|
||||
XFORM_GC_VARIABLE_STACK_THROUGH_GETSPECIFIC;
|
||||
# endif
|
||||
# else
|
||||
# ifdef MZ_XFORM
|
||||
START_XFORM_SKIP;
|
||||
# endif
|
||||
static inline Thread_Local_Variables *scheme_get_thread_local_variables() __attribute__((used));
|
||||
static inline Thread_Local_Variables *scheme_get_thread_local_variables() {
|
||||
Thread_Local_Variables *x;
|
||||
# if defined(__x86_64__)
|
||||
asm volatile("movq %%gs:0x8A0, %0" : "=r"(x));
|
||||
# else
|
||||
asm volatile("movl %%gs:0x468, %0" : "=r"(x));
|
||||
# endif
|
||||
return x;
|
||||
}
|
||||
# ifdef MZ_XFORM
|
||||
END_XFORM_SKIP;
|
||||
XFORM_GC_VARIABLE_STACK_THROUGH_FUNCTION;
|
||||
# endif
|
||||
# endif
|
||||
#else
|
||||
/* Using `THREAD_LOCAL' variable: */
|
||||
|
|
|
@ -248,6 +248,10 @@ typedef struct {
|
|||
MAIN_char **argv;
|
||||
} Main_Args;
|
||||
|
||||
# ifdef MZ_PRECISE_GC
|
||||
START_XFORM_SKIP;
|
||||
# endif
|
||||
|
||||
static int main_after_dlls(int argc, MAIN_char **argv)
|
||||
{
|
||||
Main_Args ma;
|
||||
|
@ -256,6 +260,10 @@ static int main_after_dlls(int argc, MAIN_char **argv)
|
|||
return scheme_main_stack_setup(1, main_after_stack, &ma);
|
||||
}
|
||||
|
||||
# ifdef MZ_PRECISE_GC
|
||||
END_XFORM_SKIP;
|
||||
# endif
|
||||
|
||||
/************************ main_after_stack *************************/
|
||||
/* Setup, parse command-line, and go to cont_run */
|
||||
|
||||
|
|
|
@ -157,20 +157,11 @@ int scheme_main_setup(int no_auto_statics, Scheme_Env_Main _main, int argc, char
|
|||
return scheme_main_stack_setup(no_auto_statics, call_with_basic, &d);
|
||||
}
|
||||
|
||||
int scheme_main_stack_setup(int no_auto_statics, Scheme_Nested_Main _main, void *data)
|
||||
static int do_main_stack_setup(int no_auto_statics, Scheme_Nested_Main _main, void *data)
|
||||
{
|
||||
void *stack_start;
|
||||
int volatile return_code;
|
||||
|
||||
#ifdef IMPLEMENT_THREAD_LOCAL_VIA_PTHREADS
|
||||
if (pthread_key_create(&scheme_thread_local_key, NULL)) {
|
||||
fprintf(stderr, "pthread key create failed");
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
|
||||
scheme_init_os_thread();
|
||||
|
||||
#ifdef USE_THREAD_LOCAL
|
||||
scheme_vars = scheme_get_thread_local_variables();
|
||||
#endif
|
||||
|
@ -187,6 +178,65 @@ int scheme_main_stack_setup(int no_auto_statics, Scheme_Nested_Main _main, void
|
|||
return return_code;
|
||||
}
|
||||
|
||||
#ifdef MZ_PRECISE_GC
|
||||
START_XFORM_SKIP;
|
||||
#endif
|
||||
|
||||
int scheme_main_stack_setup(int no_auto_statics, Scheme_Nested_Main _main, void *data)
|
||||
{
|
||||
#ifdef IMPLEMENT_THREAD_LOCAL_VIA_PTHREADS
|
||||
# ifdef INLINE_GETSPECIFIC_ASSEMBLY_CODE
|
||||
/* Our [highly questionable] strategy for inlining pthread_getspecific() is taken from
|
||||
the Go implementation (see "http://golang.org/src/libcgo/darwin_386.c").
|
||||
In brief, we assume that thread-local variables are going to be
|
||||
accessed via the gs segment register at offset 0x48 (i386) or 0x60 (x86_64),
|
||||
and we also hardwire the therad-local key 0x108. Here we have to try to get
|
||||
that particular key and double-check that it worked. */
|
||||
pthread_key_t unwanted[16];
|
||||
int num_unwanted = 0;
|
||||
# endif
|
||||
|
||||
while (1) {
|
||||
if (pthread_key_create(&scheme_thread_local_key, NULL)) {
|
||||
fprintf(stderr, "pthread key create failed\n");
|
||||
abort();
|
||||
}
|
||||
# ifdef INLINE_GETSPECIFIC_ASSEMBLY_CODE
|
||||
if (scheme_thread_local_key == 0x108)
|
||||
break;
|
||||
else {
|
||||
if (num_unwanted == 16) {
|
||||
fprintf(stderr, "pthread key create never produced 0x108 for inline hack\n");
|
||||
abort();
|
||||
}
|
||||
unwanted[num_unwanted++] = scheme_thread_local_key;
|
||||
}
|
||||
# else
|
||||
break;
|
||||
# endif
|
||||
}
|
||||
|
||||
# ifdef INLINE_GETSPECIFIC_ASSEMBLY_CODE
|
||||
pthread_setspecific(scheme_thread_local_key, (void *)0xaced);
|
||||
if (scheme_get_thread_local_variables() != (Thread_Local_Variables *)0xaced) {
|
||||
fprintf(stderr, "pthread getspecific inline hack failed\n");
|
||||
abort();
|
||||
}
|
||||
while (num_unwanted--) {
|
||||
pthread_key_delete(unwanted[num_unwanted]);
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
scheme_init_os_thread();
|
||||
|
||||
return do_main_stack_setup(no_auto_statics, _main, data);
|
||||
}
|
||||
|
||||
#ifdef MZ_PRECISE_GC
|
||||
END_XFORM_SKIP;
|
||||
#endif
|
||||
|
||||
void scheme_set_stack_bounds(void *base, void *deepest, int no_auto_statics)
|
||||
{
|
||||
scheme_set_stack_base(base, no_auto_statics);
|
||||
|
@ -243,14 +293,6 @@ void scheme_init_os_thread()
|
|||
vars = (Thread_Local_Variables *)malloc(sizeof(Thread_Local_Variables));
|
||||
memset(vars, 0, sizeof(Thread_Local_Variables));
|
||||
pthread_setspecific(scheme_thread_local_key, vars);
|
||||
# ifdef OS_X
|
||||
/* A hack that smehow avoids a problem with calling vm_allocate()
|
||||
later. There must be some deeper bug that I have't found, yet. */
|
||||
if (1) {
|
||||
void *r;
|
||||
vm_allocate(mach_task_self(), (vm_address_t*)&r, 4096, TRUE);
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
#ifdef OS_X
|
||||
# ifdef MZ_PRECISE_GC
|
||||
|
|
Loading…
Reference in New Issue
Block a user