From 3fb02ecb2e161c64d924fd72b06fe8085f62f54c Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Thu, 20 Dec 2012 16:37:23 -0700 Subject: [PATCH] mingw32: use same strategy for thread-locals as MSVC The gcc implementation of `_thread' seems to be much slower, so we use the inline-assembly technique from the MSVC build plus a hack in "main.c". --- src/racket/include/schthread.h | 20 +++++++++++++++----- src/racket/main.c | 16 ++++++++++++++-- src/racket/src/salloc.c | 5 +++++ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/src/racket/include/schthread.h b/src/racket/include/schthread.h index 1cd3592520..cc6d423b09 100644 --- a/src/racket/include/schthread.h +++ b/src/racket/include/schthread.h @@ -27,7 +27,7 @@ extern "C" { #if defined(MZ_USE_PLACES) || defined(MZ_USE_FUTURES) # define USE_THREAD_LOCAL -# if _MSC_VER +# ifdef _WIN32 # ifdef _WIN64 # define THREAD_LOCAL __declspec(thread) # define MZ_THREAD_EXTERN extern @@ -36,10 +36,6 @@ extern "C" { # define THREAD_LOCAL /* empty */ # define IMPLEMENT_THREAD_LOCAL_VIA_WIN_TLS # endif -# elif __MINGW32__ -# define THREAD_LOCAL __thread -# define MZ_THREAD_EXTERN extern -# define IMPLEMENT_THREAD_LOCAL_EXTERNALLY_VIA_PROC # elif (defined(__APPLE__) && defined(__MACH__)) || defined(GC2_PLACES_TESTING) # define IMPLEMENT_THREAD_LOCAL_VIA_PTHREADS # if defined(__x86_64__) || defined(__i386__) @@ -422,6 +418,19 @@ MZ_EXTERN uintptr_t scheme_tls_delta; MZ_EXTERN int scheme_tls_index; # endif static __inline Thread_Local_Variables **scheme_get_thread_local_variables_ptr(void) { +# ifdef __MINGW32__ + Thread_Local_Variables **x; + asm ( + "mov %%fs:(0x2C), %%eax;" + "mov (%%eax), %%eax;" + "add %1, %%eax;" + "mov %%eax, %0;" + :"=r"(x) /* output */ + :"r"(scheme_tls_delta) + :"%eax" /* clobbered register */ + ); + return x; +# else __asm { mov eax, FS:[0x2C] # ifdef MZ_USE_WIN_TLS_VIA_DLL add eax, scheme_tls_index @@ -429,6 +438,7 @@ static __inline Thread_Local_Variables **scheme_get_thread_local_variables_ptr(v mov eax, [eax] add eax, scheme_tls_delta } /* result is in eax */ +# endif } static __inline Thread_Local_Variables *scheme_get_thread_local_variables(void) { return *scheme_get_thread_local_variables_ptr(); diff --git a/src/racket/main.c b/src/racket/main.c index 498866115b..ee119e8432 100644 --- a/src/racket/main.c +++ b/src/racket/main.c @@ -292,11 +292,11 @@ START_XFORM_SKIP; #ifdef IMPLEMENT_THREAD_LOCAL_VIA_WIN_TLS extern intptr_t _tls_index; -static __declspec(thread) void *tls_space; +static __thread void *tls_space; #endif #ifdef DOS_FILE_SYSTEM -static void load_delayed() +void load_delayed() { /* Order matters: load dependencies first */ # ifndef MZ_PRECISE_GC @@ -305,7 +305,19 @@ static void load_delayed() load_delayed_dll(NULL, "libracket" DLL_3M_SUFFIX "xxxxxxx.dll"); record_dll_path(); # ifdef IMPLEMENT_THREAD_LOCAL_VIA_WIN_TLS +# ifdef __MINGW32__ + { + /* gcc declares space for the thread-local variable in a way that + the OS can set up, but its doesn't actually map variables + through the OS-supplied mechanism. Just assume that the first + thread-local variable is ours. */ + void **base; + asm("mov %%fs:(0x2C), %0;" :"=r"(base)); + scheme_register_tls_space(*base, _tls_index); + } +# else scheme_register_tls_space(&tls_space, _tls_index); +# endif # endif } #endif diff --git a/src/racket/src/salloc.c b/src/racket/src/salloc.c index 33bd627318..9e47d977ea 100644 --- a/src/racket/src/salloc.c +++ b/src/racket/src/salloc.c @@ -281,8 +281,13 @@ void scheme_setup_thread_local_key_if_needed() XFORM_SKIP_PROC { void **base; +# ifdef __MINGW32__ + asm("mov %%fs:(0x2C), %0;" + :"=r"(base)); /* output */ +# else __asm { mov ecx, FS:[0x2C] mov base, ecx } +# endif scheme_tls_delta -= (uintptr_t)base[scheme_tls_index]; scheme_tls_index *= sizeof(void*); }