From 4fc71951ee974766c04f87d86b077eb0b894f338 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 19 Dec 2012 08:54:32 -0700 Subject: [PATCH] change subprocess handling to avoid waitpid(0, ...) Keep track of whether any Racket-managed subprocesses are pending, and use waitpid(0, ...) only if there is one, to better cooperate with an embedding environment. Also, add a chapter to the "Inside" manual to explain the issues. --- collects/scribblings/inside/inside.scrbl | 1 + .../scribblings/inside/subprocesses.scrbl | 39 ++++++++++++++++ collects/tests/racket/sp.rktl | 44 +++++++++++++++++++ src/racket/src/place.c | 41 ++++++++++++++--- src/racket/src/port.c | 15 +++++-- src/racket/src/schpriv.h | 2 + 6 files changed, 132 insertions(+), 10 deletions(-) create mode 100644 collects/scribblings/inside/subprocesses.scrbl create mode 100644 collects/tests/racket/sp.rktl diff --git a/collects/scribblings/inside/inside.scrbl b/collects/scribblings/inside/inside.scrbl index 4e1cdd3a63..c75d98d7b2 100644 --- a/collects/scribblings/inside/inside.scrbl +++ b/collects/scribblings/inside/inside.scrbl @@ -36,6 +36,7 @@ low-level libraries and structures purely through Racket code. @include-section["structures.scrbl"] @include-section["security.scrbl"] @include-section["custodians.scrbl"] +@include-section["subprocesses.scrbl"] @include-section["misc.scrbl"] @include-section["hooks.scrbl"] diff --git a/collects/scribblings/inside/subprocesses.scrbl b/collects/scribblings/inside/subprocesses.scrbl new file mode 100644 index 0000000000..defe01ce4d --- /dev/null +++ b/collects/scribblings/inside/subprocesses.scrbl @@ -0,0 +1,39 @@ +#lang scribble/doc +@(require "utils.rkt" + (for-label racket/system)) + +@title{Subprocesses} + +On Unix and Mac OS X, subprocess handling involves +@as-index[@cpp{fork}], @as-index[@cpp{waitpid}], and +@as-index[@cpp{SIGCHLD}], which creates a variety of issues within an +embedding application. On Windows, subprocess handling is more +straightforward, since no @cpp{fork} is required, and since Windows +provides an abstraction that is a close fit to Racket's subprocess +values. + +After Racket creates a subprocess via @racket[subprocess] (or +@racket[system], @racket[process], etc.), it periodically polls the +process status using @cpp{waitpid}. If the process is created as its +own group, then the call to @cpp{waitpid} uses the created +subprocess's process ID; for all other subprocesses, polling uses a +single call to @cpp{waitpid} with the first argument as @cpp{0}. Using +@cpp{0}, in particular, can interfere with other libraries in an +embedding context, so Racket refrains from calling @cpp{waitpid} if no +subprocesses are pending. + +Racket may or may not rely on a @cpp{SIGCHLD} handler, and it may or +may not block @cpp{SIGCHLD}. Currently, when Racket is compiled to +support @|tech-place|s, Racket blocks @cpp{SIGCHLD} on start up with +the expectation that all created threads have @cpp{SIGCHLD} blocked. +When Racket is not compiled to support @|tech-place|s, then a +@cpp{SIGCHLD} handler is installed. + +Using @cpp{fork} in an application that embeds Racket is problematic +for several reasons: Racket may install a @cpp{SIGALRM} handler and +schedule alarms to implement context switches, it may have file +descriptors open that should be closed in a child process, and it may +have changed the disposition of signals such as +@cpp{SIGCHLD}. Consequently, embedding Racket in a process that +@cpp{fork}s is technically not supported; in the future, Racket may +provide better support for such applications. diff --git a/collects/tests/racket/sp.rktl b/collects/tests/racket/sp.rktl new file mode 100644 index 0000000000..6d4d9065bf --- /dev/null +++ b/collects/tests/racket/sp.rktl @@ -0,0 +1,44 @@ + +(load-relative "testing.rktl") + +(require mzlib/process) + +(Section 'subprocess) + +(define self + (parameterize ([current-directory (find-system-path 'orig-dir)]) + (find-executable-path (find-system-path 'exec-file) #f))) + +(unless (eq? 'windows (system-type)) + (let ([try + (lambda (post-shutdown?) + (let ([l (parameterize ([subprocess-group-enabled (not post-shutdown?)]) + (process* self + "-e" + (format "(define l (process* \"~a\" \"-e\" \"(let loop () (loop))\"))" self) + "-e" + "(displayln (list-ref l 2))" + "-e" + "(flush-output)" + "-e" + "(let loop () (loop))"))] + [running? (lambda (sub-pid) + (regexp-match? + (format "(?m:^ *~a(?=[^0-9]))" sub-pid) + (let ([s (open-output-string)]) + (parameterize ([current-output-port s] + [current-input-port (open-input-string "")]) + (system (format "ps x"))) + (get-output-string s))))]) + (let ([sub-pid (read (car l))]) + (test 'running (list-ref l 4) 'status) + (test #t running? sub-pid) + ((list-ref l 4) 'kill) + ((list-ref l 4) 'wait) + (test 'done-error (list-ref l 4) 'status) + (test post-shutdown? running? sub-pid) + (when post-shutdown? + (parameterize ([current-input-port (open-input-string "")]) + (system (format "kill ~a" sub-pid)))))))]) + (try #t) + (try #f))) diff --git a/src/racket/src/place.c b/src/racket/src/place.c index 598c1a73c8..180289c8d6 100644 --- a/src/racket/src/place.c +++ b/src/racket/src/place.c @@ -688,6 +688,8 @@ SHARED_OK static Child_Status *child_statuses = NULL; SHARED_OK static mzrt_mutex* child_status_lock = NULL; SHARED_OK static mzrt_mutex* child_wait_lock = NULL; /* ordered before status lock */ +SHARED_OK static int started_thread, pending_children; + /* When the Racket process value for a process in a different group becomes GC-unreachable before a waitpid() on the process, then we need to keep waiting on the pid to let the OS gc the process. @@ -861,11 +863,18 @@ static void *mz_proc_thread_signal_worker(void *data) { /* We wait only on processes in the same group as Racket, because detecting the termination of a group's main process disables our ability to terminate all processes in the group. */ - check_pid = 0; /* => processes in the same group as Racket */ + if (pending_children) + check_pid = 0; /* => processes in the same group as Racket */ + else + check_pid = -1; /* don't check */ is_group = 0; } - pid = waitpid(check_pid, &status, WNOHANG); + if (check_pid == -1) { + pid = -1; + errno = ECHILD; + } else + pid = waitpid(check_pid, &status, WNOHANG); if (pid == -1) { if (errno == EINTR) { @@ -994,13 +1003,8 @@ void scheme_places_unblock_child_signal() XFORM_SKIP_PROC void scheme_places_start_child_signal_handler() { - mz_proc_thread *signal_thread; - mzrt_mutex_create(&child_status_lock); mzrt_mutex_create(&child_wait_lock); - - signal_thread = mz_proc_thread_create(mz_proc_thread_signal_worker, NULL); - mz_proc_thread_detach(signal_thread); } void scheme_wait_suspend() @@ -1013,6 +1017,29 @@ void scheme_wait_resume() mzrt_mutex_unlock(child_wait_lock); } +void scheme_starting_child() +{ + mzrt_mutex_lock(child_wait_lock); + + if (!started_thread) { + mz_proc_thread *signal_thread; + + signal_thread = mz_proc_thread_create(mz_proc_thread_signal_worker, NULL); + mz_proc_thread_detach(signal_thread); + } + + pending_children++; + + mzrt_mutex_unlock(child_wait_lock); +} + +void scheme_ended_child() +{ + mzrt_mutex_lock(child_wait_lock); + --pending_children; + mzrt_mutex_unlock(child_wait_lock); +} + /* ---------------------------------------------------------------------- */ /* When a place has a process-group that it may be waiting on, the we diff --git a/src/racket/src/port.c b/src/racket/src/port.c index 96fd412fdf..cd9f40f327 100644 --- a/src/racket/src/port.c +++ b/src/racket/src/port.c @@ -8551,6 +8551,7 @@ static int subp_done(Scheme_Object *so) sp->done = 1; sp->status = status; child_mref_done(sp); + scheme_ended_child(); return 1; } return 0; @@ -8618,6 +8619,7 @@ static Scheme_Object *subprocess_status(int argc, Scheme_Object **argv) child_mref_done(sp); sp->done = 1; sp->status = status; + scheme_ended_child(); } } # else @@ -8707,6 +8709,7 @@ static Scheme_Object *do_subprocess_kill(Scheme_Object *_sp, Scheme_Object *kill sp->done = 1; child_mref_done(sp); scheme_wait_resume(); + scheme_ended_child(); return scheme_void; } } @@ -8853,8 +8856,10 @@ static void unused_process_record(void *_sp, void *ignored) Scheme_Subprocess *sp = (Scheme_Subprocess *)_sp; # if defined(MZ_PLACES_WAITPID) - if (!sp->done) + if (!sp->done) { scheme_done_with_process_id(sp->pid, sp->is_group); + scheme_ended_child(); + } # else if (!((System_Child *)sp->handle)->done) { void **unused_pid; @@ -9308,13 +9313,15 @@ static Scheme_Object *subprocess(int c, Scheme_Object *args[]) init_sigchld(); sc = MALLOC_ONE_RT(System_Child); -#ifdef MZTAG_REQUIRED +# ifdef MZTAG_REQUIRED sc->type = scheme_rt_system_child; -#endif +# endif sc->id = 0; sc->done = 0; scheme_block_child_signals(1); +#else + scheme_starting_child(); #endif #if !defined(__QNX__) @@ -9388,6 +9395,8 @@ static Scheme_Object *subprocess(int c, Scheme_Object *args[]) #else if (!pid) scheme_places_unblock_child_signal(); + else if (pid == -1) + scheme_ended_child(); #endif } diff --git a/src/racket/src/schpriv.h b/src/racket/src/schpriv.h index 503764e402..42e3a0763d 100644 --- a/src/racket/src/schpriv.h +++ b/src/racket/src/schpriv.h @@ -3898,6 +3898,8 @@ int scheme_places_register_child(int pid, int is_group, void *signal_fd, int *st void scheme_wait_suspend(); void scheme_wait_resume(); void scheme_done_with_process_id(int pid, int is_group); +void scheme_starting_child(); +void scheme_ended_child(); # endif #endif