change subprocess handling to avoid waitpid(0, ...)

Keep track of whether any Racket-managed subprocesses are pending,
and use waitpid(0, ...) only if there is one, to better cooperate
with an embedding environment.

Also, add a chapter to the "Inside" manual to explain the issues.
This commit is contained in:
Matthew Flatt 2012-12-19 08:54:32 -07:00
parent dc3f142f31
commit 4fc71951ee
6 changed files with 132 additions and 10 deletions

View File

@ -36,6 +36,7 @@ low-level libraries and structures purely through Racket code.
@include-section["structures.scrbl"]
@include-section["security.scrbl"]
@include-section["custodians.scrbl"]
@include-section["subprocesses.scrbl"]
@include-section["misc.scrbl"]
@include-section["hooks.scrbl"]

View File

@ -0,0 +1,39 @@
#lang scribble/doc
@(require "utils.rkt"
(for-label racket/system))
@title{Subprocesses}
On Unix and Mac OS X, subprocess handling involves
@as-index[@cpp{fork}], @as-index[@cpp{waitpid}], and
@as-index[@cpp{SIGCHLD}], which creates a variety of issues within an
embedding application. On Windows, subprocess handling is more
straightforward, since no @cpp{fork} is required, and since Windows
provides an abstraction that is a close fit to Racket's subprocess
values.
After Racket creates a subprocess via @racket[subprocess] (or
@racket[system], @racket[process], etc.), it periodically polls the
process status using @cpp{waitpid}. If the process is created as its
own group, then the call to @cpp{waitpid} uses the created
subprocess's process ID; for all other subprocesses, polling uses a
single call to @cpp{waitpid} with the first argument as @cpp{0}. Using
@cpp{0}, in particular, can interfere with other libraries in an
embedding context, so Racket refrains from calling @cpp{waitpid} if no
subprocesses are pending.
Racket may or may not rely on a @cpp{SIGCHLD} handler, and it may or
may not block @cpp{SIGCHLD}. Currently, when Racket is compiled to
support @|tech-place|s, Racket blocks @cpp{SIGCHLD} on start up with
the expectation that all created threads have @cpp{SIGCHLD} blocked.
When Racket is not compiled to support @|tech-place|s, then a
@cpp{SIGCHLD} handler is installed.
Using @cpp{fork} in an application that embeds Racket is problematic
for several reasons: Racket may install a @cpp{SIGALRM} handler and
schedule alarms to implement context switches, it may have file
descriptors open that should be closed in a child process, and it may
have changed the disposition of signals such as
@cpp{SIGCHLD}. Consequently, embedding Racket in a process that
@cpp{fork}s is technically not supported; in the future, Racket may
provide better support for such applications.

View File

@ -0,0 +1,44 @@
(load-relative "testing.rktl")
(require mzlib/process)
(Section 'subprocess)
(define self
(parameterize ([current-directory (find-system-path 'orig-dir)])
(find-executable-path (find-system-path 'exec-file) #f)))
(unless (eq? 'windows (system-type))
(let ([try
(lambda (post-shutdown?)
(let ([l (parameterize ([subprocess-group-enabled (not post-shutdown?)])
(process* self
"-e"
(format "(define l (process* \"~a\" \"-e\" \"(let loop () (loop))\"))" self)
"-e"
"(displayln (list-ref l 2))"
"-e"
"(flush-output)"
"-e"
"(let loop () (loop))"))]
[running? (lambda (sub-pid)
(regexp-match?
(format "(?m:^ *~a(?=[^0-9]))" sub-pid)
(let ([s (open-output-string)])
(parameterize ([current-output-port s]
[current-input-port (open-input-string "")])
(system (format "ps x")))
(get-output-string s))))])
(let ([sub-pid (read (car l))])
(test 'running (list-ref l 4) 'status)
(test #t running? sub-pid)
((list-ref l 4) 'kill)
((list-ref l 4) 'wait)
(test 'done-error (list-ref l 4) 'status)
(test post-shutdown? running? sub-pid)
(when post-shutdown?
(parameterize ([current-input-port (open-input-string "")])
(system (format "kill ~a" sub-pid)))))))])
(try #t)
(try #f)))

View File

@ -688,6 +688,8 @@ SHARED_OK static Child_Status *child_statuses = NULL;
SHARED_OK static mzrt_mutex* child_status_lock = NULL;
SHARED_OK static mzrt_mutex* child_wait_lock = NULL; /* ordered before status lock */
SHARED_OK static int started_thread, pending_children;
/* When the Racket process value for a process in a different group becomes
GC-unreachable before a waitpid() on the process, then we
need to keep waiting on the pid to let the OS gc the process.
@ -861,11 +863,18 @@ static void *mz_proc_thread_signal_worker(void *data) {
/* We wait only on processes in the same group as Racket,
because detecting the termination of a group's main process
disables our ability to terminate all processes in the group. */
check_pid = 0; /* => processes in the same group as Racket */
if (pending_children)
check_pid = 0; /* => processes in the same group as Racket */
else
check_pid = -1; /* don't check */
is_group = 0;
}
pid = waitpid(check_pid, &status, WNOHANG);
if (check_pid == -1) {
pid = -1;
errno = ECHILD;
} else
pid = waitpid(check_pid, &status, WNOHANG);
if (pid == -1) {
if (errno == EINTR) {
@ -994,13 +1003,8 @@ void scheme_places_unblock_child_signal() XFORM_SKIP_PROC
void scheme_places_start_child_signal_handler()
{
mz_proc_thread *signal_thread;
mzrt_mutex_create(&child_status_lock);
mzrt_mutex_create(&child_wait_lock);
signal_thread = mz_proc_thread_create(mz_proc_thread_signal_worker, NULL);
mz_proc_thread_detach(signal_thread);
}
void scheme_wait_suspend()
@ -1013,6 +1017,29 @@ void scheme_wait_resume()
mzrt_mutex_unlock(child_wait_lock);
}
void scheme_starting_child()
{
mzrt_mutex_lock(child_wait_lock);
if (!started_thread) {
mz_proc_thread *signal_thread;
signal_thread = mz_proc_thread_create(mz_proc_thread_signal_worker, NULL);
mz_proc_thread_detach(signal_thread);
}
pending_children++;
mzrt_mutex_unlock(child_wait_lock);
}
void scheme_ended_child()
{
mzrt_mutex_lock(child_wait_lock);
--pending_children;
mzrt_mutex_unlock(child_wait_lock);
}
/* ---------------------------------------------------------------------- */
/* When a place has a process-group that it may be waiting on, the we

View File

@ -8551,6 +8551,7 @@ static int subp_done(Scheme_Object *so)
sp->done = 1;
sp->status = status;
child_mref_done(sp);
scheme_ended_child();
return 1;
}
return 0;
@ -8618,6 +8619,7 @@ static Scheme_Object *subprocess_status(int argc, Scheme_Object **argv)
child_mref_done(sp);
sp->done = 1;
sp->status = status;
scheme_ended_child();
}
}
# else
@ -8707,6 +8709,7 @@ static Scheme_Object *do_subprocess_kill(Scheme_Object *_sp, Scheme_Object *kill
sp->done = 1;
child_mref_done(sp);
scheme_wait_resume();
scheme_ended_child();
return scheme_void;
}
}
@ -8853,8 +8856,10 @@ static void unused_process_record(void *_sp, void *ignored)
Scheme_Subprocess *sp = (Scheme_Subprocess *)_sp;
# if defined(MZ_PLACES_WAITPID)
if (!sp->done)
if (!sp->done) {
scheme_done_with_process_id(sp->pid, sp->is_group);
scheme_ended_child();
}
# else
if (!((System_Child *)sp->handle)->done) {
void **unused_pid;
@ -9308,13 +9313,15 @@ static Scheme_Object *subprocess(int c, Scheme_Object *args[])
init_sigchld();
sc = MALLOC_ONE_RT(System_Child);
#ifdef MZTAG_REQUIRED
# ifdef MZTAG_REQUIRED
sc->type = scheme_rt_system_child;
#endif
# endif
sc->id = 0;
sc->done = 0;
scheme_block_child_signals(1);
#else
scheme_starting_child();
#endif
#if !defined(__QNX__)
@ -9388,6 +9395,8 @@ static Scheme_Object *subprocess(int c, Scheme_Object *args[])
#else
if (!pid)
scheme_places_unblock_child_signal();
else if (pid == -1)
scheme_ended_child();
#endif
}

View File

@ -3898,6 +3898,8 @@ int scheme_places_register_child(int pid, int is_group, void *signal_fd, int *st
void scheme_wait_suspend();
void scheme_wait_resume();
void scheme_done_with_process_id(int pid, int is_group);
void scheme_starting_child();
void scheme_ended_child();
# endif
#endif