reaper: monitor SIGCHLD using the FDM instead of via a signalfd

In addition to letting the FDM do the low-level signal watching, this
patch also fixes a bug; multiple SIGCHLDs, be it delivered either through a
signal, or via a signalfd, can be coalesced, like all signals.

This means we need to loop on waitpid() with WNOHANG until there are
no more processes to reap.

This in turn requires a small change to the way reaper callbacks are
implemented.

Previously, the callback was allowed to do the wait(). This was
signalled back to the reaper through the callback’s return value.

Now, since we’ve already wait():ed, the process’ exit status is passed
as an argument to the reaper callback.

The callback for the client application has been updated accordingly;
it sets a flag in the terminal struct, telling term_destroy() that the
process has already been wait():ed on, and also stores the exit
status.
This commit is contained in:
Daniel Eklöf 2021-02-10 16:22:51 +01:00
parent 37220fc189
commit 79e3a46943
No known key found for this signature in database
GPG key ID: 5BBD4992C116573F
4 changed files with 92 additions and 140 deletions

116
reaper.c
View file

@ -3,13 +3,13 @@
#include <stdlib.h>
#include <unistd.h>
#include <sys/epoll.h>
#include <signal.h>
#include <sys/wait.h>
#include <sys/signalfd.h>
#include <tllist.h>
#define LOG_MODULE "reaper"
#define LOG_ENABLE_DBG 0
#include "log.h"
#include "tllist.h"
struct child {
pid_t pid;
@ -19,56 +19,32 @@ struct child {
struct reaper {
struct fdm *fdm;
int fd;
tll(struct child) children;
};
static bool fdm_reap(struct fdm *fdm, int fd, int events, void *data);
static bool fdm_reap(struct fdm *fdm, int signo, void *data);
struct reaper *
reaper_init(struct fdm *fdm)
{
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, SIGCHLD);
/* Block normal signal handling - we're using a signalfd instead */
if (sigprocmask(SIG_BLOCK, &mask, NULL) < 0) {
LOG_ERRNO("failed to block SIGCHLD");
return NULL;
}
int fd = signalfd(-1, &mask, SFD_NONBLOCK | SFD_CLOEXEC);
if (fd < 0) {
LOG_ERRNO("failed to create signal FD");
sigprocmask(SIG_UNBLOCK, &mask, NULL);
return NULL;
}
struct reaper *reaper = malloc(sizeof(*reaper));
if (unlikely(reaper == NULL)) {
LOG_ERRNO("malloc() failed");
close(fd);
sigprocmask(SIG_UNBLOCK, &mask, NULL);
return NULL;
}
*reaper = (struct reaper){
.fdm = fdm,
.fd = fd,
.children = tll_init(),
};
if (!fdm_add(fdm, fd, EPOLLIN, &fdm_reap, reaper)){
LOG_ERR("failed to register with the FDM");
if (!fdm_signal_add(fdm, SIGCHLD, &fdm_reap, reaper))
goto err;
}
return reaper;
err:
tll_free(reaper->children);
close(fd);
free(reaper);
return NULL;
}
@ -79,14 +55,9 @@ reaper_destroy(struct reaper *reaper)
if (reaper == NULL)
return;
fdm_del(reaper->fdm, reaper->fd);
fdm_signal_del(reaper->fdm, SIGCHLD);
tll_free(reaper->children);
free(reaper);
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, SIGCHLD);
sigprocmask(SIG_UNBLOCK, &mask, NULL);
}
void
@ -110,69 +81,40 @@ reaper_del(struct reaper *reaper, pid_t pid)
}
static bool
fdm_reap(struct fdm *fdm, int fd, int events, void *data)
fdm_reap(struct fdm *fdm, int signo, void *data)
{
struct reaper *reaper = data;
bool pollin = events & EPOLLIN;
bool hup = events & EPOLLHUP;
while (true) {
int status;
pid_t pid = waitpid(-1, &status, WNOHANG);
if (pid <= 0)
break;
if (hup && !pollin)
return false;
if (WIFEXITED(status))
LOG_DBG("pid=%d: exited with status=%d", pid, WEXITSTATUS(status));
else if (WIFSIGNALED(status))
LOG_DBG("pid=%d: killed by signal=%d", pid, WTERMSIG(status));
else
LOG_DBG("pid=%d: died of unknown resason", pid);
assert(pollin);
tll_foreach(reaper->children, it) {
struct child *_child = &it->item;
struct signalfd_siginfo info;
ssize_t amount = read(reaper->fd, &info, sizeof(info));
if (amount < 0) {
LOG_ERRNO("failed to read");
return false;
}
assert((size_t)amount >= sizeof(info));
if (info.ssi_signo != SIGCHLD) {
LOG_WARN("got non-SIGCHLD signal: %d", info.ssi_signo);
return true;
}
tll_foreach(reaper->children, it) {
struct child *_child = &it->item;
if (_child->pid != (pid_t)info.ssi_pid)
continue;
/* Make sure we remove it *before* the callback, since it too
* may remove it */
struct child child = it->item;
tll_remove(reaper->children, it);
bool reap_ourselves = true;
if (child.cb != NULL)
reap_ourselves = !child.cb(reaper, child.pid, child.cb_data);
if (reap_ourselves) {
int result;
int res = waitpid(child.pid, &result, WNOHANG);
if (res <= 0) {
if (res < 0)
LOG_ERRNO("waitpid failed for pid=%d", child.pid);
if (_child->pid != pid)
continue;
}
else if (WIFEXITED(result))
LOG_DBG("pid=%d: exited with status=%d", pid, WEXITSTATUS(result));
else if (WIFSIGNALED(result))
LOG_DBG("pid=%d: killed by signal=%d", pid, WTERMSIG(result));
else
LOG_DBG("pid=%d: died of unknown resason", pid);
/* Make sure we remove it *before* the callback, since it too
* may remove it */
struct child child = it->item;
tll_remove(reaper->children, it);
if (child.cb != NULL)
child.cb(reaper, child.pid, status, child.cb_data);
break;
}
}
if (hup)
return false;
return true;
}