term: asynchronous client application termination

When the foot window is closed, and we need to terminate the client application,
do this in an asynchronous fashion:

* Don’t do a blocking call to waitpid(), instead, rely on the reaper callback
* Use a timer FD to implement the timeout before sending SIGKILL (instead of
  using SIGALRM).
* Send SIGTERM immediately (we used to *just* close the PTY, and then wait 2
  seconds before sending SIGTERM).
* Raise the timeout from 2 seconds to 60

Full shutdown now depends on *two* asynchronous tasks - unmapping the window,
and waiting for the client application to terminate.

Only when *both* of these have completed do we proceed and call term_destroy(),
and the user provided shutdown callback.
This commit is contained in:
Daniel Eklöf 2021-07-31 18:18:48 +02:00
parent 35041cd431
commit 384b1c330f
No known key found for this signature in database
GPG key ID: 5BBD4992C116573F
3 changed files with 170 additions and 43 deletions

View file

@ -3640,7 +3640,7 @@ fdm_hook_refresh_pending_terminals(struct fdm *fdm, void *data)
tll_foreach(renderer->wayl->terms, it) {
struct terminal *term = it->item;
if (unlikely(!term->window->is_configured))
if (unlikely(term->is_shutting_down || !term->window->is_configured))
continue;
bool grid = term->render.refresh.grid;

View file

@ -982,18 +982,7 @@ load_fonts_from_conf(struct terminal *term)
return reload_fonts(term);
}
static void
slave_died(struct reaper *reaper, pid_t pid, int status, void *data)
{
struct terminal *term = data;
LOG_DBG("slave (PID=%u) died", pid);
term->slave_has_been_reaped = true;
term->exit_status = status;
if (!term->conf->hold_at_exit)
term_shutdown(term);
}
static void slave_died(struct reaper *reaper, pid_t pid, int status, void *data);
struct terminal *
term_init(const struct config *conf, struct fdm *fdm, struct reaper *reaper,
@ -1176,6 +1165,7 @@ term_init(const struct config *conf, struct fdm *fdm, struct reaper *reaper,
.max_width = SIXEL_MAX_WIDTH,
.max_height = SIXEL_MAX_HEIGHT,
},
.slave_terminate_timeout_fd = -1,
.shutdown_cb = shutdown_cb,
.shutdown_data = shutdown_data,
.foot_exe = xstrdup(foot_exe),
@ -1278,10 +1268,115 @@ term_window_configured(struct terminal *term)
}
}
/*
* Shutdown logic
*
* A foot instance can be terminated in two ways:
*
* - the client application terminates (user types exit, or pressed C-d in the
* shell, etc)
* - the foot window is closed
*
* Both variants need to trigger to other action. I.e. if the client
* application is terminated, then we need to close the window. If the window is
* closed, we need to terminate the client application.
*
* Only when *both* tasks have completed do we consider ourselves fully
* shutdown. This is when we can call term_destroy(), and the user provided
* shutdown callback.
*
* The functions involved with this are:
*
* - shutdown_maybe_done(): called after any of the two tasks above have
* completed. When it determines that *both* tasks are done, it calls
* term_destroy() and the user provided shutdown callback.
*
* - slave_died(): reaper callback, called when the client application has
* terminated.
*
* + Kills the terminate timeout timer
* + Calls shutdown_maybe_done() if the shutdown procedure has already
* started (i.e. the window being closed initiated the shutdown)
* -OR-
* Initiates the shutdown itself, by calling term_shutdown() (client
* application termination initiated the shutdown).
*
* - term_shutdown(): unregisters all FDM callbacks, sends SIGTERM to the client
* application and installs a terminate timeout timer (if it hasnt already
* terminated). Finally registers an event FD with the FDM, which is
* immediately triggered. This is done to ensure any pending FDM events are
* handled before shutting down.
*
* - fdm_shutdown(): FDM callback, triggered by the event FD in
* term_shutdown(). Unmaps and destroys the window resources, and ensures the
* seats focused pointers dont reference us. Finally calls
* shutdown_maybe_done().
*
* - fdm_terminate_timeout(): FDM callback for the terminate timeout
* timer. This function is called when the client application hasnt
* terminated after 60 seconds (after the SIGTERM). Sends SIGKILL to the
* client application.
*
* - term_destroy(): normally called from shutdown_maybe_done(), when both the
* window has been unmapped, and the client application has terminated. In
* this case, it simply destroys all resources.
*
* It may however also be called without term_shutdown() having been called
* (typically in error code paths - for example, when the Wayland connection
* is closed by the compositor). In this case, the client application is
* typically still running, and we cant assume the FDM is running. To handle
* this, we install configure a 60 second SIGALRM, send SIGTERM to the client
* application, and then enter a blocking waitpid().
*
* If the alarm triggers, we send SIGKILL and once again enter a blocking
* waitpid().
*/
static void
shutdown_maybe_done(struct terminal *term)
{
bool shutdown_done = term->window == NULL && term->slave_has_been_reaped;
LOG_DBG("window=%p, slave-has-been-reaped=%d --> %s",
(void *)term->window, term->slave_has_been_reaped,
(shutdown_done
? "shutdown done, calling term_destroy()"
: "no action"));
if (!shutdown_done)
return;
void (*cb)(void *, int) = term->shutdown_cb;
void *cb_data = term->shutdown_data;
int exit_code = term_destroy(term);
if (cb != NULL)
cb(cb_data, exit_code);
}
static void
slave_died(struct reaper *reaper, pid_t pid, int status, void *data)
{
struct terminal *term = data;
LOG_DBG("slave (PID=%u) died", pid);
term->slave_has_been_reaped = true;
term->exit_status = status;
if (term->slave_terminate_timeout_fd >= 0) {
fdm_del(term->fdm, term->slave_terminate_timeout_fd);
term->slave_terminate_timeout_fd = -1;
}
if (term->is_shutting_down)
shutdown_maybe_done(term);
else if (!term->conf->hold_at_exit)
term_shutdown(term);
}
static bool
fdm_shutdown(struct fdm *fdm, int fd, int events, void *data)
{
LOG_DBG("FDM shutdown");
struct terminal *term = data;
/* Kill the event FD */
@ -1307,13 +1402,27 @@ fdm_shutdown(struct fdm *fdm, int fd, int events, void *data)
it->item.mouse_focus = NULL;
}
void (*cb)(void *, int) = term->shutdown_cb;
void *cb_data = term->shutdown_data;
shutdown_maybe_done(term);
return true;
}
int exit_code = term_destroy(term);
if (cb != NULL)
cb(cb_data, exit_code);
static bool
fdm_terminate_timeout(struct fdm *fdm, int fd, int events, void *data)
{
uint64_t unused;
ssize_t bytes = read(fd, &unused, sizeof(unused));
if (bytes < 0) {
LOG_ERRNO("failed to read from slave terminate timeout FD");
return false;
}
struct terminal *term = data;
xassert(!term->slave_has_been_reaped);
LOG_DBG("slave (PID=%u) has not terminated, sending SIGKILL (%d)",
term->slave, SIGKILL);
kill(term->slave, SIGKILL);
return true;
}
@ -1341,14 +1450,34 @@ term_shutdown(struct terminal *term)
fdm_del(term->fdm, term->blink.fd);
fdm_del(term->fdm, term->flash.fd);
/* Well deal with this explicitly */
reaper_del(term->reaper, term->slave);
if (term->window != NULL && term->window->is_configured)
fdm_del(term->fdm, term->ptmx);
else
close(term->ptmx);
if (!term->slave_has_been_reaped) {
LOG_DBG("initiating asynchronous terminate of slave (PID=%u)",
term->slave);
kill(term->slave, SIGTERM);
const struct itimerspec timeout = {.it_value = {.tv_sec = 60}};
int timeout_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
if (timeout_fd < 0 ||
timerfd_settime(timeout_fd, 0, &timeout, NULL) < 0 ||
!fdm_add(term->fdm, timeout_fd, EPOLLIN, &fdm_terminate_timeout, term))
{
if (timeout_fd >= 0)
close(timeout_fd);
LOG_ERRNO("failed to create slave terminate timeout FD");
return false;
}
xassert(term->slave_terminate_timeout_fd < 0);
term->slave_terminate_timeout_fd = timeout_fd;
}
term->selection.auto_scroll.fd = -1;
term->render.app_sync_updates.timer_fd = -1;
term->render.title.timer_fd = -1;
@ -1409,6 +1538,7 @@ term_destroy(struct terminal *term)
fdm_del(term->fdm, term->blink.fd);
fdm_del(term->fdm, term->flash.fd);
fdm_del(term->fdm, term->ptmx);
xassert(term->slave_terminate_timeout_fd < 0);
if (term->window != NULL) {
wayl_win_destroy(term->window);
@ -1499,24 +1629,29 @@ term_destroy(struct terminal *term)
int ret = EXIT_SUCCESS;
if (term->slave > 0) {
/* Well deal with this explicitly */
reaper_del(term->reaper, term->slave);
int exit_status;
if (term->slave_has_been_reaped)
exit_status = term->exit_status;
else {
LOG_DBG("waiting for slave (PID=%u) to die", term->slave);
LOG_DBG("initiating blocking terminate of slave (PID=%u)",
term->slave);
kill(term->slave, SIGTERM);
/*
* Note: we've closed ptmx, so the slave *should* exit...
* weve closed the ptxm, and sent SIGTERM to the client
* application. It *should* exit...
*
* But, since it is possible to write clients that ignore
* this, we need to handle it in *some* way.
*
* So, what we do is register a SIGALRM handler, and configure
* a 2 second alarm. If the slave hasn't died after this time,
* we send it a SIGTERM, then wait another 2 seconds (using
* the same alarm mechanism). If it still hasn't died, we send
* it a SIGKILL.
* So, what we do is register a SIGALRM handler, and configure a 30
* second alarm. If the slave hasn't died after this time, we send
* it a SIGKILL,
*
* Note that this solution is *not* asynchronous, and any
* other events etc will be ignored during this time. This of
@ -1524,9 +1659,7 @@ term_destroy(struct terminal *term)
* there might be other terminals running.
*/
sigaction(SIGALRM, &(const struct sigaction){.sa_handler = &sig_alarm}, NULL);
alarm(2);
int kill_signal = SIGTERM;
alarm(60);
while (true) {
int r = waitpid(term->slave, &exit_status, 0);
@ -1538,18 +1671,11 @@ term_destroy(struct terminal *term)
xassert(errno == EINTR);
if (alarm_raised) {
LOG_DBG("slave hasn't died yet, sending: %s (%d)",
kill_signal == SIGTERM ? "SIGTERM" : "SIGKILL",
kill_signal);
kill(term->slave, kill_signal);
alarm_raised = 0;
if (kill_signal != SIGKILL)
alarm(2);
kill_signal = SIGKILL;
LOG_DBG(
"slave (PID=%u) has not terminate yet, "
"sending: SIGKILL (%d)", term->slave, SIGKILL);
kill(term->slave, SIGKILL);
}
}
}

View file

@ -595,6 +595,7 @@ struct terminal {
bool is_shutting_down;
bool slave_has_been_reaped;
int slave_terminate_timeout_fd;
int exit_status;
void (*shutdown_cb)(void *data, int exit_code);
void *shutdown_data;