mirror of
https://github.com/frappe/gunicorn.git
synced 2026-07-04 19:51:30 +08:00
feat(companion): Add parent-death cleanup for manager and companions
Stop orphaned processes from lingering when their parent dies. set_parent_death_signal arms Linux prctl(PR_SET_PDEATHSIG) so a process is signalled the moment its parent exits, returning False off Linux so callers fall back to polling getppid. The manager records its parent pid, arms a SIGTERM parent-death signal, and checks getppid each tick: if the arbiter dies, the manager stops its companions and exits instead of running on under a dead arbiter. Each companion arms the same parent-death signal and rechecks getppid right after the fork, exiting if the manager already died before the signal was armed. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
f21d0310be
commit
9c4d81726d
@ -693,7 +693,7 @@ No per-companion logic in Arbiter.
|
||||
- [x] Wire Gunicorn reload to manager `reread` or restart.
|
||||
- [x] Close Gunicorn-only fds in manager child.
|
||||
- [x] Close manager-only fds in companion child.
|
||||
- [ ] Add parent-death cleanup.
|
||||
- [x] Add parent-death cleanup.
|
||||
- [ ] Add lifecycle logs.
|
||||
- [ ] Add tests for config validation.
|
||||
- [ ] Add tests for state transitions.
|
||||
|
||||
@ -5,10 +5,12 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import ctypes
|
||||
import importlib
|
||||
import os
|
||||
import select
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Callable, Iterable, Union
|
||||
|
||||
@ -19,6 +21,26 @@ from gunicorn.companion.process import CompanionProcess, State
|
||||
if TYPE_CHECKING:
|
||||
from gunicorn.companion.config import CompanionConfig
|
||||
|
||||
# prctl option number for "send me this signal when my parent dies".
|
||||
PR_SET_PDEATHSIG = 1
|
||||
|
||||
|
||||
def set_parent_death_signal(stop_signal) -> bool:
|
||||
"""Ask the kernel to send ``stop_signal`` when this process's parent dies.
|
||||
|
||||
Uses Linux ``prctl(PR_SET_PDEATHSIG)`` so an orphaned manager or companion
|
||||
is signalled the moment its parent goes away, rather than lingering. Returns
|
||||
True when armed and False on any non-Linux platform or error, so callers can
|
||||
fall back to polling ``os.getppid()``.
|
||||
"""
|
||||
if not sys.platform.startswith("linux"):
|
||||
return False
|
||||
try:
|
||||
libc = ctypes.CDLL("libc.so.6", use_errno=True)
|
||||
return libc.prctl(PR_SET_PDEATHSIG, int(stop_signal), 0, 0, 0) == 0
|
||||
except (OSError, AttributeError):
|
||||
return False
|
||||
|
||||
|
||||
class CompanionManager:
|
||||
"""Forks and supervises companion processes.
|
||||
@ -41,6 +63,7 @@ class CompanionManager:
|
||||
self.control = None
|
||||
self.stopping = False
|
||||
self._wakeup_pipe = None
|
||||
self.parent_pid = None
|
||||
|
||||
def run(self) -> None:
|
||||
"""Run the manager's supervision loop. This is the forked child body.
|
||||
@ -51,8 +74,14 @@ class CompanionManager:
|
||||
companions down and returns. Each tick reaps exited companions,
|
||||
retries any that are backing off, promotes those past ``startsecs``,
|
||||
and kills any that overran their stop deadline.
|
||||
|
||||
If the arbiter dies, the manager stops too: it arms a parent-death
|
||||
signal on Linux and, as a portable fallback, watches ``getppid`` each
|
||||
tick so it never keeps companions running under a dead arbiter.
|
||||
"""
|
||||
self.parent_pid = os.getppid()
|
||||
self._install_signals()
|
||||
set_parent_death_signal(signal.SIGTERM)
|
||||
if self.control is not None:
|
||||
self.control.create()
|
||||
for process in self.processes.values():
|
||||
@ -60,6 +89,9 @@ class CompanionManager:
|
||||
self.log.info("companion manager running (pid %s)", self.pid)
|
||||
try:
|
||||
while not self.stopping:
|
||||
if self._parent_gone():
|
||||
self.log.info("companion manager parent gone, stopping")
|
||||
break
|
||||
self._tick()
|
||||
self._wait()
|
||||
self.stop_all()
|
||||
@ -67,6 +99,10 @@ class CompanionManager:
|
||||
if self.control is not None:
|
||||
self.control.close()
|
||||
|
||||
def _parent_gone(self) -> bool:
|
||||
"""True once the arbiter that forked the manager has exited."""
|
||||
return os.getppid() != self.parent_pid
|
||||
|
||||
def _tick(self, now: float = None) -> None:
|
||||
"""One supervision pass over every companion."""
|
||||
now = now or time.time()
|
||||
@ -283,6 +319,10 @@ class CompanionManager:
|
||||
|
||||
try:
|
||||
self._close_manager_fds()
|
||||
set_parent_death_signal(signal.SIGTERM)
|
||||
if os.getppid() != self.pid:
|
||||
# Manager already died between fork and arming: do not run.
|
||||
os._exit(0)
|
||||
self._apply_environment(process.config)
|
||||
self._redirect_output(process.config)
|
||||
target = self._resolve_target(process.config.target)
|
||||
|
||||
@ -9,7 +9,7 @@ from unittest import mock
|
||||
import pytest
|
||||
|
||||
from gunicorn.companion.control import CommandError
|
||||
from gunicorn.companion.manager import CompanionManager
|
||||
from gunicorn.companion.manager import CompanionManager, set_parent_death_signal
|
||||
from gunicorn.companion.config import CompanionConfig
|
||||
from gunicorn.companion.process import State
|
||||
|
||||
@ -40,6 +40,29 @@ def test_resolve_target_rejects_bad_string():
|
||||
CompanionManager._resolve_target("no_colon")
|
||||
|
||||
|
||||
def test_set_parent_death_signal_noop_off_linux():
|
||||
with mock.patch("sys.platform", "darwin"):
|
||||
assert set_parent_death_signal(signal.SIGTERM) is False
|
||||
|
||||
|
||||
def test_set_parent_death_signal_arms_on_linux():
|
||||
libc = mock.Mock()
|
||||
libc.prctl.return_value = 0
|
||||
with mock.patch("sys.platform", "linux"), \
|
||||
mock.patch("ctypes.CDLL", return_value=libc):
|
||||
assert set_parent_death_signal(signal.SIGTERM) is True
|
||||
libc.prctl.assert_called_once()
|
||||
|
||||
|
||||
def test_parent_gone_detects_reparenting():
|
||||
manager = make_manager("rq")
|
||||
manager.parent_pid = 4242
|
||||
with mock.patch("os.getppid", return_value=4242):
|
||||
assert manager._parent_gone() is False
|
||||
with mock.patch("os.getppid", return_value=1):
|
||||
assert manager._parent_gone() is True
|
||||
|
||||
|
||||
def test_close_manager_fds_closes_control_and_pipe():
|
||||
manager = make_manager("rq")
|
||||
manager.control = mock.Mock()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user