diff --git a/docs/design/companion-process-manager.md b/docs/design/companion-process-manager.md index 6d4fc89c..e97255e3 100644 --- a/docs/design/companion-process-manager.md +++ b/docs/design/companion-process-manager.md @@ -693,7 +693,7 @@ No per-companion logic in Arbiter. - [x] Wire Gunicorn reload to manager `reread` or restart. - [x] Close Gunicorn-only fds in manager child. - [x] Close manager-only fds in companion child. -- [ ] Add parent-death cleanup. +- [x] Add parent-death cleanup. - [ ] Add lifecycle logs. - [ ] Add tests for config validation. - [ ] Add tests for state transitions. diff --git a/gunicorn/companion/manager.py b/gunicorn/companion/manager.py index 62049c2f..b7026299 100644 --- a/gunicorn/companion/manager.py +++ b/gunicorn/companion/manager.py @@ -5,10 +5,12 @@ from __future__ import annotations +import ctypes import importlib import os import select import signal +import sys import time from typing import TYPE_CHECKING, Callable, Iterable, Union @@ -19,6 +21,26 @@ from gunicorn.companion.process import CompanionProcess, State if TYPE_CHECKING: from gunicorn.companion.config import CompanionConfig +# prctl option number for "send me this signal when my parent dies". +PR_SET_PDEATHSIG = 1 + + +def set_parent_death_signal(stop_signal) -> bool: + """Ask the kernel to send ``stop_signal`` when this process's parent dies. + + Uses Linux ``prctl(PR_SET_PDEATHSIG)`` so an orphaned manager or companion + is signalled the moment its parent goes away, rather than lingering. Returns + True when armed and False on any non-Linux platform or error, so callers can + fall back to polling ``os.getppid()``. + """ + if not sys.platform.startswith("linux"): + return False + try: + libc = ctypes.CDLL("libc.so.6", use_errno=True) + return libc.prctl(PR_SET_PDEATHSIG, int(stop_signal), 0, 0, 0) == 0 + except (OSError, AttributeError): + return False + class CompanionManager: """Forks and supervises companion processes. @@ -41,6 +63,7 @@ class CompanionManager: self.control = None self.stopping = False self._wakeup_pipe = None + self.parent_pid = None def run(self) -> None: """Run the manager's supervision loop. This is the forked child body. @@ -51,8 +74,14 @@ class CompanionManager: companions down and returns. Each tick reaps exited companions, retries any that are backing off, promotes those past ``startsecs``, and kills any that overran their stop deadline. + + If the arbiter dies, the manager stops too: it arms a parent-death + signal on Linux and, as a portable fallback, watches ``getppid`` each + tick so it never keeps companions running under a dead arbiter. """ + self.parent_pid = os.getppid() self._install_signals() + set_parent_death_signal(signal.SIGTERM) if self.control is not None: self.control.create() for process in self.processes.values(): @@ -60,6 +89,9 @@ class CompanionManager: self.log.info("companion manager running (pid %s)", self.pid) try: while not self.stopping: + if self._parent_gone(): + self.log.info("companion manager parent gone, stopping") + break self._tick() self._wait() self.stop_all() @@ -67,6 +99,10 @@ class CompanionManager: if self.control is not None: self.control.close() + def _parent_gone(self) -> bool: + """True once the arbiter that forked the manager has exited.""" + return os.getppid() != self.parent_pid + def _tick(self, now: float = None) -> None: """One supervision pass over every companion.""" now = now or time.time() @@ -283,6 +319,10 @@ class CompanionManager: try: self._close_manager_fds() + set_parent_death_signal(signal.SIGTERM) + if os.getppid() != self.pid: + # Manager already died between fork and arming: do not run. + os._exit(0) self._apply_environment(process.config) self._redirect_output(process.config) target = self._resolve_target(process.config.target) diff --git a/tests/test_companion_manager.py b/tests/test_companion_manager.py index 4d3d4f9c..d564839b 100644 --- a/tests/test_companion_manager.py +++ b/tests/test_companion_manager.py @@ -9,7 +9,7 @@ from unittest import mock import pytest from gunicorn.companion.control import CommandError -from gunicorn.companion.manager import CompanionManager +from gunicorn.companion.manager import CompanionManager, set_parent_death_signal from gunicorn.companion.config import CompanionConfig from gunicorn.companion.process import State @@ -40,6 +40,29 @@ def test_resolve_target_rejects_bad_string(): CompanionManager._resolve_target("no_colon") +def test_set_parent_death_signal_noop_off_linux(): + with mock.patch("sys.platform", "darwin"): + assert set_parent_death_signal(signal.SIGTERM) is False + + +def test_set_parent_death_signal_arms_on_linux(): + libc = mock.Mock() + libc.prctl.return_value = 0 + with mock.patch("sys.platform", "linux"), \ + mock.patch("ctypes.CDLL", return_value=libc): + assert set_parent_death_signal(signal.SIGTERM) is True + libc.prctl.assert_called_once() + + +def test_parent_gone_detects_reparenting(): + manager = make_manager("rq") + manager.parent_pid = 4242 + with mock.patch("os.getppid", return_value=4242): + assert manager._parent_gone() is False + with mock.patch("os.getppid", return_value=1): + assert manager._parent_gone() is True + + def test_close_manager_fds_closes_control_and_pipe(): manager = make_manager("rq") manager.control = mock.Mock()