From 8e0ca342774630d4bf4a394d9bdd70f9472c6af7 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar Date: Tue, 9 Jun 2026 18:10:40 +0530 Subject: [PATCH] feat(companion): Implement restart_process control command Add restart_process(name) following supervisor's restart rules: it always clears manual_stop. RUNNING/STARTING are sent their stop_signal and enter STOPPING with restart_pending set and a deadline from reload_timeout; the reaper respawns them immediately once the old child exits. BACKOFF and STOPPED start again right away. STOPPING is rejected. It never rereads config. handle_exit now honors restart_pending first, respawning immediately (bumping restart_count) instead of going to STOPPED or BACKOFF. Add a restart_pending field on CompanionProcess. Add tests for the running, pending-reap, stopped, backoff, and stopping cases. Co-Authored-By: Claude Opus 4.8 --- docs/design/companion-process-manager.md | 2 +- gunicorn/companion/manager.py | 41 +++++++++++++++-- gunicorn/companion/process.py | 1 + tests/test_companion_manager.py | 56 ++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 4 deletions(-) diff --git a/docs/design/companion-process-manager.md b/docs/design/companion-process-manager.md index cdfdb25f..8c3c690b 100644 --- a/docs/design/companion-process-manager.md +++ b/docs/design/companion-process-manager.md @@ -679,7 +679,7 @@ No per-companion logic in Arbiter. - [x] Implement `BACKOFF` with fixed `companion_restart_delay`. - [x] Implement `start_process`. - [x] Implement `stop_process`. -- [ ] Implement `restart_process`. +- [x] Implement `restart_process`. - [ ] Preserve and clear `manual_stop` correctly. - [ ] Add Unix control socket. - [ ] Implement JSON command protocol. diff --git a/gunicorn/companion/manager.py b/gunicorn/companion/manager.py index a3a4034e..65ba365f 100644 --- a/gunicorn/companion/manager.py +++ b/gunicorn/companion/manager.py @@ -107,6 +107,34 @@ class CompanionManager: self.log.info("companion %s stopping (pid %s)", name, proc.pid) return True, "%s stopping" % name + def restart_process(self, name: str, now: float = None): + """Restart a companion by name (the control ``restart`` command). + + Always clears ``manual_stop`` so the companion comes back. A live + companion (RUNNING or STARTING) is asked to stop -- it goes STOPPING + with ``restart_pending`` set and a deadline based on ``reload_timeout``, + and the reaper respawns it as soon as the old child exits. BACKOFF and + STOPPED start again immediately. STOPPING is rejected so the caller + retries. This never rereads config. Returns ``(ok, message)``. + """ + proc = self.processes.get(name) + if proc is None: + return False, "unknown companion %s" % name + if proc.state == State.STOPPING: + return False, "%s is stopping; retry" % name + proc.manual_stop = False + if proc.state in (State.RUNNING, State.STARTING): + now = now or time.time() + proc.restart_pending = True + os.kill(proc.pid, self._signal_number(proc.config.stop_signal)) + proc.state = State.STOPPING + proc.stop_deadline = now + proc.config.reload_timeout + self.log.info("companion %s restarting (pid %s)", name, proc.pid) + return True, "%s restarting" % name + proc.next_retry_at = None + self.spawn_process(proc) + return True, "%s started" % name + @staticmethod def _signal_number(sig) -> int: """Resolve a stop signal to its number, e.g. ``"SIGTERM"`` -> 15. @@ -157,14 +185,21 @@ class CompanionManager: return reaped def handle_exit(self, proc: CompanionProcess, now: float = None) -> None: - """Decide a companion's fate after it exits: stay stopped or back off. + """Decide a companion's fate after it exits: restart, stop, or back off. - A companion that was stopped on purpose settles in STOPPED and stays - there. Any other exit is unexpected, so it enters BACKOFF and is + A pending restart wins: the old child was asked to stop only so a fresh + one could take its place, so it is respawned immediately. Otherwise a + companion that was stopped on purpose settles in STOPPED and stays + there, and any other exit is unexpected, so it enters BACKOFF and is scheduled to restart after a fixed ``restart_delay`` (no exponential backoff, no retry cap). """ now = now or time.time() + if proc.restart_pending: + proc.restart_pending = False + proc.restart_count += 1 + self.spawn_process(proc) + return if proc.manual_stop: proc.state = State.STOPPED proc.next_retry_at = None diff --git a/gunicorn/companion/process.py b/gunicorn/companion/process.py index adb30647..e8c733ea 100644 --- a/gunicorn/companion/process.py +++ b/gunicorn/companion/process.py @@ -120,6 +120,7 @@ class CompanionProcess: self.last_exit_signal = None self.manual_stop = False + self.restart_pending = False @property def name(self): diff --git a/tests/test_companion_manager.py b/tests/test_companion_manager.py index ee9ed8e0..13247e55 100644 --- a/tests/test_companion_manager.py +++ b/tests/test_companion_manager.py @@ -218,6 +218,62 @@ def test_signal_number_rejects_bad(): CompanionManager._signal_number("SIGTRM") +def test_restart_process_running_stops_with_reload_timeout(): + mgr = make_manager("rq") + proc = mgr.processes["rq"] + proc.state = State.RUNNING + proc.pid = 90 + proc.config.reload_timeout = 30 + proc.manual_stop = True + with mock.patch("os.kill") as kill: + ok, _ = mgr.restart_process("rq", now=300.0) + kill.assert_called_once_with(90, signal.SIGTERM) + assert ok and proc.state == State.STOPPING + assert proc.restart_pending is True and proc.stop_deadline == 330.0 + assert proc.manual_stop is False + + +def test_restart_pending_reap_respawns_immediately(): + mgr = make_manager("rq") + proc = mgr.processes["rq"] + proc.state = State.STOPPING + proc.restart_pending = True + proc.pid = 91 + with mock.patch("os.waitpid", side_effect=[(91, 0), (0, 0)]), \ + mock.patch("os.fork", return_value=92): + mgr.reap_processes() + assert proc.state == State.STARTING + assert proc.pid == 92 + assert proc.restart_pending is False + assert proc.restart_count == 1 + + +def test_restart_process_stopped_starts_now(): + mgr = make_manager("rq") + proc = mgr.processes["rq"] + with mock.patch("os.fork", return_value=93), mock.patch("os.kill") as kill: + ok, _ = mgr.restart_process("rq") + kill.assert_not_called() + assert ok and proc.state == State.STARTING + + +def test_restart_process_backoff_starts_now(): + mgr = make_manager("rq") + proc = mgr.processes["rq"] + proc.state = State.BACKOFF + proc.next_retry_at = 999.0 + with mock.patch("os.fork", return_value=94): + ok, _ = mgr.restart_process("rq") + assert ok and proc.state == State.STARTING and proc.next_retry_at is None + + +def test_restart_process_stopping_rejected(): + mgr = make_manager("rq") + mgr.processes["rq"].state = State.STOPPING + ok, msg = mgr.restart_process("rq") + assert not ok and "stopping" in msg + + def test_handle_exit_unexpected_backoff(): mgr = make_manager("rq") proc = mgr.processes["rq"]