feat(companion): Implement stop_process control command

Add stop_process(name) following supervisor's stop rules: it always sets
manual_stop so the companion will not auto-restart. RUNNING/STARTING are sent
their stop_signal and moved to STOPPING with a stop_deadline (now +
stop_timeout) for the run loop to reap or SIGKILL; BACKOFF cancels its pending
retry and settles in STOPPED; STOPPED and STOPPING are success no-ops. Add
_signal_number to resolve a signal name and a stop_deadline field on
CompanionProcess.

Add tests for the running, backoff, already-stopped, unknown, and signal-name
cases.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Tanmoy Sarkar 2026-06-09 18:06:58 +05:30
parent 8c9aa962ae
commit 8d9eb76e3d
4 changed files with 93 additions and 1 deletions

View File

@ -678,7 +678,7 @@ No per-companion logic in Arbiter.
- [x] Implement `STARTING -> RUNNING` using `startsecs`.
- [x] Implement `BACKOFF` with fixed `companion_restart_delay`.
- [x] Implement `start_process`.
- [ ] Implement `stop_process`.
- [x] Implement `stop_process`.
- [ ] Implement `restart_process`.
- [ ] Preserve and clear `manual_stop` correctly.
- [ ] Add Unix control socket.

View File

@ -7,6 +7,7 @@ from __future__ import annotations
import importlib
import os
import signal
import time
from typing import TYPE_CHECKING, Callable, Iterable, Union
@ -79,6 +80,46 @@ class CompanionManager:
self.spawn_process(proc)
return True, "%s started" % name
def stop_process(self, name: str, now: float = None):
"""Stop a companion by name (the control ``stop`` command).
Sets ``manual_stop`` so the companion will not auto-restart. A live
companion (RUNNING or STARTING) is sent its ``stop_signal`` and moved
to STOPPING with a ``stop_deadline``; the run loop reaps it, or SIGKILLs
it once the deadline passes. BACKOFF just cancels the pending retry and
settles in STOPPED. STOPPED and STOPPING are already-there success
no-ops. Returns ``(ok, message)``.
"""
proc = self.processes.get(name)
if proc is None:
return False, "unknown companion %s" % name
proc.manual_stop = True
if proc.state in (State.STOPPED, State.STOPPING):
return True, "%s already %s" % (name, proc.state.lower())
if proc.state == State.BACKOFF:
proc.next_retry_at = None
proc.state = State.STOPPED
return True, "%s stopped" % name
now = now or time.time()
os.kill(proc.pid, self._signal_number(proc.config.stop_signal))
proc.state = State.STOPPING
proc.stop_deadline = now + proc.config.stop_timeout
self.log.info("companion %s stopping (pid %s)", name, proc.pid)
return True, "%s stopping" % name
@staticmethod
def _signal_number(sig) -> int:
"""Resolve a stop signal to its number, e.g. ``"SIGTERM"`` -> 15.
Accepts a signal name or a raw number and validates both against the
real signal table, so a typo like ``"SIGTRM"`` fails loudly here rather
than silently sending the wrong signal (or none).
"""
try:
return signal.Signals[sig] if isinstance(sig, str) else signal.Signals(sig)
except (KeyError, ValueError):
raise ValueError("unknown stop signal %r" % (sig,))
def reap_processes(self) -> list:
"""Reap any companions that have exited and record their exit info.

View File

@ -110,6 +110,7 @@ class CompanionProcess:
self.started_at = None
self.exited_at = None
self.next_retry_at = None
self.stop_deadline = None
self.restart_count = 0
self.exit_count = 0

View File

@ -3,6 +3,7 @@
# See the NOTICE for more information.
import os
import signal
from unittest import mock
import pytest
@ -168,6 +169,55 @@ def test_start_process_unknown():
assert not ok
def test_stop_process_running_signals_and_stopping():
mgr = make_manager("rq")
proc = mgr.processes["rq"]
proc.state = State.RUNNING
proc.pid = 80
proc.config.stop_timeout = 60
with mock.patch("os.kill") as kill:
ok, _ = mgr.stop_process("rq", now=200.0)
kill.assert_called_once_with(80, signal.SIGTERM)
assert ok and proc.state == State.STOPPING
assert proc.manual_stop is True and proc.stop_deadline == 260.0
def test_stop_process_backoff_to_stopped():
mgr = make_manager("rq")
proc = mgr.processes["rq"]
proc.state = State.BACKOFF
proc.next_retry_at = 999.0
with mock.patch("os.kill") as kill:
ok, _ = mgr.stop_process("rq")
kill.assert_not_called()
assert ok and proc.state == State.STOPPED
assert proc.next_retry_at is None and proc.manual_stop is True
def test_stop_process_already_stopped():
mgr = make_manager("rq")
with mock.patch("os.kill") as kill:
ok, _ = mgr.stop_process("rq")
kill.assert_not_called()
assert ok and mgr.processes["rq"].manual_stop is True
def test_stop_process_unknown():
mgr = make_manager("rq")
ok, _ = mgr.stop_process("nope")
assert not ok
def test_signal_number_resolves_name():
assert CompanionManager._signal_number("SIGKILL") == signal.SIGKILL
assert CompanionManager._signal_number(9) == 9
def test_signal_number_rejects_bad():
with pytest.raises(ValueError):
CompanionManager._signal_number("SIGTRM")
def test_handle_exit_unexpected_backoff():
mgr = make_manager("rq")
proc = mgr.processes["rq"]