feat(companion): Implement start_process control command

Add start_process(name) following supervisor's start rules: STOPPED and
BACKOFF clear manual_stop, drop any pending retry, and spawn now; RUNNING and
STARTING report success without acting; STOPPING is rejected so the caller
retries. Returns (ok, message).

Add tests for the stopped, backoff, running, stopping, and unknown cases.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Tanmoy Sarkar 2026-06-09 17:59:35 +05:30
parent 87bc4cf70e
commit 8c9aa962ae
3 changed files with 65 additions and 1 deletions

View File

@ -677,7 +677,7 @@ No per-companion logic in Arbiter.
- [x] Reap exited companion processes.
- [x] Implement `STARTING -> RUNNING` using `startsecs`.
- [x] Implement `BACKOFF` with fixed `companion_restart_delay`.
- [ ] Implement `start_process`.
- [x] Implement `start_process`.
- [ ] Implement `stop_process`.
- [ ] Implement `restart_process`.
- [ ] Preserve and clear `manual_stop` correctly.

View File

@ -58,6 +58,27 @@ class CompanionManager:
os._exit(1)
os._exit(0)
def start_process(self, name: str):
"""Start a companion by name (the control ``start`` command).
Follows the supervisor-style rules: a STOPPED or BACKOFF companion
clears its ``manual_stop`` flag, drops any pending retry, and is spawned
right away. RUNNING and STARTING are already-up, so they report success
without doing anything. STOPPING is rejected so the caller polls status
and retries once the old child is gone. Returns ``(ok, message)``.
"""
proc = self.processes.get(name)
if proc is None:
return False, "unknown companion %s" % name
if proc.state in (State.RUNNING, State.STARTING):
return True, "%s already %s" % (name, proc.state.lower())
if proc.state == State.STOPPING:
return False, "%s is stopping; retry" % name
proc.manual_stop = False
proc.next_retry_at = None
self.spawn_process(proc)
return True, "%s started" % name
def reap_processes(self) -> list:
"""Reap any companions that have exited and record their exit info.

View File

@ -125,6 +125,49 @@ def test_reap_no_children():
assert mgr.reap_processes() == []
def test_start_process_stopped_spawns():
mgr = make_manager("rq")
proc = mgr.processes["rq"]
with mock.patch("os.fork", return_value=70) as fork:
ok, _ = mgr.start_process("rq")
fork.assert_called_once()
assert ok and proc.state == State.STARTING and proc.manual_stop is False
def test_start_process_backoff_cancels_retry():
mgr = make_manager("rq")
proc = mgr.processes["rq"]
proc.state = State.BACKOFF
proc.next_retry_at = 999.0
proc.manual_stop = True
with mock.patch("os.fork", return_value=71):
ok, _ = mgr.start_process("rq")
assert ok and proc.state == State.STARTING
assert proc.next_retry_at is None and proc.manual_stop is False
def test_start_process_running_is_noop():
mgr = make_manager("rq")
mgr.processes["rq"].state = State.RUNNING
with mock.patch("os.fork") as fork:
ok, _ = mgr.start_process("rq")
assert ok
fork.assert_not_called()
def test_start_process_stopping_rejected():
mgr = make_manager("rq")
mgr.processes["rq"].state = State.STOPPING
ok, msg = mgr.start_process("rq")
assert not ok and "stopping" in msg
def test_start_process_unknown():
mgr = make_manager("rq")
ok, _ = mgr.start_process("nope")
assert not ok
def test_handle_exit_unexpected_backoff():
mgr = make_manager("rq")
proc = mgr.processes["rq"]