fix(dirty): detect parent death and self-terminate

Add ppid monitoring to dirty arbiter's worker monitor loop. If the
main arbiter dies unexpectedly (SIGKILL, crash, OOM), the dirty
arbiter detects the parent change and shuts itself down gracefully.

This complements the existing orphan cleanup on startup.
This commit is contained in:
Benoit Chesneau 2026-01-24 22:36:50 +01:00
parent b67ff0b31d
commit 79f85af55e
2 changed files with 41 additions and 0 deletions

View File

@ -196,6 +196,14 @@ class DirtyArbiter:
"""Periodically check worker health and manage pool."""
while self.alive:
await asyncio.sleep(1.0)
# Check if parent (main arbiter) died unexpectedly
if os.getppid() != self.ppid:
self.log.warning("Parent changed, shutting down dirty arbiter")
self.alive = False
self._shutdown()
return
await self.murder_workers()
await self.manage_workers()

View File

@ -894,6 +894,7 @@ class TestDirtyArbiterWorkerMonitor:
arbiter = DirtyArbiter(cfg=cfg, log=log)
arbiter.pid = os.getpid()
arbiter.ppid = os.getppid() # Match actual parent for ppid check
arbiter.alive = True
monitor_calls = 0
@ -917,6 +918,38 @@ class TestDirtyArbiterWorkerMonitor:
arbiter._cleanup_sync()
@pytest.mark.asyncio
async def test_worker_monitor_detects_parent_death(self):
"""Test worker monitor exits when parent dies."""
cfg = Config()
cfg.set("dirty_workers", 0)
log = MockLog()
arbiter = DirtyArbiter(cfg=cfg, log=log)
arbiter.pid = os.getpid()
arbiter.ppid = 99999 # Fake parent PID that doesn't match os.getppid()
arbiter.alive = True
shutdown_called = []
def mock_shutdown():
shutdown_called.append(True)
arbiter._shutdown = mock_shutdown
# Run monitor - should detect parent change and exit
await arbiter._worker_monitor()
# Should have detected parent death
assert arbiter.alive is False
assert len(shutdown_called) == 1
# Check log message
log_messages = [msg for level, msg in log.messages if level == "warning"]
assert any("Parent changed" in msg for msg in log_messages)
arbiter._cleanup_sync()
class TestDirtyArbiterHandleSigchld:
"""Tests for SIGCHLD handling."""