mirror of
https://github.com/frappe/gunicorn.git
synced 2026-07-04 03:31:29 +08:00
fix(dirty): add orphan cleanup via well-known PID file
When the main arbiter crashes and restarts, orphaned dirty arbiters may continue running. This adds detection and cleanup: - Add well-known PID file location based on proc_name - Dirty arbiter writes PID on startup, removes on exit - Main arbiter checks for orphans on fresh start (not USR2) - Uses self.proc_name for USR2 compatibility (myapp vs myapp.2) During USR2 upgrade, old and new dirty arbiters coexist with separate PID files, preventing the old from removing the new's file.
This commit is contained in:
parent
f6418d4eb0
commit
e21d23bfa6
@ -71,6 +71,7 @@ class Arbiter:
|
||||
# Dirty arbiter process
|
||||
self.dirty_arbiter_pid = 0
|
||||
self.dirty_arbiter = None
|
||||
self.dirty_pidfile = None # Well-known location for orphan detection
|
||||
|
||||
cwd = util.getcwd()
|
||||
|
||||
@ -735,6 +736,57 @@ class Arbiter:
|
||||
# Dirty Arbiter Management
|
||||
# =========================================================================
|
||||
|
||||
def _get_dirty_pidfile_path(self):
|
||||
"""Get the well-known PID file path for orphan detection.
|
||||
|
||||
Uses self.proc_name (not self.cfg.proc_name) so that during USR2
|
||||
the new master gets a different PID file path ("myapp.2" vs "myapp").
|
||||
This prevents the old dirty arbiter from removing the new one's PID file.
|
||||
"""
|
||||
import tempfile
|
||||
safe_name = self.proc_name.replace('/', '_').replace(' ', '_')
|
||||
return os.path.join(tempfile.gettempdir(), f"gunicorn-dirty-{safe_name}.pid")
|
||||
|
||||
def _cleanup_orphaned_dirty_arbiter(self):
|
||||
"""Kill any orphaned dirty arbiter from a previous crash.
|
||||
|
||||
Only runs on fresh start (master_pid == 0), not during USR2.
|
||||
"""
|
||||
# During USR2, master_pid is set - don't cleanup old dirty arbiter
|
||||
if self.master_pid != 0:
|
||||
return
|
||||
|
||||
pidfile = self._get_dirty_pidfile_path()
|
||||
if not os.path.exists(pidfile):
|
||||
return
|
||||
|
||||
try:
|
||||
with open(pidfile) as f:
|
||||
old_pid = int(f.read().strip())
|
||||
|
||||
# Check if process exists
|
||||
os.kill(old_pid, 0)
|
||||
# Process exists - kill orphan
|
||||
self.log.warning("Killing orphaned dirty arbiter (pid: %s)", old_pid)
|
||||
os.kill(old_pid, signal.SIGTERM)
|
||||
# Wait briefly for graceful exit
|
||||
for _ in range(10):
|
||||
time.sleep(0.1)
|
||||
try:
|
||||
os.kill(old_pid, 0)
|
||||
except OSError:
|
||||
break
|
||||
else:
|
||||
os.kill(old_pid, signal.SIGKILL)
|
||||
except (ValueError, IOError, OSError):
|
||||
pass
|
||||
|
||||
# Remove stale PID file
|
||||
try:
|
||||
os.unlink(pidfile)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def spawn_dirty_arbiter(self):
|
||||
"""\
|
||||
Spawn the dirty arbiter process.
|
||||
@ -745,7 +797,16 @@ class Arbiter:
|
||||
if self.dirty_arbiter_pid:
|
||||
return # Already running
|
||||
|
||||
self.dirty_arbiter = DirtyArbiter(self.cfg, self.log)
|
||||
# Cleanup any orphaned dirty arbiter from previous crash
|
||||
self._cleanup_orphaned_dirty_arbiter()
|
||||
|
||||
# Get well-known PID file path
|
||||
self.dirty_pidfile = self._get_dirty_pidfile_path()
|
||||
|
||||
self.dirty_arbiter = DirtyArbiter(
|
||||
self.cfg, self.log,
|
||||
pidfile=self.dirty_pidfile
|
||||
)
|
||||
socket_path = self.dirty_arbiter.socket_path
|
||||
|
||||
pid = os.fork()
|
||||
|
||||
@ -44,7 +44,7 @@ class DirtyArbiter:
|
||||
# Worker boot error code
|
||||
WORKER_BOOT_ERROR = 3
|
||||
|
||||
def __init__(self, cfg, log, socket_path=None):
|
||||
def __init__(self, cfg, log, socket_path=None, pidfile=None):
|
||||
"""
|
||||
Initialize the dirty arbiter.
|
||||
|
||||
@ -52,11 +52,13 @@ class DirtyArbiter:
|
||||
cfg: Gunicorn config
|
||||
log: Logger
|
||||
socket_path: Path to the arbiter's Unix socket
|
||||
pidfile: Well-known PID file location for orphan detection
|
||||
"""
|
||||
self.cfg = cfg
|
||||
self.log = log
|
||||
self.pid = None
|
||||
self.ppid = os.getpid()
|
||||
self.pidfile = pidfile # Well-known location for orphan detection
|
||||
|
||||
# Use a temp directory for sockets
|
||||
self.tmpdir = tempfile.mkdtemp(prefix="gunicorn-dirty-")
|
||||
@ -82,6 +84,14 @@ class DirtyArbiter:
|
||||
self.pid = os.getpid()
|
||||
self.log.info("Dirty arbiter starting (pid: %s)", self.pid)
|
||||
|
||||
# Write PID to well-known location for orphan detection
|
||||
if self.pidfile:
|
||||
try:
|
||||
with open(self.pidfile, 'w') as f:
|
||||
f.write(str(self.pid))
|
||||
except IOError as e:
|
||||
self.log.warning("Failed to write PID file: %s", e)
|
||||
|
||||
# Call hook
|
||||
self.cfg.on_dirty_starting(self)
|
||||
|
||||
@ -565,6 +575,13 @@ class DirtyArbiter:
|
||||
|
||||
def _cleanup_sync(self):
|
||||
"""Synchronous cleanup on exit."""
|
||||
# Remove PID file
|
||||
if self.pidfile and os.path.exists(self.pidfile):
|
||||
try:
|
||||
os.unlink(self.pidfile)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Clean up socket
|
||||
if os.path.exists(self.socket_path):
|
||||
try:
|
||||
|
||||
@ -552,3 +552,187 @@ class TestWorkerLifecycle:
|
||||
arbiter.murder_workers()
|
||||
|
||||
mock_kill.assert_called_once_with(42, signal.SIGKILL)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Dirty Arbiter Orphan Cleanup Tests
|
||||
# ============================================================================
|
||||
|
||||
class TestDirtyArbiterOrphanCleanup:
|
||||
"""Tests for dirty arbiter orphan detection and cleanup."""
|
||||
|
||||
def test_get_dirty_pidfile_path(self):
|
||||
"""Verify pidfile path is generated correctly."""
|
||||
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
|
||||
arbiter.proc_name = 'myapp'
|
||||
|
||||
path = arbiter._get_dirty_pidfile_path()
|
||||
|
||||
import tempfile
|
||||
expected = os.path.join(tempfile.gettempdir(), 'gunicorn-dirty-myapp.pid')
|
||||
assert path == expected
|
||||
|
||||
def test_get_dirty_pidfile_path_sanitizes_name(self):
|
||||
"""Verify special characters in proc_name are sanitized."""
|
||||
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
|
||||
arbiter.proc_name = 'my/app name'
|
||||
|
||||
path = arbiter._get_dirty_pidfile_path()
|
||||
|
||||
import tempfile
|
||||
expected = os.path.join(tempfile.gettempdir(), 'gunicorn-dirty-my_app_name.pid')
|
||||
assert path == expected
|
||||
|
||||
def test_get_dirty_pidfile_path_uses_proc_name_not_cfg(self):
|
||||
"""Verify pidfile path uses self.proc_name for USR2 compatibility.
|
||||
|
||||
During USR2, self.proc_name becomes 'myapp.2' while self.cfg.proc_name
|
||||
stays 'myapp'. Using self.proc_name ensures new and old dirty arbiters
|
||||
have different PID file paths.
|
||||
"""
|
||||
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
|
||||
arbiter.cfg.set('proc_name', 'myapp')
|
||||
arbiter.proc_name = 'myapp.2' # Simulates USR2 child
|
||||
|
||||
path = arbiter._get_dirty_pidfile_path()
|
||||
|
||||
import tempfile
|
||||
# Should use self.proc_name, not self.cfg.proc_name
|
||||
expected = os.path.join(tempfile.gettempdir(), 'gunicorn-dirty-myapp.2.pid')
|
||||
assert path == expected
|
||||
|
||||
def test_cleanup_orphaned_skipped_during_usr2(self):
|
||||
"""Verify cleanup is skipped during USR2 upgrade (master_pid != 0)."""
|
||||
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
|
||||
arbiter.master_pid = 12345 # Indicates USR2 upgrade in progress
|
||||
|
||||
with mock.patch.object(arbiter, '_get_dirty_pidfile_path') as mock_path:
|
||||
arbiter._cleanup_orphaned_dirty_arbiter()
|
||||
|
||||
# Should not even check the pidfile path
|
||||
mock_path.assert_not_called()
|
||||
|
||||
def test_cleanup_orphaned_no_pidfile(self):
|
||||
"""Verify cleanup handles missing pidfile gracefully."""
|
||||
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
|
||||
arbiter.master_pid = 0
|
||||
|
||||
with mock.patch('os.path.exists', return_value=False):
|
||||
# Should not raise any exception
|
||||
arbiter._cleanup_orphaned_dirty_arbiter()
|
||||
|
||||
@mock.patch('os.unlink')
|
||||
@mock.patch('os.kill')
|
||||
def test_cleanup_orphaned_kills_existing_process(self, mock_kill, mock_unlink):
|
||||
"""Verify cleanup kills orphaned dirty arbiter process."""
|
||||
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
|
||||
arbiter.master_pid = 0
|
||||
|
||||
# First kill(pid, 0) succeeds (process exists), then SIGTERM causes exit
|
||||
mock_kill.side_effect = [None, None, OSError(3, "No such process")]
|
||||
|
||||
import tempfile
|
||||
pidfile = os.path.join(tempfile.gettempdir(), 'gunicorn-dirty-test.pid')
|
||||
|
||||
with mock.patch('os.path.exists', return_value=True), \
|
||||
mock.patch('builtins.open', mock.mock_open(read_data='12345')), \
|
||||
mock.patch.object(arbiter, '_get_dirty_pidfile_path', return_value=pidfile), \
|
||||
mock.patch('time.sleep'):
|
||||
arbiter._cleanup_orphaned_dirty_arbiter()
|
||||
|
||||
# Should have sent signal 0 (check), then SIGTERM
|
||||
assert mock_kill.call_args_list[0] == mock.call(12345, 0)
|
||||
assert mock_kill.call_args_list[1] == mock.call(12345, signal.SIGTERM)
|
||||
# Should unlink the stale pidfile
|
||||
mock_unlink.assert_called_with(pidfile)
|
||||
|
||||
@mock.patch('os.unlink')
|
||||
@mock.patch('os.kill')
|
||||
def test_cleanup_orphaned_sigkill_if_sigterm_fails(self, mock_kill, mock_unlink):
|
||||
"""Verify cleanup sends SIGKILL if SIGTERM doesn't work."""
|
||||
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
|
||||
arbiter.master_pid = 0
|
||||
|
||||
# Process exists on all checks until SIGKILL
|
||||
def kill_side_effect(pid, sig):
|
||||
if sig == signal.SIGKILL:
|
||||
return None
|
||||
return None # Process still running
|
||||
|
||||
mock_kill.side_effect = kill_side_effect
|
||||
|
||||
import tempfile
|
||||
pidfile = os.path.join(tempfile.gettempdir(), 'gunicorn-dirty-test.pid')
|
||||
|
||||
with mock.patch('os.path.exists', return_value=True), \
|
||||
mock.patch('builtins.open', mock.mock_open(read_data='12345')), \
|
||||
mock.patch.object(arbiter, '_get_dirty_pidfile_path', return_value=pidfile), \
|
||||
mock.patch('time.sleep'):
|
||||
arbiter._cleanup_orphaned_dirty_arbiter()
|
||||
|
||||
# Should end with SIGKILL
|
||||
kill_calls = [c for c in mock_kill.call_args_list if c[0][1] == signal.SIGKILL]
|
||||
assert len(kill_calls) == 1
|
||||
|
||||
@mock.patch('os.unlink')
|
||||
def test_cleanup_orphaned_stale_pidfile_no_process(self, mock_unlink):
|
||||
"""Verify cleanup removes stale pidfile when process doesn't exist."""
|
||||
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
|
||||
arbiter.master_pid = 0
|
||||
|
||||
import tempfile
|
||||
pidfile = os.path.join(tempfile.gettempdir(), 'gunicorn-dirty-test.pid')
|
||||
|
||||
with mock.patch('os.path.exists', return_value=True), \
|
||||
mock.patch('builtins.open', mock.mock_open(read_data='12345')), \
|
||||
mock.patch.object(arbiter, '_get_dirty_pidfile_path', return_value=pidfile), \
|
||||
mock.patch('os.kill', side_effect=OSError(3, "No such process")):
|
||||
arbiter._cleanup_orphaned_dirty_arbiter()
|
||||
|
||||
# Should still unlink the stale pidfile
|
||||
mock_unlink.assert_called_with(pidfile)
|
||||
|
||||
@mock.patch('gunicorn.dirty.DirtyArbiter')
|
||||
@mock.patch('os.fork')
|
||||
def test_spawn_dirty_arbiter_calls_cleanup(self, mock_fork, mock_dirty_arbiter):
|
||||
"""Verify spawn_dirty_arbiter calls orphan cleanup before spawning."""
|
||||
mock_fork.return_value = 12345 # Parent process
|
||||
mock_arbiter_instance = mock.Mock()
|
||||
mock_arbiter_instance.socket_path = '/tmp/test.sock'
|
||||
mock_dirty_arbiter.return_value = mock_arbiter_instance
|
||||
|
||||
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
|
||||
arbiter.cfg.set('dirty_workers', 1)
|
||||
arbiter.cfg.set('dirty_apps', ['test:app'])
|
||||
|
||||
with mock.patch.object(arbiter, '_cleanup_orphaned_dirty_arbiter') as mock_cleanup, \
|
||||
mock.patch.object(arbiter, '_get_dirty_pidfile_path', return_value='/tmp/test.pid'), \
|
||||
mock.patch('gunicorn.dirty.set_dirty_socket_path'):
|
||||
arbiter.spawn_dirty_arbiter()
|
||||
|
||||
mock_cleanup.assert_called_once()
|
||||
|
||||
@mock.patch('os.fork')
|
||||
def test_spawn_dirty_arbiter_passes_pidfile(self, mock_fork):
|
||||
"""Verify spawn_dirty_arbiter passes pidfile to DirtyArbiter."""
|
||||
mock_fork.return_value = 12345 # Parent process
|
||||
|
||||
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
|
||||
arbiter.cfg.set('dirty_workers', 1)
|
||||
arbiter.cfg.set('dirty_apps', ['test:app'])
|
||||
|
||||
pidfile_path = '/tmp/gunicorn-dirty-test.pid'
|
||||
with mock.patch.object(arbiter, '_cleanup_orphaned_dirty_arbiter'), \
|
||||
mock.patch.object(arbiter, '_get_dirty_pidfile_path', return_value=pidfile_path), \
|
||||
mock.patch('gunicorn.arbiter.DirtyArbiter') as mock_dirty_arbiter, \
|
||||
mock.patch('gunicorn.arbiter.set_dirty_socket_path'):
|
||||
mock_arbiter_instance = mock.Mock()
|
||||
mock_arbiter_instance.socket_path = '/tmp/test.sock'
|
||||
mock_dirty_arbiter.return_value = mock_arbiter_instance
|
||||
|
||||
arbiter.spawn_dirty_arbiter()
|
||||
|
||||
# Verify DirtyArbiter was called with pidfile parameter
|
||||
mock_dirty_arbiter.assert_called_once()
|
||||
call_kwargs = mock_dirty_arbiter.call_args[1]
|
||||
assert call_kwargs.get('pidfile') == pidfile_path
|
||||
|
||||
@ -120,6 +120,32 @@ class TestDirtyArbiterInit:
|
||||
# Cleanup
|
||||
arbiter._cleanup_sync()
|
||||
|
||||
def test_init_with_pidfile(self):
|
||||
"""Test initialization with pidfile parameter."""
|
||||
cfg = Config()
|
||||
log = MockLog()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pidfile = os.path.join(tmpdir, "dirty.pid")
|
||||
arbiter = DirtyArbiter(cfg=cfg, log=log, pidfile=pidfile)
|
||||
|
||||
assert arbiter.pidfile == pidfile
|
||||
|
||||
# Cleanup
|
||||
arbiter._cleanup_sync()
|
||||
|
||||
def test_init_without_pidfile(self):
|
||||
"""Test initialization without pidfile parameter defaults to None."""
|
||||
cfg = Config()
|
||||
log = MockLog()
|
||||
|
||||
arbiter = DirtyArbiter(cfg=cfg, log=log)
|
||||
|
||||
assert arbiter.pidfile is None
|
||||
|
||||
# Cleanup
|
||||
arbiter._cleanup_sync()
|
||||
|
||||
|
||||
class TestDirtyArbiterCleanup:
|
||||
"""Tests for arbiter cleanup."""
|
||||
@ -157,6 +183,100 @@ class TestDirtyArbiterCleanup:
|
||||
|
||||
assert not os.path.exists(tmpdir)
|
||||
|
||||
def test_cleanup_removes_pidfile(self):
|
||||
"""Test that cleanup removes the PID file."""
|
||||
cfg = Config()
|
||||
log = MockLog()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pidfile = os.path.join(tmpdir, "dirty.pid")
|
||||
arbiter = DirtyArbiter(cfg=cfg, log=log, pidfile=pidfile)
|
||||
|
||||
# Create pidfile
|
||||
with open(pidfile, 'w') as f:
|
||||
f.write('12345')
|
||||
|
||||
assert os.path.exists(pidfile)
|
||||
|
||||
arbiter._cleanup_sync()
|
||||
|
||||
assert not os.path.exists(pidfile)
|
||||
|
||||
def test_cleanup_handles_missing_pidfile(self):
|
||||
"""Test that cleanup handles non-existent pidfile gracefully."""
|
||||
cfg = Config()
|
||||
log = MockLog()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pidfile = os.path.join(tmpdir, "nonexistent.pid")
|
||||
arbiter = DirtyArbiter(cfg=cfg, log=log, pidfile=pidfile)
|
||||
|
||||
# Don't create the file
|
||||
assert not os.path.exists(pidfile)
|
||||
|
||||
# Should not raise
|
||||
arbiter._cleanup_sync()
|
||||
|
||||
def test_cleanup_without_pidfile(self):
|
||||
"""Test that cleanup works when no pidfile configured."""
|
||||
cfg = Config()
|
||||
log = MockLog()
|
||||
|
||||
arbiter = DirtyArbiter(cfg=cfg, log=log)
|
||||
assert arbiter.pidfile is None
|
||||
|
||||
# Should not raise
|
||||
arbiter._cleanup_sync()
|
||||
|
||||
|
||||
class TestDirtyArbiterPidfileWrite:
|
||||
"""Tests for PID file writing during run()."""
|
||||
|
||||
def test_run_writes_pidfile(self):
|
||||
"""Test that run() writes the PID to the pidfile."""
|
||||
from unittest import mock
|
||||
|
||||
cfg = Config()
|
||||
cfg.set("dirty_workers", 0)
|
||||
cfg.set("dirty_apps", ["tests.support_dirty_app:TestDirtyApp"])
|
||||
log = MockLog()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pidfile = os.path.join(tmpdir, "dirty.pid")
|
||||
arbiter = DirtyArbiter(cfg=cfg, log=log, pidfile=pidfile)
|
||||
|
||||
# Track if PID file was written correctly
|
||||
pid_written = None
|
||||
|
||||
def mock_asyncio_run(coro):
|
||||
nonlocal pid_written
|
||||
# At this point, PID file should have been written
|
||||
if os.path.exists(pidfile):
|
||||
with open(pidfile) as f:
|
||||
pid_written = int(f.read().strip())
|
||||
|
||||
# Mock asyncio.run to check PID file before cleanup runs
|
||||
with mock.patch.object(asyncio, 'run', side_effect=mock_asyncio_run):
|
||||
arbiter.run()
|
||||
|
||||
# Check PID was written correctly
|
||||
assert pid_written == os.getpid()
|
||||
|
||||
def test_run_without_pidfile_does_not_fail(self):
|
||||
"""Test that run() works when no pidfile configured."""
|
||||
from unittest import mock
|
||||
|
||||
cfg = Config()
|
||||
cfg.set("dirty_workers", 0)
|
||||
cfg.set("dirty_apps", ["tests.support_dirty_app:TestDirtyApp"])
|
||||
log = MockLog()
|
||||
|
||||
arbiter = DirtyArbiter(cfg=cfg, log=log)
|
||||
|
||||
with mock.patch.object(asyncio, 'run'):
|
||||
# Should not raise
|
||||
arbiter.run()
|
||||
|
||||
|
||||
class TestDirtyArbiterRouteRequest:
|
||||
"""Tests for request routing."""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user