fix(companion): Only reload companion manager when its config changed

A SIGHUP web reload recycles HTTP workers and re-reads config, but with
--preload it does not re-import application code: the WSGI callable is
loaded once and cached, so running companions are already current.
Restarting the manager on every reload bounced all companions for
nothing, slowing the common fast-reload path Frappe relies on.

reload_companion_manager now rebuilds the companion configs and compares
their sorted config_hash against the running manager's. Unchanged ->
leave the manager and its companions running. Changed (field, added, or
removed name) -> restart the manager from the fresh cfg, as before.
Per-companion reread via the control socket is unchanged.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Tanmoy Sarkar 2026-06-12 23:44:32 +05:30
parent 910c6ada40
commit 220bbfe150
4 changed files with 77 additions and 11 deletions

View File

@ -689,7 +689,7 @@ No per-companion logic in Arbiter.
- [x] Implement transactional `reread`.
- [x] Add manager spawn/reap logic in Arbiter.
- [x] Add manager shutdown handling in Arbiter.
- [x] Wire Gunicorn reload to manager `reread` or restart.
- [x] Wire Gunicorn reload to restart the manager only when companion config changed.
- [x] Close Gunicorn-only fds in manager child.
- [x] Close manager-only fds in companion child.
- [x] Add parent-death cleanup.

View File

@ -735,20 +735,47 @@ class Arbiter:
worker.tmp.close()
def reload_companion_manager(self):
"""Restart the companion manager so it picks up the new configuration.
"""Reconcile the companion manager with the reloaded configuration.
Gunicorn reload (SIGHUP) rebuilds cfg. A running manager is asked to
stop -- it drains its companions first -- and the SIGCHLD reaper then
clears its pid so manage_companion_manager respawns it from the fresh
cfg. If companions were added where none ran, a new manager starts
right away. Per-companion transactional reread stays available
separately through the control socket.
A web reload (SIGHUP) recycles HTTP workers and re-reads config, but
with ``--preload`` it does not reload application code -- the WSGI
callable is loaded once and cached -- so the running companions are
already current. Restarting them on every reload would bounce them for
nothing, so the manager is only reloaded when the companion config
actually changed.
When it did change, a running manager is asked to stop (it drains its
companions first); the SIGCHLD reaper clears its pid so the main loop
respawns it from the fresh cfg, and a manager started where none ran
comes up right away. An unchanged config leaves the manager and its
companions untouched. Per-companion transactional reread stays
available separately through the control socket.
"""
try:
new_configs = build_companion_configs(self.cfg)
except Exception:
self.log.exception(
"Could not read companion config on reload; "
"leaving companion manager unchanged")
return
if not self._companion_configs_changed(new_configs):
return
if self.companion_manager_pid != 0:
self.log.info("Reloading companion manager")
self.log.info("Companion config changed, reloading companion manager")
self.stop_companion_manager(signal.SIGTERM)
self.manage_companion_manager()
def _companion_configs_changed(self, new_configs):
"""True when the companion config differs from the running manager's.
Compares the sorted ``config_hash`` of every companion, so a changed
field, an added name, or a removed name all count, while a pure web
reload with the same companion specs does not.
"""
old_hashes = sorted(c.config_hash for c in self._companion_configs)
new_hashes = sorted(c.config_hash for c in new_configs)
return old_hashes != new_hashes
def stop_companion_manager(self, sig):
"""Signal the companion manager to exit, if it is running.

View File

@ -121,8 +121,17 @@ echo '{"cmd": "status"}' | socat - UNIX-CONNECT:/run/gunicorn/companion.sock
Commands: `status`, `start <name>`, `stop <name>`, `restart <name>`, `reread`.
`reread` is transactional: the new config is validated first, and on any error
nothing changes and the old config keeps running. A `SIGHUP` to Gunicorn
restarts the manager with the reloaded config.
nothing changes and the old config keeps running.
A `SIGHUP` to Gunicorn recycles the HTTP workers, then reloads the companion
manager **only if the companion config changed**. With `--preload`, a reload
does not re-import application code (the WSGI callable is loaded once and
cached), so unchanged companions are already current and are left running --
the common web reload never bounces them. When the companion specs do change,
the manager is restarted with the new config. Note that, like the HTTP
workers, companions pick up new application code only on a full restart, not on
`SIGHUP`. Use `reread` for finer, per-companion changes without touching the
others.
## Files

View File

@ -9,6 +9,7 @@ from unittest import mock
import gunicorn.app.base
import gunicorn.arbiter
from gunicorn.companion.config import build_companion_configs
from gunicorn.config import ReusePort
@ -272,6 +273,35 @@ def test_reload_companion_manager_starts_when_none_running():
arbiter.spawn_companion_manager.assert_called_once_with()
def test_reload_companion_manager_noop_when_config_unchanged():
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
arbiter.cfg.set("companion_workers", [{"name": "rq", "target": "pkg:run"}])
# The running manager was spawned with this exact config.
arbiter._companion_configs = build_companion_configs(arbiter.cfg)
arbiter.companion_manager_pid = 4242
arbiter.stop_companion_manager = mock.Mock()
arbiter.spawn_companion_manager = mock.Mock()
arbiter.reload_companion_manager()
# A web reload with unchanged companion specs leaves the manager alone.
arbiter.stop_companion_manager.assert_not_called()
arbiter.spawn_companion_manager.assert_not_called()
def test_reload_companion_manager_restarts_when_field_changed():
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
arbiter.cfg.set("companion_workers",
[{"name": "rq", "target": "pkg:run", "startsecs": 1}])
arbiter._companion_configs = build_companion_configs(arbiter.cfg)
arbiter.companion_manager_pid = 4242
# Same name, changed field -> different config_hash -> reload.
arbiter.cfg.set("companion_workers",
[{"name": "rq", "target": "pkg:run", "startsecs": 9}])
arbiter.stop_companion_manager = mock.Mock()
arbiter.spawn_companion_manager = mock.Mock()
arbiter.reload_companion_manager()
arbiter.stop_companion_manager.assert_called_once_with(signal.SIGTERM)
@mock.patch('gunicorn.sock.close_sockets')
def test_arbiter_stop_signals_companion_manager(close_sockets):
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())