fix(asgi): quick shutdown on SIGINT/SIGQUIT, graceful on SIGTERM

- SIGINT/SIGQUIT triggers immediate shutdown, skipping connection waits
- SIGTERM triggers graceful shutdown, waiting for connections
- Arbiter forwards SIGQUIT to workers if received during graceful shutdown
- Workers have 2s to exit cleanly after quick shutdown before SIGKILL
This commit is contained in:
Benoit Chesneau 2026-02-01 10:06:23 +01:00
parent 98ef198baa
commit ee933f2e30
2 changed files with 51 additions and 14 deletions

View File

@ -414,7 +414,22 @@ class Arbiter:
# instruct the workers to exit # instruct the workers to exit
self.kill_workers(sig) self.kill_workers(sig)
# wait until the graceful timeout # wait until the graceful timeout
quick_shutdown = not graceful
while (self.WORKERS or self.dirty_arbiter_pid) and time.time() < limit: while (self.WORKERS or self.dirty_arbiter_pid) and time.time() < limit:
# Check for SIGINT/SIGQUIT to trigger quick shutdown
if not quick_shutdown:
try:
pending_sig = self.SIG_QUEUE.get_nowait()
if pending_sig in (signal.SIGINT, signal.SIGQUIT):
self.log.info("Quick shutdown requested")
quick_shutdown = True
self.kill_workers(signal.SIGQUIT)
if self.dirty_arbiter_pid:
self.kill_dirty_arbiter(signal.SIGQUIT)
# Give workers a short time to exit cleanly
limit = time.time() + 2.0
except Exception:
pass
self.reap_workers() self.reap_workers()
self.reap_dirty_arbiter() self.reap_dirty_arbiter()
time.sleep(0.1) time.sleep(0.1)

View File

@ -36,6 +36,7 @@ class ASGIWorker(base.Worker):
self.nr_conns = 0 self.nr_conns = 0
self.lifespan = None self.lifespan = None
self.state = {} # Shared state for lifespan self.state = {} # Shared state for lifespan
self._quick_shutdown = False # True for SIGINT/SIGQUIT (immediate), False for SIGTERM (graceful)
@classmethod @classmethod
def check_config(cls, cfg, log): def check_config(cls, cfg, log):
@ -122,7 +123,11 @@ class ASGIWorker(base.Worker):
self.loop.add_signal_handler(signal.SIGABRT, self.handle_abort_signal) self.loop.add_signal_handler(signal.SIGABRT, self.handle_abort_signal)
def handle_quit_signal(self): def handle_quit_signal(self):
"""Handle SIGQUIT - immediate shutdown.""" """Handle SIGQUIT/SIGINT - immediate shutdown."""
self._quick_shutdown = True
if not self.alive:
# Already shutting down (SIGTERM was sent) - wake up the loop
return
self.alive = False self.alive = False
self.cfg.worker_int(self) self.cfg.worker_int(self)
@ -221,23 +226,32 @@ class ASGIWorker(base.Worker):
for server in self.servers: for server in self.servers:
server.close() server.close()
# Wait for servers to close # Wait for servers to close (skip on quick shutdown)
if not self._quick_shutdown:
for server in self.servers: for server in self.servers:
await server.wait_closed() if self._quick_shutdown:
break
try:
await asyncio.wait_for(server.wait_closed(), timeout=0.5)
except asyncio.TimeoutError:
pass # Check _quick_shutdown on next iteration
# Wait for in-flight connections (with timeout) # Wait for in-flight connections (skip on quick shutdown)
if self.nr_conns > 0 and not self._quick_shutdown:
graceful_timeout = self.cfg.graceful_timeout graceful_timeout = self.cfg.graceful_timeout
if self.nr_conns > 0:
self.log.info("Waiting for %d connections to finish...", self.nr_conns) self.log.info("Waiting for %d connections to finish...", self.nr_conns)
deadline = self.loop.time() + graceful_timeout deadline = self.loop.time() + graceful_timeout
while self.nr_conns > 0 and self.loop.time() < deadline: while self.nr_conns > 0 and self.loop.time() < deadline:
if self._quick_shutdown:
self.log.info("Quick shutdown requested")
break
await asyncio.sleep(0.1) await asyncio.sleep(0.1)
if self.nr_conns > 0: if self.nr_conns > 0:
self.log.warning("Closing %d connections after timeout", self.nr_conns) self.log.warning("Forcing close of %d connections", self.nr_conns)
# Run lifespan shutdown # Run lifespan shutdown (skip on quick shutdown)
if self.lifespan: if self.lifespan and not self._quick_shutdown:
try: try:
await self.lifespan.shutdown() await self.lifespan.shutdown()
except Exception as e: except Exception as e:
@ -263,11 +277,19 @@ class ASGIWorker(base.Worker):
for task in pending: for task in pending:
task.cancel() task.cancel()
# Run loop until all tasks are cancelled # Run loop until all tasks are cancelled (with timeout on quick exit)
if pending: if pending:
gather = asyncio.gather(*pending, return_exceptions=True)
if self._quick_shutdown:
# Quick exit - don't wait long for tasks to cancel
try:
self.loop.run_until_complete( self.loop.run_until_complete(
asyncio.gather(*pending, return_exceptions=True) asyncio.wait_for(gather, timeout=1.0)
) )
except asyncio.TimeoutError:
self.log.debug("Timeout waiting for tasks to cancel")
else:
self.loop.run_until_complete(gather)
self.loop.close() self.loop.close()
except Exception as e: except Exception as e: