From 8e25cb2400392b8953b09d007b65d229b387a106 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Sun, 3 May 2026 18:37:45 +0200 Subject: [PATCH] fix: tighten keepalive gate and scope finish_body byte cap - ASGI keepalive gate now keys on receiver._complete only. _closed is overloaded across transport disconnect and receive timeout; treating either as 'message complete' would re-enable the smuggling vector the previous PR was meant to close. - Parser.finish_body's 64 KiB byte cap now applies only when an explicit deadline is given. Default invocations (notably __next__, used by base_async / sync workers) regain the prior unbounded drain so a partial drain does not silently desync the next request. --- gunicorn/asgi/protocol.py | 14 ++++++------- gunicorn/http/parser.py | 15 ++++++++++---- tests/test_asgi_disconnect.py | 26 ++++++++++++++++++++++++ tests/test_http.py | 38 +++++++++++++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 12 deletions(-) diff --git a/gunicorn/asgi/protocol.py b/gunicorn/asgi/protocol.py index 85b6ae39..789fbc46 100644 --- a/gunicorn/asgi/protocol.py +++ b/gunicorn/asgi/protocol.py @@ -782,15 +782,13 @@ class ASGIProtocol(asyncio.Protocol): break # Refuse keepalive if the previous request body was not fully - # framed: residual bytes left in the transport stream would be - # parsed as the start of the next request (smuggling). + # framed: residual bytes in the transport stream would be parsed + # as the start of the next request (smuggling). Only _complete + # signals a cleanly framed message; _closed is set on transport + # disconnect *and* on receive timeout, neither of which means + # the body finished framing. receiver = self._body_receiver - message_complete = ( - receiver is None - or receiver._complete - or receiver._closed - ) - if not message_complete: + if receiver is not None and not receiver._complete: break # Resume reading if paused during body consumption diff --git a/gunicorn/http/parser.py b/gunicorn/http/parser.py index 0d774e93..a11ea91c 100644 --- a/gunicorn/http/parser.py +++ b/gunicorn/http/parser.py @@ -35,7 +35,7 @@ class Parser: def __iter__(self): return self - def finish_body(self, deadline=None, max_bytes=_DRAIN_MAX_BYTES): + def finish_body(self, deadline=None, max_bytes=None): """Discard any unread body of the current message. Called before returning a keepalive connection to the poller so the @@ -43,8 +43,12 @@ class Parser: ``deadline`` is an absolute ``time.monotonic()`` value; when set the socket read timeout is bounded by the remaining time before each read. - ``max_bytes`` caps the total drained bytes to defend against a slow - client that keeps trickling under the deadline. + ``max_bytes`` caps the total drained bytes; when a deadline is given + and ``max_bytes`` is left at the default, ``_DRAIN_MAX_BYTES`` applies + to defend against a slow client that keeps trickling under it. When + called without a deadline (the default invocation from ``__next__``), + no byte cap is applied so the prior unbounded drain semantics are + preserved for callers that don't know how to react to a partial drain. Returns ``True`` when the body was fully drained, ``False`` when the drain was abandoned (deadline, byte cap, or socket timeout). Callers @@ -54,6 +58,9 @@ class Parser: if not self.mesg: return True + if max_bytes is None and deadline is not None: + max_bytes = _DRAIN_MAX_BYTES + sock = getattr(self.unreader, "sock", None) # gettimeout/settimeout only matter when bounding a real socket; a # mock or non-socket source skips the timeout plumbing. @@ -82,7 +89,7 @@ class Parser: if not data: return True drained += len(data) - if drained >= max_bytes: + if max_bytes is not None and drained >= max_bytes: return False finally: if timeoutable_sock is not None: diff --git a/tests/test_asgi_disconnect.py b/tests/test_asgi_disconnect.py index a0270051..02bda059 100644 --- a/tests/test_asgi_disconnect.py +++ b/tests/test_asgi_disconnect.py @@ -266,3 +266,29 @@ class TestBodyReceiverIncompleteBody: assert msg["body"] == b"hello" # more_body may be False since the body is complete assert msg["more_body"] is False + + def test_keepalive_gate_refuses_after_receive_timeout(self, mock_worker): + """The keepalive completion check must NOT treat a receive-timeout + as a framed-complete message: residual body bytes on the wire would + be misparsed as the next pipelined request (smuggling). + + BodyReceiver._closed is overloaded across transport-disconnect and + receive-timeout, so the gate keys on _complete only. + """ + from gunicorn.asgi.protocol import ASGIProtocol, BodyReceiver + + protocol = ASGIProtocol(mock_worker) + protocol.reader = mock.Mock() + + request = mock.Mock() + request.content_length = 100 + request.chunked = False + + receiver = BodyReceiver(request, protocol) + receiver._closed = True # simulate _wait_for_data timeout + receiver._complete = False # body never finished framing + + # The gate inlined in _handle_connection: refuse keepalive when + # the receiver exists and the message wasn't framed complete. + message_complete = receiver is None or receiver._complete + assert message_complete is False diff --git a/tests/test_http.py b/tests/test_http.py index 1b3dc176..afc1843b 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -289,6 +289,44 @@ def test_finish_body_returns_false_when_byte_cap_exceeded(): assert parser.finish_body(max_bytes=512) is False +def test_finish_body_no_cap_without_deadline(): + """Without a deadline, finish_body MUST drain the full body even when it + exceeds _DRAIN_MAX_BYTES. The byte cap only applies under a deadline. + + Regression: a 64 KiB cap on every call silently desynced base_async/sync + workers that iterate the parser via __next__ (which discards the return + value), leading to the next request being misparsed from residual body + bytes left on the wire. + """ + body = b"x" * (128 * 1024) # well over _DRAIN_MAX_BYTES + payload = ( + b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Length: %d\r\n\r\n%s" % (len(body), body) + ) + parser = _build_request_parser(payload) + assert parser.finish_body() is True + + +def test_finish_body_applies_cap_only_under_deadline(): + """When a deadline is set and max_bytes is left at the default, the + implicit _DRAIN_MAX_BYTES cap kicks in to defend against a slow client + trickling under the deadline.""" + from gunicorn.http.parser import _DRAIN_MAX_BYTES + + body = b"x" * (_DRAIN_MAX_BYTES + 1024) + payload = ( + b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Length: %d\r\n\r\n%s" % (len(body), body) + ) + import time as _time + far_future = _time.monotonic() + 60.0 + + parser = _build_request_parser(payload) + assert parser.finish_body(deadline=far_future) is False + + def test_finish_body_returns_false_on_expired_deadline(): payload = ( b"POST / HTTP/1.1\r\n"