diff --git a/gunicorn/asgi/protocol.py b/gunicorn/asgi/protocol.py index 85b6ae39..789fbc46 100644 --- a/gunicorn/asgi/protocol.py +++ b/gunicorn/asgi/protocol.py @@ -782,15 +782,13 @@ class ASGIProtocol(asyncio.Protocol): break # Refuse keepalive if the previous request body was not fully - # framed: residual bytes left in the transport stream would be - # parsed as the start of the next request (smuggling). + # framed: residual bytes in the transport stream would be parsed + # as the start of the next request (smuggling). Only _complete + # signals a cleanly framed message; _closed is set on transport + # disconnect *and* on receive timeout, neither of which means + # the body finished framing. receiver = self._body_receiver - message_complete = ( - receiver is None - or receiver._complete - or receiver._closed - ) - if not message_complete: + if receiver is not None and not receiver._complete: break # Resume reading if paused during body consumption diff --git a/gunicorn/http/parser.py b/gunicorn/http/parser.py index 0d774e93..a11ea91c 100644 --- a/gunicorn/http/parser.py +++ b/gunicorn/http/parser.py @@ -35,7 +35,7 @@ class Parser: def __iter__(self): return self - def finish_body(self, deadline=None, max_bytes=_DRAIN_MAX_BYTES): + def finish_body(self, deadline=None, max_bytes=None): """Discard any unread body of the current message. Called before returning a keepalive connection to the poller so the @@ -43,8 +43,12 @@ class Parser: ``deadline`` is an absolute ``time.monotonic()`` value; when set the socket read timeout is bounded by the remaining time before each read. - ``max_bytes`` caps the total drained bytes to defend against a slow - client that keeps trickling under the deadline. + ``max_bytes`` caps the total drained bytes; when a deadline is given + and ``max_bytes`` is left at the default, ``_DRAIN_MAX_BYTES`` applies + to defend against a slow client that keeps trickling under it. When + called without a deadline (the default invocation from ``__next__``), + no byte cap is applied so the prior unbounded drain semantics are + preserved for callers that don't know how to react to a partial drain. Returns ``True`` when the body was fully drained, ``False`` when the drain was abandoned (deadline, byte cap, or socket timeout). Callers @@ -54,6 +58,9 @@ class Parser: if not self.mesg: return True + if max_bytes is None and deadline is not None: + max_bytes = _DRAIN_MAX_BYTES + sock = getattr(self.unreader, "sock", None) # gettimeout/settimeout only matter when bounding a real socket; a # mock or non-socket source skips the timeout plumbing. @@ -82,7 +89,7 @@ class Parser: if not data: return True drained += len(data) - if drained >= max_bytes: + if max_bytes is not None and drained >= max_bytes: return False finally: if timeoutable_sock is not None: diff --git a/tests/test_asgi_disconnect.py b/tests/test_asgi_disconnect.py index a0270051..02bda059 100644 --- a/tests/test_asgi_disconnect.py +++ b/tests/test_asgi_disconnect.py @@ -266,3 +266,29 @@ class TestBodyReceiverIncompleteBody: assert msg["body"] == b"hello" # more_body may be False since the body is complete assert msg["more_body"] is False + + def test_keepalive_gate_refuses_after_receive_timeout(self, mock_worker): + """The keepalive completion check must NOT treat a receive-timeout + as a framed-complete message: residual body bytes on the wire would + be misparsed as the next pipelined request (smuggling). + + BodyReceiver._closed is overloaded across transport-disconnect and + receive-timeout, so the gate keys on _complete only. + """ + from gunicorn.asgi.protocol import ASGIProtocol, BodyReceiver + + protocol = ASGIProtocol(mock_worker) + protocol.reader = mock.Mock() + + request = mock.Mock() + request.content_length = 100 + request.chunked = False + + receiver = BodyReceiver(request, protocol) + receiver._closed = True # simulate _wait_for_data timeout + receiver._complete = False # body never finished framing + + # The gate inlined in _handle_connection: refuse keepalive when + # the receiver exists and the message wasn't framed complete. + message_complete = receiver is None or receiver._complete + assert message_complete is False diff --git a/tests/test_http.py b/tests/test_http.py index 1b3dc176..afc1843b 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -289,6 +289,44 @@ def test_finish_body_returns_false_when_byte_cap_exceeded(): assert parser.finish_body(max_bytes=512) is False +def test_finish_body_no_cap_without_deadline(): + """Without a deadline, finish_body MUST drain the full body even when it + exceeds _DRAIN_MAX_BYTES. The byte cap only applies under a deadline. + + Regression: a 64 KiB cap on every call silently desynced base_async/sync + workers that iterate the parser via __next__ (which discards the return + value), leading to the next request being misparsed from residual body + bytes left on the wire. + """ + body = b"x" * (128 * 1024) # well over _DRAIN_MAX_BYTES + payload = ( + b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Length: %d\r\n\r\n%s" % (len(body), body) + ) + parser = _build_request_parser(payload) + assert parser.finish_body() is True + + +def test_finish_body_applies_cap_only_under_deadline(): + """When a deadline is set and max_bytes is left at the default, the + implicit _DRAIN_MAX_BYTES cap kicks in to defend against a slow client + trickling under the deadline.""" + from gunicorn.http.parser import _DRAIN_MAX_BYTES + + body = b"x" * (_DRAIN_MAX_BYTES + 1024) + payload = ( + b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Length: %d\r\n\r\n%s" % (len(body), body) + ) + import time as _time + far_future = _time.monotonic() + 60.0 + + parser = _build_request_parser(payload) + assert parser.finish_body(deadline=far_future) is False + + def test_finish_body_returns_false_on_expired_deadline(): payload = ( b"POST / HTTP/1.1\r\n"