fix: prevent HTTP/2 ASGI request body duplication

receive_data() stores every DATA frame in both _body_chunks (list)
and request_body (BytesIO). The receive() closure in
_handle_http2_request() has two read paths: a streaming path that
pops from _body_chunks, and a fast path that reads from BytesIO.

After the streaming path consumed the body, the fast path could
re-read the same data from BytesIO because body_received was never
set in the streaming return path. This caused the application to
receive a doubled request body (e.g. 18 bytes sent, 36 bytes
received), breaking JSON parsing with "Extra data" errors.

Fix: set body_received = True in the streaming path when
_body_complete is True, preventing the fast path from re-reading
already-consumed data.

Fixes #3558
This commit is contained in:
Ben Leembruggen 2026-03-26 14:15:14 +11:00
parent a49a46fc19
commit 8fba44cf02
3 changed files with 70 additions and 0 deletions

View File

@ -1328,6 +1328,9 @@ class ASGIProtocol(asyncio.Protocol):
"more_body": False,
}
if stream._body_complete:
body_received = True
return {
"type": "http.request",
"body": chunk,

View File

@ -406,3 +406,35 @@ class TestHTTP2Direct:
response = http_client.get(f"{gunicorn_ssl_url}/stream/streaming?chunks=3")
assert response.status_code == 200
assert "Chunk" in response.text
def test_direct_https_post_echo(self, http_client, gunicorn_ssl_url):
"""Test POST echo directly to gunicorn over HTTPS."""
body = b"HTTP/2 direct echo test"
response = http_client.post(
f"{gunicorn_ssl_url}/http/echo",
content=body
)
assert response.status_code == 200
assert response.content == body
def test_direct_https_post_json(self, http_client, gunicorn_ssl_url):
"""Test POST JSON directly to gunicorn over HTTPS."""
data = {"message": "http2 direct post", "number": 42}
response = http_client.post(
f"{gunicorn_ssl_url}/http/post-json",
json=data
)
assert response.status_code == 200
result = response.json()
assert result["received"]["message"] == "http2 direct post"
assert result["received"]["number"] == 42
def test_direct_https_post_large_body(self, http_client, gunicorn_ssl_url):
"""Test large POST body directly to gunicorn over HTTPS."""
body = b"x" * 100000 # 100KB, spans multiple HTTP/2 DATA frames
response = http_client.post(
f"{gunicorn_ssl_url}/http/echo",
content=body
)
assert response.status_code == 200
assert len(response.content) == 100000

View File

@ -477,6 +477,41 @@ class TestGetRequestBody:
assert stream.get_request_body() == b"Test body content"
class TestReadBodyChunk:
"""Test read_body_chunk method."""
@pytest.mark.asyncio
async def test_read_body_chunk_returns_data(self):
conn = MockConnection()
stream = HTTP2Stream(stream_id=1, connection=conn)
stream.state = StreamState.OPEN
stream.receive_data(b"chunk1", end_stream=True)
chunk = await stream.read_body_chunk()
assert chunk == b"chunk1"
@pytest.mark.asyncio
async def test_read_body_chunk_multi_frame(self):
"""Multiple DATA frames should each be returned as separate chunks."""
conn = MockConnection()
stream = HTTP2Stream(stream_id=1, connection=conn)
stream.state = StreamState.OPEN
stream.receive_data(b"part1")
stream.receive_data(b"part2")
stream.receive_data(b"part3", end_stream=True)
chunks = []
for _ in range(3):
chunk = await stream.read_body_chunk()
if chunk is None:
break
chunks.append(chunk)
assert b"".join(chunks) == b"part1part2part3"
class TestGetPseudoHeaders:
"""Test get_pseudo_headers method."""