mirror of
https://github.com/frappe/gunicorn.git
synced 2026-07-02 10:41:30 +08:00
_handle_stream_ended() in async_connection.py (used by the ASGI worker) did not set stream._body_complete = True or signal stream._body_event. This caused the receive() closure in protocol.py to never see the body as complete via the streaming path, so on the next call the fast path re-read the entire body from BytesIO, doubling it. The sync handler in connection.py already had a partial fix from #3559 but was also missing _body_event signalling, which is needed to unblock any pending read_body_chunk() await. Fixes https://github.com/benoitc/gunicorn/discussions/3567
324 lines
10 KiB
Python
324 lines
10 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -
|
|
#
|
|
# This file is part of gunicorn released under the MIT license.
|
|
# See the NOTICE for more information.
|
|
|
|
"""
|
|
Reproduction and regression test for HTTP/2 ASGI body duplication.
|
|
|
|
https://github.com/benoitc/gunicorn/discussions/3567
|
|
|
|
Bug: When an HTTP/2 POST with a JSON body is sent to a gunicorn ASGI server,
|
|
the receive() closure reads the body via read_body_chunk() (streaming path),
|
|
but _body_complete is never set to True by _handle_stream_ended() in
|
|
async_connection.py, so the fast path re-reads the same data from BytesIO
|
|
- doubling the request body.
|
|
|
|
This test starts a real gunicorn ASGI server with TLS + HTTP/2, sends a POST
|
|
request with a JSON body, and verifies the server sees the correct body (not
|
|
doubled).
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import ssl
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import time
|
|
|
|
import pytest
|
|
|
|
try:
|
|
import httpx
|
|
HTTPX_AVAILABLE = True
|
|
except ImportError:
|
|
HTTPX_AVAILABLE = False
|
|
|
|
try:
|
|
import h2 # noqa: F401
|
|
H2_AVAILABLE = True
|
|
except ImportError:
|
|
H2_AVAILABLE = False
|
|
|
|
|
|
pytestmark = [
|
|
pytest.mark.skipif(not HTTPX_AVAILABLE, reason="httpx not available"),
|
|
pytest.mark.skipif(not H2_AVAILABLE, reason="h2 not available"),
|
|
]
|
|
|
|
|
|
def _httpx_body_len(payload):
|
|
"""Return the byte length that httpx will actually send for a JSON payload.
|
|
|
|
httpx uses compact JSON serialisation (no spaces), which differs from
|
|
Python's default json.dumps().
|
|
"""
|
|
req = httpx.Request("POST", "https://localhost/", json=payload)
|
|
return len(req.content)
|
|
|
|
|
|
# --- Inline ASGI app (echoes the raw request body back) ---
|
|
|
|
ASGI_APP_CODE = '''
|
|
import json
|
|
|
|
async def app(scope, receive, send):
|
|
"""Minimal ASGI app that echoes the request body and its length."""
|
|
if scope["type"] == "lifespan":
|
|
while True:
|
|
message = await receive()
|
|
if message["type"] == "lifespan.startup":
|
|
await send({"type": "lifespan.startup.complete"})
|
|
elif message["type"] == "lifespan.shutdown":
|
|
await send({"type": "lifespan.shutdown.complete"})
|
|
return
|
|
|
|
if scope["type"] != "http":
|
|
return
|
|
|
|
# Read the full request body
|
|
body = b""
|
|
while True:
|
|
message = await receive()
|
|
body += message.get("body", b"")
|
|
if not message.get("more_body", False):
|
|
break
|
|
|
|
# Echo it back with metadata
|
|
response = {
|
|
"body_length": len(body),
|
|
"body": body.decode("utf-8", errors="replace"),
|
|
}
|
|
response_body = json.dumps(response).encode("utf-8")
|
|
|
|
await send({
|
|
"type": "http.response.start",
|
|
"status": 200,
|
|
"headers": [
|
|
[b"content-type", b"application/json"],
|
|
[b"content-length", str(len(response_body)).encode()],
|
|
],
|
|
})
|
|
await send({
|
|
"type": "http.response.body",
|
|
"body": response_body,
|
|
})
|
|
'''
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def tls_certs():
|
|
"""Generate self-signed TLS certificates for testing."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
ca_key = os.path.join(tmpdir, "ca.key")
|
|
ca_crt = os.path.join(tmpdir, "ca.crt")
|
|
server_key = os.path.join(tmpdir, "server.key")
|
|
server_csr = os.path.join(tmpdir, "server.csr")
|
|
server_crt = os.path.join(tmpdir, "server.crt")
|
|
|
|
# Generate CA
|
|
subprocess.run([
|
|
"openssl", "genrsa", "-out", ca_key, "2048"
|
|
], check=True, capture_output=True)
|
|
subprocess.run([
|
|
"openssl", "req", "-x509", "-new", "-nodes",
|
|
"-key", ca_key, "-sha256", "-days", "1",
|
|
"-out", ca_crt, "-subj", "/CN=Test CA"
|
|
], check=True, capture_output=True)
|
|
|
|
# Generate server cert
|
|
subprocess.run([
|
|
"openssl", "genrsa", "-out", server_key, "2048"
|
|
], check=True, capture_output=True)
|
|
subprocess.run([
|
|
"openssl", "req", "-new", "-key", server_key,
|
|
"-out", server_csr, "-subj", "/CN=localhost",
|
|
"-addext", "subjectAltName=DNS:localhost,IP:127.0.0.1"
|
|
], check=True, capture_output=True)
|
|
subprocess.run([
|
|
"openssl", "x509", "-req", "-in", server_csr,
|
|
"-CA", ca_crt, "-CAkey", ca_key, "-CAcreateserial",
|
|
"-out", server_crt, "-days", "1", "-sha256",
|
|
"-copy_extensions", "copyall"
|
|
], check=True, capture_output=True)
|
|
|
|
yield {
|
|
"ca_crt": ca_crt,
|
|
"server_crt": server_crt,
|
|
"server_key": server_key,
|
|
"tmpdir": tmpdir,
|
|
}
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def gunicorn_server(tls_certs):
|
|
"""Start a gunicorn ASGI server with TLS and HTTP/2."""
|
|
# Write the ASGI app to a temp file
|
|
app_file = os.path.join(tls_certs["tmpdir"], "asgi_app.py")
|
|
with open(app_file, "w") as f:
|
|
f.write(ASGI_APP_CODE)
|
|
|
|
port = 9443 # Use a high port to avoid conflicts
|
|
|
|
env = os.environ.copy()
|
|
# Ensure the temp dir is on the Python path so gunicorn can import the app
|
|
env["PYTHONPATH"] = tls_certs["tmpdir"]
|
|
|
|
proc = subprocess.Popen(
|
|
[
|
|
sys.executable, "-m", "gunicorn",
|
|
"--worker-class", "gunicorn.workers.gasgi.ASGIWorker",
|
|
"--bind", f"127.0.0.1:{port}",
|
|
"--certfile", tls_certs["server_crt"],
|
|
"--keyfile", tls_certs["server_key"],
|
|
"--http-protocols", "h2,h1",
|
|
"--workers", "1",
|
|
"--log-level", "debug",
|
|
"--chdir", tls_certs["tmpdir"],
|
|
"asgi_app:app",
|
|
],
|
|
cwd=tls_certs["tmpdir"],
|
|
env=env,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
)
|
|
|
|
# Wait for server to start
|
|
ca_ctx = ssl.create_default_context(cafile=tls_certs["ca_crt"])
|
|
base_url = f"https://127.0.0.1:{port}"
|
|
|
|
for _ in range(40): # Up to 8 seconds
|
|
time.sleep(0.2)
|
|
try:
|
|
with httpx.Client(verify=ca_ctx, timeout=2.0) as client:
|
|
resp = client.get(base_url + "/")
|
|
if resp.status_code in (200, 404):
|
|
break
|
|
except Exception:
|
|
if proc.poll() is not None:
|
|
stdout = proc.stdout.read().decode()
|
|
stderr = proc.stderr.read().decode()
|
|
pytest.fail(
|
|
f"gunicorn exited early (code {proc.returncode}).\n"
|
|
f"stdout: {stdout}\nstderr: {stderr}"
|
|
)
|
|
continue
|
|
else:
|
|
proc.terminate()
|
|
proc.wait()
|
|
stdout = proc.stdout.read().decode()
|
|
stderr = proc.stderr.read().decode()
|
|
pytest.fail(
|
|
f"gunicorn did not start within 8 seconds.\n"
|
|
f"stdout: {stdout}\nstderr: {stderr}"
|
|
)
|
|
|
|
yield {"base_url": base_url, "ca_crt": tls_certs["ca_crt"]}
|
|
|
|
proc.terminate()
|
|
proc.wait(timeout=5)
|
|
|
|
|
|
class TestH2BodyDuplicationFix:
|
|
"""End-to-end tests proving HTTP/2 POST body is not duplicated."""
|
|
|
|
def test_h2_post_json_body_not_duplicated(self, gunicorn_server):
|
|
"""POST a JSON body over HTTP/2 and verify it arrives intact.
|
|
|
|
Before the fix, the server would see the body twice:
|
|
{"input":["hello world"]}{"input":["hello world"]}
|
|
causing a JSON decode error.
|
|
"""
|
|
ca_ctx = ssl.create_default_context(
|
|
cafile=gunicorn_server["ca_crt"]
|
|
)
|
|
body = {"input": ["hello world"]}
|
|
expected_len = _httpx_body_len(body)
|
|
|
|
with httpx.Client(http2=True, verify=ca_ctx, timeout=10.0) as client:
|
|
resp = client.post(
|
|
gunicorn_server["base_url"] + "/",
|
|
json=body,
|
|
)
|
|
|
|
assert resp.status_code == 200, f"Expected 200, got {resp.status_code}: {resp.text}"
|
|
assert resp.http_version == "HTTP/2", (
|
|
f"Expected HTTP/2, got {resp.http_version} - "
|
|
"test is not exercising the h2 code path"
|
|
)
|
|
|
|
data = resp.json()
|
|
assert data["body_length"] == expected_len, (
|
|
f"Body length mismatch: server received {data['body_length']} bytes "
|
|
f"but sent {expected_len} bytes. "
|
|
f"Server saw: {data['body']!r}"
|
|
)
|
|
# Verify the JSON round-trips correctly
|
|
assert json.loads(data["body"]) == body
|
|
|
|
def test_h2_post_json_round_trip(self, gunicorn_server):
|
|
"""Verify JSON payload round-trips correctly over HTTP/2."""
|
|
ca_ctx = ssl.create_default_context(
|
|
cafile=gunicorn_server["ca_crt"]
|
|
)
|
|
body = {"input": ["hello world"], "model": "voyage-3"}
|
|
|
|
with httpx.Client(http2=True, verify=ca_ctx, timeout=10.0) as client:
|
|
resp = client.post(
|
|
gunicorn_server["base_url"] + "/",
|
|
json=body,
|
|
)
|
|
|
|
assert resp.status_code == 200
|
|
assert resp.http_version == "HTTP/2"
|
|
data = resp.json()
|
|
assert json.loads(data["body"]) == body
|
|
|
|
def test_h2_multiple_sequential_posts(self, gunicorn_server):
|
|
"""Multiple POSTs on the same connection must all succeed."""
|
|
ca_ctx = ssl.create_default_context(
|
|
cafile=gunicorn_server["ca_crt"]
|
|
)
|
|
|
|
with httpx.Client(http2=True, verify=ca_ctx, timeout=10.0) as client:
|
|
for i in range(5):
|
|
body = {"request_number": i, "input": [f"test {i}"]}
|
|
expected_len = _httpx_body_len(body)
|
|
|
|
resp = client.post(
|
|
gunicorn_server["base_url"] + "/",
|
|
json=body,
|
|
)
|
|
|
|
assert resp.status_code == 200, f"Request {i} failed: {resp.text}"
|
|
assert resp.http_version == "HTTP/2"
|
|
data = resp.json()
|
|
assert data["body_length"] == expected_len, (
|
|
f"Request {i}: body length mismatch "
|
|
f"({data['body_length']} != {expected_len})"
|
|
)
|
|
assert json.loads(data["body"]) == body
|
|
|
|
def test_h1_post_still_works(self, gunicorn_server):
|
|
"""HTTP/1.1 POST must continue to work (no regression)."""
|
|
ca_ctx = ssl.create_default_context(
|
|
cafile=gunicorn_server["ca_crt"]
|
|
)
|
|
body = {"input": ["hello world"]}
|
|
expected_len = _httpx_body_len(body)
|
|
|
|
# http2=False forces HTTP/1.1
|
|
with httpx.Client(http2=False, verify=ca_ctx, timeout=10.0) as client:
|
|
resp = client.post(
|
|
gunicorn_server["base_url"] + "/",
|
|
json=body,
|
|
)
|
|
|
|
assert resp.status_code == 200
|
|
assert resp.http_version == "HTTP/1.1"
|
|
data = resp.json()
|
|
assert data["body_length"] == expected_len
|
|
assert json.loads(data["body"]) == body
|