mirror of
https://github.com/frappe/gunicorn.git
synced 2026-07-01 18:21:30 +08:00
Fix non-ASCII URL handling in ASGI worker
Percent-decode path to UTF-8 and preserve raw_path as original bytes per ASGI spec. Fixes #3543
This commit is contained in:
parent
932331d8a4
commit
ba1aaa5e33
@ -443,7 +443,7 @@ class CallbackRequest:
|
||||
|
||||
__slots__ = (
|
||||
'method', 'uri', 'path', 'query', 'fragment', 'version',
|
||||
'headers', 'headers_bytes', 'scheme',
|
||||
'headers', 'headers_bytes', 'scheme', 'raw_path',
|
||||
'content_length', 'chunked', 'must_close',
|
||||
'proxy_protocol_info', '_expect_100_continue',
|
||||
)
|
||||
@ -458,6 +458,7 @@ class CallbackRequest:
|
||||
self.headers = []
|
||||
self.headers_bytes = []
|
||||
self.scheme = "http"
|
||||
self.raw_path = b''
|
||||
self.content_length = 0
|
||||
self.chunked = False
|
||||
self.must_close = False
|
||||
@ -475,20 +476,27 @@ class CallbackRequest:
|
||||
Returns:
|
||||
CallbackRequest instance
|
||||
"""
|
||||
from urllib.parse import unquote_to_bytes
|
||||
|
||||
req = cls()
|
||||
req.method = parser.method.decode('ascii')
|
||||
|
||||
# Parse path and query from URL
|
||||
raw_path = parser.path
|
||||
if b'?' in raw_path:
|
||||
path_part, query_part = raw_path.split(b'?', 1)
|
||||
req.path = path_part.decode('latin-1')
|
||||
# Per ASGI spec:
|
||||
# - path: percent-decoded UTF-8 string
|
||||
# - raw_path: original bytes as received
|
||||
raw_url = parser.path
|
||||
if b'?' in raw_url:
|
||||
path_part, query_part = raw_url.split(b'?', 1)
|
||||
req.raw_path = path_part # Store original bytes
|
||||
req.path = unquote_to_bytes(path_part).decode('utf-8', errors='replace')
|
||||
req.query = query_part.decode('latin-1')
|
||||
else:
|
||||
req.path = raw_path.decode('latin-1')
|
||||
req.raw_path = raw_url # Store original bytes
|
||||
req.path = unquote_to_bytes(raw_url).decode('utf-8', errors='replace')
|
||||
req.query = ''
|
||||
|
||||
req.uri = raw_path.decode('latin-1')
|
||||
req.uri = raw_url.decode('latin-1')
|
||||
req.fragment = ''
|
||||
req.version = parser.http_version
|
||||
|
||||
|
||||
@ -921,7 +921,7 @@ class ASGIProtocol(asyncio.Protocol):
|
||||
"method": request.method,
|
||||
"scheme": request.scheme,
|
||||
"path": request.path,
|
||||
"raw_path": request.path.encode("latin-1") if request.path else b"",
|
||||
"raw_path": request.raw_path if request.raw_path else b"",
|
||||
"query_string": request.query.encode("latin-1") if request.query else b"",
|
||||
"root_path": self.cfg.root_path or "",
|
||||
"headers": headers,
|
||||
@ -985,7 +985,7 @@ class ASGIProtocol(asyncio.Protocol):
|
||||
"http_version": f"{request.version[0]}.{request.version[1]}",
|
||||
"scheme": "wss" if request.scheme == "https" else "ws",
|
||||
"path": request.path,
|
||||
"raw_path": request.path.encode("latin-1") if request.path else b"",
|
||||
"raw_path": request.raw_path if request.raw_path else b"",
|
||||
"query_string": request.query.encode("latin-1") if request.query else b"",
|
||||
"root_path": self.cfg.root_path or "",
|
||||
"headers": headers,
|
||||
@ -1457,7 +1457,7 @@ class ASGIProtocol(asyncio.Protocol):
|
||||
"method": request.method,
|
||||
"scheme": request.scheme,
|
||||
"path": request.path,
|
||||
"raw_path": request.path.encode("latin-1") if request.path else b"",
|
||||
"raw_path": getattr(request, 'raw_path', None) or (request.path.encode("latin-1") if request.path else b""),
|
||||
"query_string": request.query.encode("latin-1") if request.query else b"",
|
||||
"root_path": self.cfg.root_path or "",
|
||||
"headers": headers,
|
||||
|
||||
@ -429,3 +429,90 @@ class TestCallbackBehavior:
|
||||
|
||||
assert parser.is_complete
|
||||
assert body_chunks == [] # Body was skipped
|
||||
|
||||
|
||||
class TestCallbackRequest:
|
||||
"""Test CallbackRequest building from parser state."""
|
||||
|
||||
def test_non_ascii_path_decoding(self, http_parser):
|
||||
"""Test that percent-encoded UTF-8 paths are decoded correctly.
|
||||
|
||||
Per ASGI spec:
|
||||
- path: percent-decoded UTF-8 string
|
||||
- raw_path: original bytes as received
|
||||
"""
|
||||
from gunicorn.asgi.parser import CallbackRequest
|
||||
|
||||
parser_class = get_parser_class(http_parser)
|
||||
parser = parser_class()
|
||||
|
||||
# ö = %C3%B6 in UTF-8 percent-encoded
|
||||
parser.feed(b"GET /%C3%B6/ HTTP/1.1\r\nHost: test\r\n\r\n")
|
||||
|
||||
request = CallbackRequest.from_parser(parser)
|
||||
|
||||
# path should be percent-decoded UTF-8 string
|
||||
assert request.path == "/\u00f6/" # /ö/
|
||||
# raw_path should be original bytes
|
||||
assert request.raw_path == b"/%C3%B6/"
|
||||
|
||||
def test_non_ascii_path_with_query(self, http_parser):
|
||||
"""Test percent-encoded path with query string."""
|
||||
from gunicorn.asgi.parser import CallbackRequest
|
||||
|
||||
parser_class = get_parser_class(http_parser)
|
||||
parser = parser_class()
|
||||
|
||||
# Japanese: /日本/ = /%E6%97%A5%E6%9C%AC/
|
||||
parser.feed(b"GET /%E6%97%A5%E6%9C%AC/?q=test HTTP/1.1\r\nHost: test\r\n\r\n")
|
||||
|
||||
request = CallbackRequest.from_parser(parser)
|
||||
|
||||
assert request.path == "/\u65e5\u672c/" # /日本/
|
||||
assert request.raw_path == b"/%E6%97%A5%E6%9C%AC/"
|
||||
assert request.query == "q=test"
|
||||
|
||||
def test_invalid_utf8_path(self, http_parser):
|
||||
"""Test that invalid UTF-8 sequences use replacement character."""
|
||||
from gunicorn.asgi.parser import CallbackRequest
|
||||
|
||||
parser_class = get_parser_class(http_parser)
|
||||
parser = parser_class()
|
||||
|
||||
# %FF is invalid UTF-8
|
||||
parser.feed(b"GET /%FF HTTP/1.1\r\nHost: test\r\n\r\n")
|
||||
|
||||
request = CallbackRequest.from_parser(parser)
|
||||
|
||||
# Should use replacement character for invalid bytes
|
||||
assert "\ufffd" in request.path
|
||||
assert request.raw_path == b"/%FF"
|
||||
|
||||
def test_simple_ascii_path(self, http_parser):
|
||||
"""Test that simple ASCII paths work unchanged."""
|
||||
from gunicorn.asgi.parser import CallbackRequest
|
||||
|
||||
parser_class = get_parser_class(http_parser)
|
||||
parser = parser_class()
|
||||
|
||||
parser.feed(b"GET /api/users HTTP/1.1\r\nHost: test\r\n\r\n")
|
||||
|
||||
request = CallbackRequest.from_parser(parser)
|
||||
|
||||
assert request.path == "/api/users"
|
||||
assert request.raw_path == b"/api/users"
|
||||
|
||||
def test_percent_encoded_ascii(self, http_parser):
|
||||
"""Test percent-encoded ASCII characters."""
|
||||
from gunicorn.asgi.parser import CallbackRequest
|
||||
|
||||
parser_class = get_parser_class(http_parser)
|
||||
parser = parser_class()
|
||||
|
||||
# Space encoded as %20
|
||||
parser.feed(b"GET /hello%20world HTTP/1.1\r\nHost: test\r\n\r\n")
|
||||
|
||||
request = CallbackRequest.from_parser(parser)
|
||||
|
||||
assert request.path == "/hello world"
|
||||
assert request.raw_path == b"/hello%20world"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user