mirror of
https://github.com/frappe/gunicorn.git
synced 2026-07-02 18:51:31 +08:00
Fix non-ASCII URL handling in ASGI worker
Percent-decode path to UTF-8 and preserve raw_path as original bytes per ASGI spec. Fixes #3543
This commit is contained in:
parent
932331d8a4
commit
ba1aaa5e33
@ -443,7 +443,7 @@ class CallbackRequest:
|
|||||||
|
|
||||||
__slots__ = (
|
__slots__ = (
|
||||||
'method', 'uri', 'path', 'query', 'fragment', 'version',
|
'method', 'uri', 'path', 'query', 'fragment', 'version',
|
||||||
'headers', 'headers_bytes', 'scheme',
|
'headers', 'headers_bytes', 'scheme', 'raw_path',
|
||||||
'content_length', 'chunked', 'must_close',
|
'content_length', 'chunked', 'must_close',
|
||||||
'proxy_protocol_info', '_expect_100_continue',
|
'proxy_protocol_info', '_expect_100_continue',
|
||||||
)
|
)
|
||||||
@ -458,6 +458,7 @@ class CallbackRequest:
|
|||||||
self.headers = []
|
self.headers = []
|
||||||
self.headers_bytes = []
|
self.headers_bytes = []
|
||||||
self.scheme = "http"
|
self.scheme = "http"
|
||||||
|
self.raw_path = b''
|
||||||
self.content_length = 0
|
self.content_length = 0
|
||||||
self.chunked = False
|
self.chunked = False
|
||||||
self.must_close = False
|
self.must_close = False
|
||||||
@ -475,20 +476,27 @@ class CallbackRequest:
|
|||||||
Returns:
|
Returns:
|
||||||
CallbackRequest instance
|
CallbackRequest instance
|
||||||
"""
|
"""
|
||||||
|
from urllib.parse import unquote_to_bytes
|
||||||
|
|
||||||
req = cls()
|
req = cls()
|
||||||
req.method = parser.method.decode('ascii')
|
req.method = parser.method.decode('ascii')
|
||||||
|
|
||||||
# Parse path and query from URL
|
# Parse path and query from URL
|
||||||
raw_path = parser.path
|
# Per ASGI spec:
|
||||||
if b'?' in raw_path:
|
# - path: percent-decoded UTF-8 string
|
||||||
path_part, query_part = raw_path.split(b'?', 1)
|
# - raw_path: original bytes as received
|
||||||
req.path = path_part.decode('latin-1')
|
raw_url = parser.path
|
||||||
|
if b'?' in raw_url:
|
||||||
|
path_part, query_part = raw_url.split(b'?', 1)
|
||||||
|
req.raw_path = path_part # Store original bytes
|
||||||
|
req.path = unquote_to_bytes(path_part).decode('utf-8', errors='replace')
|
||||||
req.query = query_part.decode('latin-1')
|
req.query = query_part.decode('latin-1')
|
||||||
else:
|
else:
|
||||||
req.path = raw_path.decode('latin-1')
|
req.raw_path = raw_url # Store original bytes
|
||||||
|
req.path = unquote_to_bytes(raw_url).decode('utf-8', errors='replace')
|
||||||
req.query = ''
|
req.query = ''
|
||||||
|
|
||||||
req.uri = raw_path.decode('latin-1')
|
req.uri = raw_url.decode('latin-1')
|
||||||
req.fragment = ''
|
req.fragment = ''
|
||||||
req.version = parser.http_version
|
req.version = parser.http_version
|
||||||
|
|
||||||
|
|||||||
@ -921,7 +921,7 @@ class ASGIProtocol(asyncio.Protocol):
|
|||||||
"method": request.method,
|
"method": request.method,
|
||||||
"scheme": request.scheme,
|
"scheme": request.scheme,
|
||||||
"path": request.path,
|
"path": request.path,
|
||||||
"raw_path": request.path.encode("latin-1") if request.path else b"",
|
"raw_path": request.raw_path if request.raw_path else b"",
|
||||||
"query_string": request.query.encode("latin-1") if request.query else b"",
|
"query_string": request.query.encode("latin-1") if request.query else b"",
|
||||||
"root_path": self.cfg.root_path or "",
|
"root_path": self.cfg.root_path or "",
|
||||||
"headers": headers,
|
"headers": headers,
|
||||||
@ -985,7 +985,7 @@ class ASGIProtocol(asyncio.Protocol):
|
|||||||
"http_version": f"{request.version[0]}.{request.version[1]}",
|
"http_version": f"{request.version[0]}.{request.version[1]}",
|
||||||
"scheme": "wss" if request.scheme == "https" else "ws",
|
"scheme": "wss" if request.scheme == "https" else "ws",
|
||||||
"path": request.path,
|
"path": request.path,
|
||||||
"raw_path": request.path.encode("latin-1") if request.path else b"",
|
"raw_path": request.raw_path if request.raw_path else b"",
|
||||||
"query_string": request.query.encode("latin-1") if request.query else b"",
|
"query_string": request.query.encode("latin-1") if request.query else b"",
|
||||||
"root_path": self.cfg.root_path or "",
|
"root_path": self.cfg.root_path or "",
|
||||||
"headers": headers,
|
"headers": headers,
|
||||||
@ -1457,7 +1457,7 @@ class ASGIProtocol(asyncio.Protocol):
|
|||||||
"method": request.method,
|
"method": request.method,
|
||||||
"scheme": request.scheme,
|
"scheme": request.scheme,
|
||||||
"path": request.path,
|
"path": request.path,
|
||||||
"raw_path": request.path.encode("latin-1") if request.path else b"",
|
"raw_path": getattr(request, 'raw_path', None) or (request.path.encode("latin-1") if request.path else b""),
|
||||||
"query_string": request.query.encode("latin-1") if request.query else b"",
|
"query_string": request.query.encode("latin-1") if request.query else b"",
|
||||||
"root_path": self.cfg.root_path or "",
|
"root_path": self.cfg.root_path or "",
|
||||||
"headers": headers,
|
"headers": headers,
|
||||||
|
|||||||
@ -429,3 +429,90 @@ class TestCallbackBehavior:
|
|||||||
|
|
||||||
assert parser.is_complete
|
assert parser.is_complete
|
||||||
assert body_chunks == [] # Body was skipped
|
assert body_chunks == [] # Body was skipped
|
||||||
|
|
||||||
|
|
||||||
|
class TestCallbackRequest:
|
||||||
|
"""Test CallbackRequest building from parser state."""
|
||||||
|
|
||||||
|
def test_non_ascii_path_decoding(self, http_parser):
|
||||||
|
"""Test that percent-encoded UTF-8 paths are decoded correctly.
|
||||||
|
|
||||||
|
Per ASGI spec:
|
||||||
|
- path: percent-decoded UTF-8 string
|
||||||
|
- raw_path: original bytes as received
|
||||||
|
"""
|
||||||
|
from gunicorn.asgi.parser import CallbackRequest
|
||||||
|
|
||||||
|
parser_class = get_parser_class(http_parser)
|
||||||
|
parser = parser_class()
|
||||||
|
|
||||||
|
# ö = %C3%B6 in UTF-8 percent-encoded
|
||||||
|
parser.feed(b"GET /%C3%B6/ HTTP/1.1\r\nHost: test\r\n\r\n")
|
||||||
|
|
||||||
|
request = CallbackRequest.from_parser(parser)
|
||||||
|
|
||||||
|
# path should be percent-decoded UTF-8 string
|
||||||
|
assert request.path == "/\u00f6/" # /ö/
|
||||||
|
# raw_path should be original bytes
|
||||||
|
assert request.raw_path == b"/%C3%B6/"
|
||||||
|
|
||||||
|
def test_non_ascii_path_with_query(self, http_parser):
|
||||||
|
"""Test percent-encoded path with query string."""
|
||||||
|
from gunicorn.asgi.parser import CallbackRequest
|
||||||
|
|
||||||
|
parser_class = get_parser_class(http_parser)
|
||||||
|
parser = parser_class()
|
||||||
|
|
||||||
|
# Japanese: /日本/ = /%E6%97%A5%E6%9C%AC/
|
||||||
|
parser.feed(b"GET /%E6%97%A5%E6%9C%AC/?q=test HTTP/1.1\r\nHost: test\r\n\r\n")
|
||||||
|
|
||||||
|
request = CallbackRequest.from_parser(parser)
|
||||||
|
|
||||||
|
assert request.path == "/\u65e5\u672c/" # /日本/
|
||||||
|
assert request.raw_path == b"/%E6%97%A5%E6%9C%AC/"
|
||||||
|
assert request.query == "q=test"
|
||||||
|
|
||||||
|
def test_invalid_utf8_path(self, http_parser):
|
||||||
|
"""Test that invalid UTF-8 sequences use replacement character."""
|
||||||
|
from gunicorn.asgi.parser import CallbackRequest
|
||||||
|
|
||||||
|
parser_class = get_parser_class(http_parser)
|
||||||
|
parser = parser_class()
|
||||||
|
|
||||||
|
# %FF is invalid UTF-8
|
||||||
|
parser.feed(b"GET /%FF HTTP/1.1\r\nHost: test\r\n\r\n")
|
||||||
|
|
||||||
|
request = CallbackRequest.from_parser(parser)
|
||||||
|
|
||||||
|
# Should use replacement character for invalid bytes
|
||||||
|
assert "\ufffd" in request.path
|
||||||
|
assert request.raw_path == b"/%FF"
|
||||||
|
|
||||||
|
def test_simple_ascii_path(self, http_parser):
|
||||||
|
"""Test that simple ASCII paths work unchanged."""
|
||||||
|
from gunicorn.asgi.parser import CallbackRequest
|
||||||
|
|
||||||
|
parser_class = get_parser_class(http_parser)
|
||||||
|
parser = parser_class()
|
||||||
|
|
||||||
|
parser.feed(b"GET /api/users HTTP/1.1\r\nHost: test\r\n\r\n")
|
||||||
|
|
||||||
|
request = CallbackRequest.from_parser(parser)
|
||||||
|
|
||||||
|
assert request.path == "/api/users"
|
||||||
|
assert request.raw_path == b"/api/users"
|
||||||
|
|
||||||
|
def test_percent_encoded_ascii(self, http_parser):
|
||||||
|
"""Test percent-encoded ASCII characters."""
|
||||||
|
from gunicorn.asgi.parser import CallbackRequest
|
||||||
|
|
||||||
|
parser_class = get_parser_class(http_parser)
|
||||||
|
parser = parser_class()
|
||||||
|
|
||||||
|
# Space encoded as %20
|
||||||
|
parser.feed(b"GET /hello%20world HTTP/1.1\r\nHost: test\r\n\r\n")
|
||||||
|
|
||||||
|
request = CallbackRequest.from_parser(parser)
|
||||||
|
|
||||||
|
assert request.path == "/hello world"
|
||||||
|
assert request.raw_path == b"/hello%20world"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user