Add fast HTTP parser support for WSGI workers

- Integrate gunicorn_h1c fast parser into WSGI Request class
- Add _check_fast_parser() and _parse_fast() methods
- Tests use Python parser for consistent validation behavior
- Update config description to reflect all worker types
This commit is contained in:
Benoit Chesneau 2026-03-21 09:29:01 +01:00
parent b833a9b6df
commit 7f175fb171
3 changed files with 123 additions and 3 deletions

View File

@ -2868,7 +2868,7 @@ class HttpParser(Setting):
validator = validate_http_parser
default = "auto"
desc = """\
HTTP parser implementation for ASGI workers.
HTTP parser implementation.
- auto: Use gunicorn_h1c if available, otherwise pure Python (default)
- fast: Require gunicorn_h1c C extension (fail if unavailable)
@ -2878,8 +2878,6 @@ class HttpParser(Setting):
parsing using picohttpparser with SIMD optimizations. Install it
with: pip install gunicorn[fast]
This setting only affects the ``asgi`` worker type.
.. versionadded:: 25.0.0
"""

View File

@ -21,6 +21,33 @@ from gunicorn.http.errors import InvalidSchemeHeaders
from gunicorn.util import bytes_to_str, split_request_uri
# Fast parser availability (cached at module level)
_fast_parser_available = None
_fast_parser_module = None
def _check_fast_parser(cfg):
"""Check if fast C parser is available and should be used."""
global _fast_parser_available, _fast_parser_module
parser_setting = getattr(cfg, 'http_parser', 'auto')
if parser_setting == 'python':
return False
if _fast_parser_available is None:
try:
import gunicorn_h1c
_fast_parser_available = True
_fast_parser_module = gunicorn_h1c
except ImportError:
_fast_parser_available = False
if not _fast_parser_available and parser_setting == 'fast':
raise RuntimeError("gunicorn_h1c not installed but http_parser='fast'")
return _fast_parser_available
# PROXY protocol v2 constants
PP_V2_SIGNATURE = b"\x0D\x0A\x0D\x0A\x00\x0D\x0A\x51\x55\x49\x54\x0A"
@ -321,6 +348,10 @@ class Request(Message):
self.req_number = req_number
self.proxy_protocol_info = None
# Check if fast parser should be used
self._use_fast = _check_fast_parser(cfg)
super().__init__(cfg, unreader, peer_addr)
def get_data(self, unreader, buf, stop=False):
@ -340,6 +371,94 @@ class Request(Message):
if mode != "off" and self.req_number == 1:
buf = self._handle_proxy_protocol(unreader, buf, mode)
# Use fast parser if available
if self._use_fast:
return self._parse_fast(unreader, buf)
return self._parse_python(unreader, buf)
def _parse_fast(self, unreader, buf):
"""Parse request using fast C parser (gunicorn_h1c)."""
# Read until we have complete headers
data = bytes(buf)
last_len = 0
while True:
try:
result = _fast_parser_module.parse_request(data, last_len=last_len)
break
except _fast_parser_module.IncompleteError:
last_len = len(data)
self.read_into(unreader, buf)
data = bytes(buf)
if len(data) > self.max_buffer_headers + self.limit_request_line:
raise LimitRequestHeaders("max buffer headers")
except _fast_parser_module.ParseError as e:
raise InvalidRequestLine(str(e))
# Extract parsed data
self.method = bytes_to_str(result['method'])
self.uri = bytes_to_str(result['path'])
# Validate method
if not self.cfg.permit_unconventional_http_method:
if METHOD_BADCHAR_RE.search(self.method):
raise InvalidRequestMethod(self.method)
if not 3 <= len(self.method) <= 20:
raise InvalidRequestMethod(self.method)
if not TOKEN_RE.fullmatch(self.method):
raise InvalidRequestMethod(self.method)
if self.cfg.casefold_http_method:
self.method = self.method.upper()
# Parse URI parts
if len(self.uri) == 0:
raise InvalidRequestLine(self.uri)
try:
parts = split_request_uri(self.uri)
except ValueError:
raise InvalidRequestLine(self.uri)
self.path = parts.path or ""
self.query = parts.query or ""
self.fragment = parts.fragment or ""
# Version
self.version = (1, result['minor_version'])
if not (1, 0) <= self.version < (2, 0):
if not self.cfg.permit_unconventional_http_version:
raise InvalidHTTPVersion(self.version)
# Headers - convert bytes to strings with uppercase names
# gunicorn_h1c returns headers as (bytes, bytes) tuples
self.headers = []
for name_bytes, value_bytes in result['headers']:
name = bytes_to_str(name_bytes).upper()
value = bytes_to_str(value_bytes)
# Validate header name
if not TOKEN_RE.fullmatch(name):
raise InvalidHeaderName(name)
# Handle underscore in header names
if "_" in name:
forwarder_headers = self.cfg.forwarder_headers
if name in forwarder_headers or "*" in forwarder_headers:
pass
elif self.cfg.header_map == "dangerous":
pass
elif self.cfg.header_map == "drop":
continue
else:
raise InvalidHeaderName(name)
self.headers.append((name, value))
# Return remaining data after headers
consumed = result['consumed']
return data[consumed:]
def _parse_python(self, unreader, buf):
"""Parse request using pure Python parser."""
# Get request line
line, buf = self.read_line(unreader, buf, self.limit_request_line)

View File

@ -40,6 +40,9 @@ def load_py(fname):
setattr(mod, 'cfg', Config())
loader = importlib.machinery.SourceFileLoader(module_name, fname)
loader.exec_module(mod)
# Use Python parser for tests to ensure consistent validation behavior
# (set after loading so test-specific configs don't override)
mod.cfg.set('http_parser', 'python')
return vars(mod)