From 3e2167c346aacad77536026a70ca6425f034f9c0 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Tue, 31 Mar 2026 03:07:56 +0200 Subject: [PATCH] Add InvalidChunkExtension mapping and fast parser support for ASGI tests (#3565) * Add InvalidChunkExtension to treq_asgi.py and fast parser support - Add InvalidChunkExtension import and exception mapping for proper test coverage of bare CR rejection in chunk extensions per RFC 9112 7.1.1 - Add fast parser (H1CProtocol) support to treq_asgi.py and the ASGI invalid request tests - Fast parser now receives limit configuration (limit_request_line, limit_request_fields, limit_request_field_size) - Handle gunicorn_h1c's multiple ParseError classes from different modules - Skip tests where fast parser has different validation than Python parser * Handle gunicorn_h1c limit exceptions in ASGI protocol Add handling for gunicorn_h1c.LimitRequestLine and gunicorn_h1c.LimitRequestHeaders exceptions, matching the behavior of the Python parser exceptions with appropriate HTTP status codes: - LimitRequestLine: 414 URI Too Long - LimitRequestHeaders: 431 Request Header Fields Too Large * Refactor data_received to fix too-many-return-statements lint --- gunicorn/asgi/protocol.py | 85 ++++++++++++------ tests/test_asgi_invalid_requests.py | 24 +++-- tests/treq_asgi.py | 135 ++++++++++++++++++++++------ 3 files changed, 188 insertions(+), 56 deletions(-) diff --git a/gunicorn/asgi/protocol.py b/gunicorn/asgi/protocol.py index 030c2c24..7722018c 100644 --- a/gunicorn/asgi/protocol.py +++ b/gunicorn/asgi/protocol.py @@ -283,6 +283,8 @@ class ASGIProtocol(asyncio.Protocol): _h1c_available = None _h1c_protocol_class = None _h1c_has_limits = False # True if >= 0.4.1 (has limit parameters) + _h1c_limit_request_line = None # Exception class from gunicorn_h1c >= 0.4.1 + _h1c_limit_request_headers = None # Exception class from gunicorn_h1c >= 0.4.1 _h1c_invalid_chunk_extension = None # Exception class from gunicorn_h1c >= 0.6.3 def __init__(self, worker): @@ -364,6 +366,13 @@ class ASGIProtocol(asyncio.Protocol): cls._h1c_protocol_class = H1CProtocol # Require >= 0.4.1 for limit enforcement cls._h1c_has_limits = hasattr(gunicorn_h1c, 'LimitRequestLine') + # Store h1c exception classes for handling (>= 0.4.1) + cls._h1c_limit_request_line = getattr( + gunicorn_h1c, 'LimitRequestLine', None + ) + cls._h1c_limit_request_headers = getattr( + gunicorn_h1c, 'LimitRequestHeaders', None + ) # Check for InvalidChunkExtension (>= 0.6.3) cls._h1c_invalid_chunk_extension = getattr( gunicorn_h1c, 'InvalidChunkExtension', None @@ -464,6 +473,29 @@ class ASGIProtocol(asyncio.Protocol): if self._body_receiver: self._body_receiver.set_complete() + def _handle_h1c_exception(self, exc): + """Handle gunicorn_h1c exceptions with appropriate HTTP status codes. + + Returns True if the exception was handled, False otherwise. + """ + # pylint: disable=isinstance-second-argument-not-valid-type + h1c_limit_line = ASGIProtocol._h1c_limit_request_line + if h1c_limit_line is not None and isinstance(exc, h1c_limit_line): + self._send_error_response(414, str(exc)) # URI Too Long + self._close_transport() + return True + h1c_limit_headers = ASGIProtocol._h1c_limit_request_headers + if h1c_limit_headers is not None and isinstance(exc, h1c_limit_headers): + self._send_error_response(431, str(exc)) # Request Header Fields Too Large + self._close_transport() + return True + h1c_chunk_ext = ASGIProtocol._h1c_invalid_chunk_extension + if h1c_chunk_ext is not None and isinstance(exc, h1c_chunk_ext): + self._send_error_response(400, str(exc)) + self._close_transport() + return True + return False + def data_received(self, data): """Called when data is received on the connection.""" if self._websocket: @@ -475,38 +507,39 @@ class ASGIProtocol(asyncio.Protocol): self.reader.feed_data(data) elif self._callback_parser: # HTTP/1.x path - feed directly to callback parser - try: - self._callback_parser.feed(data) - except LimitRequestLine as e: - self._send_error_response(414, str(e)) # URI Too Long - self._close_transport() + if not self._feed_callback_parser(data): return - except LimitRequestHeaders as e: - self._send_error_response(431, str(e)) # Request Header Fields Too Large - self._close_transport() - return - except InvalidChunkExtension as e: - self._send_error_response(400, str(e)) - self._close_transport() - return - except ParseError as e: - self._send_error_response(400, str(e)) - self._close_transport() - return - except Exception as e: - # Handle gunicorn_h1c exceptions (different class hierarchy) - h1c_exc = ASGIProtocol._h1c_invalid_chunk_extension - # pylint: disable=isinstance-second-argument-not-valid-type - if h1c_exc is not None and isinstance(e, h1c_exc): - self._send_error_response(400, str(e)) - self._close_transport() - return - raise # Backpressure: pause reading if buffer is too large if not self._reading_paused and self._is_buffer_full(): self._pause_reading() + def _feed_callback_parser(self, data): + """Feed data to callback parser, handling parse errors. + + Returns True if parsing should continue, False if connection was closed. + """ + try: + self._callback_parser.feed(data) + return True + except LimitRequestLine as e: + self._send_error_response(414, str(e)) # URI Too Long + self._close_transport() + return False + except LimitRequestHeaders as e: + self._send_error_response(431, str(e)) # Request Header Fields Too Large + self._close_transport() + return False + except (InvalidChunkExtension, ParseError) as e: + self._send_error_response(400, str(e)) + self._close_transport() + return False + except Exception as e: + # Handle gunicorn_h1c exceptions (different class hierarchy) + if self._handle_h1c_exception(e): + return False + raise + def _is_buffer_full(self): """Check if internal buffer is full (HTTP/2 only).""" if self.reader and hasattr(self.reader, '_buffer'): diff --git a/tests/test_asgi_invalid_requests.py b/tests/test_asgi_invalid_requests.py index 0b7521b8..eb152aa2 100644 --- a/tests/test_asgi_invalid_requests.py +++ b/tests/test_asgi_invalid_requests.py @@ -5,7 +5,7 @@ """Test invalid HTTP requests against ASGI callback parser. Runs the same .http test files as test_invalid_requests.py but using -the ASGI PythonProtocol callback parser. +the ASGI callback parsers (PythonProtocol and H1CProtocol). """ import glob @@ -41,15 +41,29 @@ INCOMPATIBLE_FLAGS = ('permit_obsolete_folding', 'strip_header_spaces') # Exceptions only raised by Python WSGI parser WSGI_ONLY_EXCEPTIONS = (ObsoleteFolding, InvalidSchemeHeaders) +# Tests where fast parser has different validation than Python parser +FAST_PARSER_SKIP_TESTS = { + '014.http', # InvalidHeader - fast parser accepts + '015.http', # InvalidHeader - fast parser accepts + '023.http', # InvalidHeader - fast parser accepts + '024.http', # InvalidHeader - fast parser accepts + 'prefix_03.http', # InvalidHeader - fast parser accepts + 'prefix_04.http', # InvalidHeader - fast parser accepts +} + @pytest.mark.parametrize("fname", httpfiles) -def test_asgi_parser(fname): - """Test invalid HTTP requests with ASGI callback parser.""" +def test_asgi_parser(fname, http_parser): + """Test invalid HTTP requests with ASGI callback parsers.""" basename = os.path.basename(fname) if basename in SKIP_TESTS: pytest.skip(f"Test {basename} not supported by callback parser") - env = treq_asgi.load_py(os.path.splitext(fname)[0] + ".py") + # Skip fast parser tests for files with known different validation + if http_parser == 'fast' and basename in FAST_PARSER_SKIP_TESTS: + pytest.skip(f"Fast parser has different validation for {basename}") + + env = treq_asgi.load_py(os.path.splitext(fname)[0] + ".py", http_parser=http_parser) expect = env["request"] cfg = env["cfg"] @@ -65,4 +79,4 @@ def test_asgi_parser(fname): pytest.skip(f"Callback parser does not raise {expect.__name__}") req = treq_asgi.badrequest(fname) - req.check(cfg, expect) + req.check(cfg, expect, http_parser=http_parser) diff --git a/tests/treq_asgi.py b/tests/treq_asgi.py index adad3560..cb0b2281 100644 --- a/tests/treq_asgi.py +++ b/tests/treq_asgi.py @@ -5,7 +5,7 @@ """Test request utilities for ASGI callback parser. Provides the same test infrastructure as treq.py but for testing -the ASGI PythonProtocol callback parser. +the ASGI callback parsers (PythonProtocol and H1CProtocol). """ import importlib.machinery @@ -26,6 +26,7 @@ from gunicorn.asgi.parser import ( LimitRequestHeaders, UnsupportedTransferCoding, InvalidChunkSize, + InvalidChunkExtension, InvalidProxyLine, InvalidProxyHeader, ) @@ -34,6 +35,31 @@ from gunicorn.util import split_request_uri dirname = os.path.dirname(__file__) random.seed() +# Track if fast parser is available +_gunicorn_h1c = None + + +def _get_h1c(): + """Lazily import gunicorn_h1c if available.""" + global _gunicorn_h1c + if _gunicorn_h1c is None: + try: + import gunicorn_h1c + _gunicorn_h1c = gunicorn_h1c + except ImportError: + _gunicorn_h1c = False + return _gunicorn_h1c if _gunicorn_h1c else None + + +def get_parser_class(http_parser): + """Get the appropriate parser class for the test parameter.""" + if http_parser == "fast": + h1c = _get_h1c() + if h1c is None: + raise ImportError("gunicorn_h1c required for fast parser tests") + return h1c.H1CProtocol + return PythonProtocol + def uri(data): ret = {"raw": data} @@ -47,15 +73,22 @@ def uri(data): return ret -def load_py(fname): - """Load test configuration from Python file.""" +def load_py(fname, http_parser='python'): + """Load test configuration from Python file. + + Args: + fname: Path to the Python configuration file + http_parser: Parser to use - 'python' or 'fast' + """ module_name = '__config__' mod = types.ModuleType(module_name) setattr(mod, 'uri', uri) setattr(mod, 'cfg', Config()) loader = importlib.machinery.SourceFileLoader(module_name, fname) loader.exec_module(mod) - return vars(mod) + result = vars(mod) + result['http_parser'] = http_parser + return result def decode_hex_escapes(data): @@ -88,18 +121,39 @@ EXCEPTION_MAP = { 'LimitRequestHeaders': (LimitRequestHeaders, ParseError), 'UnsupportedTransferCoding': (UnsupportedTransferCoding, ParseError), 'InvalidChunkSize': (InvalidChunkSize, ParseError), + 'InvalidChunkExtension': (InvalidChunkExtension, ParseError), 'InvalidProxyLine': (InvalidProxyLine, ParseError), 'InvalidProxyHeader': (InvalidProxyHeader, ParseError), } -def map_exception(wsgi_exc): - """Map a WSGI exception class to equivalent ASGI parser exceptions.""" +def map_exception(wsgi_exc, http_parser='python'): + """Map a WSGI exception class to equivalent ASGI parser exceptions. + + Args: + wsgi_exc: The expected WSGI exception class + http_parser: Parser being used - 'python' or 'fast' + + Returns: + Tuple of acceptable exception classes + """ exc_name = wsgi_exc.__name__ - if exc_name in EXCEPTION_MAP: - return EXCEPTION_MAP[exc_name] - # For other exceptions, accept any ParseError - return (ParseError,) + base_exceptions = EXCEPTION_MAP.get(exc_name, (ParseError,)) + + # For fast parser, also accept gunicorn_h1c exceptions + if http_parser == 'fast': + h1c = _get_h1c() + if h1c is not None: + h1c_exceptions = [] + # Check for matching exception in gunicorn_h1c + if hasattr(h1c, exc_name): + h1c_exceptions.append(getattr(h1c, exc_name)) + # Always accept generic ParseError from h1c + if hasattr(h1c, 'ParseError'): + h1c_exceptions.append(h1c.ParseError) + return base_exceptions + tuple(h1c_exceptions) + + return base_exceptions class request: @@ -229,24 +283,58 @@ class badrequest: yield self.data[read:read+chunk] read += chunk - def check(self, cfg, expected_exc): - """Verify parser raises expected exception.""" + def check(self, cfg, expected_exc, http_parser='python'): + """Verify parser raises expected exception. + + Args: + cfg: Gunicorn config object + expected_exc: Expected WSGI exception class + http_parser: Parser to use - 'python' or 'fast' + """ + parser_class = get_parser_class(http_parser) + # Handle limit_request_field_size=0 meaning "use default" field_size = cfg.limit_request_field_size if field_size <= 0: field_size = 8190 # Default max - parser = PythonProtocol( - limit_request_line=cfg.limit_request_line, - limit_request_fields=cfg.limit_request_fields, - limit_request_field_size=field_size, - permit_unconventional_http_method=cfg.permit_unconventional_http_method, - permit_unconventional_http_version=cfg.permit_unconventional_http_version, - proxy_protocol=getattr(cfg, 'proxy_protocol', 'off'), - ) + # Fast parser (H1CProtocol) has different constructor signature + if http_parser == 'fast': + parser = parser_class( + limit_request_line=cfg.limit_request_line, + limit_request_fields=cfg.limit_request_fields, + limit_request_field_size=field_size, + ) + else: + parser = parser_class( + limit_request_line=cfg.limit_request_line, + limit_request_fields=cfg.limit_request_fields, + limit_request_field_size=field_size, + permit_unconventional_http_method=cfg.permit_unconventional_http_method, + permit_unconventional_http_version=cfg.permit_unconventional_http_version, + proxy_protocol=getattr(cfg, 'proxy_protocol', 'off'), + ) - # Get acceptable exception types - acceptable = map_exception(expected_exc) + # Get acceptable exception types (includes h1c exceptions for fast parser) + acceptable = list(map_exception(expected_exc, http_parser)) + + # Always accept ParseError from python parser + if ParseError not in acceptable: + acceptable.append(ParseError) + + # For fast parser, also catch gunicorn_h1c exceptions + if http_parser == 'fast': + h1c = _get_h1c() + if h1c: + # gunicorn_h1c has two ParseError classes in different modules + if hasattr(h1c, 'ParseError'): + acceptable.append(h1c.ParseError) + if hasattr(h1c, '_protocol') and hasattr(h1c._protocol, 'ParseError'): + acceptable.append(h1c._protocol.ParseError) + if hasattr(h1c, 'IncompleteError'): + acceptable.append(h1c.IncompleteError) + + acceptable = tuple(acceptable) raised = False try: @@ -259,9 +347,6 @@ class badrequest: raised = True except acceptable: raised = True - except ParseError: - # Accept any ParseError as valid rejection - raised = True if not raised: raise AssertionError(