gunicorn/gunicorn/http/message.py

#
# This file is part of gunicorn released under the MIT license.
# See the NOTICE for more information.

from enum import IntEnum
import ipaddress
import re
import socket
import struct

from gunicorn.http.body import ChunkedReader, LengthReader, EOFReader, Body
from gunicorn.http.errors import (
    InvalidHeader, InvalidHeaderName, NoMoreData,
    InvalidRequestLine, InvalidRequestMethod, InvalidHTTPVersion,
    LimitRequestLine, LimitRequestHeaders,
    UnsupportedTransferCoding, ObsoleteFolding,
    ExpectationFailed,
)
from gunicorn.http.errors import InvalidProxyLine, InvalidProxyHeader, ForbiddenProxyRequest
from gunicorn.http.errors import InvalidSchemeHeaders
from gunicorn.util import bytes_to_str, split_request_uri


# Fast parser availability (cached at module level)
_fast_parser_available = None
_fast_parser_module = None

# Compatibility flags not supported by the fast parser
_FAST_PARSER_INCOMPATIBLE_FLAGS = (
    'permit_obsolete_folding',
    'strip_header_spaces',
)


def _check_fast_parser(cfg):
    """Check if fast C parser is available and should be used.

    Returns False if:
    - http_parser='python' is explicitly set
    - gunicorn_h1c is not installed (in 'auto' mode)
    - gunicorn_h1c < 0.4.1 (in 'auto' mode)
    - Incompatible compatibility flags are enabled (in 'auto' mode)

    Raises RuntimeError if:
    - http_parser='fast' but gunicorn_h1c is not installed
    - http_parser='fast' but gunicorn_h1c < 0.4.1
    - http_parser='fast' but incompatible flags are enabled
    """
    global _fast_parser_available, _fast_parser_module  # pylint: disable=global-statement

    parser_setting = getattr(cfg, 'http_parser', 'auto')
    if parser_setting == 'python':
        return False

    if _fast_parser_available is None:
        try:
            import gunicorn_h1c
            _fast_parser_available = True
            _fast_parser_module = gunicorn_h1c
        except ImportError:
            _fast_parser_available = False

    if not _fast_parser_available and parser_setting == 'fast':
        raise RuntimeError("gunicorn_h1c not installed but http_parser='fast'")

    if not _fast_parser_available:
        return False

    # Require >= 0.4.1 for limit enforcement
    if not hasattr(_fast_parser_module, 'LimitRequestLine'):
        if parser_setting == 'fast':
            raise RuntimeError(
                "gunicorn_h1c >= 0.4.1 required for http_parser='fast'. "
                "Please upgrade: pip install --upgrade gunicorn_h1c"
            )
        # In 'auto' mode, fall back to Python parser
        return False

    # Check for incompatible compatibility flags
    incompatible = []
    for flag in _FAST_PARSER_INCOMPATIBLE_FLAGS:
        if getattr(cfg, flag, False):
            incompatible.append(flag)

    if incompatible:
        if parser_setting == 'fast':
            raise RuntimeError(
                "http_parser='fast' is incompatible with compatibility flags: %s. "
                "Use http_parser='python' or disable these flags."
                % ', '.join(incompatible)
            )
        # In 'auto' mode, fall back to Python parser
        return False

    return True


# PROXY protocol v2 constants
PP_V2_SIGNATURE = b"\x0D\x0A\x0D\x0A\x00\x0D\x0A\x51\x55\x49\x54\x0A"


class PPCommand(IntEnum):
    """PROXY protocol v2 commands."""
    LOCAL = 0x0
    PROXY = 0x1


class PPFamily(IntEnum):
    """PROXY protocol v2 address families."""
    UNSPEC = 0x0
    INET = 0x1   # IPv4
    INET6 = 0x2  # IPv6
    UNIX = 0x3


class PPProtocol(IntEnum):
    """PROXY protocol v2 transport protocols."""
    UNSPEC = 0x0
    STREAM = 0x1  # TCP
    DGRAM = 0x2   # UDP


MAX_REQUEST_LINE = 8190
MAX_HEADERS = 32768
DEFAULT_MAX_HEADERFIELD_SIZE = 8190

# verbosely on purpose, avoid backslash ambiguity
RFC9110_5_6_2_TOKEN_SPECIALS = r"!#$%&'*+-.^_`|~"
TOKEN_RE = re.compile(r"[%s0-9a-zA-Z]+" % (re.escape(RFC9110_5_6_2_TOKEN_SPECIALS)))
METHOD_BADCHAR_RE = re.compile("[a-z#]")
# usually 1.0 or 1.1 - RFC9112 permits restricting to single-digit versions
VERSION_RE = re.compile(r"HTTP/(\d)\.(\d)")
# RFC 9110 section 5.5: field-vchar = VCHAR / obs-text; SP and HTAB are the
# only non-VCHAR bytes allowed in a field-value. Anything else in the
# control range (0x00-0x1F except HTAB, plus DEL 0x7F) must be rejected.
RFC9110_5_5_INVALID_AND_DANGEROUS = re.compile(r"[\x00-\x08\x0a-\x1f\x7f]")

# RFC 9110 section 6.5.1: fields forbidden in trailers because they alter
# routing, framing, or authentication. Using the uppercased names stored
# by parse_headers.
RFC9110_6_5_1_FORBIDDEN_TRAILER = frozenset((
    "HOST",
    "CONTENT-LENGTH",
    "TRANSFER-ENCODING",
    "TRAILER",
    "AUTHORIZATION",
    "TE",
))


def _ip_in_allow_list(ip_str, allow_list, networks):
    """Check if IP address is in the allow list.

    Args:
        ip_str: The IP address string to check
        allow_list: The original allow list (strings, may contain "*")
        networks: Pre-computed ipaddress.ip_network objects from config
    """
    if '*' in allow_list:
        return True
    try:
        ip = ipaddress.ip_address(ip_str)
    except ValueError:
        return False
    for network in networks:
        if ip in network:
            return True
    return False


class Message:
    def __init__(self, cfg, unreader, peer_addr):
        self.cfg = cfg
        self.unreader = unreader
        self.peer_addr = peer_addr
        self.remote_addr = peer_addr
        self.version = None
        self.headers = []
        self.trailers = []
        self.body = None
        self.scheme = "https" if cfg.is_ssl else "http"
        self.must_close = False
        self._expected_100_continue = False

        # set headers limits
        self.limit_request_fields = cfg.limit_request_fields
        if (self.limit_request_fields <= 0
                or self.limit_request_fields > MAX_HEADERS):
            self.limit_request_fields = MAX_HEADERS
        self.limit_request_field_size = cfg.limit_request_field_size
        if self.limit_request_field_size <= 0:
            self.limit_request_field_size = DEFAULT_MAX_HEADERFIELD_SIZE

        # set max header buffer size
        max_header_field_size = self.limit_request_field_size or DEFAULT_MAX_HEADERFIELD_SIZE
        self.max_buffer_headers = self.limit_request_fields * \
            (max_header_field_size + 2) + 4

        unused = self.parse(self.unreader)
        self.unreader.unread(unused)
        self.set_body_reader()

    def force_close(self):
        self.must_close = True

    def parse(self, unreader):
        raise NotImplementedError()

    def parse_headers(self, data, from_trailer=False):
        cfg = self.cfg
        headers = []

        # Split lines on \r\n
        lines = [bytes_to_str(line) for line in data.split(b"\r\n")]

        # handle scheme headers
        scheme_header = False
        secure_scheme_headers = {}
        forwarder_headers = []
        if from_trailer:
            # nonsense. either a request is https from the beginning
            #  .. or we are just behind a proxy who does not remove conflicting trailers
            pass
        elif (not isinstance(self.peer_addr, tuple)
              or _ip_in_allow_list(self.peer_addr[0], cfg.forwarded_allow_ips,
                                   cfg.forwarded_allow_networks())):
            secure_scheme_headers = cfg.secure_scheme_headers
            forwarder_headers = cfg.forwarder_headers

        # Parse headers into key/value pairs paying attention
        # to continuation lines.
        while lines:
            if len(headers) >= self.limit_request_fields:
                raise LimitRequestHeaders("limit request headers fields")

            # Parse initial header name: value pair.
            curr = lines.pop(0)
            header_length = len(curr) + len("\r\n")
            if curr.find(":") <= 0:
                raise InvalidHeader(curr)
            name, value = curr.split(":", 1)
            if self.cfg.strip_header_spaces:
                name = name.rstrip(" \t")
            if not TOKEN_RE.fullmatch(name):
                raise InvalidHeaderName(name)

            # this is still a dangerous place to do this
            #  but it is more correct than doing it before the pattern match:
            # after we entered Unicode wonderland, 8bits could case-shift into ASCII:
            # b"\xDF".decode("latin-1").upper().encode("ascii") == b"SS"
            name = name.upper()

            # RFC 9110 section 6.5.1
            if from_trailer and name in RFC9110_6_5_1_FORBIDDEN_TRAILER:
                raise InvalidHeaderName(name)

            value = [value.strip(" \t")]

            # Consume value continuation lines..
            while lines and lines[0].startswith((" ", "\t")):
                # .. which is obsolete here, and no longer done by default
                if not self.cfg.permit_obsolete_folding:
                    raise ObsoleteFolding(name)
                curr = lines.pop(0)
                header_length += len(curr) + len("\r\n")
                if header_length > self.limit_request_field_size > 0:
                    raise LimitRequestHeaders("limit request headers "
                                              "fields size")
                value.append(curr.strip("\t "))
            value = " ".join(value)

            if RFC9110_5_5_INVALID_AND_DANGEROUS.search(value):
                raise InvalidHeader(name)

            if header_length > self.limit_request_field_size > 0:
                raise LimitRequestHeaders("limit request headers fields size")

            if not from_trailer and name == "EXPECT":
                # https://datatracker.ietf.org/doc/html/rfc9110#section-10.1.1
                # "The Expect field value is case-insensitive."
                if value.lower() == "100-continue":
                    if self.version < (1, 1):
                        # https://datatracker.ietf.org/doc/html/rfc9110#section-10.1.1-12
                        # "A server that receives a 100-continue expectation
                        #  in an HTTP/1.0 request MUST ignore that expectation."
                        pass
                    else:
                        self._expected_100_continue = True
                    # N.B. understood but ignored expect header does not return 417
                else:
                    raise ExpectationFailed(value)

            if name in secure_scheme_headers:
                secure = value == secure_scheme_headers[name]
                scheme = "https" if secure else "http"
                if scheme_header:
                    if scheme != self.scheme:
                        raise InvalidSchemeHeaders()
                else:
                    scheme_header = True
                    self.scheme = scheme

            # ambiguous mapping allows fooling downstream, e.g. merging non-identical headers:
            # X-Forwarded-For: 2001:db8::ha:cc:ed
            # X_Forwarded_For: 127.0.0.1,::1
            # HTTP_X_FORWARDED_FOR = 2001:db8::ha:cc:ed,127.0.0.1,::1
            # Only modify after fixing *ALL* header transformations; network to wsgi env
            if "_" in name:
                if name in forwarder_headers or "*" in forwarder_headers:
                    # This forwarder may override our environment
                    pass
                elif self.cfg.header_map == "dangerous":
                    # as if we did not know we cannot safely map this
                    pass
                elif self.cfg.header_map == "drop":
                    # almost as if it never had been there
                    # but still counts against resource limits
                    continue
                else:
                    # fail-safe fallthrough: refuse
                    raise InvalidHeaderName(name)

            headers.append((name, value))

        return headers

    def set_body_reader(self):
        chunked = False
        content_length = None

        for (name, value) in self.headers:
            if name == "CONTENT-LENGTH":
                if content_length is not None:
                    raise InvalidHeader("CONTENT-LENGTH", req=self)
                content_length = value
            elif name == "TRANSFER-ENCODING":
                # T-E can be a list
                # https://datatracker.ietf.org/doc/html/rfc9112#name-transfer-encoding
                vals = [v.strip() for v in value.split(',')]
                for val in vals:
                    if val.lower() == "chunked":
                        # DANGER: transfer codings stack, and stacked chunking is never intended
                        if chunked:
                            raise InvalidHeader("TRANSFER-ENCODING", req=self)
                        chunked = True
                    elif val.lower() == "identity":
                        # does not do much, could still plausibly desync from what the proxy does
                        # safe option: nuke it, its never needed
                        if chunked:
                            raise InvalidHeader("TRANSFER-ENCODING", req=self)
                    elif val.lower() in ('compress', 'deflate', 'gzip'):
                        # chunked should be the last one
                        if chunked:
                            raise InvalidHeader("TRANSFER-ENCODING", req=self)
                        self.force_close()
                    else:
                        raise UnsupportedTransferCoding(value)

        if chunked:
            # two potentially dangerous cases:
            #  a) CL + TE (TE overrides CL.. only safe if the recipient sees it that way too)
            #  b) chunked HTTP/1.0 (always faulty)
            if self.version < (1, 1):
                # framing wonky, see RFC 9112 Section 6.1
                raise InvalidHeader("TRANSFER-ENCODING", req=self)
            if content_length is not None:
                # we cannot be certain the message framing we understood matches proxy intent
                #  -> whatever happens next, remaining input must not be trusted
                raise InvalidHeader("CONTENT-LENGTH", req=self)
            self.body = Body(ChunkedReader(self, self.unreader))
        elif content_length is not None:
            try:
                if str(content_length).isnumeric():
                    content_length = int(content_length)
                else:
                    raise InvalidHeader("CONTENT-LENGTH", req=self)
            except ValueError:
                raise InvalidHeader("CONTENT-LENGTH", req=self)

            if content_length < 0:
                raise InvalidHeader("CONTENT-LENGTH", req=self)

            self.body = Body(LengthReader(self.unreader, content_length))
        else:
            self.body = Body(EOFReader(self.unreader))

    def should_close(self):
        if self.must_close:
            return True
        for (h, v) in self.headers:
            if h == "CONNECTION":
                v = v.lower().strip(" \t")
                if v == "close":
                    return True
                elif v == "keep-alive":
                    return False
                break
        return self.version <= (1, 0)


class Request(Message):
    def __init__(self, cfg, unreader, peer_addr, req_number=1):
        self.method = None
        self.uri = None
        self.path = None
        self.query = None
        self.fragment = None

        # get max request line size (0 means unlimited per documentation)
        self.limit_request_line = cfg.limit_request_line
        if self.limit_request_line < 0:
            self.limit_request_line = MAX_REQUEST_LINE
        # For fast parser: use large value when unlimited (0), since C parser
        # doesn't support 0 as unlimited. 1MB should be more than enough.
        if self.limit_request_line == 0:
            self._fast_limit_request_line = 1024 * 1024  # 1MB
        elif self.limit_request_line >= MAX_REQUEST_LINE:
            self._fast_limit_request_line = MAX_REQUEST_LINE
            self.limit_request_line = MAX_REQUEST_LINE
        else:
            self._fast_limit_request_line = self.limit_request_line

        self.req_number = req_number
        self.proxy_protocol_info = None

        # Check if fast parser should be used
        self._use_fast = _check_fast_parser(cfg)

        super().__init__(cfg, unreader, peer_addr)

    def get_data(self, unreader, buf, stop=False):
        data = unreader.read()
        if not data:
            if stop:
                raise StopIteration()
            raise NoMoreData(buf.getvalue())
        buf.write(data)

    def parse(self, unreader):
        buf = bytearray()
        self.read_into(unreader, buf, stop=True)

        # Handle proxy protocol if enabled and this is the first request
        mode = self.cfg.proxy_protocol
        if mode != "off" and self.req_number == 1:
            buf = self._handle_proxy_protocol(unreader, buf, mode)

        # Use fast parser if available
        if self._use_fast:
            return self._parse_fast(unreader, buf)

        return self._parse_python(unreader, buf)

    def _parse_fast(self, unreader, buf):
        """Parse request using fast C parser (gunicorn_h1c >= 0.4.1)."""
        # Read until we have complete headers
        data = bytes(buf)
        last_len = 0

        while True:
            try:
                # Pass all limit parameters to C parser
                # Use _fast_limit_request_line which handles 0=unlimited
                result = _fast_parser_module.parse_request(
                    data,
                    last_len=last_len,
                    limit_request_line=self._fast_limit_request_line,
                    limit_request_fields=self.limit_request_fields,
                    limit_request_field_size=self.limit_request_field_size,
                    permit_unconventional_http_method=self.cfg.permit_unconventional_http_method,
                    permit_unconventional_http_version=self.cfg.permit_unconventional_http_version,
                )
                break
            except _fast_parser_module.IncompleteError:
                last_len = len(data)
                self.read_into(unreader, buf)
                data = bytes(buf)
                if len(data) > self.max_buffer_headers + self._fast_limit_request_line:
                    raise LimitRequestHeaders("max buffer headers")
            except _fast_parser_module.LimitRequestLine as e:
                raise LimitRequestLine(str(e))
            except _fast_parser_module.LimitRequestHeaders as e:
                raise LimitRequestHeaders(str(e))
            except _fast_parser_module.InvalidRequestMethod as e:
                raise InvalidRequestMethod(str(e))
            except _fast_parser_module.InvalidHTTPVersion as e:
                raise InvalidHTTPVersion(str(e))
            except _fast_parser_module.InvalidHeaderName as e:
                raise InvalidHeaderName(str(e))
            except _fast_parser_module.InvalidHeader as e:
                raise InvalidHeader(str(e))
            except _fast_parser_module.ParseError as e:
                raise InvalidRequestLine(str(e))

        # Extract parsed data
        self.method = bytes_to_str(result['method'])
        self.uri = bytes_to_str(result['path'])

        # Casefold method if configured (validation done by C parser)
        if self.cfg.casefold_http_method:
            self.method = self.method.upper()

        # Parse URI parts
        if len(self.uri) == 0:
            raise InvalidRequestLine(self.uri)
        try:
            parts = split_request_uri(self.uri)
        except ValueError:
            raise InvalidRequestLine(self.uri)
        self.path = parts.path or ""
        self.query = parts.query or ""
        self.fragment = parts.fragment or ""

        # Version (validation done by C parser)
        self.version = (1, result['minor_version'])

        # Headers - convert bytes to strings with uppercase names
        # gunicorn_h1c returns headers as (bytes, bytes) tuples
        # Header name/value validation done by C parser
        self.headers = []
        for name_bytes, value_bytes in result['headers']:
            name = bytes_to_str(name_bytes).upper()
            value = bytes_to_str(value_bytes)

            # Handle underscore in header names (policy decision, not validation)
            if "_" in name:
                forwarder_headers = self.cfg.forwarder_headers
                if name in forwarder_headers or "*" in forwarder_headers:
                    pass
                elif self.cfg.header_map == "dangerous":
                    pass
                elif self.cfg.header_map == "drop":
                    continue
                else:
                    raise InvalidHeaderName(name)

            self.headers.append((name, value))

        # Return remaining data after headers
        consumed = result['consumed']
        return data[consumed:]

    def _parse_python(self, unreader, buf):
        """Parse request using pure Python parser."""
        # Get request line
        line, buf = self.read_line(unreader, buf, self.limit_request_line)

        self.parse_request_line(line)

        # Headers
        data = bytes(buf)

        done = data[:2] == b"\r\n"
        while True:
            idx = data.find(b"\r\n\r\n")
            done = data[:2] == b"\r\n"

            if idx < 0 and not done:
                self.read_into(unreader, buf)
                data = bytes(buf)
                if len(data) > self.max_buffer_headers:
                    raise LimitRequestHeaders("max buffer headers")
            else:
                break

        if done:
            self.unreader.unread(data[2:])
            return b""

        self.headers = self.parse_headers(data[:idx], from_trailer=False)

        ret = data[idx + 4:]
        return ret

    def read_into(self, unreader, buf, stop=False):
        """Read data from unreader and append to bytearray buffer."""
        data = unreader.read()
        if not data:
            if stop:
                raise StopIteration()
            raise NoMoreData(bytes(buf))
        buf.extend(data)

    def read_line(self, unreader, buf, limit=0):
        """Read a line from buffer, returning (line, remaining_buffer)."""
        data = bytes(buf)

        while True:
            idx = data.find(b"\r\n")
            if idx >= 0:
                # check if the request line is too large
                if idx > limit > 0:
                    raise LimitRequestLine(idx, limit)
                break
            if len(data) - 2 > limit > 0:
                raise LimitRequestLine(len(data), limit)
            self.read_into(unreader, buf)
            data = bytes(buf)

        return (data[:idx],  # request line,
                bytearray(data[idx + 2:]))  # residue in the buffer, skip \r\n

    def read_bytes(self, unreader, buf, count):
        """Read exactly count bytes from buffer/unreader."""
        while len(buf) < count:
            self.read_into(unreader, buf)
        return bytes(buf[:count]), bytearray(buf[count:])

    def _handle_proxy_protocol(self, unreader, buf, mode):
        """Handle PROXY protocol detection and parsing.

        Returns the buffer with proxy protocol data consumed.
        """
        # Ensure we have enough data to detect v2 signature (12 bytes)
        while len(buf) < 12:
            self.read_into(unreader, buf)

        # Check for v2 signature first
        if mode in ("v2", "auto") and buf[:12] == PP_V2_SIGNATURE:
            self.proxy_protocol_access_check()
            return self._parse_proxy_protocol_v2(unreader, buf)

        # Check for v1 prefix
        if mode in ("v1", "auto") and buf[:6] == b"PROXY ":
            self.proxy_protocol_access_check()
            return self._parse_proxy_protocol_v1(unreader, buf)

        # Not proxy protocol - return buffer unchanged
        return buf

    def proxy_protocol_access_check(self):
        """Check if proxy protocol is allowed from this peer."""
        if (isinstance(self.peer_addr, tuple) and
                not _ip_in_allow_list(self.peer_addr[0], self.cfg.proxy_allow_ips,
                                      self.cfg.proxy_allow_networks())):
            raise ForbiddenProxyRequest(self.peer_addr[0])

    def _parse_proxy_protocol_v1(self, unreader, buf):
        """Parse PROXY protocol v1 (text format).

        Returns buffer with v1 header consumed.
        """
        # Read until we find \r\n
        data = bytes(buf)
        while b"\r\n" not in data:
            self.read_into(unreader, buf)
            data = bytes(buf)

        idx = data.find(b"\r\n")
        line = bytes_to_str(data[:idx])
        remaining = bytearray(data[idx + 2:])

        bits = line.split(" ")

        if len(bits) != 6:
            raise InvalidProxyLine(line)

        # Extract data
        proto = bits[1]
        s_addr = bits[2]
        d_addr = bits[3]

        # Validation
        if proto not in ["TCP4", "TCP6"]:
            raise InvalidProxyLine("protocol '%s' not supported" % proto)
        if proto == "TCP4":
            try:
                socket.inet_pton(socket.AF_INET, s_addr)
                socket.inet_pton(socket.AF_INET, d_addr)
            except OSError:
                raise InvalidProxyLine(line)
        elif proto == "TCP6":
            try:
                socket.inet_pton(socket.AF_INET6, s_addr)
                socket.inet_pton(socket.AF_INET6, d_addr)
            except OSError:
                raise InvalidProxyLine(line)

        try:
            s_port = int(bits[4])
            d_port = int(bits[5])
        except ValueError:
            raise InvalidProxyLine("invalid port %s" % line)

        if not ((0 <= s_port <= 65535) and (0 <= d_port <= 65535)):
            raise InvalidProxyLine("invalid port %s" % line)

        # Set data
        self.proxy_protocol_info = {
            "proxy_protocol": proto,
            "client_addr": s_addr,
            "client_port": s_port,
            "proxy_addr": d_addr,
            "proxy_port": d_port
        }

        return remaining

    def _parse_proxy_protocol_v2(self, unreader, buf):
        """Parse PROXY protocol v2 (binary format).

        Returns buffer with v2 header consumed.
        """
        # We need at least 16 bytes for the header (12 signature + 4 header)
        while len(buf) < 16:
            self.read_into(unreader, buf)

        # Parse header fields (after 12-byte signature)
        ver_cmd = buf[12]
        fam_proto = buf[13]
        length = struct.unpack(">H", bytes(buf[14:16]))[0]

        # Validate version (high nibble must be 0x2)
        version = (ver_cmd & 0xF0) >> 4
        if version != 2:
            raise InvalidProxyHeader("unsupported version %d" % version)

        # Extract command (low nibble)
        command = ver_cmd & 0x0F
        if command not in (PPCommand.LOCAL, PPCommand.PROXY):
            raise InvalidProxyHeader("unsupported command %d" % command)

        # Ensure we have the complete header
        total_header_size = 16 + length
        while len(buf) < total_header_size:
            self.read_into(unreader, buf)

        # For LOCAL command, no address info is provided
        if command == PPCommand.LOCAL:
            self.proxy_protocol_info = {
                "proxy_protocol": "LOCAL",
                "client_addr": None,
                "client_port": None,
                "proxy_addr": None,
                "proxy_port": None
            }
            return bytearray(buf[total_header_size:])

        # Extract address family and protocol
        family = (fam_proto & 0xF0) >> 4
        protocol = fam_proto & 0x0F

        # We only support TCP (STREAM)
        if protocol != PPProtocol.STREAM:
            raise InvalidProxyHeader("only TCP protocol is supported")

        addr_data = bytes(buf[16:16 + length])

        if family == PPFamily.INET:  # IPv4
            if length < 12:  # 4+4+2+2
                raise InvalidProxyHeader("insufficient address data for IPv4")
            s_addr = socket.inet_ntop(socket.AF_INET, addr_data[0:4])
            d_addr = socket.inet_ntop(socket.AF_INET, addr_data[4:8])
            s_port = struct.unpack(">H", addr_data[8:10])[0]
            d_port = struct.unpack(">H", addr_data[10:12])[0]
            proto = "TCP4"

        elif family == PPFamily.INET6:  # IPv6
            if length < 36:  # 16+16+2+2
                raise InvalidProxyHeader("insufficient address data for IPv6")
            s_addr = socket.inet_ntop(socket.AF_INET6, addr_data[0:16])
            d_addr = socket.inet_ntop(socket.AF_INET6, addr_data[16:32])
            s_port = struct.unpack(">H", addr_data[32:34])[0]
            d_port = struct.unpack(">H", addr_data[34:36])[0]
            proto = "TCP6"

        elif family == PPFamily.UNSPEC:
            # No address info provided with PROXY command
            self.proxy_protocol_info = {
                "proxy_protocol": "UNSPEC",
                "client_addr": None,
                "client_port": None,
                "proxy_addr": None,
                "proxy_port": None
            }
            return bytearray(buf[total_header_size:])

        else:
            raise InvalidProxyHeader("unsupported address family %d" % family)

        # Set data
        self.proxy_protocol_info = {
            "proxy_protocol": proto,
            "client_addr": s_addr,
            "client_port": s_port,
            "proxy_addr": d_addr,
            "proxy_port": d_port
        }

        return bytearray(buf[total_header_size:])

    def parse_request_line(self, line_bytes):
        bits = [bytes_to_str(bit) for bit in line_bytes.split(b" ", 2)]
        if len(bits) != 3:
            raise InvalidRequestLine(bytes_to_str(line_bytes))

        # Method: RFC9110 Section 9
        self.method = bits[0]

        # nonstandard restriction, suitable for all IANA registered methods
        # partially enforced in previous gunicorn versions
        if not self.cfg.permit_unconventional_http_method:
            if METHOD_BADCHAR_RE.search(self.method):
                raise InvalidRequestMethod(self.method)
            if not 3 <= len(bits[0]) <= 20:
                raise InvalidRequestMethod(self.method)
        # standard restriction: RFC9110 token
        if not TOKEN_RE.fullmatch(self.method):
            raise InvalidRequestMethod(self.method)
        # nonstandard and dangerous
        # methods are merely uppercase by convention, no case-insensitive treatment is intended
        if self.cfg.casefold_http_method:
            self.method = self.method.upper()

        # URI
        self.uri = bits[1]

        # Python stdlib explicitly tells us it will not perform validation.
        # https://docs.python.org/3/library/urllib.parse.html#url-parsing-security
        # There are *four* `request-target` forms in rfc9112, none of them can be empty:
        # 1. origin-form, which starts with a slash
        # 2. absolute-form, which starts with a non-empty scheme
        # 3. authority-form, (for CONNECT) which contains a colon after the host
        # 4. asterisk-form, which is an asterisk (`\x2A`)
        # => manually reject one always invalid URI: empty
        if len(self.uri) == 0:
            raise InvalidRequestLine(bytes_to_str(line_bytes))

        # RFC 9112 section 3.2.4: asterisk-form is only valid with OPTIONS.
        if self.uri == "*" and self.method != "OPTIONS":
            raise InvalidRequestLine(bytes_to_str(line_bytes))

        # RFC 9112 section 3.2.3: authority-form ("host:port") is only valid
        # with CONNECT. origin-form starts with "/"; absolute-form contains
        # "://". Anything else on a non-CONNECT request is authority-form.
        if (self.method != "CONNECT"
                and self.uri != "*"
                and not self.uri.startswith("/")
                and "://" not in self.uri):
            raise InvalidRequestLine(bytes_to_str(line_bytes))

        try:
            parts = split_request_uri(self.uri)
        except ValueError:
            raise InvalidRequestLine(bytes_to_str(line_bytes))
        self.path = parts.path or ""
        self.query = parts.query or ""
        self.fragment = parts.fragment or ""

        # Version
        match = VERSION_RE.fullmatch(bits[2])
        if match is None:
            raise InvalidHTTPVersion(bits[2])
        self.version = (int(match.group(1)), int(match.group(2)))
        if not (1, 0) <= self.version < (2, 0):
            # if ever relaxing this, carefully review Content-Encoding processing
            if not self.cfg.permit_unconventional_http_version:
                raise InvalidHTTPVersion(self.version)

    def set_body_reader(self):
        super().set_body_reader()
        if isinstance(self.body.reader, EOFReader):
            self.body = Body(LengthReader(self.unreader, 0))