From 9ca4f1fdfc48c32eb4cc6d3e637cf4e5dcf5a923 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Wed, 31 Jul 2024 02:14:35 +0200 Subject: [PATCH] refuse empty request-target in HTTP request A single slash is valid, but nothing at all can be safely refused. Python stdlib explicitly tells us it will not perform validation. https://docs.python.org/3/library/urllib.parse.html#url-parsing-security There are *four* `request-target` forms in rfc9112, none of them can be empty. --- gunicorn/http/message.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 88ffa5a2..3ca0f614 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -426,6 +426,17 @@ class Request(Message): # URI self.uri = bits[1] + # Python stdlib explicitly tells us it will not perform validation. + # https://docs.python.org/3/library/urllib.parse.html#url-parsing-security + # There are *four* `request-target` forms in rfc9112, none of them can be empty: + # 1. origin-form, which starts with a slash + # 2. absolute-form, which starts with a non-empty scheme + # 3. authority-form, (for CONNECT) which contains a colon after the host + # 4. asterisk-form, which is an asterisk (`\x2A`) + # => manually reject one always invalid URI: empty + if len(self.uri) == 0: + raise InvalidRequestLine(bytes_to_str(line_bytes)) + try: parts = split_request_uri(self.uri) except ValueError: