diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index b50da949..acd73873 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -15,7 +15,7 @@ from gunicorn.http.errors import (InvalidHeader, InvalidHeaderName, NoMoreData, LimitRequestLine, LimitRequestHeaders) from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest from gunicorn.six import BytesIO -from gunicorn._compat import urlsplit +from gunicorn.util import split_request_uri MAX_REQUEST_LINE = 8190 MAX_HEADERS = 32768 @@ -312,18 +312,10 @@ class Request(Message): self.method = bits[0].upper() # URI - # When the path starts with //, urlsplit considers it as a - # relative uri while the RDF says it shouldnt - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 - # considers it as an absolute url. - # fix issue #297 - if bits[1].startswith("//"): - self.uri = bits[1][1:] - else: - self.uri = bits[1] + self.uri = bits[1] try: - parts = urlsplit(self.uri) + parts = split_request_uri(self.uri) except ValueError: raise InvalidRequestLine(bytes_to_str(line_bytes)) self.path = parts.path or "" diff --git a/gunicorn/util.py b/gunicorn/util.py index df93e875..e73d3502 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -543,3 +543,15 @@ def make_fail_app(msg): return [msg] return app + + +def split_request_uri(uri): + if uri.startswith("//"): + # When the path starts with //, urlsplit considers it as a + # relative uri while the RFC says we should consider it as abs_path + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 + # We use temporary dot prefix to workaround this behaviour + parts = _compat.urlsplit("." + uri) + return parts._replace(path=parts.path[1:]) + + return _compat.urlsplit(uri) diff --git a/tests/requests/invalid/016.http b/tests/requests/invalid/016.http index b35adb0e..38297ada 100644 --- a/tests/requests/invalid/016.http +++ b/tests/requests/invalid/016.http @@ -1,2 +1,2 @@ -PUT ///]ufd/: HTTP/1.1\r\n -\r\n \ No newline at end of file +PUT s://]ufd/: HTTP/1.1\r\n +\r\n diff --git a/tests/requests/valid/100.http b/tests/requests/valid/100.http new file mode 100644 index 00000000..c040a0f0 --- /dev/null +++ b/tests/requests/valid/100.http @@ -0,0 +1,2 @@ +GET ///keeping_slashes HTTP/1.1\r\n +\r\n diff --git a/tests/requests/valid/100.py b/tests/requests/valid/100.py new file mode 100644 index 00000000..f526f9a9 --- /dev/null +++ b/tests/requests/valid/100.py @@ -0,0 +1,7 @@ +request = { + "method": "GET", + "uri": uri("///keeping_slashes"), + "version": (1, 1), + "headers": [], + "body": b"" +} diff --git a/tests/test_util.py b/tests/test_util.py index 51307423..5d3c8db5 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -7,6 +7,7 @@ import pytest from gunicorn import util from gunicorn.errors import AppImportError +from gunicorn.six.moves.urllib.parse import SplitResult @pytest.mark.parametrize('test_input, expected', [ @@ -69,3 +70,19 @@ def test_to_bytestring(): util.to_bytestring(100) msg = '100 is not a string' assert msg in str(err) + + +@pytest.mark.parametrize('test_input, expected', [ + ('https://example.org/a/b?c=1#d', + SplitResult(scheme='https', netloc='example.org', path='/a/b', query='c=1', fragment='d')), + ('a/b?c=1#d', + SplitResult(scheme='', netloc='', path='a/b', query='c=1', fragment='d')), + ('/a/b?c=1#d', + SplitResult(scheme='', netloc='', path='/a/b', query='c=1', fragment='d')), + ('//a/b?c=1#d', + SplitResult(scheme='', netloc='', path='//a/b', query='c=1', fragment='d')), + ('///a/b?c=1#d', + SplitResult(scheme='', netloc='', path='///a/b', query='c=1', fragment='d')), +]) +def test_split_request_uri(test_input, expected): + assert util.split_request_uri(test_input) == expected diff --git a/tests/treq.py b/tests/treq.py index fb714ac3..46f2d5c2 100644 --- a/tests/treq.py +++ b/tests/treq.py @@ -10,7 +10,7 @@ import random from gunicorn._compat import execfile_ from gunicorn.config import Config from gunicorn.http.parser import RequestParser -from gunicorn.six.moves.urllib.parse import urlparse +from gunicorn.util import split_request_uri from gunicorn import six dirname = os.path.dirname(__file__) @@ -19,19 +19,11 @@ random.seed() def uri(data): ret = {"raw": data} - parts = urlparse(data) + parts = split_request_uri(data) ret["scheme"] = parts.scheme or '' ret["host"] = parts.netloc.rsplit(":", 1)[0] or None ret["port"] = parts.port or 80 - if parts.path and parts.params: - ret["path"] = ";".join([parts.path, parts.params]) - elif parts.path: - ret["path"] = parts.path - elif parts.params: - # Don't think this can happen - ret["path"] = ";" + parts.path - else: - ret["path"] = '' + ret["path"] = parts.path or '' ret["query"] = parts.query or '' ret["fragment"] = parts.fragment or '' return ret