Do not strip leading slash from path (#1511)

Fixes #1512
2026-01-14 11:09:11 +08:00 · 2017-12-28 13:32:47 +05:00 · 2017-12-28 13:32:47 +05:00 · 5953148573
commit 5953148573
parent 953bb9e8d8
7 changed files with 46 additions and 24 deletions
--- a/gunicorn/http/message.py
+++ b/gunicorn/http/message.py
@ -15,7 +15,7 @@ from gunicorn.http.errors import (InvalidHeader, InvalidHeaderName, NoMoreData,
    LimitRequestLine, LimitRequestHeaders)
 from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest
 from gunicorn.six import BytesIO
-from gunicorn._compat import urlsplit
+from gunicorn.util import split_request_uri

 MAX_REQUEST_LINE = 8190
 MAX_HEADERS = 32768
@ -312,18 +312,10 @@ class Request(Message):
        self.method = bits[0].upper()

        # URI
-        # When the path starts with //, urlsplit considers it as a
-        # relative uri while the RDF says it shouldnt
-        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
-        # considers it as an absolute url.
-        # fix issue #297
-        if bits[1].startswith("//"):
-            self.uri = bits[1][1:]
-        else:
-            self.uri = bits[1]
+        self.uri = bits[1]

        try:
-            parts = urlsplit(self.uri)
+            parts = split_request_uri(self.uri)
        except ValueError:
            raise InvalidRequestLine(bytes_to_str(line_bytes))
        self.path = parts.path or ""
--- a/gunicorn/util.py
+++ b/gunicorn/util.py
@ -543,3 +543,15 @@ def make_fail_app(msg):
        return [msg]

    return app
+
+
+def split_request_uri(uri):
+    if uri.startswith("//"):
+        # When the path starts with //, urlsplit considers it as a
+        # relative uri while the RFC says we should consider it as abs_path
+        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
+        # We use temporary dot prefix to workaround this behaviour
+        parts = _compat.urlsplit("." + uri)
+        return parts._replace(path=parts.path[1:])
+
+    return _compat.urlsplit(uri)
--- a/tests/requests/invalid/016.http
+++ b/tests/requests/invalid/016.http
@ -1,2 +1,2 @@
-PUT ///]ufd/: HTTP/1.1\r\n
-\r\n
+PUT s://]ufd/: HTTP/1.1\r\n
+\r\n
--- a/tests/requests/valid/100.http
+++ b/tests/requests/valid/100.http
@ -0,0 +1,2 @@
+GET ///keeping_slashes HTTP/1.1\r\n
+\r\n
--- a/tests/requests/valid/100.py
+++ b/tests/requests/valid/100.py
@ -0,0 +1,7 @@
+request = {
+    "method": "GET",
+    "uri": uri("///keeping_slashes"),
+    "version": (1, 1),
+    "headers": [],
+    "body": b""
+}
--- a/tests/test_util.py
+++ b/tests/test_util.py
@ -7,6 +7,7 @@ import pytest

 from gunicorn import util
 from gunicorn.errors import AppImportError
+from gunicorn.six.moves.urllib.parse import SplitResult


@pytest.mark.parametrize('test_input, expected', [
@ -69,3 +70,19 @@ def test_to_bytestring():
        util.to_bytestring(100)
    msg = '100 is not a string'
    assert msg in str(err)
+
+
+@pytest.mark.parametrize('test_input, expected', [
+    ('https://example.org/a/b?c=1#d',
+     SplitResult(scheme='https', netloc='example.org', path='/a/b', query='c=1', fragment='d')),
+    ('a/b?c=1#d',
+     SplitResult(scheme='', netloc='', path='a/b', query='c=1', fragment='d')),
+    ('/a/b?c=1#d',
+     SplitResult(scheme='', netloc='', path='/a/b', query='c=1', fragment='d')),
+    ('//a/b?c=1#d',
+     SplitResult(scheme='', netloc='', path='//a/b', query='c=1', fragment='d')),
+    ('///a/b?c=1#d',
+     SplitResult(scheme='', netloc='', path='///a/b', query='c=1', fragment='d')),
+])
+def test_split_request_uri(test_input, expected):
+    assert util.split_request_uri(test_input) == expected
--- a/tests/treq.py
+++ b/tests/treq.py
@ -10,7 +10,7 @@ import random
 from gunicorn._compat import execfile_
 from gunicorn.config import Config
 from gunicorn.http.parser import RequestParser
-from gunicorn.six.moves.urllib.parse import urlparse
+from gunicorn.util import split_request_uri
 from gunicorn import six

 dirname = os.path.dirname(__file__)
@ -19,19 +19,11 @@ random.seed()

 def uri(data):
    ret = {"raw": data}
-    parts = urlparse(data)
+    parts = split_request_uri(data)
    ret["scheme"] = parts.scheme or ''
    ret["host"] = parts.netloc.rsplit(":", 1)[0] or None
    ret["port"] = parts.port or 80
-    if parts.path and parts.params:
-        ret["path"] = ";".join([parts.path, parts.params])
-    elif parts.path:
-        ret["path"] = parts.path
-    elif parts.params:
-        # Don't think this can happen
-        ret["path"] = ";" + parts.path
-    else:
-        ret["path"] = ''
+    ret["path"] = parts.path or ''
    ret["query"] = parts.query or ''
    ret["fragment"] = parts.fragment or ''
    return ret