Backport urlparse.urlsplit from Python 2.7 to Python 2.6.

urlsplit doesn't reject invalid IPv6 addresses in Python 2.6.
2026-07-03 11:11:30 +08:00 · 2015-05-10 10:17:28 +03:00 · 2015-05-10 10:17:28 +03:00 · 95b5f78ecb
commit 95b5f78ecb
parent 945d2ecfc3
2 changed files with 66 additions and 1 deletions
--- a/gunicorn/_compat.py
+++ b/gunicorn/_compat.py
@ -2,6 +2,7 @@ import sys

 from gunicorn import six

+PY26 = (sys.version_info[:2] == (2, 6))
 PY33 = (sys.version_info >= (3, 3))


@ -200,3 +201,67 @@ else:
            if exc.args:
                _wrap_error(exc, _MAP_ERRNO, exc.args[0])
            raise
+
+if PY26:
+    from urlparse import (
+        _parse_cache, MAX_CACHE_SIZE, clear_cache, _splitnetloc, SplitResult,
+        scheme_chars,
+    )
+
+    def urlsplit(url, scheme='', allow_fragments=True):
+        """Parse a URL into 5 components:
+        <scheme>://<netloc>/<path>?<query>#<fragment>
+        Return a 5-tuple: (scheme, netloc, path, query, fragment).
+        Note that we don't break the components up in smaller bits
+        (e.g. netloc is a single string) and we don't expand % escapes."""
+        allow_fragments = bool(allow_fragments)
+        key = url, scheme, allow_fragments, type(url), type(scheme)
+        cached = _parse_cache.get(key, None)
+        if cached:
+            return cached
+        if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
+            clear_cache()
+        netloc = query = fragment = ''
+        i = url.find(':')
+        if i > 0:
+            if url[:i] == 'http': # optimize the common case
+                scheme = url[:i].lower()
+                url = url[i+1:]
+                if url[:2] == '//':
+                    netloc, url = _splitnetloc(url, 2)
+                    if (('[' in netloc and ']' not in netloc) or
+                            (']' in netloc and '[' not in netloc)):
+                        raise ValueError("Invalid IPv6 URL")
+                if allow_fragments and '#' in url:
+                    url, fragment = url.split('#', 1)
+                if '?' in url:
+                    url, query = url.split('?', 1)
+                v = SplitResult(scheme, netloc, url, query, fragment)
+                _parse_cache[key] = v
+                return v
+            for c in url[:i]:
+                if c not in scheme_chars:
+                    break
+            else:
+                # make sure "url" is not actually a port number (in which case
+                # "scheme" is really part of the path)
+                rest = url[i+1:]
+                if not rest or any(c not in '0123456789' for c in rest):
+                    # not a port number
+                    scheme, url = url[:i].lower(), rest
+
+        if url[:2] == '//':
+            netloc, url = _splitnetloc(url, 2)
+            if (('[' in netloc and ']' not in netloc) or
+                    (']' in netloc and '[' not in netloc)):
+                raise ValueError("Invalid IPv6 URL")
+        if allow_fragments and '#' in url:
+            url, fragment = url.split('#', 1)
+        if '?' in url:
+            url, query = url.split('?', 1)
+        v = SplitResult(scheme, netloc, url, query, fragment)
+        _parse_cache[key] = v
+        return v
+
+else:
+    from gunicorn.six.moves.urllib.parse import urlsplit
--- a/gunicorn/http/message.py
+++ b/gunicorn/http/message.py
@ -15,7 +15,7 @@ from gunicorn.http.errors import (InvalidHeader, InvalidHeaderName, NoMoreData,
    LimitRequestLine, LimitRequestHeaders)
 from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest
 from gunicorn.six import BytesIO
-from gunicorn.six.moves.urllib.parse import urlsplit
+from gunicorn._compat import urlsplit

 MAX_REQUEST_LINE = 8190
 MAX_HEADERS = 32768