Backport urlparse.urlsplit from Python 2.7 to Python 2.6.

urlsplit doesn't reject invalid IPv6 addresses in Python 2.6.
This commit is contained in:
Berker Peksag 2015-05-10 10:17:28 +03:00
parent 945d2ecfc3
commit 95b5f78ecb
2 changed files with 66 additions and 1 deletions

View File

@ -2,6 +2,7 @@ import sys
from gunicorn import six
PY26 = (sys.version_info[:2] == (2, 6))
PY33 = (sys.version_info >= (3, 3))
@ -200,3 +201,67 @@ else:
if exc.args:
_wrap_error(exc, _MAP_ERRNO, exc.args[0])
raise
if PY26:
from urlparse import (
_parse_cache, MAX_CACHE_SIZE, clear_cache, _splitnetloc, SplitResult,
scheme_chars,
)
def urlsplit(url, scheme='', allow_fragments=True):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
Return a 5-tuple: (scheme, netloc, path, query, fragment).
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes."""
allow_fragments = bool(allow_fragments)
key = url, scheme, allow_fragments, type(url), type(scheme)
cached = _parse_cache.get(key, None)
if cached:
return cached
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
clear_cache()
netloc = query = fragment = ''
i = url.find(':')
if i > 0:
if url[:i] == 'http': # optimize the common case
scheme = url[:i].lower()
url = url[i+1:]
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v
return v
for c in url[:i]:
if c not in scheme_chars:
break
else:
# make sure "url" is not actually a port number (in which case
# "scheme" is really part of the path)
rest = url[i+1:]
if not rest or any(c not in '0123456789' for c in rest):
# not a port number
scheme, url = url[:i].lower(), rest
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v
return v
else:
from gunicorn.six.moves.urllib.parse import urlsplit

View File

@ -15,7 +15,7 @@ from gunicorn.http.errors import (InvalidHeader, InvalidHeaderName, NoMoreData,
LimitRequestLine, LimitRequestHeaders)
from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest
from gunicorn.six import BytesIO
from gunicorn.six.moves.urllib.parse import urlsplit
from gunicorn._compat import urlsplit
MAX_REQUEST_LINE = 8190
MAX_HEADERS = 32768