add http parsers unittests and fix it while i'm here

This commit is contained in:
Benoit Chesneau 2010-01-20 14:10:38 +01:00
parent c613b826c8
commit 6eecc37fce
8 changed files with 20038 additions and 50 deletions

16
examples/nohup.out Normal file
View File

@ -0,0 +1,16 @@
INFO Booted Arbiter: 8103
INFO Worker 8104 booting
INFO Worker 8105 booting
INFO Worker 8105 exiting.
241
INFO Handling signal: int
INFO Worker 8104 exiting.
241
INFO Master is shutting down.
241
INFO Booted Arbiter: 8106
INFO Worker 8107 booting
INFO Worker 8108 booting
INFO Booted Arbiter: 8115
INFO Worker 8116 booting
INFO Worker 8117 booting

19943
examples/test.log Normal file

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,6 @@
# This file is part of gunicorn released under the MIT license.
# See the NOTICE for more information.
from gunicorn.http.http_parser import HttpParser
from gunicorn.http.request import HTTPRequest, RequestError
from gunicorn.http.response import HTTPResponse

View File

@ -3,27 +3,39 @@
# This file is part of gunicorn released under the MIT license.
# See the NOTICE for more information.
import urlparse
from gunicorn.util import normalize_name
class HttpParserError(Exception):
""" error raised when parsing fail"""
class HttpParser(object):
def __init__(self):
self._headers = {}
self.status = ""
self.headers = []
self.headers_dict = {}
self.raw_version = ""
self.raw_path = ""
self.version = None
self.method = None
self.path = None
self.method = ""
self.path = ""
self.query_string = ""
self.fragment = ""
self._content_len = None
self.start_offset = 0
self.chunk_size = 0
self._chunk_eof = False
def headers(self, headers, buf):
def filter_headers(self, headers, buf):
""" take a string as buffer and an header dict
(empty or not). It return new position or -1
if parsing isn't done. headers dict is updated
with new headers.
"""
if self._headers:
return self._headers
if self.headers:
return self.headers
ld = len("\r\n\r\n")
i = buf.find("\r\n\r\n")
@ -43,57 +55,73 @@ class HttpParser(object):
# parse headers. We silently ignore
# bad headers' lines
_headers = {}
hname = ""
for line in lines:
if line == "\t":
self._headers[hname] += line.strip()
headers[hname] += line.strip()
else:
try:
hname =self._parse_headerl(line)
hname =self._parse_headerl(_headers, line)
except ValueError:
# bad headers
pass
headers.update(self._headers)
self._content_len = int(self._headers.get('Content-Length') or 0)
self.headers_dict = _headers
headers.extend(list(_headers.items()))
self.headers = headers
self._content_len = int(_headers.get('Content-Length') or 0)
(_, _, self.path, self.query_string, self.fragment) = urlparse.urlsplit(self.raw_path)
return pos
def _first_line(self, line):
""" parse first line """
method, path, version = line.strip().split(" ")
self.version = version.strip()
self.method = method.upper()
self.path = path
self.status = status = line.strip()
def _parse_headerl(self, line):
method, path, version = status.split(" ")
version = version.strip()
self.raw_version = version
try:
major, minor = version.split("HTTP/")[1].split(".")
version = (int(major), int(minor))
except IndexError:
version = (1, 0)
self.version = version
self.method = method.upper()
self.raw_path = path
def _parse_headerl(self, hdrs, line):
""" parse header line"""
name, value = line.split(": ", 1)
name = name.strip()
self._headers[name] = value.strip()
name, value = line.split(":", 1)
name = normalize_name(name.strip())
hdrs[name] = value.rsplit("\r\n",1)[0].strip()
return name
@property
def should_close(self):
if self._should_close:
return True
if self._headers.get("Connection") == "close":
if self.headers_dict.get("Connection") == "close":
return True
if self._headers.get("Connection") == "Keep-Alive":
if self.headers_dict.get("Connection") == "Keep-Alive":
return False
if self.version < "HTTP/1.1":
if int("%s%s" % self.version) < 11:
return True
@property
def is_chunked(self):
""" is TE: chunked ?"""
transfert_encoding = self._headers.get('Transfer-Encoding', False)
transfert_encoding = self.headers_dict.get('Transfer-Encoding', False)
return (transfert_encoding == "chunked")
@property
def content_len(self):
""" return content length as integer or
None."""
transfert_encoding = self._headers.get('Transfer-Encoding')
content_length = self._headers.get('Content-Length')
transfert_encoding = self.headers_dict.get('Transfer-Encoding')
content_length = self.headers_dict.get('Content-Length')
if transfert_encoding is None:
if content_length is None:
return 0

View File

@ -17,15 +17,14 @@ import logging
from gunicorn import __version__
from gunicorn.http.http_parser import HttpParser
from gunicorn.http.tee import TeeInput
from gunicorn.util import CHUNK_SIZE, read_partial
from gunicorn.util import CHUNK_SIZE, read_partial, normalize_name
NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
def _normalize_name(name):
return "-".join([w.lower().capitalize() for w in name.split("-")])
class RequestError(Exception):
""" raised when something wrong happend"""
@ -62,40 +61,35 @@ class HTTPRequest(object):
def read(self):
environ = {}
headers = {}
headers = []
remain = CHUNK_SIZE
buf = ""
buf = read_partial(self.socket, CHUNK_SIZE)
i = self.parser.headers(headers, buf)
i = self.parser.filter_headers(headers, buf)
if i == -1 and buf:
while True:
data = read_partial(self.socket, CHUNK_SIZE)
if not data: break
buf += data
i = self.parser.headers(headers, buf)
i = self.parser.filter_headers(headers, buf)
if i != -1: break
if not headers:
print "ici :()"
environ.update(self.DEFAULTS)
return environ
buf = buf[i:]
self.log.info("%s", self.parser.status)
self.log.info("Got headers:\n%s" % headers)
if headers.get('Except', '').lower() == "100-continue":
if self.parser.headers_dict.get('Except', '').lower() == "100-continue":
self.socket.send("100 Continue\n")
if "?" in self.parser.path:
path_info, query = self.parser.path.split('?', 1)
else:
path_info = self.parser.path
query = ""
if not self.parser.content_len and not self.parser.is_chunked:
wsgi_input = StringIO.StringIO()
else:
wsgi_input = TeeInput(self.socket, self.parser, buf)
wsgi_input = TeeInput(self.socket, self.parser, buf[i:])
environ = {
"wsgi.url_scheme": 'http',
@ -108,28 +102,29 @@ class HTTPRequest(object):
"SCRIPT_NAME": "",
"SERVER_SOFTWARE": self.SERVER_VERSION,
"REQUEST_METHOD": self.parser.method,
"PATH_INFO": unquote(path_info),
"QUERY_STRING": query,
"RAW_URI": self.parser.path,
"CONTENT_TYPE": headers.get('Content-Type', ''),
"PATH_INFO": unquote(self.parser.path),
"QUERY_STRING": self.parser.query_string,
"RAW_URI": self.parser.raw_path,
"CONTENT_TYPE": self.parser.headers_dict.get('Content-Type', ''),
"CONTENT_LENGTH": str(wsgi_input.len),
"REMOTE_ADDR": self.client_address[0],
"REMOTE_PORT": self.client_address[1],
"SERVER_NAME": self.server_address[0],
"SERVER_PORT": self.server_address[1],
"SERVER_PROTOCOL": self.parser.version
"SERVER_PROTOCOL": self.parser.raw_version
}
for key, value in headers.items():
for key, value in self.parser.headers:
key = 'HTTP_' + key.upper().replace('-', '_')
if key not in ('HTTP_CONTENT_TYPE', 'HTTP_CONTENT_LENGTH'):
environ[key] = value
return environ
def start_response(self, status, response_headers):
self.response_status = status
for name, value in response_headers:
name = _normalize_name(name)
name = normalize_name(name)
if not isinstance(value, basestring):
value = str(value)
self.response_headers[name] = value.strip()

View File

@ -15,7 +15,7 @@ class HTTPResponse(object):
def __init__(self, sock, response, req):
self.req = req
self.sock = sock.dup()
self.sock = sock
self.data = response
self.headers = req.response_headers or {}
self.status = req.response_status

View File

@ -73,10 +73,14 @@ def write_nonblock(sock, data):
if ret[1]: break
except socket.error, e:
if e[0] == errno.EINTR:
break
continue
raise
write(sock, data)
def normalize_name(name):
return "-".join([w.lower().capitalize() for w in name.split("-")])
def import_app(module):
parts = module.rsplit(":", 1)
if len(parts) == 1:

View File

@ -39,6 +39,8 @@ setup(
packages = find_packages(),
include_package_data = True,
scripts = ['bin/gunicorn', 'bin/gunicorn_django'],
test_suite = 'nose.collector',
)