add http parsers unittests and fix it while i'm here

2026-01-14 11:09:11 +08:00 · 2010-01-20 14:10:38 +01:00 · 2010-01-20 14:10:38 +01:00 · 6eecc37fce
commit 6eecc37fce
parent c613b826c8
8 changed files with 20038 additions and 50 deletions
--- a/examples/nohup.out
+++ b/examples/nohup.out
@ -0,0 +1,16 @@
+INFO Booted Arbiter: 8103
+INFO Worker 8104 booting
+INFO Worker 8105 booting
+INFO Worker 8105 exiting.
+241
+INFO Handling signal: int
+INFO Worker 8104 exiting.
+241
+INFO Master is shutting down.
+241
+INFO Booted Arbiter: 8106
+INFO Worker 8107 booting
+INFO Worker 8108 booting
+INFO Booted Arbiter: 8115
+INFO Worker 8116 booting
+INFO Worker 8117 booting
--- a/examples/test.log
+++ b/examples/test.log
--- a/gunicorn/http/init.py
+++ b/gunicorn/http/init.py
@ -3,6 +3,6 @@
 # This file is part of gunicorn released under the MIT license. 
 # See the NOTICE for more information.

-
+from gunicorn.http.http_parser import HttpParser
 from gunicorn.http.request import HTTPRequest, RequestError
 from gunicorn.http.response import HTTPResponse
--- a/gunicorn/http/http_parser.py
+++ b/gunicorn/http/http_parser.py
@ -3,27 +3,39 @@
 # This file is part of gunicorn released under the MIT license. 
 # See the NOTICE for more information.

+import urlparse
+
+from gunicorn.util import normalize_name
+
+class HttpParserError(Exception):
+    """ error raised when parsing fail"""

 class HttpParser(object):
    
    def __init__(self):
-        self._headers = {}
+        self.status = ""
+        self.headers = []
+        self.headers_dict = {}
+        self.raw_version = ""
+        self.raw_path = ""
        self.version = None
-        self.method = None
-        self.path = None
+        self.method = ""
+        self.path = ""
+        self.query_string = ""
+        self.fragment = ""
        self._content_len = None
        self.start_offset = 0
        self.chunk_size = 0
        self._chunk_eof = False      
        
-    def headers(self, headers, buf):
+    def filter_headers(self, headers, buf):
        """ take a string as buffer and an header dict 
        (empty or not). It return new position or -1 
        if parsing isn't done. headers dict is updated
        with new headers.
        """
-        if self._headers:
-            return self._headers
+        if self.headers:
+            return self.headers
        
        ld = len("\r\n\r\n")
        i = buf.find("\r\n\r\n")
@ -43,57 +55,73 @@ class HttpParser(object):
        
        # parse headers. We silently ignore 
        # bad headers' lines
+        
+        _headers = {}
        hname = ""
        for line in lines:
            if line == "\t":
-                self._headers[hname] += line.strip()
+                headers[hname] += line.strip()
            else:
                try:
-                    hname =self._parse_headerl(line)
+                    hname =self._parse_headerl(_headers, line)
                except ValueError: 
                    # bad headers
                    pass
-        headers.update(self._headers)
-        self._content_len = int(self._headers.get('Content-Length') or 0)
+        self.headers_dict = _headers
+        headers.extend(list(_headers.items()))
+        self.headers = headers
+        self._content_len = int(_headers.get('Content-Length') or 0)
+        
+        (_, _, self.path, self.query_string, self.fragment) = urlparse.urlsplit(self.raw_path)
        return pos
    
    def _first_line(self, line):
        """ parse first line """
-        method, path, version = line.strip().split(" ")
-        self.version = version.strip()
-        self.method = method.upper()
-        self.path = path
+        self.status = status = line.strip()
        
-    def _parse_headerl(self, line):
+        method, path, version = status.split(" ")
+        version = version.strip()
+        self.raw_version = version
+        try:
+            major, minor = version.split("HTTP/")[1].split(".")
+            version = (int(major), int(minor))
+        except IndexError:
+            version = (1, 0)
+
+        self.version = version
+        self.method = method.upper()
+        self.raw_path = path
+        
+    def _parse_headerl(self, hdrs, line):
        """ parse header line"""
-        name, value = line.split(": ", 1)
-        name = name.strip()
-        self._headers[name] = value.strip()
+        name, value = line.split(":", 1)
+        name = normalize_name(name.strip())
+        hdrs[name] = value.rsplit("\r\n",1)[0].strip()
        return name
      
    @property
    def should_close(self):
        if self._should_close:
            return True
-        if self._headers.get("Connection") == "close":
+        if self.headers_dict.get("Connection") == "close":
            return True
-        if self._headers.get("Connection") == "Keep-Alive":
+        if self.headers_dict.get("Connection") == "Keep-Alive":
            return False
-        if self.version < "HTTP/1.1":
+        if int("%s%s" % self.version) < 11:
            return True
        
    @property
    def is_chunked(self):
        """ is TE: chunked ?"""
-        transfert_encoding = self._headers.get('Transfer-Encoding', False)
+        transfert_encoding = self.headers_dict.get('Transfer-Encoding', False)
        return (transfert_encoding == "chunked")
        
    @property
    def content_len(self):
        """ return content length as integer or
        None."""
-        transfert_encoding = self._headers.get('Transfer-Encoding')
-        content_length = self._headers.get('Content-Length')
+        transfert_encoding = self.headers_dict.get('Transfer-Encoding')
+        content_length = self.headers_dict.get('Content-Length')
        if transfert_encoding is None:
            if content_length is None:
                return 0
--- a/gunicorn/http/request.py
+++ b/gunicorn/http/request.py
@ -17,15 +17,14 @@ import logging
 from gunicorn import __version__
 from gunicorn.http.http_parser import HttpParser
 from gunicorn.http.tee import TeeInput
-from gunicorn.util import CHUNK_SIZE, read_partial
+from gunicorn.util import CHUNK_SIZE, read_partial, normalize_name


 NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')



-def _normalize_name(name):
-    return  "-".join([w.lower().capitalize() for w in name.split("-")])
+

 class RequestError(Exception):
    """ raised when something wrong happend"""
@ -62,40 +61,35 @@ class HTTPRequest(object):
        
    def read(self):
        environ = {}
-        headers = {}
+        headers = []
        remain = CHUNK_SIZE
        buf = ""
        buf = read_partial(self.socket, CHUNK_SIZE)
-        i = self.parser.headers(headers, buf)
+        i = self.parser.filter_headers(headers, buf)
        if i == -1 and buf:
            while True:
                data = read_partial(self.socket, CHUNK_SIZE)
                if not data: break
                buf += data
-                i = self.parser.headers(headers, buf)
+                i = self.parser.filter_headers(headers, buf)
                if i != -1: break

        if not headers:
+            print "ici :()"
            environ.update(self.DEFAULTS)
            return environ
-        
-        buf = buf[i:]
+
+        self.log.info("%s", self.parser.status)

        self.log.info("Got headers:\n%s" % headers)
        
-        if headers.get('Except', '').lower() == "100-continue":
+        if self.parser.headers_dict.get('Except', '').lower() == "100-continue":
            self.socket.send("100 Continue\n")
            
-        if "?" in self.parser.path:
-            path_info, query = self.parser.path.split('?', 1)
-        else:
-            path_info = self.parser.path
-            query = ""
-            
        if not self.parser.content_len and not self.parser.is_chunked:
            wsgi_input = StringIO.StringIO()
        else:
-            wsgi_input = TeeInput(self.socket, self.parser, buf)
+            wsgi_input = TeeInput(self.socket, self.parser, buf[i:])
                
        environ = {
            "wsgi.url_scheme": 'http',
@ -108,28 +102,29 @@ class HTTPRequest(object):
            "SCRIPT_NAME": "",
            "SERVER_SOFTWARE": self.SERVER_VERSION,
            "REQUEST_METHOD": self.parser.method,
-            "PATH_INFO": unquote(path_info),
-            "QUERY_STRING": query,
-            "RAW_URI": self.parser.path,
-            "CONTENT_TYPE": headers.get('Content-Type', ''),
+            "PATH_INFO": unquote(self.parser.path),
+            "QUERY_STRING": self.parser.query_string,
+            "RAW_URI": self.parser.raw_path,
+            "CONTENT_TYPE": self.parser.headers_dict.get('Content-Type', ''),
            "CONTENT_LENGTH": str(wsgi_input.len),
            "REMOTE_ADDR": self.client_address[0],
            "REMOTE_PORT": self.client_address[1],
            "SERVER_NAME": self.server_address[0],
            "SERVER_PORT": self.server_address[1],
-            "SERVER_PROTOCOL": self.parser.version
+            "SERVER_PROTOCOL": self.parser.raw_version
        }
        
-        for key, value in headers.items():
+        for key, value in self.parser.headers:
            key = 'HTTP_' + key.upper().replace('-', '_')
            if key not in ('HTTP_CONTENT_TYPE', 'HTTP_CONTENT_LENGTH'):
                environ[key] = value
+
        return environ
        
    def start_response(self, status, response_headers):
        self.response_status = status
        for name, value in response_headers:
-            name = _normalize_name(name)
+            name = normalize_name(name)
            if not isinstance(value, basestring):
                value = str(value)
            self.response_headers[name] = value.strip()        
--- a/gunicorn/http/response.py
+++ b/gunicorn/http/response.py
@ -15,7 +15,7 @@ class HTTPResponse(object):
    
    def __init__(self, sock, response, req):
        self.req = req
-        self.sock = sock.dup()
+        self.sock = sock
        self.data = response
        self.headers = req.response_headers or {}
        self.status = req.response_status
--- a/gunicorn/util.py
+++ b/gunicorn/util.py
@ -73,10 +73,14 @@ def write_nonblock(sock, data):
            if ret[1]: break
        except socket.error, e:
            if e[0] == errno.EINTR:
-                break
+                continue
            raise
    write(sock, data)

+
+def normalize_name(name):
+    return  "-".join([w.lower().capitalize() for w in name.split("-")])
+    
 def import_app(module):
    parts = module.rsplit(":", 1)
    if len(parts) == 1:
--- a/setup.py
+++ b/setup.py
@ -39,6 +39,8 @@ setup(
    packages = find_packages(),
    include_package_data = True,
    scripts = ['bin/gunicorn', 'bin/gunicorn_django'],
+    
+    test_suite = 'nose.collector',

 )