From 0d32ab1356bd178925d15ba52df91daedfac2aad Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 6 May 2010 18:14:45 -0400 Subject: [PATCH] Initial steps for importing simplehttp. --- gunicorn/http/__init__.py | 3 + gunicorn/http/body.py | 237 ++++++++++++++++++++++++++++++++++++ gunicorn/http/errors.py | 52 ++++++++ gunicorn/http/message.py | 187 ++++++++++++++++++++++++++++ gunicorn/http/parser.py | 38 ++++++ gunicorn/http/unreader.py | 73 +++++++++++ gunicorn/{ => http}/wsgi.py | 0 gunicorn/workers/sync.py | 11 +- 8 files changed, 595 insertions(+), 6 deletions(-) create mode 100644 gunicorn/http/__init__.py create mode 100644 gunicorn/http/body.py create mode 100644 gunicorn/http/errors.py create mode 100644 gunicorn/http/message.py create mode 100644 gunicorn/http/parser.py create mode 100644 gunicorn/http/unreader.py rename gunicorn/{ => http}/wsgi.py (100%) diff --git a/gunicorn/http/__init__.py b/gunicorn/http/__init__.py new file mode 100644 index 00000000..14e2939a --- /dev/null +++ b/gunicorn/http/__init__.py @@ -0,0 +1,3 @@ + +from message import Message, Request, Response +from parser import Parser, RequestParser, ResponseParser \ No newline at end of file diff --git a/gunicorn/http/body.py b/gunicorn/http/body.py new file mode 100644 index 00000000..06650aae --- /dev/null +++ b/gunicorn/http/body.py @@ -0,0 +1,237 @@ + +import re + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +from errors import * + +class ChunkedReader(object): + def __init__(self, req, unreader): + self.req = req + self.parser = self.parse_chunked(unreader) + self.buf = StringIO() + + def read(self, size=None): + if size == 0: + return "" + if size < 0: + size = None + + if not self.parser: + return self.buf.getvalue() + + while size is None or self.buf.tell() < size: + try: + self.buf.write(self.parser.next()) + except StopIteration: + self.parser = None + break + + if size is None or self.buf.tell() < size: + ret = self.buf.getvalue() + self.buf.truncate(0) + return ret + + data = self.buf.getvalue() + ret, rest = data[:size], data[size:] + self.buf.truncate(0) + self.buf.write(rest) + return ret + + def parse_trailers(self, unreader, data): + buf = StringIO() + buf.write(data) + + idx = buf.getvalue().find("\r\n\r\n") + done = buf.getvalue()[:2] == "\r\n" + while idx < 0 and not done: + self.get_data(unreader, buf) + idx = buf.getvalue().find("\r\n\r\n") + done = buf.getvalue()[:2] == "\r\n" + if done: + unreader.unread(buf.getvalue()[2:]) + return "" + self.req.trailers = self.req.parse_headers(buf.getvalue()[:idx]) + unreader.unread(buf.getvalue()[idx+4:]) + + def parse_chunked(self, unreader): + (size, rest) = self.parse_chunk_size(unreader) + while size > 0: + while size > len(rest): + size -= len(rest) + yield rest + rest = unreader.read() + if not rest: + raise NoMoreData() + yield rest[:size] + # Remove \r\n after chunk + rest = rest[size:] + while len(rest) < 2: + rest += unreader.read() + if rest[:2] != '\r\n': + raise ChunkMissingTerminator(rest[:2]) + (size, rest) = self.parse_chunk_size(unreader, data=rest[2:]) + + def parse_chunk_size(self, unreader, data=None): + buf = StringIO() + if data is not None: + buf.write(data) + + idx = buf.getvalue().find("\r\n") + while idx < 0: + self.get_data(unreader, buf) + idx = buf.getvalue().find("\r\n") + + data = buf.getvalue() + line, rest_chunk = data[:idx], data[idx+2:] + + chunk_size = line.split(";", 1)[0].strip() + try: + chunk_size = int(chunk_size, 16) + except ValueError: + raise InvalidChunkSize(chunk_size) + + if chunk_size == 0: + self.parse_trailers(unreader, rest_chunk) + return (0, None) + return (chunk_size, rest_chunk) + + def get_data(self, unreader, buf): + data = unreader.read() + if not data: + raise NoMoreData() + buf.write(data) + +class LengthReader(object): + def __init__(self, unreader, length): + self.unreader = unreader + self.length = length + + def read(self, size=None): + if size is not None and not isinstance(size, (int, long)): + raise TypeError("size must be an integral type") + + if size == 0 or self.length <= 0: + return "" + if size < 0 or size is None: + size = self.length + + buf = StringIO() + data = self.unreader.read() + while data: + buf.write(data) + if buf.tell() >= size: + break + data = self.unreader.read() + + buf = buf.getvalue() + ret, rest = buf[:size], buf[size:] + self.unreader.unread(rest) + self.length -= size + return ret + +class EOFReader(object): + def __init__(self, unreader): + self.unreader = unreader + self.buf = StringIO() + self.finished = False + + def read(self, size=None): + if size == 0 or self.finished: + return "" + if size < 0: + size = None + + data = self.unreader.read() + while data: + buf.write(data) + if size is not None and buf.tell() > size: + data = buf.getvalue() + ret, rest = data[:size], data[size:] + self.buf.truncate(0) + self.buf.write(rest) + return ret + data = self.unreader.read() + + self.finished = True + ret = self.buf.getvalue() + self.buf.truncate(0) + return ret + +class Body(object): + def __init__(self, reader): + self.reader = reader + self.buf = StringIO() + + def __iter__(self): + return self + + def next(self): + ret = self.readline() + if not ret: + raise StopIteration() + return ret + + def read(self, size=None): + if size is not None and not isinstance(size, (int, long)): + raise TypeError("size must be an integral type") + + if size is not None and size < self.buf.tell(): + data = self.buf.getvalue() + ret, rest = data[:size], data[size:] + self.buf.truncate(0) + self.buf.write(rest) + return ret + + if size > 0: + size -= self.buf.tell() + else: + size = None + + ret = self.buf.getvalue() + self.reader.read(size=size) + self.buf.truncate(0) + return ret + + def readline(self, size=None): + if size == 0: + return "" + if size < 0: + size = None + + idx = -1 + while idx < 0: + data = self.reader.read(1024) + if not len(data): + break + self.buf.write(data) + if size is not None and self.buf.tell() > size: + break + idx = self.buf.getvalue().find("\r\n") + + if idx < 0 and size is not None: + idx = size + elif idx < 0: + idx = self.buf.tell() + + data = self.buf.getvalue() + ret, rest = data[:idx], data[idx:] + self.buf.truncate(0) + self.buf.write(rest) + return ret + + def readlines(self, size=None): + ret = [] + data = self.read() + while len(data): + pos = data.find("\n") + if pos < 0: + ret.append(data) + data = "" + else: + line, data = data[:pos+1], data[pos+1:] + ret.append(line) + return ret + diff --git a/gunicorn/http/errors.py b/gunicorn/http/errors.py new file mode 100644 index 00000000..57cd9932 --- /dev/null +++ b/gunicorn/http/errors.py @@ -0,0 +1,52 @@ + +class ParseException(Exception): + pass + +class NoMoreData(ParseException): + def __init__(self, buf): + self.buf = buf + def __str__(self): + return "No more data after: %r" % self.buf + +class InvalidRequestLine(ParseException): + def __init__(self, req): + self.req = req + self.code = 400 + + def __str__(self): + return "Invalid HTTP request line: %r" % self.req + +class InvalidRequestMethod(ParseException): + def __init__(self, method): + self.method = method + + def __str__(self): + return "Invalid HTTP method: %r" % self.method + +class InvalidHeader(ParseException): + def __init__(self, hdr): + self.hdr = hdr + + def __str__(self): + return "Invalid HTTP Header: %r" % self.hdr + +class InvalidHeaderName(ParseException): + def __init__(self, hdr): + self.hdr = hdr + + def __str__(self): + return "Invalid HTTP header name: %r" % self.hdr + +class InvalidChunkSize(ParseException): + def __init__(self, data): + self.data = data + + def __str__(self): + return "Invalid chunk size: %r" % self.data + +class ChunkMissingTerminator(ParseException): + def __init__(self, term): + self.term = term + + def __str__(self): + return "Invalid chunk terminator is not '\\r\\n': %r" % self.term diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py new file mode 100644 index 00000000..cb703f83 --- /dev/null +++ b/gunicorn/http/message.py @@ -0,0 +1,187 @@ + +import os +import re +import urlparse + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +from body import ChunkedReader, LengthReader, EOFReader, Body +from errors import * + +class Message(object): + def __init__(self, unreader): + self.unreader = unreader + self.version = None + self.headers = [] + self.trailers = [] + self.body = None + + self.hdrre = re.compile("[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]") + + unused = self.parse(self.unreader) + self.unreader.unread(unused) + self.set_body_reader() + + def parse(self): + raise NotImplementedError() + + def parse_headers(self, data): + headers = [] + + # Split lines on \r\n keeping the \r\n on each line + lines = [] + while len(data): + pos = data.find("\r\n") + if pos < 0: + lines.append(data) + data = "" + else: + lines.append(data[:pos+2]) + data = data[pos+2:] + + # Parse headers into key/value pairs paying attention + # to continuation lines. + while len(lines): + # Parse initial header name : value pair. + curr = lines.pop(0) + if curr.find(":") < 0: + raise InvalidHeader(curr.strip()) + name, value = curr.split(":", 1) + name = name.rstrip(" \t").upper() + if self.hdrre.search(name): + raise InvalidHeaderName(name) + name, value = name.strip(), [value.lstrip()] + + # Consume value continuation lines + while len(lines) and lines[0].startswith((" ", "\t")): + value.append(lines.pop(0)) + value = ''.join(value).rstrip() + + headers.append((name, value)) + return headers + + def set_body_reader(self): + chunked = False + clength = None + + for (name, value) in self.headers: + if name.upper() == "CONTENT-LENGTH": + try: + clength = int(value) + except ValueError: + clenth = None + elif name.upper() == "TRANSFER-ENCODING": + chunked = value.lower() == "chunked" + + if chunked: + self.body = Body(ChunkedReader(self, self.unreader)) + elif clength is not None: + self.body = Body(LengthReader(self.unreader, clength)) + else: + self.body = Body(EOFReader(self.unreader)) + + def should_close(self): + for (h, v) in self.headers: + if h.lower() == "connection": + if v.lower().strip() == "close": + return True + elif v.lower().strip() == "keep-alive": + return False + return self.version <= (1, 0) + + +class Request(Message): + def __init__(self, unreader): + self.methre = re.compile("[A-Z0-9$-_.]{3,20}") + self.versre = re.compile("HTTP/(\d+).(\d+)") + + self.method = None + self.uri = None + self.scheme = None + self.host = None + self.port = 80 + self.path = None + self.query = None + self.fragment = None + + super(Request, self).__init__(unreader) + + + def get_data(self, unreader, buf, stop=False): + data = unreader.read() + if not data: + if stop: + raise StopIteration() + raise NoMoreData(buf.getvalue()) + buf.write(data) + + def parse(self, unreader): + buf = StringIO() + + self.get_data(unreader, buf, stop=True) + + # Request line + idx = buf.getvalue().find("\r\n") + while idx < 0: + self.get_data(unreader, buf) + idx = buf.getvalue().find("\r\n") + self.parse_request_line(buf.getvalue()[:idx]) + rest = buf.getvalue()[idx+2:] # Skip \r\n + buf.truncate(0) + buf.write(rest) + + # Headers + idx = buf.getvalue().find("\r\n\r\n") + done = buf.getvalue()[:2] == "\r\n" + while idx < 0 and not done: + self.get_data(unreader, buf) + idx = buf.getvalue().find("\r\n\r\n") + done = buf.getvalue()[:2] == "\r\n" + if done: + self.unreader.unread(buf.getvalue()[2:]) + return "" + self.headers = self.parse_headers(buf.getvalue()[:idx]) + + ret = buf.getvalue()[idx+4:] + buf.truncate(0) + return ret + + def parse_request_line(self, line): + bits = line.split(None, 2) + if len(bits) != 3: + raise InvalidRequestLine(line) + + # Method + if not self.methre.match(bits[0]): + raise InvalidRequestMethod(bits[0]) + self.method = bits[0].upper() + + # URI + self.uri = bits[1] + parts = urlparse.urlparse(bits[1]) + self.scheme = parts.scheme or None + self.host = parts.netloc or None + if parts.port is None: + self.port = 80 + else: + self.host = self.host.rsplit(":", 1)[0] + self.port = parts.port + self.path = parts.path or None + self.query = parts.query or None + self.fragment = parts.fragment or None + + # Version + match = self.versre.match(bits[2]) + if match is None: + raise InvalidHTTPVersion(bits[2]) + self.version = (int(match.group(1)), int(match.group(2))) + + def set_body_reader(self): + super(Request, self).set_body_reader() + if isinstance(self.body.reader, EOFReader): + self.body = Body(LengthReader(self.unreader, 0)) + + diff --git a/gunicorn/http/parser.py b/gunicorn/http/parser.py new file mode 100644 index 00000000..a5e31bc3 --- /dev/null +++ b/gunicorn/http/parser.py @@ -0,0 +1,38 @@ + +import socket + +from message import Request +from unreader import SocketUnreader, IterUnreader + +class Parser(object): + def __init__(self, mesg_class, source): + self.mesg_class = mesg_class + if isinstance(source, socket.socket): + self.unreader = SocketUnreader(source) + else: + self.unreader = IterUnreader(source) + self.mesg = None + + def __iter__(self): + return self + + def next(self): + if self.mesg.should_close(): + raise StopIteration() + self.discard() + self.mesg = self.mesg_class(self.unreader) + if not self.mesg: + raise StopIteration() + return self.mesg + + def discard(self): + if self.mesg is not None: + data = self.mesg.read(8192) + while data: + self.mesg.read(8192) + self.mesg = None + +class RequestParser(Parser): + def __init__(self, *args, **kwargs): + super(RequestParser, self).__init__(Request, *args, **kwargs) + diff --git a/gunicorn/http/unreader.py b/gunicorn/http/unreader.py new file mode 100644 index 00000000..838452e9 --- /dev/null +++ b/gunicorn/http/unreader.py @@ -0,0 +1,73 @@ + +import os + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +# Classes that can undo reading data from +# a given type of data source. + +class Unreader(object): + def __init__(self): + self.buf = StringIO() + + def chunk(self): + raise NotImplementedError() + + def read(self, size=None): + if size is not None and not isinstance(size, (int, long)): + raise TypeError("size parameter must be an int or long.") + if size == 0: + return "" + if size < 0: + size = None + + self.buf.seek(0, os.SEEK_END) + + if size is None and self.buf.tell(): + ret = self.buf.getvalue() + self.buf.truncate(0) + return ret + if size is None: + return self.chunk() + + while self.buf.tell() < size: + chunk = self.chunk() + if not len(chunk): + ret = self.buf.getvalue() + self.buf.truncate(0) + return ret + self.buf.write(chunk) + data = self.buf.getvalue() + self.buf.truncate(0) + self.buf.write(data[size:]) + return data[:size] + + def unread(self, data): + self.buf.seek(0, os.SEEK_END) + self.buf.write(data) + +class SocketUnreader(Unreader): + def __init__(self, sock, max_chunk=8192): + super(SocketUnreader, self).__init__() + self.sock = sock + self.mxchunk = max_chunk + + def chunk(self): + return self.sock.recv(self.mxchunk) + +class IterUnreader(Unreader): + def __init__(self, iterable): + super(IterUnreader, self).__init__() + self.iter = iter(iterable) + + def chunk(self): + if not self.iter: + return "" + try: + return self.iter.next() + except StopIteration: + self.iter = None + return "" diff --git a/gunicorn/wsgi.py b/gunicorn/http/wsgi.py similarity index 100% rename from gunicorn/wsgi.py rename to gunicorn/http/wsgi.py diff --git a/gunicorn/workers/sync.py b/gunicorn/workers/sync.py index 32d25daf..7380ae44 100644 --- a/gunicorn/workers/sync.py +++ b/gunicorn/workers/sync.py @@ -10,13 +10,12 @@ import select import socket import traceback -from simplehttp import RequestParser - +import gunicorn.http.parser as parser +import gunicorn.http.wsgi as wsgi import gunicorn.util as util -import gunicorn.wsgi as wsgi -from gunicorn.workers.base import Worker +import gunicorn.workers.base as base -class SyncWorker(Worker): +class SyncWorker(base.Worker): def run(self): self.nr = 0 @@ -71,7 +70,7 @@ class SyncWorker(Worker): def handle(self, client, addr): try: - parser = RequestParser(client) + parser = parser.RequestParser(client) req = parser.next() self.handle_request(req, client, addr) except socket.error, e: