From 003c474fe2270fe67393a630516e2db659cd3918 Mon Sep 17 00:00:00 2001 From: benoitc Date: Wed, 24 Oct 2012 13:51:35 +0200 Subject: [PATCH] handle bytes & native strings This patch makes sure that we now handle correctly bytes and native strings on python 3: - In python 3, sockets are now taking and returning bytes. - according to PEP3333, headers should be native strings and body in bytes. --- examples/longpoll.py | 2 +- examples/multiapp.py | 2 +- examples/multidomainapp.py | 8 +++---- examples/slowclient.py | 2 +- examples/test.py | 2 +- gunicorn/http/body.py | 46 +++++++++++++++++--------------------- gunicorn/http/message.py | 27 +++++++++------------- gunicorn/http/wsgi.py | 19 +++++++++++----- gunicorn/six.py | 34 ++++++++++++++++++++++++++++ gunicorn/util.py | 10 ++++++--- 10 files changed, 94 insertions(+), 58 deletions(-) diff --git a/examples/longpoll.py b/examples/longpoll.py index 19559d1b..97d6647f 100644 --- a/examples/longpoll.py +++ b/examples/longpoll.py @@ -17,7 +17,7 @@ class TestIter(object): def app(environ, start_response): """Application which cooperatively pauses 20 seconds (needed to surpass normal timeouts) before responding""" - data = 'Hello, World!\n' + data = b'Hello, World!\n' status = '200 OK' response_headers = [ ('Content-type','text/plain'), diff --git a/examples/multiapp.py b/examples/multiapp.py index e48a253d..c6a4c90b 100644 --- a/examples/multiapp.py +++ b/examples/multiapp.py @@ -38,7 +38,7 @@ class Application(object): return match[0]['app'](environ, start_response) def error404(self, environ, start_response): - html = """\ + html = b"""\ 404 - Not Found diff --git a/examples/multidomainapp.py b/examples/multidomainapp.py index 89e59e2b..948a5359 100644 --- a/examples/multidomainapp.py +++ b/examples/multidomainapp.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 - # -# This file is part of gunicorn released under the MIT license. +# This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. import re @@ -20,15 +20,15 @@ class SubDomainApp: return app(environ, start_response) else: start_response("404 Not Found", []) - return [""] + return [b""] def hello(environ, start_response): start_response("200 OK", [("Content-Type", "text/plain")]) - return ["Hello, world\n"] + return [b"Hello, world\n"] def bye(environ, start_response): start_response("200 OK", [("Content-Type", "text/plain")]) - return ["Goodbye!\n"] + return [b"Goodbye!\n"] app = SubDomainApp([ ("localhost", hello), diff --git a/examples/slowclient.py b/examples/slowclient.py index 6f612bea..15729d93 100644 --- a/examples/slowclient.py +++ b/examples/slowclient.py @@ -9,7 +9,7 @@ import time def app(environ, start_response): """Application which cooperatively pauses 10 seconds before responding""" - data = 'Hello, World!\n' + data = b'Hello, World!\n' status = '200 OK' response_headers = [ ('Content-type','text/plain'), diff --git a/examples/test.py b/examples/test.py index 8972f68d..c61f672e 100644 --- a/examples/test.py +++ b/examples/test.py @@ -12,7 +12,7 @@ from gunicorn import __version__ #@validator def app(environ, start_response): """Simplest possible application object""" - data = 'Hello, World!\n' + data = b'Hello, World!\n' status = '200 OK' response_headers = [ diff --git a/gunicorn/http/body.py b/gunicorn/http/body.py index b331f292..82797e4c 100644 --- a/gunicorn/http/body.py +++ b/gunicorn/http/body.py @@ -5,13 +5,9 @@ import sys -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - -from gunicorn.http.errors import NoMoreData, ChunkMissingTerminator, \ -InvalidChunkSize +from gunicorn.http.errors import (NoMoreData, ChunkMissingTerminator, + InvalidChunkSize) +from gunicorn.six import StringIO, bytes_to_str class ChunkedReader(object): def __init__(self, req, unreader): @@ -25,7 +21,7 @@ class ChunkedReader(object): if size <= 0: raise ValueError("Size must be positive.") if size == 0: - return "" + return b"" if self.parser: while self.buf.tell() < size: @@ -45,16 +41,17 @@ class ChunkedReader(object): buf = StringIO() buf.write(data) - idx = buf.getvalue().find("\r\n\r\n") - done = buf.getvalue()[:2] == "\r\n" + idx = buf.getvalue().find(b"\r\n\r\n") + done = buf.getvalue()[:2] == b"\r\n" while idx < 0 and not done: self.get_data(unreader, buf) - idx = buf.getvalue().find("\r\n\r\n") - done = buf.getvalue()[:2] == "\r\n" + idx = buf.getvalue().find(b"\r\n\r\n") + done = buf.getvalue()[:2] == b"\r\n" if done: unreader.unread(buf.getvalue()[2:]) - return "" - self.req.trailers = self.req.parse_headers(buf.getvalue()[:idx]) + return b"" + self.req.trailers = self.req.parse_headers( + bytes_to_str(buf.getvalue()[:idx])) unreader.unread(buf.getvalue()[idx+4:]) def parse_chunked(self, unreader): @@ -71,7 +68,7 @@ class ChunkedReader(object): rest = rest[size:] while len(rest) < 2: rest += unreader.read() - if rest[:2] != '\r\n': + if rest[:2] != b'\r\n': raise ChunkMissingTerminator(rest[:2]) (size, rest) = self.parse_chunk_size(unreader, data=rest[2:]) @@ -80,15 +77,15 @@ class ChunkedReader(object): if data is not None: buf.write(data) - idx = buf.getvalue().find("\r\n") + idx = buf.getvalue().find(b"\r\n") while idx < 0: self.get_data(unreader, buf) - idx = buf.getvalue().find("\r\n") + idx = buf.getvalue().find(b"\r\n") data = buf.getvalue() line, rest_chunk = data[:idx], data[idx+2:] - chunk_size = line.split(";", 1)[0].strip() + chunk_size = line.split(b";", 1)[0].strip() try: chunk_size = int(chunk_size, 16) except ValueError: @@ -121,7 +118,7 @@ class LengthReader(object): if size < 0: raise ValueError("Size must be positive.") if size == 0: - return "" + return b"" buf = StringIO() @@ -201,7 +198,7 @@ class Body(object): def read(self, size=None): size = self.getsize(size) if size == 0: - return "" + return b"" if size < self.buf.tell(): data = self.buf.getvalue() @@ -225,7 +222,7 @@ class Body(object): def readline(self, size=None): size = self.getsize(size) if size == 0: - return "" + return b"" line = self.buf.getvalue() self.buf.truncate(0) @@ -234,7 +231,7 @@ class Body(object): extra_buf_data = line[size:] line = line[:size] - idx = line.find("\n") + idx = line.find(b"\n") if idx >= 0: ret = line[:idx+1] self.buf.write(line[idx+1:]) @@ -247,12 +244,11 @@ class Body(object): ret = [] data = self.read() while len(data): - pos = data.find("\n") + pos = data.find(b"\n") if pos < 0: ret.append(data) - data = "" + data = b"" else: line, data = data[:pos+1], data[pos+1:] ret.append(line) return ret - diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index d24ea5d0..b32dbc2b 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -4,21 +4,16 @@ # See the NOTICE for more information. import re -import urlparse import socket from errno import ENOTCONN -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - from gunicorn.http.unreader import SocketUnreader from gunicorn.http.body import ChunkedReader, LengthReader, EOFReader, Body from gunicorn.http.errors import InvalidHeader, InvalidHeaderName, NoMoreData, \ InvalidRequestLine, InvalidRequestMethod, InvalidHTTPVersion, \ LimitRequestLine, LimitRequestHeaders from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest +from gunicorn.six import StringIO, urlsplit, bytes_to_str MAX_REQUEST_LINE = 8190 MAX_HEADERS = 32768 @@ -61,7 +56,7 @@ class Message(object): headers = [] # Split lines on \r\n keeping the \r\n on each line - lines = [line + "\r\n" for line in data.split("\r\n")] + lines = [bytes_to_str(line) + "\r\n" for line in data.split(b"\r\n")] # Parse headers into key/value pairs paying attention # to continuation lines. @@ -173,24 +168,24 @@ class Request(Message): line, rbuf = self.read_line(unreader, buf, self.limit_request_line) # proxy protocol - if self.proxy_protocol(line): + if self.proxy_protocol(bytes_to_str(line)): # get next request line buf = StringIO() buf.write(rbuf) line, rbuf = self.read_line(unreader, buf, self.limit_request_line) - self.parse_request_line(line) + self.parse_request_line(bytes_to_str(line)) buf = StringIO() buf.write(rbuf) # Headers data = buf.getvalue() - idx = data.find("\r\n\r\n") + idx = data.find(b"\r\n\r\n") - done = data[:2] == "\r\n" + done = data[:2] == b"\r\n" while True: - idx = data.find("\r\n\r\n") - done = data[:2] == "\r\n" + idx = data.find(b"\r\n\r\n") + done = data[:2] == b"\r\n" if idx < 0 and not done: self.get_data(unreader, buf) @@ -202,7 +197,7 @@ class Request(Message): if done: self.unreader.unread(data[2:]) - return "" + return b"" self.headers = self.parse_headers(data[:idx]) @@ -214,7 +209,7 @@ class Request(Message): data = buf.getvalue() while True: - idx = data.find("\r\n") + idx = data.find(b"\r\n") if idx >= 0: # check if the request line is too large if idx > limit > 0: @@ -328,7 +323,7 @@ class Request(Message): else: self.uri = bits[1] - parts = urlparse.urlsplit(self.uri) + parts = urlsplit(self.uri) self.path = parts.path or "" self.query = parts.query or "" self.fragment = parts.fragment or "" diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 228ed941..026a9c98 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -7,8 +7,8 @@ import logging import os import re import sys -from urllib import unquote +from gunicorn.six import unquote, string_types, binary_type from gunicorn import SERVER_SOFTWARE import gunicorn.util as util @@ -265,12 +265,18 @@ class Response(object): return tosend = self.default_headers() tosend.extend(["%s: %s\r\n" % (n, v) for n, v in self.headers]) - util.write(self.sock, "%s\r\n" % "".join(tosend)) + + header_str = "%s\r\n" % "".join(tosend) + util.write(self.sock, header_str.encode('latin1')) self.headers_sent = True def write(self, arg): self.send_headers() - assert isinstance(arg, basestring), "%r is not a string." % arg + + if isinstance(arg, text_type): + arg = arg.decode('utf-8') + + assert isinstance(arg, binary_type), "%r is not a byte." % arg arglen = len(arg) tosend = arglen @@ -328,12 +334,13 @@ class Response(object): self.send_headers() if self.is_chunked(): - self.sock.sendall("%X\r\n" % nbytes) + chunk_size = "%X\r\n" % nbytes + self.sock.sendall(chunk_size.encode('utf-8')) self.sendfile_all(fileno, self.sock.fileno(), fo_offset, nbytes) if self.is_chunked(): - self.sock.sendall("\r\n") + self.sock.sendall(b"\r\n") os.lseek(fileno, fd_offset, os.SEEK_SET) else: @@ -344,4 +351,4 @@ class Response(object): if not self.headers_sent: self.send_headers() if self.chunked: - util.write_chunk(self.sock, "") + util.write_chunk(self.sock, b"") diff --git a/gunicorn/six.py b/gunicorn/six.py index 44b80a44..e82ddce2 100644 --- a/gunicorn/six.py +++ b/gunicorn/six.py @@ -364,3 +364,37 @@ _add_doc(reraise, """Reraise an exception.""") def with_metaclass(meta, base=object): """Create a base class with a metaclass.""" return meta("NewBase", (base,), {}) + + +# specific to gunicorn +if PY3: + import io + StringIO = io.BytesIO + + def bytes_to_str(b): + return str(b, 'latin1') + + import urllib.parse + + unquote = urllib.parse.unquote + urlsplit = urllib.parse.urlsplit + +else: + try: + import cStringIO as StringIO + except ImportError: + import StringIO + + StringIO = StringIO + + + def bytestring(s): + if isinstance(s, unicode): + return s.encode('utf-8') + return s + + import urlparse as orig_urlparse + urlsplit = orig_urlparse.urlsplit + + import urllib + urlunquote = urllib.unquote diff --git a/gunicorn/util.py b/gunicorn/util.py index e6fdb5ed..11731327 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -25,6 +25,7 @@ import textwrap import time import inspect +from gunicorn.six import text_type MAXFD = 1024 if (hasattr(os, "devnull")): @@ -223,7 +224,10 @@ except ImportError: pass def write_chunk(sock, data): - chunk = "".join(("%X\r\n" % len(data), data, "\r\n")) + if instance(data, text_type): + data = data.decode('utf-8') + chunk_size = "%X\r\n" % len(data) + chunk = b"".join([chunk_size.decode('utf-8'), data, b"\r\n"]) sock.sendall(chunk) def write(sock, data, chunked=False): @@ -259,7 +263,7 @@ def write_error(sock, status_int, reason, mesg): """) % {"reason": reason, "mesg": mesg} - http = textwrap.dedent("""\ + headers = textwrap.dedent("""\ HTTP/1.1 %s %s\r Connection: close\r Content-Type: text/html\r @@ -267,7 +271,7 @@ def write_error(sock, status_int, reason, mesg): \r %s """) % (str(status_int), reason, len(html), html) - write_nonblock(sock, http) + write_nonblock(sock, http.encode('latin1')) def normalize_name(name): return "-".join([w.lower().capitalize() for w in name.split("-")])