handle bytes & native strings

This patch makes sure that we now handle correctly bytes and native
strings on python 3:

- In python 3, sockets are now taking and returning bytes.
- according to PEP3333, headers should be native strings and body in
  bytes.
This commit is contained in:
benoitc 2012-10-24 13:51:35 +02:00
parent 5759d59f08
commit 003c474fe2
10 changed files with 94 additions and 58 deletions

View File

@ -17,7 +17,7 @@ class TestIter(object):
def app(environ, start_response): def app(environ, start_response):
"""Application which cooperatively pauses 20 seconds (needed to surpass normal timeouts) before responding""" """Application which cooperatively pauses 20 seconds (needed to surpass normal timeouts) before responding"""
data = 'Hello, World!\n' data = b'Hello, World!\n'
status = '200 OK' status = '200 OK'
response_headers = [ response_headers = [
('Content-type','text/plain'), ('Content-type','text/plain'),

View File

@ -38,7 +38,7 @@ class Application(object):
return match[0]['app'](environ, start_response) return match[0]['app'](environ, start_response)
def error404(self, environ, start_response): def error404(self, environ, start_response):
html = """\ html = b"""\
<html> <html>
<head> <head>
<title>404 - Not Found</title> <title>404 - Not Found</title>

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 - # -*- coding: utf-8 -
# #
# This file is part of gunicorn released under the MIT license. # This file is part of gunicorn released under the MIT license.
# See the NOTICE for more information. # See the NOTICE for more information.
import re import re
@ -20,15 +20,15 @@ class SubDomainApp:
return app(environ, start_response) return app(environ, start_response)
else: else:
start_response("404 Not Found", []) start_response("404 Not Found", [])
return [""] return [b""]
def hello(environ, start_response): def hello(environ, start_response):
start_response("200 OK", [("Content-Type", "text/plain")]) start_response("200 OK", [("Content-Type", "text/plain")])
return ["Hello, world\n"] return [b"Hello, world\n"]
def bye(environ, start_response): def bye(environ, start_response):
start_response("200 OK", [("Content-Type", "text/plain")]) start_response("200 OK", [("Content-Type", "text/plain")])
return ["Goodbye!\n"] return [b"Goodbye!\n"]
app = SubDomainApp([ app = SubDomainApp([
("localhost", hello), ("localhost", hello),

View File

@ -9,7 +9,7 @@ import time
def app(environ, start_response): def app(environ, start_response):
"""Application which cooperatively pauses 10 seconds before responding""" """Application which cooperatively pauses 10 seconds before responding"""
data = 'Hello, World!\n' data = b'Hello, World!\n'
status = '200 OK' status = '200 OK'
response_headers = [ response_headers = [
('Content-type','text/plain'), ('Content-type','text/plain'),

View File

@ -12,7 +12,7 @@ from gunicorn import __version__
#@validator #@validator
def app(environ, start_response): def app(environ, start_response):
"""Simplest possible application object""" """Simplest possible application object"""
data = 'Hello, World!\n' data = b'Hello, World!\n'
status = '200 OK' status = '200 OK'
response_headers = [ response_headers = [

View File

@ -5,13 +5,9 @@
import sys import sys
try: from gunicorn.http.errors import (NoMoreData, ChunkMissingTerminator,
from cStringIO import StringIO InvalidChunkSize)
except ImportError: from gunicorn.six import StringIO, bytes_to_str
from StringIO import StringIO
from gunicorn.http.errors import NoMoreData, ChunkMissingTerminator, \
InvalidChunkSize
class ChunkedReader(object): class ChunkedReader(object):
def __init__(self, req, unreader): def __init__(self, req, unreader):
@ -25,7 +21,7 @@ class ChunkedReader(object):
if size <= 0: if size <= 0:
raise ValueError("Size must be positive.") raise ValueError("Size must be positive.")
if size == 0: if size == 0:
return "" return b""
if self.parser: if self.parser:
while self.buf.tell() < size: while self.buf.tell() < size:
@ -45,16 +41,17 @@ class ChunkedReader(object):
buf = StringIO() buf = StringIO()
buf.write(data) buf.write(data)
idx = buf.getvalue().find("\r\n\r\n") idx = buf.getvalue().find(b"\r\n\r\n")
done = buf.getvalue()[:2] == "\r\n" done = buf.getvalue()[:2] == b"\r\n"
while idx < 0 and not done: while idx < 0 and not done:
self.get_data(unreader, buf) self.get_data(unreader, buf)
idx = buf.getvalue().find("\r\n\r\n") idx = buf.getvalue().find(b"\r\n\r\n")
done = buf.getvalue()[:2] == "\r\n" done = buf.getvalue()[:2] == b"\r\n"
if done: if done:
unreader.unread(buf.getvalue()[2:]) unreader.unread(buf.getvalue()[2:])
return "" return b""
self.req.trailers = self.req.parse_headers(buf.getvalue()[:idx]) self.req.trailers = self.req.parse_headers(
bytes_to_str(buf.getvalue()[:idx]))
unreader.unread(buf.getvalue()[idx+4:]) unreader.unread(buf.getvalue()[idx+4:])
def parse_chunked(self, unreader): def parse_chunked(self, unreader):
@ -71,7 +68,7 @@ class ChunkedReader(object):
rest = rest[size:] rest = rest[size:]
while len(rest) < 2: while len(rest) < 2:
rest += unreader.read() rest += unreader.read()
if rest[:2] != '\r\n': if rest[:2] != b'\r\n':
raise ChunkMissingTerminator(rest[:2]) raise ChunkMissingTerminator(rest[:2])
(size, rest) = self.parse_chunk_size(unreader, data=rest[2:]) (size, rest) = self.parse_chunk_size(unreader, data=rest[2:])
@ -80,15 +77,15 @@ class ChunkedReader(object):
if data is not None: if data is not None:
buf.write(data) buf.write(data)
idx = buf.getvalue().find("\r\n") idx = buf.getvalue().find(b"\r\n")
while idx < 0: while idx < 0:
self.get_data(unreader, buf) self.get_data(unreader, buf)
idx = buf.getvalue().find("\r\n") idx = buf.getvalue().find(b"\r\n")
data = buf.getvalue() data = buf.getvalue()
line, rest_chunk = data[:idx], data[idx+2:] line, rest_chunk = data[:idx], data[idx+2:]
chunk_size = line.split(";", 1)[0].strip() chunk_size = line.split(b";", 1)[0].strip()
try: try:
chunk_size = int(chunk_size, 16) chunk_size = int(chunk_size, 16)
except ValueError: except ValueError:
@ -121,7 +118,7 @@ class LengthReader(object):
if size < 0: if size < 0:
raise ValueError("Size must be positive.") raise ValueError("Size must be positive.")
if size == 0: if size == 0:
return "" return b""
buf = StringIO() buf = StringIO()
@ -201,7 +198,7 @@ class Body(object):
def read(self, size=None): def read(self, size=None):
size = self.getsize(size) size = self.getsize(size)
if size == 0: if size == 0:
return "" return b""
if size < self.buf.tell(): if size < self.buf.tell():
data = self.buf.getvalue() data = self.buf.getvalue()
@ -225,7 +222,7 @@ class Body(object):
def readline(self, size=None): def readline(self, size=None):
size = self.getsize(size) size = self.getsize(size)
if size == 0: if size == 0:
return "" return b""
line = self.buf.getvalue() line = self.buf.getvalue()
self.buf.truncate(0) self.buf.truncate(0)
@ -234,7 +231,7 @@ class Body(object):
extra_buf_data = line[size:] extra_buf_data = line[size:]
line = line[:size] line = line[:size]
idx = line.find("\n") idx = line.find(b"\n")
if idx >= 0: if idx >= 0:
ret = line[:idx+1] ret = line[:idx+1]
self.buf.write(line[idx+1:]) self.buf.write(line[idx+1:])
@ -247,12 +244,11 @@ class Body(object):
ret = [] ret = []
data = self.read() data = self.read()
while len(data): while len(data):
pos = data.find("\n") pos = data.find(b"\n")
if pos < 0: if pos < 0:
ret.append(data) ret.append(data)
data = "" data = b""
else: else:
line, data = data[:pos+1], data[pos+1:] line, data = data[:pos+1], data[pos+1:]
ret.append(line) ret.append(line)
return ret return ret

View File

@ -4,21 +4,16 @@
# See the NOTICE for more information. # See the NOTICE for more information.
import re import re
import urlparse
import socket import socket
from errno import ENOTCONN from errno import ENOTCONN
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from gunicorn.http.unreader import SocketUnreader from gunicorn.http.unreader import SocketUnreader
from gunicorn.http.body import ChunkedReader, LengthReader, EOFReader, Body from gunicorn.http.body import ChunkedReader, LengthReader, EOFReader, Body
from gunicorn.http.errors import InvalidHeader, InvalidHeaderName, NoMoreData, \ from gunicorn.http.errors import InvalidHeader, InvalidHeaderName, NoMoreData, \
InvalidRequestLine, InvalidRequestMethod, InvalidHTTPVersion, \ InvalidRequestLine, InvalidRequestMethod, InvalidHTTPVersion, \
LimitRequestLine, LimitRequestHeaders LimitRequestLine, LimitRequestHeaders
from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest
from gunicorn.six import StringIO, urlsplit, bytes_to_str
MAX_REQUEST_LINE = 8190 MAX_REQUEST_LINE = 8190
MAX_HEADERS = 32768 MAX_HEADERS = 32768
@ -61,7 +56,7 @@ class Message(object):
headers = [] headers = []
# Split lines on \r\n keeping the \r\n on each line # Split lines on \r\n keeping the \r\n on each line
lines = [line + "\r\n" for line in data.split("\r\n")] lines = [bytes_to_str(line) + "\r\n" for line in data.split(b"\r\n")]
# Parse headers into key/value pairs paying attention # Parse headers into key/value pairs paying attention
# to continuation lines. # to continuation lines.
@ -173,24 +168,24 @@ class Request(Message):
line, rbuf = self.read_line(unreader, buf, self.limit_request_line) line, rbuf = self.read_line(unreader, buf, self.limit_request_line)
# proxy protocol # proxy protocol
if self.proxy_protocol(line): if self.proxy_protocol(bytes_to_str(line)):
# get next request line # get next request line
buf = StringIO() buf = StringIO()
buf.write(rbuf) buf.write(rbuf)
line, rbuf = self.read_line(unreader, buf, self.limit_request_line) line, rbuf = self.read_line(unreader, buf, self.limit_request_line)
self.parse_request_line(line) self.parse_request_line(bytes_to_str(line))
buf = StringIO() buf = StringIO()
buf.write(rbuf) buf.write(rbuf)
# Headers # Headers
data = buf.getvalue() data = buf.getvalue()
idx = data.find("\r\n\r\n") idx = data.find(b"\r\n\r\n")
done = data[:2] == "\r\n" done = data[:2] == b"\r\n"
while True: while True:
idx = data.find("\r\n\r\n") idx = data.find(b"\r\n\r\n")
done = data[:2] == "\r\n" done = data[:2] == b"\r\n"
if idx < 0 and not done: if idx < 0 and not done:
self.get_data(unreader, buf) self.get_data(unreader, buf)
@ -202,7 +197,7 @@ class Request(Message):
if done: if done:
self.unreader.unread(data[2:]) self.unreader.unread(data[2:])
return "" return b""
self.headers = self.parse_headers(data[:idx]) self.headers = self.parse_headers(data[:idx])
@ -214,7 +209,7 @@ class Request(Message):
data = buf.getvalue() data = buf.getvalue()
while True: while True:
idx = data.find("\r\n") idx = data.find(b"\r\n")
if idx >= 0: if idx >= 0:
# check if the request line is too large # check if the request line is too large
if idx > limit > 0: if idx > limit > 0:
@ -328,7 +323,7 @@ class Request(Message):
else: else:
self.uri = bits[1] self.uri = bits[1]
parts = urlparse.urlsplit(self.uri) parts = urlsplit(self.uri)
self.path = parts.path or "" self.path = parts.path or ""
self.query = parts.query or "" self.query = parts.query or ""
self.fragment = parts.fragment or "" self.fragment = parts.fragment or ""

View File

@ -7,8 +7,8 @@ import logging
import os import os
import re import re
import sys import sys
from urllib import unquote
from gunicorn.six import unquote, string_types, binary_type
from gunicorn import SERVER_SOFTWARE from gunicorn import SERVER_SOFTWARE
import gunicorn.util as util import gunicorn.util as util
@ -265,12 +265,18 @@ class Response(object):
return return
tosend = self.default_headers() tosend = self.default_headers()
tosend.extend(["%s: %s\r\n" % (n, v) for n, v in self.headers]) tosend.extend(["%s: %s\r\n" % (n, v) for n, v in self.headers])
util.write(self.sock, "%s\r\n" % "".join(tosend))
header_str = "%s\r\n" % "".join(tosend)
util.write(self.sock, header_str.encode('latin1'))
self.headers_sent = True self.headers_sent = True
def write(self, arg): def write(self, arg):
self.send_headers() self.send_headers()
assert isinstance(arg, basestring), "%r is not a string." % arg
if isinstance(arg, text_type):
arg = arg.decode('utf-8')
assert isinstance(arg, binary_type), "%r is not a byte." % arg
arglen = len(arg) arglen = len(arg)
tosend = arglen tosend = arglen
@ -328,12 +334,13 @@ class Response(object):
self.send_headers() self.send_headers()
if self.is_chunked(): if self.is_chunked():
self.sock.sendall("%X\r\n" % nbytes) chunk_size = "%X\r\n" % nbytes
self.sock.sendall(chunk_size.encode('utf-8'))
self.sendfile_all(fileno, self.sock.fileno(), fo_offset, nbytes) self.sendfile_all(fileno, self.sock.fileno(), fo_offset, nbytes)
if self.is_chunked(): if self.is_chunked():
self.sock.sendall("\r\n") self.sock.sendall(b"\r\n")
os.lseek(fileno, fd_offset, os.SEEK_SET) os.lseek(fileno, fd_offset, os.SEEK_SET)
else: else:
@ -344,4 +351,4 @@ class Response(object):
if not self.headers_sent: if not self.headers_sent:
self.send_headers() self.send_headers()
if self.chunked: if self.chunked:
util.write_chunk(self.sock, "") util.write_chunk(self.sock, b"")

View File

@ -364,3 +364,37 @@ _add_doc(reraise, """Reraise an exception.""")
def with_metaclass(meta, base=object): def with_metaclass(meta, base=object):
"""Create a base class with a metaclass.""" """Create a base class with a metaclass."""
return meta("NewBase", (base,), {}) return meta("NewBase", (base,), {})
# specific to gunicorn
if PY3:
import io
StringIO = io.BytesIO
def bytes_to_str(b):
return str(b, 'latin1')
import urllib.parse
unquote = urllib.parse.unquote
urlsplit = urllib.parse.urlsplit
else:
try:
import cStringIO as StringIO
except ImportError:
import StringIO
StringIO = StringIO
def bytestring(s):
if isinstance(s, unicode):
return s.encode('utf-8')
return s
import urlparse as orig_urlparse
urlsplit = orig_urlparse.urlsplit
import urllib
urlunquote = urllib.unquote

View File

@ -25,6 +25,7 @@ import textwrap
import time import time
import inspect import inspect
from gunicorn.six import text_type
MAXFD = 1024 MAXFD = 1024
if (hasattr(os, "devnull")): if (hasattr(os, "devnull")):
@ -223,7 +224,10 @@ except ImportError:
pass pass
def write_chunk(sock, data): def write_chunk(sock, data):
chunk = "".join(("%X\r\n" % len(data), data, "\r\n")) if instance(data, text_type):
data = data.decode('utf-8')
chunk_size = "%X\r\n" % len(data)
chunk = b"".join([chunk_size.decode('utf-8'), data, b"\r\n"])
sock.sendall(chunk) sock.sendall(chunk)
def write(sock, data, chunked=False): def write(sock, data, chunked=False):
@ -259,7 +263,7 @@ def write_error(sock, status_int, reason, mesg):
</html> </html>
""") % {"reason": reason, "mesg": mesg} """) % {"reason": reason, "mesg": mesg}
http = textwrap.dedent("""\ headers = textwrap.dedent("""\
HTTP/1.1 %s %s\r HTTP/1.1 %s %s\r
Connection: close\r Connection: close\r
Content-Type: text/html\r Content-Type: text/html\r
@ -267,7 +271,7 @@ def write_error(sock, status_int, reason, mesg):
\r \r
%s %s
""") % (str(status_int), reason, len(html), html) """) % (str(status_int), reason, len(html), html)
write_nonblock(sock, http) write_nonblock(sock, http.encode('latin1'))
def normalize_name(name): def normalize_name(name):
return "-".join([w.lower().capitalize() for w in name.split("-")]) return "-".join([w.lower().capitalize() for w in name.split("-")])