handle bytes & native strings

This patch makes sure that we now handle correctly bytes and native strings on python 3: - In python 3, sockets are now taking and returning bytes. - according to PEP3333, headers should be native strings and body in bytes.
2026-01-14 11:09:11 +08:00 · 2012-10-24 13:51:35 +02:00 · 2012-10-24 13:51:35 +02:00 · 003c474fe2
commit 003c474fe2
parent 5759d59f08
10 changed files with 94 additions and 58 deletions
--- a/examples/longpoll.py
+++ b/examples/longpoll.py
@ -17,7 +17,7 @@ class TestIter(object):

 def app(environ, start_response):
    """Application which cooperatively pauses 20 seconds (needed to surpass normal timeouts) before responding"""
-    data = 'Hello, World!\n'
+    data = b'Hello, World!\n'
    status = '200 OK'
    response_headers = [
        ('Content-type','text/plain'),
--- a/examples/multiapp.py
+++ b/examples/multiapp.py
@ -38,7 +38,7 @@ class Application(object):
        return match[0]['app'](environ, start_response)

    def error404(self, environ, start_response):
-        html = """\
+        html = b"""\
        <html>
          <head>
            <title>404 - Not Found</title>
--- a/examples/multidomainapp.py
+++ b/examples/multidomainapp.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -
 #
-# This file is part of gunicorn released under the MIT license. 
+# This file is part of gunicorn released under the MIT license.
 # See the NOTICE for more information.

 import re
@ -20,15 +20,15 @@ class SubDomainApp:
                return app(environ, start_response)
        else:
            start_response("404 Not Found", [])
-            return [""]
+            return [b""]

 def hello(environ, start_response):
    start_response("200 OK", [("Content-Type", "text/plain")])
-    return ["Hello, world\n"]
+    return [b"Hello, world\n"]

 def bye(environ, start_response):
    start_response("200 OK", [("Content-Type", "text/plain")])
-    return ["Goodbye!\n"]
+    return [b"Goodbye!\n"]

 app = SubDomainApp([
    ("localhost", hello),
--- a/examples/slowclient.py
+++ b/examples/slowclient.py
@ -9,7 +9,7 @@ import time

 def app(environ, start_response):
    """Application which cooperatively pauses 10 seconds before responding"""
-    data = 'Hello, World!\n'
+    data = b'Hello, World!\n'
    status = '200 OK'
    response_headers = [
        ('Content-type','text/plain'),
--- a/examples/test.py
+++ b/examples/test.py
@ -12,7 +12,7 @@ from gunicorn import __version__
 #@validator
 def app(environ, start_response):
    """Simplest possible application object"""
-    data = 'Hello, World!\n'
+    data = b'Hello, World!\n'
    status = '200 OK'

    response_headers = [
--- a/gunicorn/http/body.py
+++ b/gunicorn/http/body.py
@ -5,13 +5,9 @@

 import sys

-try:
-    from cStringIO import StringIO
-except ImportError:
-    from StringIO import StringIO
-
-from gunicorn.http.errors import NoMoreData, ChunkMissingTerminator, \
-InvalidChunkSize
+from gunicorn.http.errors import (NoMoreData, ChunkMissingTerminator,
+        InvalidChunkSize)
+from gunicorn.six import StringIO, bytes_to_str

 class ChunkedReader(object):
    def __init__(self, req, unreader):
@ -25,7 +21,7 @@ class ChunkedReader(object):
        if size <= 0:
            raise ValueError("Size must be positive.")
        if size == 0:
-            return ""
+            return b""

        if self.parser:
            while self.buf.tell() < size:
@ -45,16 +41,17 @@ class ChunkedReader(object):
        buf = StringIO()
        buf.write(data)

-        idx = buf.getvalue().find("\r\n\r\n")
-        done = buf.getvalue()[:2] == "\r\n"
+        idx = buf.getvalue().find(b"\r\n\r\n")
+        done = buf.getvalue()[:2] == b"\r\n"
        while idx < 0 and not done:
            self.get_data(unreader, buf)
-            idx = buf.getvalue().find("\r\n\r\n")
-            done = buf.getvalue()[:2] == "\r\n"
+            idx = buf.getvalue().find(b"\r\n\r\n")
+            done = buf.getvalue()[:2] == b"\r\n"
        if done:
            unreader.unread(buf.getvalue()[2:])
-            return ""
-        self.req.trailers = self.req.parse_headers(buf.getvalue()[:idx])
+            return b""
+        self.req.trailers = self.req.parse_headers(
+                bytes_to_str(buf.getvalue()[:idx]))
        unreader.unread(buf.getvalue()[idx+4:])

    def parse_chunked(self, unreader):
@ -71,7 +68,7 @@ class ChunkedReader(object):
            rest = rest[size:]
            while len(rest) < 2:
                rest += unreader.read()
-            if rest[:2] != '\r\n':
+            if rest[:2] != b'\r\n':
                raise ChunkMissingTerminator(rest[:2])
            (size, rest) = self.parse_chunk_size(unreader, data=rest[2:])

@ -80,15 +77,15 @@ class ChunkedReader(object):
        if data is not None:
            buf.write(data)

-        idx = buf.getvalue().find("\r\n")
+        idx = buf.getvalue().find(b"\r\n")
        while idx < 0:
            self.get_data(unreader, buf)
-            idx = buf.getvalue().find("\r\n")
+            idx = buf.getvalue().find(b"\r\n")

        data = buf.getvalue()
        line, rest_chunk = data[:idx], data[idx+2:]

-        chunk_size = line.split(";", 1)[0].strip()
+        chunk_size = line.split(b";", 1)[0].strip()
        try:
            chunk_size = int(chunk_size, 16)
        except ValueError:
@ -121,7 +118,7 @@ class LengthReader(object):
        if size < 0:
            raise ValueError("Size must be positive.")
        if size == 0:
-            return ""
+            return b""


        buf = StringIO()
@ -201,7 +198,7 @@ class Body(object):
    def read(self, size=None):
        size = self.getsize(size)
        if size == 0:
-            return ""
+            return b""

        if size < self.buf.tell():
            data = self.buf.getvalue()
@ -225,7 +222,7 @@ class Body(object):
    def readline(self, size=None):
        size = self.getsize(size)
        if size == 0:
-            return ""
+            return b""

        line = self.buf.getvalue()
        self.buf.truncate(0)
@ -234,7 +231,7 @@ class Body(object):
        extra_buf_data = line[size:]
        line = line[:size]

-        idx = line.find("\n")
+        idx = line.find(b"\n")
        if idx >= 0:
            ret = line[:idx+1]
            self.buf.write(line[idx+1:])
@ -247,12 +244,11 @@ class Body(object):
        ret = []
        data = self.read()
        while len(data):
-            pos = data.find("\n")
+            pos = data.find(b"\n")
            if pos < 0:
                ret.append(data)
-                data = ""
+                data = b""
            else:
                line, data = data[:pos+1], data[pos+1:]
                ret.append(line)
        return ret
-
--- a/gunicorn/http/message.py
+++ b/gunicorn/http/message.py
@ -4,21 +4,16 @@
 # See the NOTICE for more information.

 import re
-import urlparse
 import socket
 from errno import ENOTCONN

-try:
-    from cStringIO import StringIO
-except ImportError:
-    from StringIO import StringIO
-
 from gunicorn.http.unreader import SocketUnreader
 from gunicorn.http.body import ChunkedReader, LengthReader, EOFReader, Body
 from gunicorn.http.errors import InvalidHeader, InvalidHeaderName, NoMoreData, \
 InvalidRequestLine, InvalidRequestMethod, InvalidHTTPVersion, \
 LimitRequestLine, LimitRequestHeaders
 from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest
+from gunicorn.six import StringIO, urlsplit, bytes_to_str

 MAX_REQUEST_LINE = 8190
 MAX_HEADERS = 32768
@ -61,7 +56,7 @@ class Message(object):
        headers = []

        # Split lines on \r\n keeping the \r\n on each line
-        lines = [line + "\r\n" for line in data.split("\r\n")]
+        lines = [bytes_to_str(line) + "\r\n" for line in data.split(b"\r\n")]

        # Parse headers into key/value pairs paying attention
        # to continuation lines.
@ -173,24 +168,24 @@ class Request(Message):
        line, rbuf = self.read_line(unreader, buf, self.limit_request_line)

        # proxy protocol
-        if self.proxy_protocol(line):
+        if self.proxy_protocol(bytes_to_str(line)):
            # get next request line
            buf = StringIO()
            buf.write(rbuf)
            line, rbuf = self.read_line(unreader, buf, self.limit_request_line)

-        self.parse_request_line(line)
+        self.parse_request_line(bytes_to_str(line))
        buf = StringIO()
        buf.write(rbuf)

        # Headers
        data = buf.getvalue()
-        idx = data.find("\r\n\r\n")
+        idx = data.find(b"\r\n\r\n")

-        done = data[:2] == "\r\n"
+        done = data[:2] == b"\r\n"
        while True:
-            idx = data.find("\r\n\r\n")
-            done = data[:2] == "\r\n"
+            idx = data.find(b"\r\n\r\n")
+            done = data[:2] == b"\r\n"

            if idx < 0 and not done:
                self.get_data(unreader, buf)
@ -202,7 +197,7 @@ class Request(Message):

        if done:
            self.unreader.unread(data[2:])
-            return ""
+            return b""

        self.headers = self.parse_headers(data[:idx])

@ -214,7 +209,7 @@ class Request(Message):
        data = buf.getvalue()

        while True:
-            idx = data.find("\r\n")
+            idx = data.find(b"\r\n")
            if idx >= 0:
                # check if the request line is too large
                if idx > limit > 0:
@ -328,7 +323,7 @@ class Request(Message):
        else:
            self.uri = bits[1]

-        parts = urlparse.urlsplit(self.uri)
+        parts = urlsplit(self.uri)
        self.path = parts.path or ""
        self.query = parts.query or ""
        self.fragment = parts.fragment or ""
--- a/gunicorn/http/wsgi.py
+++ b/gunicorn/http/wsgi.py
@ -7,8 +7,8 @@ import logging
 import os
 import re
 import sys
-from urllib import unquote

+from gunicorn.six import unquote, string_types, binary_type
 from gunicorn import SERVER_SOFTWARE
 import gunicorn.util as util

@ -265,12 +265,18 @@ class Response(object):
            return
        tosend = self.default_headers()
        tosend.extend(["%s: %s\r\n" % (n, v) for n, v in self.headers])
-        util.write(self.sock, "%s\r\n" % "".join(tosend))
+
+        header_str = "%s\r\n" % "".join(tosend)
+        util.write(self.sock, header_str.encode('latin1'))
        self.headers_sent = True

    def write(self, arg):
        self.send_headers()
-        assert isinstance(arg, basestring), "%r is not a string." % arg
+
+        if isinstance(arg, text_type):
+            arg = arg.decode('utf-8')
+
+        assert isinstance(arg, binary_type), "%r is not a byte." % arg

        arglen = len(arg)
        tosend = arglen
@ -328,12 +334,13 @@ class Response(object):
            self.send_headers()

            if self.is_chunked():
-                self.sock.sendall("%X\r\n" % nbytes)
+                chunk_size = "%X\r\n" % nbytes
+                self.sock.sendall(chunk_size.encode('utf-8'))

            self.sendfile_all(fileno, self.sock.fileno(), fo_offset, nbytes)

            if self.is_chunked():
-                self.sock.sendall("\r\n")
+                self.sock.sendall(b"\r\n")

            os.lseek(fileno, fd_offset, os.SEEK_SET)
        else:
@ -344,4 +351,4 @@ class Response(object):
        if not self.headers_sent:
            self.send_headers()
        if self.chunked:
-            util.write_chunk(self.sock, "")
+            util.write_chunk(self.sock, b"")
--- a/gunicorn/six.py
+++ b/gunicorn/six.py
@ -364,3 +364,37 @@ _add_doc(reraise, """Reraise an exception.""")
 def with_metaclass(meta, base=object):
    """Create a base class with a metaclass."""
    return meta("NewBase", (base,), {})
+
+
+# specific to gunicorn
+if PY3:
+    import io
+    StringIO = io.BytesIO
+
+    def bytes_to_str(b):
+        return str(b, 'latin1')
+
+    import urllib.parse
+
+    unquote = urllib.parse.unquote
+    urlsplit = urllib.parse.urlsplit
+
+else:
+    try:
+        import cStringIO as StringIO
+    except ImportError:
+        import StringIO
+
+    StringIO = StringIO
+
+
+    def bytestring(s):
+        if isinstance(s, unicode):
+            return s.encode('utf-8')
+        return s
+
+    import urlparse as orig_urlparse
+    urlsplit = orig_urlparse.urlsplit
+
+    import urllib
+    urlunquote = urllib.unquote
--- a/gunicorn/util.py
+++ b/gunicorn/util.py
@ -25,6 +25,7 @@ import textwrap
 import time
 import inspect

+from gunicorn.six import text_type

 MAXFD = 1024
 if (hasattr(os, "devnull")):
@ -223,7 +224,10 @@ except ImportError:
                pass

 def write_chunk(sock, data):
-    chunk = "".join(("%X\r\n" % len(data), data, "\r\n"))
+    if instance(data, text_type):
+        data = data.decode('utf-8')
+    chunk_size = "%X\r\n" % len(data)
+    chunk = b"".join([chunk_size.decode('utf-8'), data, b"\r\n"])
    sock.sendall(chunk)

 def write(sock, data, chunked=False):
@ -259,7 +263,7 @@ def write_error(sock, status_int, reason, mesg):
    </html>
    """) % {"reason": reason, "mesg": mesg}

-    http = textwrap.dedent("""\
+    headers = textwrap.dedent("""\
    HTTP/1.1 %s %s\r
    Connection: close\r
    Content-Type: text/html\r
@ -267,7 +271,7 @@ def write_error(sock, status_int, reason, mesg):
    \r
    %s
    """) % (str(status_int), reason, len(html), html)
-    write_nonblock(sock, http)
+    write_nonblock(sock, http.encode('latin1'))

 def normalize_name(name):
    return  "-".join([w.lower().capitalize() for w in name.split("-")])