From 003c474fe2270fe67393a630516e2db659cd3918 Mon Sep 17 00:00:00 2001
From: benoitc <bchesneau@gmail.com>
Date: Wed, 24 Oct 2012 13:51:35 +0200
Subject: [PATCH] handle bytes & native strings

This patch makes sure that we now handle correctly bytes and native
strings on python 3:

- In python 3, sockets are now taking and returning bytes.
- according to PEP3333, headers should be native strings and body in
  bytes.
---
 examples/longpoll.py       |  2 +-
 examples/multiapp.py       |  2 +-
 examples/multidomainapp.py |  8 +++----
 examples/slowclient.py     |  2 +-
 examples/test.py           |  2 +-
 gunicorn/http/body.py      | 46 +++++++++++++++++---------------------
 gunicorn/http/message.py   | 27 +++++++++-------------
 gunicorn/http/wsgi.py      | 19 +++++++++++-----
 gunicorn/six.py            | 34 ++++++++++++++++++++++++++++
 gunicorn/util.py           | 10 ++++++---
 10 files changed, 94 insertions(+), 58 deletions(-)
diff --git a/examples/longpoll.py b/examples/longpoll.py
index 19559d1b..97d6647f 100644
--- a/examples/longpoll.py
+++ b/examples/longpoll.py
@@ -17,7 +17,7 @@ class TestIter(object):
 
 def app(environ, start_response):
     """Application which cooperatively pauses 20 seconds (needed to surpass normal timeouts) before responding"""
-    data = 'Hello, World!\n'
+    data = b'Hello, World!\n'
     status = '200 OK'
     response_headers = [
         ('Content-type','text/plain'),
diff --git a/examples/multiapp.py b/examples/multiapp.py
index e48a253d..c6a4c90b 100644
--- a/examples/multiapp.py
+++ b/examples/multiapp.py
@@ -38,7 +38,7 @@ class Application(object):
         return match[0]['app'](environ, start_response)
 
     def error404(self, environ, start_response):
-        html = """\
+        html = b"""\
         <html>
           <head>
             <title>404 - Not Found</title>
diff --git a/examples/multidomainapp.py b/examples/multidomainapp.py
index 89e59e2b..948a5359 100644
--- a/examples/multidomainapp.py
+++ b/examples/multidomainapp.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -
 #
-# This file is part of gunicorn released under the MIT license. 
+# This file is part of gunicorn released under the MIT license.
 # See the NOTICE for more information.
 
 import re
@@ -20,15 +20,15 @@ class SubDomainApp:
                 return app(environ, start_response)
         else:
             start_response("404 Not Found", [])
-            return [""]
+            return [b""]
 
 def hello(environ, start_response):
     start_response("200 OK", [("Content-Type", "text/plain")])
-    return ["Hello, world\n"]
+    return [b"Hello, world\n"]
 
 def bye(environ, start_response):
     start_response("200 OK", [("Content-Type", "text/plain")])
-    return ["Goodbye!\n"]
+    return [b"Goodbye!\n"]
 
 app = SubDomainApp([
     ("localhost", hello),
diff --git a/examples/slowclient.py b/examples/slowclient.py
index 6f612bea..15729d93 100644
--- a/examples/slowclient.py
+++ b/examples/slowclient.py
@@ -9,7 +9,7 @@ import time
 
 def app(environ, start_response):
     """Application which cooperatively pauses 10 seconds before responding"""
-    data = 'Hello, World!\n'
+    data = b'Hello, World!\n'
     status = '200 OK'
     response_headers = [
         ('Content-type','text/plain'),
diff --git a/examples/test.py b/examples/test.py
index 8972f68d..c61f672e 100644
--- a/examples/test.py
+++ b/examples/test.py
@@ -12,7 +12,7 @@ from gunicorn import __version__
 #@validator
 def app(environ, start_response):
     """Simplest possible application object"""
-    data = 'Hello, World!\n'
+    data = b'Hello, World!\n'
     status = '200 OK'
 
     response_headers = [
diff --git a/gunicorn/http/body.py b/gunicorn/http/body.py
index b331f292..82797e4c 100644
--- a/gunicorn/http/body.py
+++ b/gunicorn/http/body.py
@@ -5,13 +5,9 @@
 
 import sys
 
-try:
-    from cStringIO import StringIO
-except ImportError:
-    from StringIO import StringIO
-
-from gunicorn.http.errors import NoMoreData, ChunkMissingTerminator, \
-InvalidChunkSize
+from gunicorn.http.errors import (NoMoreData, ChunkMissingTerminator,
+        InvalidChunkSize)
+from gunicorn.six import StringIO, bytes_to_str
 
 class ChunkedReader(object):
     def __init__(self, req, unreader):
@@ -25,7 +21,7 @@ class ChunkedReader(object):
         if size <= 0:
             raise ValueError("Size must be positive.")
         if size == 0:
-            return ""
+            return b""
 
         if self.parser:
             while self.buf.tell() < size:
@@ -45,16 +41,17 @@ class ChunkedReader(object):
         buf = StringIO()
         buf.write(data)
 
-        idx = buf.getvalue().find("\r\n\r\n")
-        done = buf.getvalue()[:2] == "\r\n"
+        idx = buf.getvalue().find(b"\r\n\r\n")
+        done = buf.getvalue()[:2] == b"\r\n"
         while idx < 0 and not done:
             self.get_data(unreader, buf)
-            idx = buf.getvalue().find("\r\n\r\n")
-            done = buf.getvalue()[:2] == "\r\n"
+            idx = buf.getvalue().find(b"\r\n\r\n")
+            done = buf.getvalue()[:2] == b"\r\n"
         if done:
             unreader.unread(buf.getvalue()[2:])
-            return ""
-        self.req.trailers = self.req.parse_headers(buf.getvalue()[:idx])
+            return b""
+        self.req.trailers = self.req.parse_headers(
+                bytes_to_str(buf.getvalue()[:idx]))
         unreader.unread(buf.getvalue()[idx+4:])
 
     def parse_chunked(self, unreader):
@@ -71,7 +68,7 @@ class ChunkedReader(object):
             rest = rest[size:]
             while len(rest) < 2:
                 rest += unreader.read()
-            if rest[:2] != '\r\n':
+            if rest[:2] != b'\r\n':
                 raise ChunkMissingTerminator(rest[:2])
             (size, rest) = self.parse_chunk_size(unreader, data=rest[2:])
 
@@ -80,15 +77,15 @@ class ChunkedReader(object):
         if data is not None:
             buf.write(data)
 
-        idx = buf.getvalue().find("\r\n")
+        idx = buf.getvalue().find(b"\r\n")
         while idx < 0:
             self.get_data(unreader, buf)
-            idx = buf.getvalue().find("\r\n")
+            idx = buf.getvalue().find(b"\r\n")
 
         data = buf.getvalue()
         line, rest_chunk = data[:idx], data[idx+2:]
 
-        chunk_size = line.split(";", 1)[0].strip()
+        chunk_size = line.split(b";", 1)[0].strip()
         try:
             chunk_size = int(chunk_size, 16)
         except ValueError:
@@ -121,7 +118,7 @@ class LengthReader(object):
         if size < 0:
             raise ValueError("Size must be positive.")
         if size == 0:
-            return ""
+            return b""
 
 
         buf = StringIO()
@@ -201,7 +198,7 @@ class Body(object):
     def read(self, size=None):
         size = self.getsize(size)
         if size == 0:
-            return ""
+            return b""
 
         if size < self.buf.tell():
             data = self.buf.getvalue()
@@ -225,7 +222,7 @@ class Body(object):
     def readline(self, size=None):
         size = self.getsize(size)
         if size == 0:
-            return ""
+            return b""
 
         line = self.buf.getvalue()
         self.buf.truncate(0)
@@ -234,7 +231,7 @@ class Body(object):
         extra_buf_data = line[size:]
         line = line[:size]
 
-        idx = line.find("\n")
+        idx = line.find(b"\n")
         if idx >= 0:
             ret = line[:idx+1]
             self.buf.write(line[idx+1:])
@@ -247,12 +244,11 @@ class Body(object):
         ret = []
         data = self.read()
         while len(data):
-            pos = data.find("\n")
+            pos = data.find(b"\n")
             if pos < 0:
                 ret.append(data)
-                data = ""
+                data = b""
             else:
                 line, data = data[:pos+1], data[pos+1:]
                 ret.append(line)
         return ret
-
diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py
index d24ea5d0..b32dbc2b 100644
--- a/gunicorn/http/message.py
+++ b/gunicorn/http/message.py
@@ -4,21 +4,16 @@
 # See the NOTICE for more information.
 
 import re
-import urlparse
 import socket
 from errno import ENOTCONN
 
-try:
-    from cStringIO import StringIO
-except ImportError:
-    from StringIO import StringIO
-
 from gunicorn.http.unreader import SocketUnreader
 from gunicorn.http.body import ChunkedReader, LengthReader, EOFReader, Body
 from gunicorn.http.errors import InvalidHeader, InvalidHeaderName, NoMoreData, \
 InvalidRequestLine, InvalidRequestMethod, InvalidHTTPVersion, \
 LimitRequestLine, LimitRequestHeaders
 from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest
+from gunicorn.six import StringIO, urlsplit, bytes_to_str
 
 MAX_REQUEST_LINE = 8190
 MAX_HEADERS = 32768
@@ -61,7 +56,7 @@ class Message(object):
         headers = []
 
         # Split lines on \r\n keeping the \r\n on each line
-        lines = [line + "\r\n" for line in data.split("\r\n")]
+        lines = [bytes_to_str(line) + "\r\n" for line in data.split(b"\r\n")]
 
         # Parse headers into key/value pairs paying attention
         # to continuation lines.
@@ -173,24 +168,24 @@ class Request(Message):
         line, rbuf = self.read_line(unreader, buf, self.limit_request_line)
 
         # proxy protocol
-        if self.proxy_protocol(line):
+        if self.proxy_protocol(bytes_to_str(line)):
             # get next request line
             buf = StringIO()
             buf.write(rbuf)
             line, rbuf = self.read_line(unreader, buf, self.limit_request_line)
 
-        self.parse_request_line(line)
+        self.parse_request_line(bytes_to_str(line))
         buf = StringIO()
         buf.write(rbuf)
 
         # Headers
         data = buf.getvalue()
-        idx = data.find("\r\n\r\n")
+        idx = data.find(b"\r\n\r\n")
 
-        done = data[:2] == "\r\n"
+        done = data[:2] == b"\r\n"
         while True:
-            idx = data.find("\r\n\r\n")
-            done = data[:2] == "\r\n"
+            idx = data.find(b"\r\n\r\n")
+            done = data[:2] == b"\r\n"
 
             if idx < 0 and not done:
                 self.get_data(unreader, buf)
@@ -202,7 +197,7 @@ class Request(Message):
 
         if done:
             self.unreader.unread(data[2:])
-            return ""
+            return b""
 
         self.headers = self.parse_headers(data[:idx])
 
@@ -214,7 +209,7 @@ class Request(Message):
         data = buf.getvalue()
 
         while True:
-            idx = data.find("\r\n")
+            idx = data.find(b"\r\n")
             if idx >= 0:
                 # check if the request line is too large
                 if idx > limit > 0:
@@ -328,7 +323,7 @@ class Request(Message):
         else:
             self.uri = bits[1]
 
-        parts = urlparse.urlsplit(self.uri)
+        parts = urlsplit(self.uri)
         self.path = parts.path or ""
         self.query = parts.query or ""
         self.fragment = parts.fragment or ""
diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py
index 228ed941..026a9c98 100644
--- a/gunicorn/http/wsgi.py
+++ b/gunicorn/http/wsgi.py
@@ -7,8 +7,8 @@ import logging
 import os
 import re
 import sys
-from urllib import unquote
 
+from gunicorn.six import unquote, string_types, binary_type
 from gunicorn import SERVER_SOFTWARE
 import gunicorn.util as util
 
@@ -265,12 +265,18 @@ class Response(object):
             return
         tosend = self.default_headers()
         tosend.extend(["%s: %s\r\n" % (n, v) for n, v in self.headers])
-        util.write(self.sock, "%s\r\n" % "".join(tosend))
+
+        header_str = "%s\r\n" % "".join(tosend)
+        util.write(self.sock, header_str.encode('latin1'))
         self.headers_sent = True
 
     def write(self, arg):
         self.send_headers()
-        assert isinstance(arg, basestring), "%r is not a string." % arg
+
+        if isinstance(arg, text_type):
+            arg = arg.decode('utf-8')
+
+        assert isinstance(arg, binary_type), "%r is not a byte." % arg
 
         arglen = len(arg)
         tosend = arglen
@@ -328,12 +334,13 @@ class Response(object):
             self.send_headers()
 
             if self.is_chunked():
-                self.sock.sendall("%X\r\n" % nbytes)
+                chunk_size = "%X\r\n" % nbytes
+                self.sock.sendall(chunk_size.encode('utf-8'))
 
             self.sendfile_all(fileno, self.sock.fileno(), fo_offset, nbytes)
 
             if self.is_chunked():
-                self.sock.sendall("\r\n")
+                self.sock.sendall(b"\r\n")
 
             os.lseek(fileno, fd_offset, os.SEEK_SET)
         else:
@@ -344,4 +351,4 @@ class Response(object):
         if not self.headers_sent:
             self.send_headers()
         if self.chunked:
-            util.write_chunk(self.sock, "")
+            util.write_chunk(self.sock, b"")
diff --git a/gunicorn/six.py b/gunicorn/six.py
index 44b80a44..e82ddce2 100644
--- a/gunicorn/six.py
+++ b/gunicorn/six.py
@@ -364,3 +364,37 @@ _add_doc(reraise, """Reraise an exception.""")
 def with_metaclass(meta, base=object):
     """Create a base class with a metaclass."""
     return meta("NewBase", (base,), {})
+
+
+# specific to gunicorn
+if PY3:
+    import io
+    StringIO = io.BytesIO
+
+    def bytes_to_str(b):
+        return str(b, 'latin1')
+
+    import urllib.parse
+
+    unquote = urllib.parse.unquote
+    urlsplit = urllib.parse.urlsplit
+
+else:
+    try:
+        import cStringIO as StringIO
+    except ImportError:
+        import StringIO
+
+    StringIO = StringIO
+
+
+    def bytestring(s):
+        if isinstance(s, unicode):
+            return s.encode('utf-8')
+        return s
+
+    import urlparse as orig_urlparse
+    urlsplit = orig_urlparse.urlsplit
+
+    import urllib
+    urlunquote = urllib.unquote
diff --git a/gunicorn/util.py b/gunicorn/util.py
index e6fdb5ed..11731327 100644
--- a/gunicorn/util.py
+++ b/gunicorn/util.py
@@ -25,6 +25,7 @@ import textwrap
 import time
 import inspect
 
+from gunicorn.six import text_type
 
 MAXFD = 1024
 if (hasattr(os, "devnull")):
@@ -223,7 +224,10 @@ except ImportError:
                 pass
 
 def write_chunk(sock, data):
-    chunk = "".join(("%X\r\n" % len(data), data, "\r\n"))
+    if instance(data, text_type):
+        data = data.decode('utf-8')
+    chunk_size = "%X\r\n" % len(data)
+    chunk = b"".join([chunk_size.decode('utf-8'), data, b"\r\n"])
     sock.sendall(chunk)
 
 def write(sock, data, chunked=False):
@@ -259,7 +263,7 @@ def write_error(sock, status_int, reason, mesg):
     </html>
     """) % {"reason": reason, "mesg": mesg}
 
-    http = textwrap.dedent("""\
+    headers = textwrap.dedent("""\
     HTTP/1.1 %s %s\r
     Connection: close\r
     Content-Type: text/html\r
@@ -267,7 +271,7 @@ def write_error(sock, status_int, reason, mesg):
     \r
     %s
     """) % (str(status_int), reason, len(html), html)
-    write_nonblock(sock, http)
+    write_nonblock(sock, http.encode('latin1'))
 
 def normalize_name(name):
     return  "-".join([w.lower().capitalize() for w in name.split("-")])