From 6d76ed8e6c6bf789e3a9e39031d0312856e6ae8d Mon Sep 17 00:00:00 2001 From: Ron DuPlain Date: Fri, 16 Nov 2018 11:52:37 -0500 Subject: [PATCH 001/263] Provide guidance on X-Forwarded-For access log. (#1906) The doc change introduced in #1037 is initially helpful but then internally inconsistent. It correctly points out that X-Forwarded-For is no longer used in setting REMOTE_ADDR (c4873681299212d6082cd9902740eef18c2f14f1), but then confusingly indicates a solution using the X-Forwarded-For header. The deployment doc provides a full configuration example which includes proxy headers as set by nginx. What is missing, before this patch, is a clear suggestion on how to make use of the header in Gunicorn's access log. Accordingly, remove the confusing suggestion and replace it with a drop-in replacement for Gunicorn's default access log format. --- docs/source/deploy.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/deploy.rst b/docs/source/deploy.rst index b6355a1c..c7a0212c 100644 --- a/docs/source/deploy.rst +++ b/docs/source/deploy.rst @@ -67,13 +67,13 @@ Gunicorn 19 introduced a breaking change concerning how ``REMOTE_ADDR`` is handled. Previous to Gunicorn 19 this was set to the value of ``X-Forwarded-For`` if received from a trusted proxy. However, this was not in compliance with :rfc:`3875` which is why the ``REMOTE_ADDR`` is now the IP -address of **the proxy** and **not the actual user**. You should instead -configure Nginx to send the user's IP address through the ``X-Forwarded-For`` -header like this:: +address of **the proxy** and **not the actual user**. - ... - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - ... +To have access logs indicate **the actual user** IP when proxied, set +:ref:`access-log-format` with a format which includes ``X-Forwarded-For``. For +example, this format uses ``X-Forwarded-For`` in place of ``REMOTE_ADDR``:: + + %({x-forwarded-for}i)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" It is also worth noting that the ``REMOTE_ADDR`` will be completely empty if you bind Gunicorn to a UNIX socket and not a TCP ``host:port`` tuple. From efdb5acdd08cd79e61adbb746a7ca1247511a5b2 Mon Sep 17 00:00:00 2001 From: Jake <16889000+jakethedev@users.noreply.github.com> Date: Fri, 16 Nov 2018 10:11:22 -0700 Subject: [PATCH 002/263] Docs: Serving WSGI app modules from Gunicorn (#1817) --- docs/source/custom.rst | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/docs/source/custom.rst b/docs/source/custom.rst index 20dae4db..36b2c9fd 100644 --- a/docs/source/custom.rst +++ b/docs/source/custom.rst @@ -14,3 +14,37 @@ a custom Application: .. literalinclude:: ../../examples/standalone_app.py :lines: 11-60 + +Direct Usage of Existing WSGI Apps +---------------------------------- + +If necessary, you can run Gunicorn straight from Python, allowing you to +specify a WSGI-compatible application at runtime. This can be handy for +rolling deploys or in the case of using PEX files to deploy your application, +as the app and Gunicorn can be bundled in the same PEX file. Gunicorn has +this functionality built-in as a first class citizen known as +:class:`gunicorn.app.wsgiapp`. This can be used to run WSGI-compatible app +instances such as those produced by Flask or Django. Assuming your WSGI API +package is *exampleapi*, and your application instance is *app*, this is all +you need to get going:: + + gunicorn.app.wsgiapp exampleapi:app + +This command will work with any Gunicorn CLI parameters or a config file - just +pass them along as if you're directly giving them to Gunicorn: + +.. code-block:: bash + # Custom parameters + $ python gunicorn.app.wsgiapp exampleapi:app --bind=0.0.0.0:8081 --workers=4 + # Using a config file + $ python gunicorn.app.wsgiapp exampleapi:app -c config.py + +Note for those using PEX: use ``-c gunicorn`` as your entry at build +time, and your compiled app should work with the entry point passed to it at +run time. + +.. code-block:: bash + # Generic pex build command via bash from root of exampleapi project + $ pex . -v -c gunicorn -o compiledapp.pex + # Running it + ./compiledapp.pex exampleapi:app -c gunicorn_config.py From ee7af1247b8cc289af7b4bcfd2ff93ecfaf8c9df Mon Sep 17 00:00:00 2001 From: Florian Apolloner Date: Fri, 16 Nov 2018 18:21:13 +0100 Subject: [PATCH 003/263] Added support to --bind to open file descriptors (#1809) Fixes #1107 --- THANKS | 1 + docs/source/news.rst | 5 +++ docs/source/run.rst | 4 +- docs/source/settings.rst | 7 ++- gunicorn/config.py | 7 ++- gunicorn/sock.py | 13 ++++-- gunicorn/socketfromfd.py | 96 ++++++++++++++++++++++++++++++++++++++++ gunicorn/util.py | 7 +++ tests/test_config.py | 6 +++ tests/test_util.py | 9 +++- 10 files changed, 144 insertions(+), 11 deletions(-) create mode 100644 gunicorn/socketfromfd.py diff --git a/THANKS b/THANKS index 8f0c944a..57ed4dab 100644 --- a/THANKS +++ b/THANKS @@ -60,6 +60,7 @@ Eric Florenzano Eric Shull Eugene Obukhov Evan Mezeske +Florian Apolloner Gaurav Kumar George Kollias George Notaras diff --git a/docs/source/news.rst b/docs/source/news.rst index e4386ab4..049e66cf 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -2,6 +2,11 @@ Changelog ========= +20.0 / not released +=================== + +- fix: Added support for binding to file descriptors (:issue:`1107`, :pr:`1809`) + 19.9.0 / 2018/07/03 =================== diff --git a/docs/source/run.rst b/docs/source/run.rst index 3100c526..d0799fa0 100644 --- a/docs/source/run.rst +++ b/docs/source/run.rst @@ -52,8 +52,8 @@ Commonly Used Arguments * ``-c CONFIG, --config=CONFIG`` - Specify a config file in the form ``$(PATH)``, ``file:$(PATH)``, or ``python:$(MODULE_NAME)``. * ``-b BIND, --bind=BIND`` - Specify a server socket to bind. Server sockets - can be any of ``$(HOST)``, ``$(HOST):$(PORT)``, or ``unix:$(PATH)``. - An IP is a valid ``$(HOST)``. + can be any of ``$(HOST)``, ``$(HOST):$(PORT)``, ``fd://$(FD)``, or + ``unix:$(PATH)``. An IP is a valid ``$(HOST)``. * ``-w WORKERS, --workers=WORKERS`` - The number of worker processes. This number should generally be between 2-4 workers per core in the server. Check the :ref:`faq` for ideas on tuning this parameter. diff --git a/docs/source/settings.rst b/docs/source/settings.rst index b9bd56de..bdc62f1e 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -1108,8 +1108,11 @@ bind The socket to bind. -A string of the form: ``HOST``, ``HOST:PORT``, ``unix:PATH``. An IP is -a valid ``HOST``. +A string of the form: ``HOST``, ``HOST:PORT``, ``unix:PATH``, +``fd://FD``. An IP is a valid ``HOST``. + +.. versionchanged:: 20.0 + Support for ``fd://FD`` got added. Multiple addresses can be bound. ex.:: diff --git a/gunicorn/config.py b/gunicorn/config.py index e14161b6..a9b18afe 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -557,8 +557,11 @@ class Bind(Setting): desc = """\ The socket to bind. - A string of the form: ``HOST``, ``HOST:PORT``, ``unix:PATH``. An IP is - a valid ``HOST``. + A string of the form: ``HOST``, ``HOST:PORT``, ``unix:PATH``, + ``fd://FD``. An IP is a valid ``HOST``. + + .. versionchanged:: 20.0 + Support for ``fd://FD`` got added. Multiple addresses can be bound. ex.:: diff --git a/gunicorn/sock.py b/gunicorn/sock.py index f61443a1..8d35c4d4 100644 --- a/gunicorn/sock.py +++ b/gunicorn/sock.py @@ -11,6 +11,7 @@ import sys import time from gunicorn import util +from gunicorn.socketfromfd import fromfd class BaseSocket(object): @@ -150,7 +151,11 @@ def create_sockets(conf, log, fds=None): listeners = [] # get it only once - laddr = conf.address + addr = conf.address + fdaddr = [bind for bind in addr if isinstance(bind, int)] + if fds: + fdaddr += list(fds) + laddr = [bind for bind in addr if not isinstance(bind, int)] # check ssl config early to raise the error on startup # only the certfile is needed since it can contains the keyfile @@ -161,9 +166,9 @@ def create_sockets(conf, log, fds=None): raise ValueError('keyfile "%s" does not exist' % conf.keyfile) # sockets are already bound - if fds is not None: - for fd in fds: - sock = socket.fromfd(fd, socket.AF_UNIX, socket.SOCK_STREAM) + if fdaddr: + for fd in fdaddr: + sock = fromfd(fd) sock_name = sock.getsockname() sock_type = _sock_type(sock_name) listener = sock_type(sock_name, conf, log, fd=fd) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py new file mode 100644 index 00000000..4c2847b2 --- /dev/null +++ b/gunicorn/socketfromfd.py @@ -0,0 +1,96 @@ +# Copyright (C) 2016 Christian Heimes +"""socketfromfd -- socket.fromd() with auto-discovery + +ATTENTION: Do not remove this backport till the minimum required version is + Python 3.7. See https://bugs.python.org/issue28134 for details. +""" +from __future__ import print_function + +import ctypes +import os +import socket +import sys +from ctypes.util import find_library + +__all__ = ('fromfd',) + +SO_DOMAIN = getattr(socket, 'SO_DOMAIN', 39) +SO_TYPE = getattr(socket, 'SO_TYPE', 3) +SO_PROTOCOL = getattr(socket, 'SO_PROTOCOL', 38) + + +_libc_name = find_library('c') +if _libc_name is not None: + libc = ctypes.CDLL(_libc_name, use_errno=True) +else: + raise OSError('libc not found') + + +def _errcheck_errno(result, func, arguments): + """Raise OSError by errno for -1 + """ + if result == -1: + errno = ctypes.get_errno() + raise OSError(errno, os.strerror(errno)) + return arguments + + +_libc_getsockopt = libc.getsockopt +_libc_getsockopt.argtypes = [ + ctypes.c_int, # int sockfd + ctypes.c_int, # int level + ctypes.c_int, # int optname + ctypes.c_void_p, # void *optval + ctypes.POINTER(ctypes.c_uint32) # socklen_t *optlen +] +_libc_getsockopt.restype = ctypes.c_int # 0: ok, -1: err +_libc_getsockopt.errcheck = _errcheck_errno + + +def _raw_getsockopt(fd, level, optname): + """Make raw getsockopt() call for int32 optval + + :param fd: socket fd + :param level: SOL_* + :param optname: SO_* + :return: value as int + """ + optval = ctypes.c_int(0) + optlen = ctypes.c_uint32(4) + _libc_getsockopt(fd, level, optname, + ctypes.byref(optval), ctypes.byref(optlen)) + return optval.value + + +def fromfd(fd, keep_fd=True): + """Create a socket from a file descriptor + + socket domain (family), type and protocol are auto-detected. By default + the socket uses a dup()ed fd. The original fd can be closed. + + The parameter `keep_fd` influences fd duplication. Under Python 2 the + fd is still duplicated but the input fd is closed. Under Python 3 and + with `keep_fd=True`, the new socket object uses the same fd. + + :param fd: socket fd + :type fd: int + :param keep_fd: keep input fd + :type keep_fd: bool + :return: socket.socket instance + :raises OSError: for invalid socket fd + """ + family = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_DOMAIN) + typ = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_TYPE) + proto = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_PROTOCOL) + if sys.version_info.major == 2: + # Python 2 has no fileno argument and always duplicates the fd + sockobj = socket.fromfd(fd, family, typ, proto) + sock = socket.socket(None, None, None, _sock=sockobj) + if not keep_fd: + os.close(fd) + return sock + else: + if keep_fd: + return socket.fromfd(fd, family, typ, proto) + else: + return socket.socket(family, typ, proto, fileno=fd) diff --git a/gunicorn/util.py b/gunicorn/util.py index 973d7ed3..899416ad 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -251,6 +251,13 @@ def parse_address(netloc, default_port=8000): if re.match(r'unix:(//)?', netloc): return re.split(r'unix:(//)?', netloc)[-1] + if netloc.startswith("fd://"): + fd = netloc[5:] + try: + return int(fd) + except ValueError: + raise RuntimeError("%r is not a valid file descriptor." % fd) from None + if netloc.startswith("tcp://"): netloc = netloc.split("tcp://")[1] diff --git a/tests/test_config.py b/tests/test_config.py index 2d009088..98420bd0 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -429,3 +429,9 @@ def _test_ssl_version(options, expected): with AltArgs(cmdline): app = NoConfigApp() assert app.cfg.ssl_version == expected + + +def test_bind_fd(): + with AltArgs(["prog_name", "-b", "fd://42"]): + app = NoConfigApp() + assert app.cfg.bind == ["fd://42"] diff --git a/tests/test_util.py b/tests/test_util.py index 4d977981..3b8be0c3 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -17,7 +17,8 @@ from urllib.parse import SplitResult ('[::1]:8000', ('::1', 8000)), ('localhost:8000', ('localhost', 8000)), ('127.0.0.1:8000', ('127.0.0.1', 8000)), - ('localhost', ('localhost', 8000)) + ('localhost', ('localhost', 8000)), + ('fd://33', 33), ]) def test_parse_address(test_input, expected): assert util.parse_address(test_input) == expected @@ -29,6 +30,12 @@ def test_parse_address_invalid(): assert "'test' is not a valid port number." in str(err) +def test_parse_fd_invalid(): + with pytest.raises(RuntimeError) as err: + util.parse_address('fd://asd') + assert "'asd' is not a valid file descriptor." in str(err) + + def test_http_date(): assert util.http_date(1508607753.740316) == 'Sat, 21 Oct 2017 17:42:33 GMT' From ba9a4462c6ccfb090d975fe7ca76d526d09c0776 Mon Sep 17 00:00:00 2001 From: John Sirois Date: Fri, 7 Dec 2018 01:43:27 -0800 Subject: [PATCH 004/263] Declare our setuptools dependency (#1931) We rely on setuptools' pkg_resources in a few places so we declare our dependency so that packaging and installation tools work. Fixes #1716 --- setup.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/setup.py b/setup.py index f7e5d526..19e85c94 100644 --- a/setup.py +++ b/setup.py @@ -65,6 +65,14 @@ class PyTestCommand(TestCommand): sys.exit(errno) +install_requires = [ + # We depend on functioning pkg_resources.working_set.add_entry() and + # pkg_resources.load_entry_point(). These both work as of 3.0 which + # is the first version to support Python 3.4 which we require as a + # floor. + 'setuptools>=3.0', +] + extra_require = { 'gevent': ['gevent>=0.13'], 'eventlet': ['eventlet>=0.9.7'], @@ -84,6 +92,7 @@ setup( url='http://gunicorn.org', python_requires='>=3.4', + install_requires=install_requires, classifiers=CLASSIFIERS, zip_safe=False, packages=find_packages(exclude=['examples', 'tests']), From 33025cf610bb7a6f1cb307644c1881863c2fddc4 Mon Sep 17 00:00:00 2001 From: Niklas B Date: Sun, 9 Dec 2018 10:23:56 +0100 Subject: [PATCH 005/263] tornado 5 support (#1918) tornado 5 support by keeping track of the periodic callbacks --- gunicorn/workers/gtornado.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/gunicorn/workers/gtornado.py b/gunicorn/workers/gtornado.py index f15712d3..9c8379c2 100644 --- a/gunicorn/workers/gtornado.py +++ b/gunicorn/workers/gtornado.py @@ -19,9 +19,13 @@ from gunicorn.workers.base import Worker from gunicorn import __version__ as gversion -# `io_loop` arguments to many Tornado functions have been removed in Tornado 5.0 -# -IOLOOP_PARAMETER_REMOVED = tornado.version_info >= (5, 0, 0) +# Tornado 5.0 updated its IOLoop, and the `io_loop` arguments to many +# Tornado functions have been removed in Tornado 5.0. Also, they no +# longer store PeriodCallbacks in ioloop._callbacks. Instead we store +# them on our side, and use stop() on them when stopping the worker. +# See https://www.tornadoweb.org/en/stable/releases/v5.0.0.html#backwards-compatibility-notes +# for more details. +TORNADO5 = tornado.version_info >= (5, 0, 0) class TornadoWorker(Worker): @@ -66,8 +70,13 @@ class TornadoWorker(Worker): pass self.server_alive = False else: - if not self.ioloop._callbacks: + if TORNADO5: + for callback in self.callbacks: + callback.stop() self.ioloop.stop() + else: + if not self.ioloop._callbacks: + self.ioloop.stop() def init_process(self): # IOLoop cannot survive a fork or be shared across processes @@ -81,9 +90,13 @@ class TornadoWorker(Worker): self.ioloop = IOLoop.instance() self.alive = True self.server_alive = False - if IOLOOP_PARAMETER_REMOVED: - PeriodicCallback(self.watchdog, 1000).start() - PeriodicCallback(self.heartbeat, 1000).start() + + if TORNADO5: + self.callbacks = [] + self.callbacks.append(PeriodicCallback(self.watchdog, 1000)) + self.callbacks.append(PeriodicCallback(self.heartbeat, 1000)) + for callback in self.callbacks: + callback.start() else: PeriodicCallback(self.watchdog, 1000, io_loop=self.ioloop).start() PeriodicCallback(self.heartbeat, 1000, io_loop=self.ioloop).start() @@ -127,13 +140,13 @@ class TornadoWorker(Worker): # options del _ssl_opt["do_handshake_on_connect"] del _ssl_opt["suppress_ragged_eofs"] - if IOLOOP_PARAMETER_REMOVED: + if TORNADO5: server = server_class(app, ssl_options=_ssl_opt) else: server = server_class(app, io_loop=self.ioloop, ssl_options=_ssl_opt) else: - if IOLOOP_PARAMETER_REMOVED: + if TORNADO5: server = server_class(app) else: server = server_class(app, io_loop=self.ioloop) From ad1afe7b797bb5b5f2af297610077bdee05357a4 Mon Sep 17 00:00:00 2001 From: Pengpeng Zuo Date: Sun, 23 Dec 2018 16:37:47 +0800 Subject: [PATCH 006/263] Fix typo in gthread.py --- gunicorn/workers/gthread.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gunicorn/workers/gthread.py b/gunicorn/workers/gthread.py index 257499e6..5625a91e 100644 --- a/gunicorn/workers/gthread.py +++ b/gunicorn/workers/gthread.py @@ -4,10 +4,10 @@ # See the NOTICE for more information. # design: -# a threaded worker accepts connections in the main loop, accepted -# connections are are added to the thread pool as a connection job. On -# keepalive connections are put back in the loop waiting for an event. -# If no event happen after the keep alive timeout, the connectoin is +# A threaded worker accepts connections in the main loop, accepted +# connections are added to the thread pool as a connection job. +# Keepalive connections are put back in the loop waiting for an event. +# If no event happen after the keep alive timeout, the connection is # closed. import errno From 9184ae889894912b105e8b39a3dde8c07c780047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=B0=D0=BC=D1=98=D0=B0=D0=BD=20=D0=93=D0=B5=D0=BE?= =?UTF-8?q?=D1=80=D0=B3=D0=B8=D0=B5=D0=B2=D1=81=D0=BA=D0=B8?= Date: Fri, 11 Jan 2019 04:41:09 +0100 Subject: [PATCH 007/263] add systemd sd_notify support (#1897) * add systemd sd_notify support roughly based on sd_notify() from systemd and https://github.com/bb4242/sdnotify only implements `READY=1` and `STATUS=Gunicorn arbiter booted` of the protocol in the arbiter. in the future, reloads can be notified, and possibly also other statuses. see https://www.freedesktop.org/software/systemd/man/sd_notify.html for more info sd_notify() is a noop when not run in a systemd service (i.e NOTIFY_SOCKET environment variable is not set) --- docs/source/deploy.rst | 2 +- gunicorn/arbiter.py | 1 + gunicorn/systemd.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/docs/source/deploy.rst b/docs/source/deploy.rst index c7a0212c..7a289aed 100644 --- a/docs/source/deploy.rst +++ b/docs/source/deploy.rst @@ -227,7 +227,7 @@ unix socket: After=network.target [Service] - PIDFile=/run/gunicorn/pid + Type=notify User=someuser Group=someuser RuntimeDirectory=gunicorn diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index adca13d3..7eaa2c17 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -158,6 +158,7 @@ class Arbiter(object): self.log.debug("Arbiter booted") self.log.info("Listening at: %s (%s)", listeners_str, self.pid) self.log.info("Using worker: %s", self.cfg.worker_class_str) + systemd.sd_notify("READY=1\nSTATUS=Gunicorn arbiter booted", self.log) # check worker class requirements if hasattr(self.worker_class, "check_config"): diff --git a/gunicorn/systemd.py b/gunicorn/systemd.py index 10ffb8d8..cea48220 100644 --- a/gunicorn/systemd.py +++ b/gunicorn/systemd.py @@ -4,6 +4,7 @@ # See the NOTICE for more information. import os +import socket SD_LISTEN_FDS_START = 3 @@ -43,3 +44,34 @@ def listen_fds(unset_environment=True): os.environ.pop('LISTEN_FDS', None) return fds + + +def sd_notify(state, logger, unset_environment=False): + """Send a notification to systemd. state is a string; see + the man page of sd_notify (http://www.freedesktop.org/software/systemd/man/sd_notify.html) + for a description of the allowable values. + + If the unset_environment parameter is True, sd_notify() will unset + the $NOTIFY_SOCKET environment variable before returning (regardless of + whether the function call itself succeeded or not). Further calls to + sd_notify() will then fail, but the variable is no longer inherited by + child processes. + """ + + + addr = os.environ.get('NOTIFY_SOCKET') + if addr is None: + # not run in a service, just a noop + return + try: + sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM | socket.SOCK_CLOEXEC) + if addr[0] == '@': + addr = '\0' + addr[1:] + sock.connect(addr) + sock.sendall(state.encode('utf-8')) + except: + logger.debug("Exception while invoking sd_notify()", exc_info=True) + finally: + if unset_environment: + os.environ.pop('NOTIFY_SOCKET') + sock.close() From ba0d784960be9d1e30e47e0d8b03f52c30590a0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=B0=D0=BC=D1=98=D0=B0=D0=BD=20=D0=93=D0=B5=D0=BE?= =?UTF-8?q?=D1=80=D0=B3=D0=B8=D0=B5=D0=B2=D1=81=D0=BA=D0=B8?= Date: Fri, 11 Jan 2019 13:52:06 +0100 Subject: [PATCH 008/263] documentation: update systemd deploy unit examples (#1950) There's no need for PIDFile=, especially not for Type=notify services. systemd knows the correct pid of the process it manages. No need for the `--bind` option either, since gunicorn supports the LISTEN_FDS environment variable and will use all of the sockets that systemd provides. This way, it's also more flexible, since we can specify several sockets in a .socket unit. The .socket unit should specify User=www-data so that nginx can connect to the socket. The service (gunicorn process) will inherit the file descriptor so it doesn't even need permissions for the socket (it's nginx which needs permissions). tmpfiles.d is not needed. replace ExecStop=/bin/kill with KillMode=mixed --- docs/source/deploy.rst | 73 +++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/docs/source/deploy.rst b/docs/source/deploy.rst index 7a289aed..7be1b9c7 100644 --- a/docs/source/deploy.rst +++ b/docs/source/deploy.rst @@ -212,12 +212,15 @@ Using Gunicorn with upstart is simple. In this example we will run the app Systemd ------- -A tool that is starting to be common on linux systems is Systemd_. Below are -configurations files and instructions for using systemd to create a unix socket -for incoming Gunicorn requests. Systemd will listen on this socket and start -gunicorn automatically in response to traffic. Later in this section are -instructions for configuring Nginx to forward web traffic to the newly created -unix socket: +A tool that is starting to be common on linux systems is Systemd_. It is a +system services manager that allows for strict process management, resources +and permissions control. + +Below are configurations files and instructions for using systemd to create +a unix socket for incoming Gunicorn requests. Systemd will listen on this +socket and start gunicorn automatically in response to traffic. Later in +this section are instructions for configuring Nginx to forward web traffic +to the newly created unix socket: **/etc/systemd/system/gunicorn.service**:: @@ -228,14 +231,18 @@ unix socket: [Service] Type=notify + # the specific user that our service will run as User=someuser Group=someuser + # another option for an even more restricted service is + # DynamicUser=yes + # see http://0pointer.net/blog/dynamic-users-with-systemd.html RuntimeDirectory=gunicorn WorkingDirectory=/home/someuser/applicationroot - ExecStart=/usr/bin/gunicorn --pid /run/gunicorn/pid \ - --bind unix:/run/gunicorn.sock applicationname.wsgi + ExecStart=/usr/bin/gunicorn applicationname.wsgi ExecReload=/bin/kill -s HUP $MAINPID - ExecStop=/bin/kill -s TERM $MAINPID + KillMode=mixed + TimeoutStopSec=5 PrivateTmp=true [Install] @@ -248,33 +255,47 @@ unix socket: [Socket] ListenStream=/run/gunicorn.sock - User=someuser - Group=someuser - + # Our service won't need permissions for the socket, since it + # inherits the file descriptor by socket activation + # only the nginx daemon will need access to the socket + User=www-data + # Optionally restrict the socket permissions even more. + # Mode=600 + [Install] WantedBy=sockets.target -**/etc/tmpfiles.d/gunicorn.conf**:: - d /run/gunicorn 0755 someuser somegroup - +Next enable and start the socket (it will autostart at boot too):: -Next enable the socket so it autostarts at boot:: - - systemctl enable gunicorn.socket - -Either reboot, or start the services manually:: - - systemctl start gunicorn.socket + systemctl enable --now gunicorn.socket -After running ``curl --unix-socket /run/gunicorn.sock http``, Gunicorn -should start and you should see some HTML from your server in the terminal. +Now let's see if the nginx daemon will be able to connect to the socket. +Running ``sudo -u www-data curl --unix-socket /run/gunicorn.sock http``, +our Gunicorn service will be automatically started and you should see some +HTML from your server in the terminal. + +.. note:: + + systemd employs cgroups to track the processes of a service, so it doesn't + need pid files. In the rare case that you need to find out the service main + pid, you can use ``systemctl show --value -p MainPID gunicorn.service``, but + if you only want to send a signal an even better option is + ``systemctl kill -s HUP gunicorn.service``. + +.. note:: + + ``www-data`` is the default nginx user in debian, other distriburions use + different users (for example: ``http`` or ``nginx``). Check you distro to + know what to put for the socket user, and for the sudo command. You must now configure your web proxy to send traffic to the new Gunicorn socket. Edit your ``nginx.conf`` to include the following: **/etc/nginx/nginx.conf**:: + user www-data; ... http { server { @@ -292,15 +313,15 @@ socket. Edit your ``nginx.conf`` to include the following: The listen and server_name used here are configured for a local machine. In a production server you will most likely listen on port 80, and use your URL as the server_name. - + Now make sure you enable the nginx service so it automatically starts at boot:: systemctl enable nginx.service - + Either reboot, or start Nginx with the following command:: systemctl start nginx - + Now you should be able to test Nginx with Gunicorn by visiting http://127.0.0.1:8000/ in any web browser. Systemd is now set up. From c85bfba8b22c34fefb7cd812230448256cea828a Mon Sep 17 00:00:00 2001 From: Brett Randall Date: Tue, 22 Jan 2019 10:03:38 +1100 Subject: [PATCH 009/263] Fixed typo in ssl_version doc, TLSv2 -> TLSv1_2 (TLS 1.2). (#1959) * Fixed typo in ssl_version doc, TLSv2 -> TLSv1_2 (TLS 1.2). Signed-off-by: Brett Randall * revert chdir change --- docs/source/settings.rst | 20 +++++++++++++++++++- gunicorn/config.py | 3 ++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/docs/source/settings.rst b/docs/source/settings.rst index bdc62f1e..a9f05899 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -436,11 +436,29 @@ ssl_version * ``--ssl-version`` * ``_SSLMethod.PROTOCOL_TLS`` -SSL version to use (see stdlib ssl module's) +SSL version to use. + +============= ============ +--ssl-version Description +============= ============ +SSLv3 SSLv3 is not-secure and is strongly discouraged. +SSLv23 Alias for TLS. Deprecated in Python 3.6, use TLS. +TLS Negotiate highest possible version between client/server. + Can yield SSL. (Python 3.6+) +TLSv1 TLS 1.0 +TLSv1_1 TLS 1.1 (Python 3.4+) +TLSv1_2 TLS 1.2 (Python 3.4+) +TLS_SERVER Auto-negotiate the highest protocol version like TLS, + but only support server-side SSLSocket connections. + (Python 3.6+) +============= ============ .. versionchanged:: 19.7 The default value has been changed from ``ssl.PROTOCOL_TLSv1`` to ``ssl.PROTOCOL_SSLv23``. +.. versionchanged:: 20.0 + This setting now accepts string names based on ``ssl.PROTOCOL_`` + constants. .. _cert-reqs: diff --git a/gunicorn/config.py b/gunicorn/config.py index a9b18afe..00710524 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -1900,10 +1900,11 @@ class SSLVersion(Setting): Can yield SSL. (Python 3.6+) TLSv1 TLS 1.0 TLSv1_1 TLS 1.1 (Python 3.4+) - TLSv2 TLS 1.2 (Python 3.4+) + TLSv1_2 TLS 1.2 (Python 3.4+) TLS_SERVER Auto-negotiate the highest protocol version like TLS, but only support server-side SSLSocket connections. (Python 3.6+) + ============= ============ .. versionchanged:: 19.7 The default value has been changed from ``ssl.PROTOCOL_TLSv1`` to From 6da84c614d5a5b60ebed146301e4ac7c436f8fb3 Mon Sep 17 00:00:00 2001 From: fangfei Date: Mon, 21 Jan 2019 14:35:36 +0800 Subject: [PATCH 010/263] Fix root logging root and logger are same level. https://docs.python.org/2/library/logging.config.html#dictionary-schema-details --- gunicorn/glogging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/glogging.py b/gunicorn/glogging.py index 3f266977..56cc5bd0 100644 --- a/gunicorn/glogging.py +++ b/gunicorn/glogging.py @@ -49,8 +49,8 @@ CONFIG_DEFAULTS = dict( version=1, disable_existing_loggers=False, + root={"level": "INFO", "handlers": ["console"]}, loggers={ - "root": {"level": "INFO", "handlers": ["console"]}, "gunicorn.error": { "level": "INFO", "handlers": ["error_console"], From e0f177ca45592583c262a1546cb41103e128dd57 Mon Sep 17 00:00:00 2001 From: Randall Leeds Date: Tue, 22 Jan 2019 00:59:41 -0800 Subject: [PATCH 011/263] Fix code block formatting in docs --- docs/source/custom.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/custom.rst b/docs/source/custom.rst index 36b2c9fd..0b8c366c 100644 --- a/docs/source/custom.rst +++ b/docs/source/custom.rst @@ -34,6 +34,7 @@ This command will work with any Gunicorn CLI parameters or a config file - just pass them along as if you're directly giving them to Gunicorn: .. code-block:: bash + # Custom parameters $ python gunicorn.app.wsgiapp exampleapi:app --bind=0.0.0.0:8081 --workers=4 # Using a config file @@ -44,6 +45,7 @@ time, and your compiled app should work with the entry point passed to it at run time. .. code-block:: bash + # Generic pex build command via bash from root of exampleapi project $ pex . -v -c gunicorn -o compiledapp.pex # Running it From 7af6f651c04511e631cbb888f527897d4273703c Mon Sep 17 00:00:00 2001 From: Takuya N Date: Tue, 22 Jan 2019 19:56:49 +0900 Subject: [PATCH 012/263] Add restructuredtext_lint to test (#1598) Signed-off-by: Takuya Noguchi --- .travis.yml | 4 ++++ tox.ini | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/.travis.yml b/.travis.yml index 48c83880..97c578aa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,10 @@ matrix: env: TOXENV=py38-dev dist: xenial sudo: true + - python: 3.7 + env: TOXENV=docs-lint + dist: xenial + sudo: true allow_failures: - env: TOXENV=py38-dev install: pip install tox diff --git a/tox.ini b/tox.ini index cd694555..d2868ddb 100644 --- a/tox.ini +++ b/tox.ini @@ -27,3 +27,15 @@ commands = tests/test_valid_requests.py deps = pylint + +[testenv:docs-lint] +whitelist_externals = + rst-lint + bash + grep +deps = + restructuredtext_lint + pygments +commands = + rst-lint README.rst docs/README.rst + bash -c "(set -o pipefail; rst-lint --encoding utf-8 docs/source/*.rst | grep -v 'Unknown interpreted text role\|Unknown directive type'); test $? == 1" From 61e136b92250ead629ff0439be7447301fcc0440 Mon Sep 17 00:00:00 2001 From: Randall Leeds Date: Sun, 12 Mar 2017 17:14:36 -0700 Subject: [PATCH 013/263] Simplify Paste Deployment integration Remove the `gunicorn_paster` command. With the `--paste` option to the `gunicorn` command, Gunicorn will no longer read the server section of the configuration. Instead, server configuration must be done with Gunicorn configuration files, command line switches, and environment variables. The use of Gunicorn as a Paste Deployment server factory is no longer deprecated. It allows specifying `host` and `port`, as well as `bind`, but is otherwise more strict with options than in the past. Rather than ignoring unknown options it will raise an error. Close #1189 --- gunicorn/app/pasterapp.py | 237 +++++++++----------------------------- gunicorn/app/wsgiapp.py | 31 +++-- setup.py | 3 +- 3 files changed, 67 insertions(+), 204 deletions(-) diff --git a/gunicorn/app/pasterapp.py b/gunicorn/app/pasterapp.py index 0f9de435..4c9fc7de 100644 --- a/gunicorn/app/pasterapp.py +++ b/gunicorn/app/pasterapp.py @@ -3,206 +3,73 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. -# pylint: skip-file - +import configparser import os -import pkg_resources -import sys -try: - import configparser as ConfigParser -except ImportError: - import ConfigParser +from paste.deploy import loadapp -from paste.deploy import loadapp, loadwsgi -SERVER = loadwsgi.SERVER - -from gunicorn.app.base import Application -from gunicorn.config import Config, get_default_config_file -from gunicorn import util +from gunicorn.app.wsgiapp import WSGIApplication +from gunicorn.config import get_default_config_file -def _has_logging_config(paste_file): - cfg_parser = ConfigParser.ConfigParser() - cfg_parser.read([paste_file]) - return cfg_parser.has_section('loggers') +def get_wsgi_app(config_uri, name=None, defaults=None): + if ':' not in config_uri: + config_uri = "config:%s" % config_uri + + return loadapp( + config_uri, + name=name, + relative_to=os.getcwd(), + global_conf=defaults, + ) -def paste_config(gconfig, config_url, relative_to, global_conf=None): - # add entry to pkg_resources - sys.path.insert(0, relative_to) - pkg_resources.working_set.add_entry(relative_to) +def has_logging_config(config_file): + parser = configparser.ConfigParser() + parser.read([config_file]) + return parser.has_section('loggers') - config_url = config_url.split('#')[0] - cx = loadwsgi.loadcontext(SERVER, config_url, relative_to=relative_to, - global_conf=global_conf) - gc, lc = cx.global_conf.copy(), cx.local_conf.copy() - cfg = {} - host, port = lc.pop('host', ''), lc.pop('port', '') +def serve(app, global_conf, **local_conf): + """\ + A Paste Deployment server runner. + + Example configuration: + + [server:main] + use = egg:gunicorn#main + host = 127.0.0.1 + port = 5000 + """ + config_file = global_conf['__file__'] + gunicorn_config_file = local_conf.pop('config', None) + + host = local_conf.pop('host', '') + port = local_conf.pop('port', '') if host and port: - cfg['bind'] = '%s:%s' % (host, port) + local_conf['bind'] = '%s:%s' % (host, port) elif host: - cfg['bind'] = host.split(',') + local_conf['bind'] = host.split(',') - cfg['default_proc_name'] = gc.get('__file__') + class PasterServerApplication(WSGIApplication): + def load_config(self): + self.cfg.set("default_proc_name", config_file) - # init logging configuration - config_file = config_url.split(':')[1] - if _has_logging_config(config_file): - cfg.setdefault('logconfig', config_file) + if has_logging_config(config_file): + self.cfg.set("logconfig", config_file) - for k, v in gc.items(): - if k not in gconfig.settings: - continue - cfg[k] = v + if gunicorn_config_file: + self.load_config_from_file(gunicorn_config_file) + else: + default_gunicorn_config_file = get_default_config_file() + if default_gunicorn_config_file is not None: + self.load_config_from_file(default_gunicorn_config_file) - for k, v in lc.items(): - if k not in gconfig.settings: - continue - cfg[k] = v - - return cfg - - -def load_pasteapp(config_url, relative_to, global_conf=None): - return loadapp(config_url, relative_to=relative_to, - global_conf=global_conf) - -class PasterBaseApplication(Application): - gcfg = None - - def app_config(self): - return paste_config(self.cfg, self.cfgurl, self.relpath, - global_conf=self.gcfg) - - def load_config(self): - super(PasterBaseApplication, self).load_config() - - # reload logging conf - if hasattr(self, "cfgfname"): - parser = ConfigParser.ConfigParser() - parser.read([self.cfgfname]) - if parser.has_section('loggers'): - from logging.config import fileConfig - config_file = os.path.abspath(self.cfgfname) - fileConfig(config_file, dict(__file__=config_file, - here=os.path.dirname(config_file))) - - -class PasterApplication(PasterBaseApplication): - - def init(self, parser, opts, args): - if len(args) != 1: - parser.error("No application name specified.") - - cwd = util.getcwd() - cfgfname = os.path.normpath(os.path.join(cwd, args[0])) - cfgfname = os.path.abspath(cfgfname) - if not os.path.exists(cfgfname): - parser.error("Config file not found: %s" % cfgfname) - - self.cfgurl = 'config:%s' % cfgfname - self.relpath = os.path.dirname(cfgfname) - self.cfgfname = cfgfname - - sys.path.insert(0, self.relpath) - pkg_resources.working_set.add_entry(self.relpath) - - return self.app_config() - - def load(self): - # chdir to the configured path before loading, - # default is the current dir - os.chdir(self.cfg.chdir) - - return load_pasteapp(self.cfgurl, self.relpath, global_conf=self.gcfg) - - -class PasterServerApplication(PasterBaseApplication): - - def __init__(self, app, gcfg=None, host="127.0.0.1", port=None, **kwargs): - # pylint: disable=super-init-not-called - self.cfg = Config() - self.gcfg = gcfg # need to hold this for app_config - self.app = app - self.callable = None - - gcfg = gcfg or {} - cfgfname = gcfg.get("__file__") - if cfgfname is not None: - self.cfgurl = 'config:%s' % cfgfname - self.relpath = os.path.dirname(cfgfname) - self.cfgfname = cfgfname - - cfg = kwargs.copy() - - if port and not host.startswith("unix:"): - bind = "%s:%s" % (host, port) - else: - bind = host - cfg["bind"] = bind.split(',') - - if gcfg: - for k, v in gcfg.items(): - cfg[k] = v - cfg["default_proc_name"] = cfg['__file__'] - - try: - for k, v in cfg.items(): - if k.lower() in self.cfg.settings and v is not None: + for k, v in local_conf.items(): + if v is not None: self.cfg.set(k.lower(), v) - except Exception as e: - print("\nConfig error: %s" % str(e), file=sys.stderr) - sys.stderr.flush() - sys.exit(1) - if cfg.get("config"): - self.load_config_from_file(cfg["config"]) - else: - default_config = get_default_config_file() - if default_config is not None: - self.load_config_from_file(default_config) + def load(self): + return app - def load(self): - return self.app - - -def run(): - """\ - The ``gunicorn_paster`` command for launching Paster compatible - applications like Pylons or Turbogears2 - """ - util.warn("""This command is deprecated. - - You should now use the `--paste` option. Ex.: - - gunicorn --paste development.ini - """) - - from gunicorn.app.pasterapp import PasterApplication - PasterApplication("%(prog)s [OPTIONS] pasteconfig.ini").run() - - -def paste_server(app, gcfg=None, host="127.0.0.1", port=None, **kwargs): - """\ - A paster server. - - Then entry point in your paster ini file should looks like this: - - [server:main] - use = egg:gunicorn#main - host = 127.0.0.1 - port = 5000 - - """ - - util.warn("""This command is deprecated. - - You should now use the `--paste` option. Ex.: - - gunicorn --paste development.ini - """) - - from gunicorn.app.pasterapp import PasterServerApplication - PasterServerApplication(app, gcfg=gcfg, host=host, port=port, **kwargs).run() + PasterServerApplication().run() diff --git a/gunicorn/app/wsgiapp.py b/gunicorn/app/wsgiapp.py index 916a2b1d..c8501e5f 100644 --- a/gunicorn/app/wsgiapp.py +++ b/gunicorn/app/wsgiapp.py @@ -13,22 +13,21 @@ from gunicorn import util class WSGIApplication(Application): def init(self, parser, opts, args): if opts.paste: - app_name = 'main' - path = opts.paste - if '#' in path: - path, app_name = path.split('#') - path = os.path.abspath(os.path.normpath( - os.path.join(util.getcwd(), path))) + from .pasterapp import has_logging_config - if not os.path.exists(path): - raise ConfigError("%r not found" % path) + config_uri = os.path.abspath(opts.paste) + config_file = config_uri.split('#')[0] - # paste application, load the config - self.cfgurl = 'config:%s#%s' % (path, app_name) - self.relpath = os.path.dirname(path) + if not os.path.exists(config_file): + raise ConfigError("%r not found" % config_file) - from .pasterapp import paste_config - return paste_config(self.cfg, self.cfgurl, self.relpath) + self.cfg.set("default_proc_name", config_file) + self.app_uri = config_uri + + if has_logging_config(config_file): + self.cfg.set("logconfig", config_file) + + return if not args: parser.error("No application module specified.") @@ -37,13 +36,11 @@ class WSGIApplication(Application): self.app_uri = args[0] def load_wsgiapp(self): - # load the app return util.import_app(self.app_uri) def load_pasteapp(self): - # load the paste app - from .pasterapp import load_pasteapp - return load_pasteapp(self.cfgurl, self.relpath, global_conf=self.cfg.paste_global_conf) + from .pasterapp import get_wsgi_app + return get_wsgi_app(self.app_uri, defaults=self.cfg.paste_global_conf) def load(self): if self.cfg.paste is not None: diff --git a/setup.py b/setup.py index 19e85c94..fd24c0df 100644 --- a/setup.py +++ b/setup.py @@ -104,10 +104,9 @@ setup( entry_points=""" [console_scripts] gunicorn=gunicorn.app.wsgiapp:run - gunicorn_paster=gunicorn.app.pasterapp:run [paste.server_runner] - main=gunicorn.app.pasterapp:paste_server + main=gunicorn.app.pasterapp:serve """, extras_require=extra_require, ) From 47e208717b85a8feaccd160eefc4575a5551a55e Mon Sep 17 00:00:00 2001 From: Randall Leeds Date: Tue, 22 Jan 2019 03:21:52 -0800 Subject: [PATCH 014/263] Update integration docs for Paste Deployment --- docs/source/run.rst | 57 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/docs/source/run.rst b/docs/source/run.rst index d0799fa0..1ee68ee9 100644 --- a/docs/source/run.rst +++ b/docs/source/run.rst @@ -4,8 +4,9 @@ Running Gunicorn .. highlight:: bash -You can run Gunicorn by using commands or integrate with Django or Paster. For -deploying Gunicorn in production see :doc:`deploy`. +You can run Gunicorn by using commands or integrate with popular frameworks +like Django, Pyramid, or TurboGears. For deploying Gunicorn in production see +:doc:`deploy`. Commands ======== @@ -78,7 +79,7 @@ See :ref:`configuration` and :ref:`settings` for detailed usage. Integration =========== -We also provide integration for both Django and Paster applications. +Gunicorn also provides integration for Django and Paste Deploy applications. Django ------ @@ -104,13 +105,40 @@ option:: $ gunicorn --env DJANGO_SETTINGS_MODULE=myproject.settings myproject.wsgi -Paste ------ +Paste Deployment +---------------- -If you are a user/developer of a paste-compatible framework/app (as -Pyramid, Pylons and Turbogears) you can use the -`--paste `_ option -to run your application. +Frameworks such as Pyramid and Turbogears are typically configured using Paste +Deployment configuration files. If you would like to use these files with +Gunicorn, there are two approaches. + +As a server runner, Gunicorn can serve your application using the commands from +your framework, such as ``pserve`` or ``gearbox``. To use Gunicorn with these +commands, specify it as a server in your configuration file: + +.. code-block:: ini + + [server:main] + use = egg:gunicorn#main + host = 127.0.0.1 + port = 8080 + workers = 3 + +This approach is the quickest way to get started with Gunicorn, but there are +some limitations. Gunicorn will have no control over how the application is +loaded, so settings such as reload_ will have no effect and Gunicorn will be +unable to hot upgrade a running application. Using the daemon_ option may +confuse your command line tool. Instead, use the built-in support for these +features provided by that tool. For example, run ``pserve --reload`` instead of +specifying ``reload = True`` in the server configuration block. For advanced +configuration of Gunicorn, such as `Server Hooks`_ specifying a Gunicorn +configuration file using the ``config`` key is supported. + +To use the full power of Gunicorn's reloading and hot code upgrades, use the +`paste option`_ to run your application instead. When used this way, Gunicorn +will use the application defined by the PasteDeploy configuration file, but +Gunicorn will not use any server configuration defined in the file. Instead, +`configure gunicorn`_. For example:: @@ -120,4 +148,13 @@ Or use a different application:: $ gunicorn --paste development.ini#admin -b :8080 --chdir /path/to/project -It is all here. No configuration files nor additional Python modules to write! +With both approaches, Gunicorn will use any loggers section found in Paste +Deployment configuration file, unless instructed otherwise by specifying +additional `logging settings`_. + +.. _reload: http://docs.gunicorn.org/en/latest/settings.html#reload +.. _daemon: http://docs.gunicorn.org/en/latest/settings.html#daemon +.. _Server Hooks: http://docs.gunicorn.org/en/latest/settings.html#server-hooks +.. _paste option: http://docs.gunicorn.org/en/latest/settings.html#paste +.. _configure gunicorn: http://docs.gunicorn.org/en/latest/configure.html +.. _logging settings: http://docs.gunicorn.org/en/latest/settings.html#logging From e5141a1c5a44299472abf910a3bd84d7d13b2399 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Thu, 24 Jan 2019 15:34:20 +0100 Subject: [PATCH 015/263] handle `wsgi.input_terminated` extension fix #1653 --- gunicorn/http/wsgi.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 593c8f24..3694eb9e 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -73,6 +73,7 @@ def base_environ(cfg): "wsgi.multiprocess": (cfg.workers > 1), "wsgi.run_once": False, "wsgi.file_wrapper": FileWrapper, + "wsgi.input_terminated": True, "SERVER_SOFTWARE": SERVER_SOFTWARE, } @@ -130,6 +131,7 @@ def create(req, sock, client, server, cfg): continue elif hdr_name == "CONTENT-LENGTH": environ['CONTENT_LENGTH'] = hdr_value + environ['wsgi.input_terminated'] = False continue key = 'HTTP_' + hdr_name.replace('-', '_') From 2ea5fbdc86e3e0d3d7f6d89c6da7ae0d80a36dbc Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Thu, 24 Jan 2019 21:41:04 +0000 Subject: [PATCH 016/263] Use Python's default SSL cipher list by default (#1970) --- docs/source/settings.rst | 17 +++++++++++++++-- gunicorn/config.py | 17 +++++++++++++++-- tests/test_ssl.py | 2 +- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/docs/source/settings.rst b/docs/source/settings.rst index a9f05899..f67c5363 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -506,9 +506,22 @@ ciphers ~~~~~~~ * ``--ciphers`` -* ``TLSv1`` +* ``None`` -Ciphers to use (see stdlib ssl module's) +SSL Cipher suite to use, in the format of an OpenSSL cipher list. + +By default we use the default cipher list from Python's ``ssl`` module, +which contains ciphers considered strong at the time of each Python +release. + +As a recommended alternative, the Open Web App Security Project (OWASP) +offers `a vetted set of strong cipher strings rated A+ to C- +`_. +OWASP provides details on user-agent compatibility at each security level. + +See the `OpenSSL Cipher List Format Documentation +`_ +for details on the format of an OpenSSL cipher list. Security -------- diff --git a/gunicorn/config.py b/gunicorn/config.py index 00710524..e460e627 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -1963,9 +1963,22 @@ class Ciphers(Setting): section = "SSL" cli = ["--ciphers"] validator = validate_string - default = 'TLSv1' + default = None desc = """\ - Ciphers to use (see stdlib ssl module's) + SSL Cipher suite to use, in the format of an OpenSSL cipher list. + + By default we use the default cipher list from Python's ``ssl`` module, + which contains ciphers considered strong at the time of each Python + release. + + As a recommended alternative, the Open Web App Security Project (OWASP) + offers `a vetted set of strong cipher strings rated A+ to C- + `_. + OWASP provides details on user-agent compatibility at each security level. + + See the `OpenSSL Cipher List Format Documentation + `_ + for details on the format of an OpenSSL cipher list. """ diff --git a/tests/test_ssl.py b/tests/test_ssl.py index 6c16212a..97e05d86 100644 --- a/tests/test_ssl.py +++ b/tests/test_ssl.py @@ -72,4 +72,4 @@ def test_ciphers(): assert Ciphers.name == 'ciphers' assert Ciphers.section == 'SSL' assert Ciphers.cli == ['--ciphers'] - assert Ciphers.default == 'TLSv1' + assert Ciphers.default is None From 97a45805f85830d1f80bf769f5787704daa635d3 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Thu, 24 Jan 2019 23:05:28 +0100 Subject: [PATCH 017/263] remove gaiohttp worker (#1971) * remove gaiohttp worker worker is deprecated and won't work on latest version. --- docs/source/design.rst | 27 +---- docs/source/run.rst | 2 +- gunicorn/config.py | 14 +-- gunicorn/workers/__init__.py | 1 - gunicorn/workers/_gaiohttp.py | 168 ----------------------------- gunicorn/workers/gaiohttp.py | 22 ---- tests/test_gaiohttp.py | 193 ---------------------------------- tox.ini | 1 - 8 files changed, 10 insertions(+), 418 deletions(-) delete mode 100644 gunicorn/workers/_gaiohttp.py delete mode 100644 gunicorn/workers/gaiohttp.py delete mode 100644 tests/test_gaiohttp.py diff --git a/docs/source/design.rst b/docs/source/design.rst index 88180a82..85157666 100644 --- a/docs/source/design.rst +++ b/docs/source/design.rst @@ -59,7 +59,7 @@ WSGI application, this is not a recommended configuration. AsyncIO Workers --------------- -These workers are compatible with python3. You have two kind of workers. +These workers are compatible with Python 3. The worker `gthread` is a threaded worker. It accepts connections in the main loop, accepted connections are added to the thread pool as a @@ -67,24 +67,8 @@ connection job. On keepalive connections are put back in the loop waiting for an event. If no event happen after the keep alive timeout, the connection is closed. -The worker `gaiohttp` is a full asyncio worker using aiohttp_. - -.. note:: - The ``gaiohttp`` worker requires the aiohttp_ module to be installed. - aiohttp_ has removed its native WSGI application support in version 2. - If you want to continue to use the ``gaiohttp`` worker with your WSGI - application (e.g. an application that uses Flask or Django), there are - three options available: - - #. Install aiohttp_ version 1.3.5 instead of version 2:: - - $ pip install aiohttp==1.3.5 - - #. Use aiohttp_wsgi_ to wrap your WSGI application. You can take a look - at the `example`_ in the Gunicorn repository. - #. Port your application to use aiohttp_'s ``web.Application`` API. - #. Use the ``aiohttp.worker.GunicornWebWorker`` worker instead of the - deprecated ``gaiohttp`` worker. +You can port also your application to use aiohttp_'s `web.Application`` API and use the +``aiohttp.worker.GunicornWebWorker`` worker. Choosing a Worker Type ====================== @@ -150,13 +134,12 @@ the worker processes (unlike when using the preload setting, which loads the code in the master process). .. note:: - Under Python 2.x, you need to install the 'futures' package to use this + Under Python 2.x, you need to install the 'futures' package to use this feature. .. _Greenlets: https://github.com/python-greenlet/greenlet .. _Eventlet: http://eventlet.net/ .. _Gevent: http://www.gevent.org/ .. _Hey: https://github.com/rakyll/hey -.. _aiohttp: https://aiohttp.readthedocs.io/en/stable/ -.. _aiohttp_wsgi: https://aiohttp-wsgi.readthedocs.io/en/stable/index.html +.. _aiohttp: https://docs.aiohttp.org/en/stable/deployment.html#nginx-gunicorn .. _`example`: https://github.com/benoitc/gunicorn/blob/master/examples/frameworks/flaskapp_aiohttp_wsgi.py diff --git a/docs/source/run.rst b/docs/source/run.rst index d0799fa0..070f6ea5 100644 --- a/docs/source/run.rst +++ b/docs/source/run.rst @@ -61,7 +61,7 @@ Commonly Used Arguments to run. You'll definitely want to read the production page for the implications of this parameter. You can set this to ``$(NAME)`` where ``$(NAME)`` is one of ``sync``, ``eventlet``, ``gevent``, - ``tornado``, ``gthread``, ``gaiohttp`` (deprecated). + ``tornado``, ``gthread``. ``sync`` is the default. See the :ref:`worker-class` documentation for more information. * ``-n APP_NAME, --name=APP_NAME`` - If setproctitle_ is installed you can diff --git a/gunicorn/config.py b/gunicorn/config.py index e460e627..29a42f23 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -630,25 +630,19 @@ class WorkerClass(Setting): A string referring to one of the following bundled classes: * ``sync`` - * ``eventlet`` - Requires eventlet >= 0.9.7 (or install it via + * ``eventlet`` - Requires eventlet >= 0.9.7 (or install it via ``pip install gunicorn[eventlet]``) - * ``gevent`` - Requires gevent >= 0.13 (or install it via + * ``gevent`` - Requires gevent >= 0.13 (or install it via ``pip install gunicorn[gevent]``) - * ``tornado`` - Requires tornado >= 0.2 (or install it via + * ``tornado`` - Requires tornado >= 0.2 (or install it via ``pip install gunicorn[tornado]``) * ``gthread`` - Python 2 requires the futures package to be installed (or install it via ``pip install gunicorn[gthread]``) - * ``gaiohttp`` - Deprecated. Optionally, you can provide your own worker by giving Gunicorn a Python path to a subclass of ``gunicorn.workers.base.Worker``. This alternative syntax will load the gevent class: ``gunicorn.workers.ggevent.GeventWorker``. - - .. deprecated:: 19.8 - The ``gaiohttp`` worker is deprecated. Please use - ``aiohttp.worker.GunicornWebWorker`` instead. See - :ref:`asyncio-workers` for more information on how to use it. """ class WorkerThreads(Setting): @@ -671,7 +665,7 @@ class WorkerThreads(Setting): If it is not defined, the default is ``1``. This setting only affects the Gthread worker type. - + .. note:: If you try to use the ``sync`` worker type and set the ``threads`` setting to more than 1, the ``gthread`` worker type will be used diff --git a/gunicorn/workers/__init__.py b/gunicorn/workers/__init__.py index 29c04c2a..ae753e1c 100644 --- a/gunicorn/workers/__init__.py +++ b/gunicorn/workers/__init__.py @@ -7,7 +7,6 @@ SUPPORTED_WORKERS = { "sync": "gunicorn.workers.sync.SyncWorker", "eventlet": "gunicorn.workers.geventlet.EventletWorker", - "gaiohttp": "gunicorn.workers.gaiohttp.AiohttpWorker", "gevent": "gunicorn.workers.ggevent.GeventWorker", "gevent_wsgi": "gunicorn.workers.ggevent.GeventPyWSGIWorker", "gevent_pywsgi": "gunicorn.workers.ggevent.GeventPyWSGIWorker", diff --git a/gunicorn/workers/_gaiohttp.py b/gunicorn/workers/_gaiohttp.py deleted file mode 100644 index fe378c35..00000000 --- a/gunicorn/workers/_gaiohttp.py +++ /dev/null @@ -1,168 +0,0 @@ -# -*- coding: utf-8 - -# -# This file is part of gunicorn released under the MIT license. -# See the NOTICE for more information. - -import asyncio -import datetime -import functools -import logging -import os - -try: - import ssl -except ImportError: - ssl = None - -import gunicorn.workers.base as base - -from aiohttp.wsgi import WSGIServerHttpProtocol as OldWSGIServerHttpProtocol - - -class WSGIServerHttpProtocol(OldWSGIServerHttpProtocol): - def log_access(self, request, environ, response, time): - self.logger.access(response, request, environ, datetime.timedelta(0, 0, time)) - - -class AiohttpWorker(base.Worker): - - def __init__(self, *args, **kw): # pragma: no cover - super().__init__(*args, **kw) - cfg = self.cfg - if cfg.is_ssl: - self.ssl_context = self._create_ssl_context(cfg) - else: - self.ssl_context = None - self.servers = [] - self.connections = {} - - def init_process(self): - # create new event_loop after fork - asyncio.get_event_loop().close() - - self.loop = asyncio.new_event_loop() - asyncio.set_event_loop(self.loop) - - super().init_process() - - def run(self): - self._runner = asyncio.ensure_future(self._run(), loop=self.loop) - - try: - self.loop.run_until_complete(self._runner) - finally: - self.loop.close() - - def wrap_protocol(self, proto): - proto.connection_made = _wrp( - proto, proto.connection_made, self.connections) - proto.connection_lost = _wrp( - proto, proto.connection_lost, self.connections, False) - return proto - - def factory(self, wsgi, addr): - # are we in debug level - is_debug = self.log.loglevel == logging.DEBUG - - proto = WSGIServerHttpProtocol( - wsgi, readpayload=True, - loop=self.loop, - log=self.log, - debug=is_debug, - keep_alive=self.cfg.keepalive, - access_log=self.log.access_log, - access_log_format=self.cfg.access_log_format) - return self.wrap_protocol(proto) - - def get_factory(self, sock, addr): - return functools.partial(self.factory, self.wsgi, addr) - - @asyncio.coroutine - def close(self): - try: - if hasattr(self.wsgi, 'close'): - yield from self.wsgi.close() - except: - self.log.exception('Process shutdown exception') - - @asyncio.coroutine - def _run(self): - for sock in self.sockets: - factory = self.get_factory(sock.sock, sock.cfg_addr) - self.servers.append( - (yield from self._create_server(factory, sock))) - - # If our parent changed then we shut down. - pid = os.getpid() - try: - while self.alive or self.connections: - self.notify() - - if (self.alive and - pid == os.getpid() and self.ppid != os.getppid()): - self.log.info("Parent changed, shutting down: %s", self) - self.alive = False - - # stop accepting requests - if not self.alive: - if self.servers: - self.log.info( - "Stopping server: %s, connections: %s", - pid, len(self.connections)) - for server in self.servers: - server.close() - self.servers.clear() - - # prepare connections for closing - for conn in self.connections.values(): - if hasattr(conn, 'closing'): - conn.closing() - - yield from asyncio.sleep(1.0, loop=self.loop) - except KeyboardInterrupt: - pass - - if self.servers: - for server in self.servers: - server.close() - - yield from self.close() - - @asyncio.coroutine - def _create_server(self, factory, sock): - return self.loop.create_server(factory, sock=sock.sock, - ssl=self.ssl_context) - - @staticmethod - def _create_ssl_context(cfg): - """ Creates SSLContext instance for usage in asyncio.create_server. - - See ssl.SSLSocket.__init__ for more details. - """ - ctx = ssl.SSLContext(cfg.ssl_version) - ctx.load_cert_chain(cfg.certfile, cfg.keyfile) - ctx.verify_mode = cfg.cert_reqs - if cfg.ca_certs: - ctx.load_verify_locations(cfg.ca_certs) - if cfg.ciphers: - ctx.set_ciphers(cfg.ciphers) - return ctx - - -class _wrp: - - def __init__(self, proto, meth, tracking, add=True): - self._proto = proto - self._id = id(proto) - self._meth = meth - self._tracking = tracking - self._add = add - - def __call__(self, *args): - if self._add: - self._tracking[self._id] = self._proto - elif self._id in self._tracking: - del self._tracking[self._id] - - conn = self._meth(*args) - return conn diff --git a/gunicorn/workers/gaiohttp.py b/gunicorn/workers/gaiohttp.py deleted file mode 100644 index b8248259..00000000 --- a/gunicorn/workers/gaiohttp.py +++ /dev/null @@ -1,22 +0,0 @@ -# -*- coding: utf-8 - -# -# This file is part of gunicorn released under the MIT license. -# See the NOTICE for more information. - -from gunicorn import util - -try: - import aiohttp # pylint: disable=unused-import -except ImportError: - raise RuntimeError("You need aiohttp installed to use this worker.") -else: - try: - from aiohttp.worker import GunicornWebWorker as AiohttpWorker - except ImportError: - from gunicorn.workers._gaiohttp import AiohttpWorker - - util.warn( - "The 'gaiohttp' worker is deprecated. See --worker-class " - "documentation for more information." - ) - __all__ = ['AiohttpWorker'] diff --git a/tests/test_gaiohttp.py b/tests/test_gaiohttp.py deleted file mode 100644 index e58f36c1..00000000 --- a/tests/test_gaiohttp.py +++ /dev/null @@ -1,193 +0,0 @@ -# -*- coding: utf-8 - -# -# This file is part of gunicorn released under the MIT license. -# See the NOTICE for more information. - -import unittest -import pytest - -aiohttp = pytest.importorskip("aiohttp") -WSGIServerHttpProtocol = pytest.importorskip("aiohttp.wsgi.WSGIServerHttpProtocol") - -import asyncio -from gunicorn.workers import gaiohttp -from gunicorn.workers._gaiohttp import _wrp -from gunicorn.config import Config -from unittest import mock - - -class WorkerTests(unittest.TestCase): - - def setUp(self): - self.loop = asyncio.new_event_loop() - asyncio.set_event_loop(None) - self.worker = gaiohttp.AiohttpWorker('age', - 'ppid', - 'sockets', - 'app', - 'timeout', - Config(), - 'log') - - def tearDown(self): - self.loop.close() - - @mock.patch('gunicorn.workers._gaiohttp.asyncio') - def test_init_process(self, m_asyncio): - try: - self.worker.init_process() - except TypeError: - # to mask incomplete initialization of AiohttWorker instance: - # we pass invalid values for ctor args - pass - - self.assertTrue(m_asyncio.get_event_loop.return_value.close.called) - self.assertTrue(m_asyncio.new_event_loop.called) - self.assertTrue(m_asyncio.set_event_loop.called) - - @mock.patch('gunicorn.workers._gaiohttp.asyncio') - def test_run(self, m_asyncio): - self.worker.loop = mock.Mock() - self.worker.run() - - self.assertTrue(m_asyncio.ensure_future.called) - self.assertTrue(self.worker.loop.run_until_complete.called) - self.assertTrue(self.worker.loop.close.called) - - def test_factory(self): - self.worker.wsgi = mock.Mock() - self.worker.loop = mock.Mock() - self.worker.log = mock.Mock() - self.worker.cfg = Config() - - f = self.worker.factory( - self.worker.wsgi, ('localhost', 8080)) - self.assertIsInstance(f, WSGIServerHttpProtocol) - - @mock.patch('gunicorn.workers._gaiohttp.asyncio') - def test__run(self, m_asyncio): - self.worker.ppid = 1 - self.worker.alive = True - self.worker.servers = [] - sock = mock.Mock() - sock.cfg_addr = ('localhost', 8080) - self.worker.sockets = [sock] - self.worker.wsgi = mock.Mock() - self.worker.log = mock.Mock() - self.worker.notify = mock.Mock() - loop = self.worker.loop = mock.Mock() - loop.create_server.return_value = asyncio.Future(loop=self.loop) - loop.create_server.return_value.set_result(sock) - - self.loop.run_until_complete(self.worker._run()) - - self.assertTrue(self.worker.log.info.called) - self.assertTrue(self.worker.notify.called) - - @mock.patch('gunicorn.workers._gaiohttp.asyncio') - def test__run_unix_socket(self, m_asyncio): - self.worker.ppid = 1 - self.worker.alive = True - self.worker.servers = [] - sock = mock.Mock() - sock.cfg_addr = '/tmp/gunicorn.sock' - self.worker.sockets = [sock] - self.worker.wsgi = mock.Mock() - self.worker.log = mock.Mock() - self.worker.notify = mock.Mock() - loop = self.worker.loop = mock.Mock() - loop.create_server.return_value = asyncio.Future(loop=self.loop) - loop.create_server.return_value.set_result(sock) - - self.loop.run_until_complete(self.worker._run()) - - self.assertTrue(self.worker.log.info.called) - self.assertTrue(self.worker.notify.called) - - def test__run_connections(self): - conn = mock.Mock() - self.worker.ppid = 1 - self.worker.alive = False - self.worker.servers = [mock.Mock()] - self.worker.connections = {1: conn} - self.worker.sockets = [] - self.worker.wsgi = mock.Mock() - self.worker.log = mock.Mock() - self.worker.loop = self.loop - self.worker.loop.create_server = mock.Mock() - self.worker.notify = mock.Mock() - - def _close_conns(): - self.worker.connections = {} - - self.loop.call_later(0.1, _close_conns) - self.loop.run_until_complete(self.worker._run()) - - self.assertTrue(self.worker.log.info.called) - self.assertTrue(self.worker.notify.called) - self.assertFalse(self.worker.servers) - self.assertTrue(conn.closing.called) - - @mock.patch('gunicorn.workers._gaiohttp.os') - @mock.patch('gunicorn.workers._gaiohttp.asyncio.sleep') - def test__run_exc(self, m_sleep, m_os): - m_os.getpid.return_value = 1 - m_os.getppid.return_value = 1 - - self.worker.servers = [mock.Mock()] - self.worker.ppid = 1 - self.worker.alive = True - self.worker.sockets = [] - self.worker.log = mock.Mock() - self.worker.loop = mock.Mock() - self.worker.notify = mock.Mock() - - slp = asyncio.Future(loop=self.loop) - slp.set_exception(KeyboardInterrupt) - m_sleep.return_value = slp - - self.loop.run_until_complete(self.worker._run()) - self.assertTrue(m_sleep.called) - self.assertTrue(self.worker.servers[0].close.called) - - def test_close_wsgi_app(self): - self.worker.ppid = 1 - self.worker.alive = False - self.worker.servers = [mock.Mock()] - self.worker.connections = {} - self.worker.sockets = [] - self.worker.log = mock.Mock() - self.worker.loop = self.loop - self.worker.loop.create_server = mock.Mock() - self.worker.notify = mock.Mock() - - self.worker.wsgi = mock.Mock() - self.worker.wsgi.close.return_value = asyncio.Future(loop=self.loop) - self.worker.wsgi.close.return_value.set_result(1) - - self.loop.run_until_complete(self.worker._run()) - self.assertTrue(self.worker.wsgi.close.called) - - self.worker.wsgi = mock.Mock() - self.worker.wsgi.close.return_value = asyncio.Future(loop=self.loop) - self.worker.wsgi.close.return_value.set_exception(ValueError()) - - self.loop.run_until_complete(self.worker._run()) - self.assertTrue(self.worker.wsgi.close.called) - - def test_wrp(self): - conn = object() - tracking = {} - meth = mock.Mock() - wrp = _wrp(conn, meth, tracking) - wrp() - - self.assertIn(id(conn), tracking) - self.assertTrue(meth.called) - - meth = mock.Mock() - wrp = _wrp(conn, meth, tracking, False) - wrp() - - self.assertNotIn(1, tracking) - self.assertTrue(meth.called) diff --git a/tox.ini b/tox.ini index d2868ddb..47249d6e 100644 --- a/tox.ini +++ b/tox.ini @@ -14,7 +14,6 @@ commands = gunicorn \ tests/test_arbiter.py \ tests/test_config.py \ - tests/test_gaiohttp.py \ tests/test_http.py \ tests/test_invalid_requests.py \ tests/test_logger.py \ From fe7632fe3738dfcc1aaf5c2fc7ced3a3c7f42214 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Thu, 24 Jan 2019 23:15:19 +0100 Subject: [PATCH 018/263] make sure all examples work on python 3 fix #1961 --- examples/echo.py | 3 +-- examples/log_app.py | 2 +- examples/{readline.py => readline_app.py} | 7 +++---- examples/test.py | 3 +-- 4 files changed, 6 insertions(+), 9 deletions(-) rename examples/{readline.py => readline_app.py} (86%) diff --git a/examples/echo.py b/examples/echo.py index 5f7d79e2..06f61602 100644 --- a/examples/echo.py +++ b/examples/echo.py @@ -24,8 +24,7 @@ def app(environ, start_response): response_headers = [ ('Content-type', 'text/plain'), ('Content-Length', str(len(data))), - ('X-Gunicorn-Version', __version__), - ("Test", "test тест"), + ('X-Gunicorn-Version', __version__) ] start_response(status, response_headers) return iter([data]) diff --git a/examples/log_app.py b/examples/log_app.py index a1cc3d6e..c7e97992 100644 --- a/examples/log_app.py +++ b/examples/log_app.py @@ -13,4 +13,4 @@ def app(environ, start_response): log.info("Hello Info!") log.warn("Hello Warn!") log.error("Hello Error!") - return ["Hello World!\n"] + return [b"Hello World!\n"] diff --git a/examples/readline.py b/examples/readline_app.py similarity index 86% rename from examples/readline.py rename to examples/readline_app.py index 75d18f31..4a87851b 100644 --- a/examples/readline.py +++ b/examples/readline_app.py @@ -9,7 +9,7 @@ # # Launch a server with the app in a terminal # -# $ gunicorn -w3 readline:app +# $ gunicorn -w3 readline_app:app # # Then in another terminal launch the following command: # @@ -27,8 +27,7 @@ def app(environ, start_response): response_headers = [ ('Content-type', 'text/plain'), ('Transfer-Encoding', "chunked"), - ('X-Gunicorn-Version', __version__), - #("Test", "test тест"), + ('X-Gunicorn-Version', __version__) ] start_response(status, response_headers) @@ -42,4 +41,4 @@ def app(environ, start_response): print(line) lines.append(line) - return iter(lines) \ No newline at end of file + return iter(lines) diff --git a/examples/test.py b/examples/test.py index 476fd62f..ffface08 100644 --- a/examples/test.py +++ b/examples/test.py @@ -20,8 +20,7 @@ def app(environ, start_response): response_headers = [ ('Content-type', 'text/plain'), ('Content-Length', str(len(data))), - ('X-Gunicorn-Version', __version__), - #("Test", "test тест"), + ('X-Gunicorn-Version', __version__) ] start_response(status, response_headers) return iter([data]) From 9f87c88819b901d4cd8ddf6c2bba1198b68903cb Mon Sep 17 00:00:00 2001 From: Randall Leeds Date: Thu, 24 Jan 2019 23:24:14 -0800 Subject: [PATCH 019/263] Bump minimum Eventlet and Gevent versions (#1962) Update the Eventlet and Gevent worker classes to check for versions of Eventlet and Gevent that are stable under Python 3 and remove outdated compatibility code. --- gunicorn/config.py | 4 +-- gunicorn/workers/geventlet.py | 14 ++++---- gunicorn/workers/ggevent.py | 62 +++++++++-------------------------- 3 files changed, 24 insertions(+), 56 deletions(-) diff --git a/gunicorn/config.py b/gunicorn/config.py index 29a42f23..03fa887b 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -630,9 +630,9 @@ class WorkerClass(Setting): A string referring to one of the following bundled classes: * ``sync`` - * ``eventlet`` - Requires eventlet >= 0.9.7 (or install it via + * ``eventlet`` - Requires eventlet >= 0.24 (or install it via ``pip install gunicorn[eventlet]``) - * ``gevent`` - Requires gevent >= 0.13 (or install it via + * ``gevent`` - Requires gevent >= 1.4 (or install it via ``pip install gunicorn[gevent]``) * ``tornado`` - Requires tornado >= 0.2 (or install it via ``pip install gunicorn[tornado]``) diff --git a/gunicorn/workers/geventlet.py b/gunicorn/workers/geventlet.py index b0cf4b83..eda95992 100644 --- a/gunicorn/workers/geventlet.py +++ b/gunicorn/workers/geventlet.py @@ -11,12 +11,11 @@ import sys try: import eventlet except ImportError: - raise RuntimeError("You need eventlet installed to use this worker.") - -# validate the eventlet version -if eventlet.version_info < (0, 9, 7): - raise RuntimeError("You need eventlet >= 0.9.7") - + raise RuntimeError("eventlet worker requires eventlet 0.24 or higher") +else: + from pkg_resources import parse_version + if parse_version(eventlet.__version__) < parse_version('0.24'): + raise RuntimeError("eventlet worker requires eventlet 0.24 or higher") from eventlet import hubs, greenthread from eventlet.greenio import GreenSocket @@ -26,6 +25,7 @@ import greenlet from gunicorn.workers.base_async import AsyncWorker + def _eventlet_sendfile(fdout, fdin, offset, nbytes): while True: try: @@ -86,7 +86,7 @@ class EventletWorker(AsyncWorker): def patch(self): hubs.use_hub() - eventlet.monkey_patch(os=False) + eventlet.monkey_patch() patch_sendfile() def is_already_handled(self, respiter): diff --git a/gunicorn/workers/ggevent.py b/gunicorn/workers/ggevent.py index 16dea592..6c614fe2 100644 --- a/gunicorn/workers/ggevent.py +++ b/gunicorn/workers/ggevent.py @@ -10,20 +10,18 @@ from datetime import datetime from functools import partial import time -_socket = __import__("socket") - -# workaround on osx, disable kqueue -if sys.platform == "darwin": - os.environ['EVENT_NOKQUEUE'] = "1" - try: import gevent except ImportError: - raise RuntimeError("You need gevent installed to use this worker.") + raise RuntimeError("gevent worker requires gevent 1.4 or higher") +else: + from pkg_resources import parse_version + if parse_version(gevent.__version__) < parse_version('1.4'): + raise RuntimeError("gevent worker requires gevent 1.4 or higher") + from gevent.pool import Pool from gevent.server import StreamServer -from gevent.socket import wait_write, socket -from gevent import pywsgi +from gevent import hub, monkey, socket, pywsgi import gunicorn from gunicorn.http.wsgi import base_environ @@ -31,13 +29,14 @@ from gunicorn.workers.base_async import AsyncWorker VERSION = "gevent/%s gunicorn/%s" % (gevent.__version__, gunicorn.__version__) + def _gevent_sendfile(fdout, fdin, offset, nbytes): while True: try: return os.sendfile(fdout, fdin, offset, nbytes) except OSError as e: if e.args[0] == errno.EAGAIN: - wait_write(fdout) + socket.wait_write(fdout) else: raise @@ -51,14 +50,7 @@ class GeventWorker(AsyncWorker): wsgi_handler = None def patch(self): - from gevent import monkey - monkey.noisy = False - - # if the new version is used make sure to patch subprocess - if gevent.version_info[0] == 0: - monkey.patch_all() - else: - monkey.patch_all(subprocess=True) + monkey.patch_all() # monkey patch sendfile to make it none blocking patch_sendfile() @@ -66,7 +58,7 @@ class GeventWorker(AsyncWorker): # patch sockets sockets = [] for s in self.sockets: - sockets.append(socket(s.FAMILY, _socket.SOCK_STREAM, + sockets.append(socket.socket(s.FAMILY, socket.SOCK_STREAM, fileno=s.sock.fileno())) self.sockets = sockets @@ -165,34 +157,10 @@ class GeventWorker(AsyncWorker): # by deferring to a new greenlet. See #1645 gevent.spawn(super(GeventWorker, self).handle_usr1, sig, frame) - if gevent.version_info[0] == 0: - - def init_process(self): - # monkey patch here - self.patch() - - # reinit the hub - import gevent.core - gevent.core.reinit() - - #gevent 0.13 and older doesn't reinitialize dns for us after forking - #here's the workaround - gevent.core.dns_shutdown(fail_requests=1) - gevent.core.dns_init() - super(GeventWorker, self).init_process() - - else: - - def init_process(self): - # monkey patch here - self.patch() - - # reinit the hub - from gevent import hub - hub.reinit() - - # then initialize the process - super(GeventWorker, self).init_process() + def init_process(self): + self.patch() + hub.reinit() + super(GeventWorker, self).init_process() class GeventResponse(object): From 09cc75f94f0e2c122ea7276d7e2979654255b5a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Szalski?= Date: Wed, 6 Feb 2019 08:19:49 +0100 Subject: [PATCH 020/263] Fix typo in README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index f88697ea..6b9bcaf1 100644 --- a/README.rst +++ b/README.rst @@ -28,7 +28,7 @@ The documentation is hosted at http://docs.gunicorn.org. Installation ------------ -Gunicorn requires **Python **Python 3.x >= 3.4**. +Gunicorn requires **Python 3.x >= 3.4**. Install from PyPI:: From 7ed20f6833c12d216a3b7241ff0f7e63f4eb50d2 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Thu, 7 Feb 2019 16:59:37 +0100 Subject: [PATCH 021/263] corretly set max_requests fix #1978 --- gunicorn/workers/base.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/gunicorn/workers/base.py b/gunicorn/workers/base.py index 934ad723..3e9fb1e9 100644 --- a/gunicorn/workers/base.py +++ b/gunicorn/workers/base.py @@ -51,8 +51,13 @@ class Worker(object): self.reloader = None self.nr = 0 - jitter = randint(0, cfg.max_requests_jitter) - self.max_requests = cfg.max_requests + jitter or sys.maxsize + + if cfg.max_requests > 0: + jitter = randint(0, cfg.max_requests_jitter) + self.max_requests = cfg.max_requests + jitter + else: + self.max_requests = sys.maxsize + self.alive = True self.log = log self.tmp = WorkerTmp(cfg) From a1ffebcdb2a493b4fc891c88dce897920bb8988d Mon Sep 17 00:00:00 2001 From: Jat Date: Wed, 13 Feb 2019 22:43:21 +0800 Subject: [PATCH 022/263] worker_connections also affects the Gthread worker type --- docs/source/settings.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/settings.rst b/docs/source/settings.rst index f67c5363..6b42b3cc 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -1262,7 +1262,7 @@ worker_connections The maximum number of simultaneous clients. -This setting only affects the Eventlet and Gevent worker types. +This setting only affects the Eventlet, Gevent and Gthread worker types. .. _max-requests: From 5680320e5c11570f616bd0293ffe3edf9ab65e73 Mon Sep 17 00:00:00 2001 From: Andrew Widdersheim Date: Tue, 19 Feb 2019 16:09:46 -0500 Subject: [PATCH 023/263] Simplify futures import Commits e974f305 and 78208c8c removed support for Python 2 and Python <3.4 respectively so the conditional logic for importing `concurrent.futures` is no longer necessary as it has been part of the standard library since Python 3.2. --- gunicorn/workers/gthread.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/gunicorn/workers/gthread.py b/gunicorn/workers/gthread.py index 5625a91e..48e93764 100644 --- a/gunicorn/workers/gthread.py +++ b/gunicorn/workers/gthread.py @@ -10,6 +10,7 @@ # If no event happen after the keep alive timeout, the connection is # closed. +import concurrent.futures as futures import errno import os import selectors @@ -27,13 +28,6 @@ from .. import http from .. import util from ..http import wsgi -try: - import concurrent.futures as futures -except ImportError: - raise RuntimeError(""" - You need to install the 'futures' package to use this worker with this - Python version. - """) class TConn(object): From a2a8bc1ae6fa89110f6b2030d8fb7f3a75f1f5b7 Mon Sep 17 00:00:00 2001 From: Andrew Widdersheim Date: Wed, 20 Feb 2019 15:55:31 -0500 Subject: [PATCH 024/263] Move ThreadPoolExecutor() creation into own method Move ThreadPoolExecutor() creation into it's own method so it is easier to override when subclassing. --- gunicorn/workers/gthread.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gunicorn/workers/gthread.py b/gunicorn/workers/gthread.py index 48e93764..a4c4c771 100644 --- a/gunicorn/workers/gthread.py +++ b/gunicorn/workers/gthread.py @@ -85,11 +85,15 @@ class ThreadWorker(base.Worker): "Check the number of worker connections and threads.") def init_process(self): - self.tpool = futures.ThreadPoolExecutor(max_workers=self.cfg.threads) + self.tpool = self.get_thread_pool() self.poller = selectors.DefaultSelector() self._lock = RLock() super(ThreadWorker, self).init_process() + def get_thread_pool(self): + """Override this method to customize how the thread pool is created""" + return futures.ThreadPoolExecutor(max_workers=self.cfg.threads) + def handle_quit(self, sig, frame): self.alive = False # worker_int callback From a8963ef1a5a76f3df75ce477b55fe0297e3b617d Mon Sep 17 00:00:00 2001 From: Marat Date: Sat, 23 Feb 2019 23:28:56 +0300 Subject: [PATCH 025/263] Remove redundant super() arguments --- .../testing/testing/apps/someapp/middleware.py | 2 +- examples/standalone_app.py | 2 +- examples/when_ready.conf.py | 2 +- gunicorn/app/base.py | 2 +- gunicorn/config.py | 4 ++-- gunicorn/glogging.py | 4 ++-- gunicorn/http/message.py | 4 ++-- gunicorn/http/unreader.py | 4 ++-- gunicorn/reloader.py | 4 ++-- gunicorn/sock.py | 4 ++-- gunicorn/workers/base.py | 3 +-- gunicorn/workers/base_async.py | 2 +- gunicorn/workers/geventlet.py | 10 +++++----- gunicorn/workers/ggevent.py | 15 +++++++-------- gunicorn/workers/gthread.py | 4 ++-- gunicorn/workers/gtornado.py | 4 ++-- tests/test_arbiter.py | 2 +- tests/test_config.py | 2 +- 18 files changed, 36 insertions(+), 38 deletions(-) diff --git a/examples/frameworks/django/testing/testing/apps/someapp/middleware.py b/examples/frameworks/django/testing/testing/apps/someapp/middleware.py index 39a4e234..ddc667a9 100644 --- a/examples/frameworks/django/testing/testing/apps/someapp/middleware.py +++ b/examples/frameworks/django/testing/testing/apps/someapp/middleware.py @@ -10,7 +10,7 @@ def child_process(queue): class GunicornSubProcessTestMiddleware(object): def __init__(self): - super(GunicornSubProcessTestMiddleware, self).__init__() + super().__init__() self.queue = Queue() self.process = Process(target=child_process, args=(self.queue,)) self.process.start() diff --git a/examples/standalone_app.py b/examples/standalone_app.py index 9abda283..7731aaf5 100644 --- a/examples/standalone_app.py +++ b/examples/standalone_app.py @@ -35,7 +35,7 @@ class StandaloneApplication(gunicorn.app.base.BaseApplication): def __init__(self, app, options=None): self.options = options or {} self.application = app - super(StandaloneApplication, self).__init__() + super().__init__() def load_config(self): config = {key: value for key, value in self.options.items() diff --git a/examples/when_ready.conf.py b/examples/when_ready.conf.py index 195284b3..1531ec14 100644 --- a/examples/when_ready.conf.py +++ b/examples/when_ready.conf.py @@ -8,7 +8,7 @@ max_mem = 100000 class MemoryWatch(threading.Thread): def __init__(self, server, max_mem): - super(MemoryWatch, self).__init__() + super().__init__() self.daemon = True self.server = server self.max_mem = max_mem diff --git a/gunicorn/app/base.py b/gunicorn/app/base.py index 1d6b3897..0276b009 100644 --- a/gunicorn/app/base.py +++ b/gunicorn/app/base.py @@ -218,4 +218,4 @@ class Application(BaseApplication): if pythonpath not in sys.path: sys.path.insert(0, pythonpath) - super(Application, self).run() + super().run() diff --git a/gunicorn/config.py b/gunicorn/config.py index 03fa887b..8dda626e 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -59,7 +59,7 @@ class Config(object): def __setattr__(self, name, value): if name != "settings" and name in self.settings: raise AttributeError("Invalid access!") - super(Config, self).__setattr__(name, value) + super().__setattr__(name, value) def set(self, name, value): if name not in self.settings: @@ -224,7 +224,7 @@ class Config(object): class SettingMeta(type): def __new__(cls, name, bases, attrs): - super_new = super(SettingMeta, cls).__new__ + super_new = super().__new__ parents = [b for b in bases if isinstance(b, SettingMeta)] if not parents: return super_new(cls, name, bases, attrs) diff --git a/gunicorn/glogging.py b/gunicorn/glogging.py index 56cc5bd0..3f7b4ac7 100644 --- a/gunicorn/glogging.py +++ b/gunicorn/glogging.py @@ -108,11 +108,11 @@ class SafeAtoms(dict): if k.startswith("{"): kl = k.lower() if kl in self: - return super(SafeAtoms, self).__getitem__(kl) + return super().__getitem__(kl) else: return "-" if k in self: - return super(SafeAtoms, self).__getitem__(k) + return super().__getitem__(k) else: return '-' diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 0dda58db..3c080207 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -177,7 +177,7 @@ class Request(Message): self.req_number = req_number self.proxy_protocol_info = None - super(Request, self).__init__(cfg, unreader) + super().__init__(cfg, unreader) def get_data(self, unreader, buf, stop=False): data = unreader.read() @@ -357,6 +357,6 @@ class Request(Message): self.version = (int(match.group(1)), int(match.group(2))) def set_body_reader(self): - super(Request, self).set_body_reader() + super().set_body_reader() if isinstance(self.body.reader, EOFReader): self.body = Body(LengthReader(self.unreader, 0)) diff --git a/gunicorn/http/unreader.py b/gunicorn/http/unreader.py index be07e7ac..273bfc31 100644 --- a/gunicorn/http/unreader.py +++ b/gunicorn/http/unreader.py @@ -56,7 +56,7 @@ class Unreader(object): class SocketUnreader(Unreader): def __init__(self, sock, max_chunk=8192): - super(SocketUnreader, self).__init__() + super().__init__() self.sock = sock self.mxchunk = max_chunk @@ -66,7 +66,7 @@ class SocketUnreader(Unreader): class IterUnreader(Unreader): def __init__(self, iterable): - super(IterUnreader, self).__init__() + super().__init__() self.iter = iter(iterable) def chunk(self): diff --git a/gunicorn/reloader.py b/gunicorn/reloader.py index 96b7d288..500b22ed 100644 --- a/gunicorn/reloader.py +++ b/gunicorn/reloader.py @@ -15,7 +15,7 @@ COMPILED_EXT_RE = re.compile(r'py[co]$') class Reloader(threading.Thread): def __init__(self, extra_files=None, interval=1, callback=None): - super(Reloader, self).__init__() + super().__init__() self.setDaemon(True) self._extra_files = set(extra_files or ()) self._extra_files_lock = threading.RLock() @@ -74,7 +74,7 @@ if has_inotify: | inotify.constants.IN_MOVED_TO) def __init__(self, extra_files=None, callback=None): - super(InotifyReloader, self).__init__() + super().__init__() self.setDaemon(True) self._callback = callback self._dirs = set() diff --git a/gunicorn/sock.py b/gunicorn/sock.py index 8d35c4d4..e53e578e 100644 --- a/gunicorn/sock.py +++ b/gunicorn/sock.py @@ -87,7 +87,7 @@ class TCPSocket(BaseSocket): def set_options(self, sock, bound=False): sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) - return super(TCPSocket, self).set_options(sock, bound=bound) + return super().set_options(sock, bound=bound) class TCP6Socket(TCPSocket): @@ -115,7 +115,7 @@ class UnixSocket(BaseSocket): os.remove(addr) else: raise ValueError("%r is not a socket" % addr) - super(UnixSocket, self).__init__(addr, conf, log, fd=fd) + super().__init__(addr, conf, log, fd=fd) def __str__(self): return "unix:%s" % self.cfg_addr diff --git a/gunicorn/workers/base.py b/gunicorn/workers/base.py index 3e9fb1e9..f95994bc 100644 --- a/gunicorn/workers/base.py +++ b/gunicorn/workers/base.py @@ -85,8 +85,7 @@ class Worker(object): """\ If you override this method in a subclass, the last statement in the function should be to call this method with - super(MyWorkerClass, self).init_process() so that the ``run()`` - loop is initiated. + super().init_process() so that the ``run()`` loop is initiated. """ # set environment' variables diff --git a/gunicorn/workers/base_async.py b/gunicorn/workers/base_async.py index 05f4799a..ebd0fc10 100644 --- a/gunicorn/workers/base_async.py +++ b/gunicorn/workers/base_async.py @@ -20,7 +20,7 @@ ALREADY_HANDLED = object() class AsyncWorker(base.Worker): def __init__(self, *args, **kwargs): - super(AsyncWorker, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.worker_connections = self.cfg.worker_connections def timeout_ctx(self): diff --git a/gunicorn/workers/geventlet.py b/gunicorn/workers/geventlet.py index eda95992..91d2f333 100644 --- a/gunicorn/workers/geventlet.py +++ b/gunicorn/workers/geventlet.py @@ -93,17 +93,17 @@ class EventletWorker(AsyncWorker): if respiter == EVENTLET_ALREADY_HANDLED: raise StopIteration() else: - return super(EventletWorker, self).is_already_handled(respiter) + return super().is_already_handled(respiter) def init_process(self): - super(EventletWorker, self).init_process() + super().init_process() self.patch() def handle_quit(self, sig, frame): - eventlet.spawn(super(EventletWorker, self).handle_quit, sig, frame) + eventlet.spawn(super().handle_quit, sig, frame) def handle_usr1(self, sig, frame): - eventlet.spawn(super(EventletWorker, self).handle_usr1, sig, frame) + eventlet.spawn(super().handle_usr1, sig, frame) def timeout_ctx(self): return eventlet.Timeout(self.cfg.keepalive or None, False) @@ -113,7 +113,7 @@ class EventletWorker(AsyncWorker): client = eventlet.wrap_ssl(client, server_side=True, **self.cfg.ssl_options) - super(EventletWorker, self).handle(listener, client, addr) + super().handle(listener, client, addr) def run(self): acceptors = [] diff --git a/gunicorn/workers/ggevent.py b/gunicorn/workers/ggevent.py index 6c614fe2..5dfec5e1 100644 --- a/gunicorn/workers/ggevent.py +++ b/gunicorn/workers/ggevent.py @@ -63,7 +63,7 @@ class GeventWorker(AsyncWorker): self.sockets = sockets def notify(self): - super(GeventWorker, self).notify() + super().notify() if self.ppid != os.getppid(): self.log.info("Parent changed, shutting down: %s", self) sys.exit(0) @@ -136,12 +136,11 @@ class GeventWorker(AsyncWorker): # Connected socket timeout defaults to socket.getdefaulttimeout(). # This forces to blocking mode. client.setblocking(1) - super(GeventWorker, self).handle(listener, client, addr) + super().handle(listener, client, addr) def handle_request(self, listener_name, req, sock, addr): try: - super(GeventWorker, self).handle_request(listener_name, req, sock, - addr) + super().handle_request(listener_name, req, sock, addr) except gevent.GreenletExit: pass except SystemExit: @@ -150,17 +149,17 @@ class GeventWorker(AsyncWorker): def handle_quit(self, sig, frame): # Move this out of the signal handler so we can use # blocking calls. See #1126 - gevent.spawn(super(GeventWorker, self).handle_quit, sig, frame) + gevent.spawn(super().handle_quit, sig, frame) def handle_usr1(self, sig, frame): # Make the gevent workers handle the usr1 signal # by deferring to a new greenlet. See #1645 - gevent.spawn(super(GeventWorker, self).handle_usr1, sig, frame) + gevent.spawn(super().handle_usr1, sig, frame) def init_process(self): self.patch() hub.reinit() - super(GeventWorker, self).init_process() + super().init_process() class GeventResponse(object): @@ -190,7 +189,7 @@ class PyWSGIHandler(pywsgi.WSGIHandler): self.server.log.access(resp, req_headers, self.environ, response_time) def get_environ(self): - env = super(PyWSGIHandler, self).get_environ() + env = super().get_environ() env['gunicorn.sock'] = self.socket env['RAW_URI'] = self.path return env diff --git a/gunicorn/workers/gthread.py b/gunicorn/workers/gthread.py index a4c4c771..30d2745e 100644 --- a/gunicorn/workers/gthread.py +++ b/gunicorn/workers/gthread.py @@ -65,7 +65,7 @@ class TConn(object): class ThreadWorker(base.Worker): def __init__(self, *args, **kwargs): - super(ThreadWorker, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.worker_connections = self.cfg.worker_connections self.max_keepalived = self.cfg.worker_connections - self.cfg.threads # initialise the pool @@ -88,7 +88,7 @@ class ThreadWorker(base.Worker): self.tpool = self.get_thread_pool() self.poller = selectors.DefaultSelector() self._lock = RLock() - super(ThreadWorker, self).init_process() + super().init_process() def get_thread_pool(self): """Override this method to customize how the thread pool is created""" diff --git a/gunicorn/workers/gtornado.py b/gunicorn/workers/gtornado.py index 9c8379c2..40e5572a 100644 --- a/gunicorn/workers/gtornado.py +++ b/gunicorn/workers/gtornado.py @@ -44,7 +44,7 @@ class TornadoWorker(Worker): def handle_exit(self, sig, frame): if self.alive: - super(TornadoWorker, self).handle_exit(sig, frame) + super().handle_exit(sig, frame) def handle_request(self): self.nr += 1 @@ -84,7 +84,7 @@ class TornadoWorker(Worker): # should create its own IOLoop. We should clear current IOLoop # if exists before os.fork. IOLoop.clear_current() - super(TornadoWorker, self).init_process() + super().init_process() def run(self): self.ioloop = IOLoop.instance() diff --git a/tests/test_arbiter.py b/tests/test_arbiter.py index fa376ce7..bfd9fe65 100644 --- a/tests/test_arbiter.py +++ b/tests/test_arbiter.py @@ -173,7 +173,7 @@ class PreloadedAppWithEnvSettings(DummyApplication): 'preloaded' application. """ verify_env_vars() - return super(PreloadedAppWithEnvSettings, self).wsgi() + return super().wsgi() def verify_env_vars(): diff --git a/tests/test_config.py b/tests/test_config.py index 98420bd0..0587c63c 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -42,7 +42,7 @@ class AltArgs(object): class NoConfigApp(Application): def __init__(self): - super(NoConfigApp, self).__init__("no_usage", prog="gunicorn_test") + super().__init__("no_usage", prog="gunicorn_test") def init(self, parser, opts, args): pass From 2b07f2be287ed8d1f9ee3af3646a3158af67186d Mon Sep 17 00:00:00 2001 From: Hasan Ramezani Date: Tue, 2 Jan 2018 00:07:06 +0330 Subject: [PATCH 026/263] Rewrite `parse_address` util and add one test --- gunicorn/util.py | 27 +++++++++++---------------- tests/test_util.py | 4 ++++ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/gunicorn/util.py b/gunicorn/util.py index 899416ad..37e586a8 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -247,7 +247,7 @@ def is_ipv6(addr): return True -def parse_address(netloc, default_port=8000): +def parse_address(netloc, default_port='8000'): if re.match(r'unix:(//)?', netloc): return re.split(r'unix:(//)?', netloc)[-1] @@ -260,27 +260,22 @@ def parse_address(netloc, default_port=8000): if netloc.startswith("tcp://"): netloc = netloc.split("tcp://")[1] + host, port = netloc, default_port - # get host if '[' in netloc and ']' in netloc: - host = netloc.split(']')[0][1:].lower() + host = netloc.split(']')[0][1:] + port = (netloc.split(']:') + [default_port])[1] elif ':' in netloc: - host = netloc.split(':')[0].lower() + host, port = (netloc.split(':') + [default_port])[:2] elif netloc == "": - host = "0.0.0.0" - else: - host = netloc.lower() + host, port = "0.0.0.0", default_port - #get port - netloc = netloc.split(']')[-1] - if ":" in netloc: - port = netloc.split(':', 1)[1] - if not port.isdigit(): - raise RuntimeError("%r is not a valid port number." % port) + try: port = int(port) - else: - port = default_port - return (host, port) + except ValueError: + raise RuntimeError("%r is not a valid port number." % port) + + return host.lower(), port def close_on_exec(fd): diff --git a/tests/test_util.py b/tests/test_util.py index 3b8be0c3..3b8688a2 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -13,8 +13,12 @@ from urllib.parse import SplitResult @pytest.mark.parametrize('test_input, expected', [ ('unix://var/run/test.sock', 'var/run/test.sock'), ('unix:/var/run/test.sock', '/var/run/test.sock'), + ('tcp://localhost', ('localhost', 8000)), + ('tcp://localhost:5000', ('localhost', 5000)), ('', ('0.0.0.0', 8000)), ('[::1]:8000', ('::1', 8000)), + ('[::1]:5000', ('::1', 5000)), + ('[::1]', ('::1', 8000)), ('localhost:8000', ('localhost', 8000)), ('127.0.0.1:8000', ('127.0.0.1', 8000)), ('localhost', ('localhost', 8000)), From cc8e67ea83ce1064ef605d82130ace2a3670d68a Mon Sep 17 00:00:00 2001 From: Randall Leeds Date: Sat, 16 Mar 2019 16:11:02 -0700 Subject: [PATCH 027/263] Fix new pylint errors --- gunicorn/util.py | 3 +-- gunicorn/workers/geventlet.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/gunicorn/util.py b/gunicorn/util.py index 37e586a8..3ec54af0 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -369,8 +369,7 @@ def import_app(module): if module.endswith(".py") and os.path.exists(module): msg = "Failed to find application, did you mean '%s:%s'?" raise ImportError(msg % (module.rsplit(".", 1)[0], obj)) - else: - raise + raise mod = sys.modules[module] diff --git a/gunicorn/workers/geventlet.py b/gunicorn/workers/geventlet.py index 91d2f333..20f29734 100644 --- a/gunicorn/workers/geventlet.py +++ b/gunicorn/workers/geventlet.py @@ -92,8 +92,7 @@ class EventletWorker(AsyncWorker): def is_already_handled(self, respiter): if respiter == EVENTLET_ALREADY_HANDLED: raise StopIteration() - else: - return super().is_already_handled(respiter) + return super().is_already_handled(respiter) def init_process(self): super().init_process() From 879651bb6f873631725cc31cbda783349fdf9dc3 Mon Sep 17 00:00:00 2001 From: Brett Randall Date: Fri, 9 Nov 2018 12:49:38 +1100 Subject: [PATCH 028/263] Header values are encoded using latin-1, not ascii. This commit reverts one aspect changed by 5f4ebd2eb2b08783a5fbefe79d09fcb3fc1fbc73 (#1151); header-values are again encoded as latin-1 and not ascii. Test is restored but uses a latin-1-mappable test-character, not a general utf8 character. Fixed #1778. Signed-off-by: Brett Randall --- examples/test.py | 3 ++- gunicorn/http/wsgi.py | 2 +- tests/test_http.py | 7 ++++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/examples/test.py b/examples/test.py index ffface08..09ea06f0 100644 --- a/examples/test.py +++ b/examples/test.py @@ -20,7 +20,8 @@ def app(environ, start_response): response_headers = [ ('Content-type', 'text/plain'), ('Content-Length', str(len(data))), - ('X-Gunicorn-Version', __version__) + ('X-Gunicorn-Version', __version__), + ('Foo', 'B\u00e5r'), # Foo: Bår ] start_response(status, response_headers) return iter([data]) diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 593c8f24..1b8b9cc0 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -315,7 +315,7 @@ class Response(object): tosend.extend(["%s: %s\r\n" % (k, v) for k, v in self.headers]) header_str = "%s\r\n" % "".join(tosend) - util.write(self.sock, util.to_bytestring(header_str, "ascii")) + util.write(self.sock, util.to_bytestring(header_str, "latin-1")) self.headers_sent = True def write(self, arg): diff --git a/tests/test_http.py b/tests/test_http.py index a91f4794..6bcb5f09 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -81,8 +81,13 @@ def test_http_header_encoding(): mocked_request = mock.MagicMock() response = Response(mocked_request, mocked_socket, None) - # set umlaut header + # set umlaut header value - latin-1 is OK response.headers.append(('foo', 'häder')) + response.send_headers() + + # set a-breve header value - unicode, non-latin-1 fails + response = Response(mocked_request, mocked_socket, None) + response.headers.append(('apple', 'măr')) with pytest.raises(UnicodeEncodeError): response.send_headers() From 96dde54af1350860791f6ac275a1d56a8d0e1da9 Mon Sep 17 00:00:00 2001 From: dblack Date: Fri, 21 Apr 2017 08:35:24 -0700 Subject: [PATCH 029/263] optional datadog tags for statsd metrics --- THANKS | 1 + docs/source/settings.rst | 11 +++++++++++ gunicorn/config.py | 14 ++++++++++++++ gunicorn/instrument/statsd.py | 7 +++++++ tests/test_statsd.py | 11 +++++++++++ 5 files changed, 44 insertions(+) diff --git a/THANKS b/THANKS index 57ed4dab..2c2a6de5 100644 --- a/THANKS +++ b/THANKS @@ -47,6 +47,7 @@ Dan Callaghan Dan Sully Daniel Quinn Dariusz Suchojad +David Black David Vincelli David Wolever Denis Bilenko diff --git a/docs/source/settings.rst b/docs/source/settings.rst index 6b42b3cc..5e11b421 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -376,6 +376,17 @@ if not provided). .. versionadded:: 19.2 +dogstatsd_tags +~~~~~~~~~~~~~~ + +* ``--dogstatsd-tags DOGSTATSD_TAGS`` +* ``(empty string)`` + +Comma-delimited list of static dogstatsd (datadog statsd) tags sent with all statsd metrics +See: `Datadog Docs ` + +.. versionadded:: 20 + Process Naming -------------- diff --git a/gunicorn/config.py b/gunicorn/config.py index 8dda626e..52095801 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -1478,6 +1478,20 @@ class StatsdHost(Setting): .. versionadded:: 19.1 """ +# Datadog Statsd (dogstatsd) tags. https://docs.datadoghq.com/developers/dogstatsd/ +class DogstatsdTags(Setting): + name = "dogstatsd_tags" + section = "Logging" + cli = ["--dogstatsd-tags"] + meta = "DOGSTATSD_TAGS" + default = "" + validator = validate_string + desc = """\ + A comma-delimited list of datadog statsd (dogstatsd) tags to append to statsd metrics. + + .. versionadded:: 20 + """ + class StatsdPrefix(Setting): name = "statsd_prefix" section = "Logging" diff --git a/gunicorn/instrument/statsd.py b/gunicorn/instrument/statsd.py index 12b6de4e..7ef0bbb6 100644 --- a/gunicorn/instrument/statsd.py +++ b/gunicorn/instrument/statsd.py @@ -34,6 +34,8 @@ class Statsd(Logger): except Exception: self.sock = None + self.dogstatsd_tags = cfg.dogstatsd_tags + # Log errors and warnings def critical(self, msg, *args, **kwargs): Logger.critical(self, msg, *args, **kwargs) @@ -116,6 +118,11 @@ class Statsd(Logger): try: if isinstance(msg, str): msg = msg.encode("ascii") + + # http://docs.datadoghq.com/guides/dogstatsd/#datagram-format + if self.dogstatsd_tags: + msg = msg + b"|#" + self.dogstatsd_tags.encode('ascii') + if self.sock: self.sock.send(msg) except Exception: diff --git a/tests/test_statsd.py b/tests/test_statsd.py index b75057c8..130bfa6b 100644 --- a/tests/test_statsd.py +++ b/tests/test_statsd.py @@ -59,6 +59,17 @@ def test_statsd_fail(): logger.exception("No impact on logging") +def test_dogstatsd_tags(): + c = Config() + tags = 'yucatan,libertine:rhubarb' + c.set('dogstatsd_tags', tags) + logger = Statsd(c) + logger.sock = MockSocket(False) + logger.info("Twill", extra={"mtype": "gauge", "metric": "barb.westerly", + "value": 2}) + assert logger.sock.msgs[0] == b"barb.westerly:2|g|#" + tags.encode('ascii') + + def test_instrument(): logger = Statsd(Config()) # Capture logged messages From c2e39fd1a786b547b89735d7e3a78c3c44950f94 Mon Sep 17 00:00:00 2001 From: Armin Berres <20811121+aberres@users.noreply.github.com> Date: Tue, 30 Apr 2019 16:32:35 +0200 Subject: [PATCH 030/263] Emphasize that full greenlet support might need additional work --- docs/source/design.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/source/design.rst b/docs/source/design.rst index 85157666..079e65ad 100644 --- a/docs/source/design.rst +++ b/docs/source/design.rst @@ -46,6 +46,13 @@ Gevent_). Greenlets are an implementation of cooperative multi-threading for Python. In general, an application should be able to make use of these worker classes with no changes. +For full greenlet support applications might need to be adapted. +When using, e.g., Gevent_ and Psycopg_ it makes sense to ensure psycogreen_ is +installed and `setup `_. + +Other applications might not be compatible at all as they, e.g., rely on +the original unpatched behavior. + Tornado Workers --------------- @@ -143,3 +150,5 @@ code in the master process). .. _Hey: https://github.com/rakyll/hey .. _aiohttp: https://docs.aiohttp.org/en/stable/deployment.html#nginx-gunicorn .. _`example`: https://github.com/benoitc/gunicorn/blob/master/examples/frameworks/flaskapp_aiohttp_wsgi.py +.. _Psycopg: http://initd.org/psycopg/ +.. _psycogreen: https://bitbucket.org/dvarrazzo/psycogreen From 78737074cfaddb33f29759f7448597cc69d3281b Mon Sep 17 00:00:00 2001 From: Jason Madden Date: Mon, 6 May 2019 14:40:38 -0500 Subject: [PATCH 031/263] Add Jason Madden to MAINTAINERS. --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1799675e..004fa6e5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7,3 +7,4 @@ Nikolay Kim Andrew Svetlov Stéphane Wirtel Berker Peksağ +Jason Madden From a542a07773adce68574ff9e1ef64824a8b811cbc Mon Sep 17 00:00:00 2001 From: Randall Leeds Date: Wed, 8 May 2019 19:23:13 -0700 Subject: [PATCH 032/263] Fix eventlet patching and bump required version Fix eventlet patching by performing the patching before the `init_process` method, which does not return, is called. This fix depends on bumping the required version to 0.24.1, which contains a fix for the monotonic clock. See the relevant commit: eventlet/eventlet@82f1877 This commit reverts 120f503f68929ef98d721a406745a1dbbb9d83fe Fix #1847 Fix #1884 --- gunicorn/config.py | 2 +- gunicorn/workers/geventlet.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gunicorn/config.py b/gunicorn/config.py index 52095801..aae5270c 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -630,7 +630,7 @@ class WorkerClass(Setting): A string referring to one of the following bundled classes: * ``sync`` - * ``eventlet`` - Requires eventlet >= 0.24 (or install it via + * ``eventlet`` - Requires eventlet >= 0.24.1 (or install it via ``pip install gunicorn[eventlet]``) * ``gevent`` - Requires gevent >= 1.4 (or install it via ``pip install gunicorn[gevent]``) diff --git a/gunicorn/workers/geventlet.py b/gunicorn/workers/geventlet.py index 20f29734..968868a3 100644 --- a/gunicorn/workers/geventlet.py +++ b/gunicorn/workers/geventlet.py @@ -11,11 +11,11 @@ import sys try: import eventlet except ImportError: - raise RuntimeError("eventlet worker requires eventlet 0.24 or higher") + raise RuntimeError("eventlet worker requires eventlet 0.24.1 or higher") else: from pkg_resources import parse_version - if parse_version(eventlet.__version__) < parse_version('0.24'): - raise RuntimeError("eventlet worker requires eventlet 0.24 or higher") + if parse_version(eventlet.__version__) < parse_version('0.24.1'): + raise RuntimeError("eventlet worker requires eventlet 0.24.1 or higher") from eventlet import hubs, greenthread from eventlet.greenio import GreenSocket @@ -95,8 +95,8 @@ class EventletWorker(AsyncWorker): return super().is_already_handled(respiter) def init_process(self): - super().init_process() self.patch() + super().init_process() def handle_quit(self, sig, frame): eventlet.spawn(super().handle_quit, sig, frame) From 678b326dc030b450717ec505df69863dcd6fb716 Mon Sep 17 00:00:00 2001 From: Sebastien Williams-Wynn Date: Thu, 9 May 2019 19:13:31 +0100 Subject: [PATCH 033/263] Fix typo in --worker-class documentation (#2030) --- docs/source/settings.rst | 39 +++++++++++++++++---------------------- gunicorn/config.py | 2 +- setup.py | 4 ++-- 3 files changed, 20 insertions(+), 25 deletions(-) diff --git a/docs/source/settings.rst b/docs/source/settings.rst index 5e11b421..16d8961a 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -363,6 +363,18 @@ statsd_host .. versionadded:: 19.1 +.. _dogstatsd-tags: + +dogstatsd_tags +~~~~~~~~~~~~~~ + +* ``--dogstatsd-tags DOGSTATSD_TAGS`` +* ``(empty string)`` + +A comma-delimited list of datadog statsd (dogstatsd) tags to append to statsd metrics. + +.. versionadded:: 20 + .. _statsd-prefix: statsd_prefix @@ -376,17 +388,6 @@ if not provided). .. versionadded:: 19.2 -dogstatsd_tags -~~~~~~~~~~~~~~ - -* ``--dogstatsd-tags DOGSTATSD_TAGS`` -* ``(empty string)`` - -Comma-delimited list of static dogstatsd (datadog statsd) tags sent with all statsd metrics -See: `Datadog Docs ` - -.. versionadded:: 20 - Process Naming -------------- @@ -1213,31 +1214,25 @@ The type of workers to use. The default class (``sync``) should handle most "normal" types of workloads. You'll want to read :doc:`design` for information on when you might want to choose one of the other worker classes. Required -libraries may be installed using setuptools' ``extra_require`` feature. +libraries may be installed using setuptools' ``extras_require`` feature. A string referring to one of the following bundled classes: * ``sync`` -* ``eventlet`` - Requires eventlet >= 0.9.7 (or install it via +* ``eventlet`` - Requires eventlet >= 0.24.1 (or install it via ``pip install gunicorn[eventlet]``) -* ``gevent`` - Requires gevent >= 0.13 (or install it via +* ``gevent`` - Requires gevent >= 1.4 (or install it via ``pip install gunicorn[gevent]``) -* ``tornado`` - Requires tornado >= 0.2 (or install it via +* ``tornado`` - Requires tornado >= 0.2 (or install it via ``pip install gunicorn[tornado]``) * ``gthread`` - Python 2 requires the futures package to be installed (or install it via ``pip install gunicorn[gthread]``) -* ``gaiohttp`` - Deprecated. Optionally, you can provide your own worker by giving Gunicorn a Python path to a subclass of ``gunicorn.workers.base.Worker``. This alternative syntax will load the gevent class: ``gunicorn.workers.ggevent.GeventWorker``. -.. deprecated:: 19.8 - The ``gaiohttp`` worker is deprecated. Please use - ``aiohttp.worker.GunicornWebWorker`` instead. See - :ref:`asyncio-workers` for more information on how to use it. - .. _threads: threads @@ -1273,7 +1268,7 @@ worker_connections The maximum number of simultaneous clients. -This setting only affects the Eventlet, Gevent and Gthread worker types. +This setting only affects the Eventlet and Gevent worker types. .. _max-requests: diff --git a/gunicorn/config.py b/gunicorn/config.py index aae5270c..e8e0f926 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -625,7 +625,7 @@ class WorkerClass(Setting): The default class (``sync``) should handle most "normal" types of workloads. You'll want to read :doc:`design` for information on when you might want to choose one of the other worker classes. Required - libraries may be installed using setuptools' ``extra_require`` feature. + libraries may be installed using setuptools' ``extras_require`` feature. A string referring to one of the following bundled classes: diff --git a/setup.py b/setup.py index fd24c0df..ee898d88 100644 --- a/setup.py +++ b/setup.py @@ -73,7 +73,7 @@ install_requires = [ 'setuptools>=3.0', ] -extra_require = { +extras_require = { 'gevent': ['gevent>=0.13'], 'eventlet': ['eventlet>=0.9.7'], 'tornado': ['tornado>=0.2'], @@ -108,5 +108,5 @@ setup( [paste.server_runner] main=gunicorn.app.pasterapp:serve """, - extras_require=extra_require, + extras_require=extras_require, ) From 89c13491afcb712c9070663eba6c17601d45aeff Mon Sep 17 00:00:00 2001 From: DeltaF1 Date: Thu, 16 May 2019 14:05:50 -0400 Subject: [PATCH 034/263] Update install.rst Minor grammar/punctuation fixes --- docs/source/install.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/install.rst b/docs/source/install.rst index 4a9b0a8c..3002a611 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -40,7 +40,7 @@ want to consider one of the alternate worker types. $ pip install gunicorn[gevent] # Or, using extra .. note:: - Both require ``greenlet``, which should get installed automatically, + Both require ``greenlet``, which should get installed automatically. If its installation fails, you probably need to install the Python headers. These headers are available in most package managers. On Ubuntu the package name for ``apt-get`` is @@ -55,7 +55,7 @@ want to consider one of the alternate worker types. Debian GNU/Linux ================ -If you are using Debian GNU/Linux and it is recommended that you use +If you are using Debian GNU/Linux it is recommended that you use system packages to install Gunicorn except maybe when you want to use different versions of Gunicorn with virtualenv. This has a number of advantages: @@ -99,7 +99,7 @@ oldstable ("jessie") -------------------- The version of Gunicorn in the Debian_ "oldstable" distribution is 19.0 (June -2014). you can install it using:: +2014). You can install it using:: $ sudo apt-get install gunicorn @@ -128,7 +128,7 @@ install it in the usual way:: Ubuntu ====== -Ubuntu_ 12.04 (trusty) or later contains Gunicorn package by default so that +Ubuntu_ 12.04 (trusty) or later contains the Gunicorn package by default so that you can install it in the usual way:: $ sudo apt-get update From dc639e0d0eec0bd265037f54415014deb28bceb7 Mon Sep 17 00:00:00 2001 From: c-bata Date: Mon, 20 May 2019 15:26:35 +0900 Subject: [PATCH 035/263] Fix typo: hanle --- gunicorn/workers/gthread.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/workers/gthread.py b/gunicorn/workers/gthread.py index 30d2745e..40960f00 100644 --- a/gunicorn/workers/gthread.py +++ b/gunicorn/workers/gthread.py @@ -216,7 +216,7 @@ class ThreadWorker(base.Worker): if not self.is_parent_alive(): break - # hanle keepalive timeouts + # handle keepalive timeouts self.murder_keepalived() self.tpool.shutdown(False) From d4ff4b419cd42f4fe9be28998c3e4fb965edf8e7 Mon Sep 17 00:00:00 2001 From: c-bata Date: Mon, 20 May 2019 15:35:08 +0900 Subject: [PATCH 036/263] Fix typo: peform to perform --- gunicorn/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/util.py b/gunicorn/util.py index 3ec54af0..d5d1fcfd 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -190,7 +190,7 @@ def chown(path, uid, gid): if sys.platform.startswith("win"): def _waitfor(func, pathname, waitall=False): - # Peform the operation + # Perform the operation func(pathname) # Now setup the wait loop if waitall: From aa8b258f937867a8a453b426e5c26db84a8ab879 Mon Sep 17 00:00:00 2001 From: c-bata Date: Mon, 20 May 2019 15:35:42 +0900 Subject: [PATCH 037/263] Fix typo: treatement to treatment --- gunicorn/instrument/statsd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/instrument/statsd.py b/gunicorn/instrument/statsd.py index 7ef0bbb6..9a537205 100644 --- a/gunicorn/instrument/statsd.py +++ b/gunicorn/instrument/statsd.py @@ -53,7 +53,7 @@ class Statsd(Logger): Logger.exception(self, msg, *args, **kwargs) self.increment("gunicorn.log.exception", 1) - # Special treatement for info, the most common log level + # Special treatment for info, the most common log level def info(self, msg, *args, **kwargs): self.log(logging.INFO, msg, *args, **kwargs) From 85ec74fc10ff5b56f3ac674a1b8491f5f068659c Mon Sep 17 00:00:00 2001 From: c-bata Date: Mon, 20 May 2019 22:24:45 +0900 Subject: [PATCH 038/263] Fix typo: connnection to connection --- gunicorn/workers/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/workers/sync.py b/gunicorn/workers/sync.py index 7efa354d..ab9e0856 100644 --- a/gunicorn/workers/sync.py +++ b/gunicorn/workers/sync.py @@ -18,7 +18,7 @@ import gunicorn.util as util import gunicorn.workers.base as base class StopWaiting(Exception): - """ exception raised to stop waiting for a connnection """ + """ exception raised to stop waiting for a connection """ class SyncWorker(base.Worker): From 3701ad9f26a7a4c0a081dfd0f6e97ecb272de515 Mon Sep 17 00:00:00 2001 From: Masashi SHIBATA Date: Thu, 23 May 2019 04:07:02 +0900 Subject: [PATCH 039/263] Use importlib instead of __import__ and eval (#2043) --- gunicorn/util.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/gunicorn/util.py b/gunicorn/util.py index d5d1fcfd..cea69794 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -7,6 +7,7 @@ import email.utils import errno import fcntl import html +import importlib import inspect import io import logging @@ -364,19 +365,17 @@ def import_app(module): module, obj = parts[0], parts[1] try: - __import__(module) + mod = importlib.import_module(module) except ImportError: if module.endswith(".py") and os.path.exists(module): msg = "Failed to find application, did you mean '%s:%s'?" raise ImportError(msg % (module.rsplit(".", 1)[0], obj)) raise - mod = sys.modules[module] - is_debug = logging.root.level == logging.DEBUG try: - app = eval(obj, vars(mod)) - except NameError: + app = getattr(mod, obj) + except AttributeError: if is_debug: traceback.print_exception(*sys.exc_info()) raise AppImportError("Failed to find application object %r in %r" % (obj, module)) From a13a2096edc2c7358ed218ef138cf571f2e7a888 Mon Sep 17 00:00:00 2001 From: Masashi SHIBATA Date: Mon, 27 May 2019 04:09:52 +0900 Subject: [PATCH 040/263] Use SourceFileLoader instead instead of execfile_ (#2046) --- gunicorn/_compat.py | 65 -------------------------------------------- gunicorn/app/base.py | 18 ++++++------ tests/treq.py | 15 ++++++---- 3 files changed, 17 insertions(+), 81 deletions(-) delete mode 100644 gunicorn/_compat.py diff --git a/gunicorn/_compat.py b/gunicorn/_compat.py deleted file mode 100644 index 2487e7b7..00000000 --- a/gunicorn/_compat.py +++ /dev/null @@ -1,65 +0,0 @@ -def _check_if_pyc(fname): - """Return True if the extension is .pyc, False if .py - and None if otherwise""" - from imp import find_module - from os.path import realpath, dirname, basename, splitext - - # Normalize the file-path for the find_module() - filepath = realpath(fname) - dirpath = dirname(filepath) - module_name = splitext(basename(filepath))[0] - - # Validate and fetch - try: - fileobj, fullpath, (_, _, pytype) = find_module(module_name, [dirpath]) - except ImportError: - raise IOError("Cannot find config file. " - "Path maybe incorrect! : {0}".format(filepath)) - return pytype, fileobj, fullpath - - -def _get_codeobj(pyfile): - """ Returns the code object, given a python file """ - from imp import PY_COMPILED, PY_SOURCE - - result, fileobj, fullpath = _check_if_pyc(pyfile) - - # WARNING: - # fp.read() can blowup if the module is extremely large file. - # Lookout for overflow errors. - try: - data = fileobj.read() - finally: - fileobj.close() - - # This is a .pyc file. Treat accordingly. - if result is PY_COMPILED: - # .pyc format is as follows: - # 0 - 4 bytes: Magic number, which changes with each create of .pyc file. - # First 2 bytes change with each marshal of .pyc file. Last 2 bytes is "\r\n". - # 4 - 8 bytes: Datetime value, when the .py was last changed. - # 8 - EOF: Marshalled code object data. - # So to get code object, just read the 8th byte onwards till EOF, and - # UN-marshal it. - import marshal - code_obj = marshal.loads(data[8:]) - - elif result is PY_SOURCE: - # This is a .py file. - code_obj = compile(data, fullpath, 'exec') - - else: - # Unsupported extension - raise Exception("Input file is unknown format: {0}".format(fullpath)) - - # Return code object - return code_obj - - -def execfile_(fname, *args): - if fname.endswith(".pyc"): - code = _get_codeobj(fname) - else: - with open(fname, 'rb') as file: - code = compile(file.read(), fname, 'exec') - return exec(code, *args) diff --git a/gunicorn/app/base.py b/gunicorn/app/base.py index 0276b009..a9e18859 100644 --- a/gunicorn/app/base.py +++ b/gunicorn/app/base.py @@ -2,16 +2,18 @@ # # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. +import importlib.machinery import os import sys import traceback +import types -from gunicorn._compat import execfile_ from gunicorn import util from gunicorn.arbiter import Arbiter from gunicorn.config import Config, get_default_config_file from gunicorn import debug + class BaseApplication(object): """ An application interface for configuring and loading @@ -93,22 +95,18 @@ class Application(BaseApplication): if not os.path.exists(filename): raise RuntimeError("%r doesn't exist" % filename) - cfg = { - "__builtins__": __builtins__, - "__name__": "__config__", - "__file__": filename, - "__doc__": None, - "__package__": None - } try: - execfile_(filename, cfg, cfg) + module_name = '__config__' + mod = types.ModuleType(module_name) + loader = importlib.machinery.SourceFileLoader(module_name, filename) + loader.exec_module(mod) except Exception: print("Failed to read config file: %s" % filename, file=sys.stderr) traceback.print_exc() sys.stderr.flush() sys.exit(1) - return cfg + return vars(mod) def get_config_from_module_name(self, module_name): return vars(util.import_module(module_name)) diff --git a/tests/treq.py b/tests/treq.py index d82d036f..71466855 100644 --- a/tests/treq.py +++ b/tests/treq.py @@ -4,10 +4,11 @@ # under the MIT license. import inspect +import importlib.machinery import os import random +import types -from gunicorn._compat import execfile_ from gunicorn.config import Config from gunicorn.http.parser import RequestParser from gunicorn.util import split_request_uri @@ -29,11 +30,13 @@ def uri(data): def load_py(fname): - config = globals().copy() - config["uri"] = uri - config["cfg"] = Config() - execfile_(fname, config) - return config + module_name = '__config__' + mod = types.ModuleType(module_name) + setattr(mod, 'uri', uri) + setattr(mod, 'cfg', Config()) + loader = importlib.machinery.SourceFileLoader(module_name, fname) + loader.exec_module(mod) + return vars(mod) class request(object): From c435341e6626d8ec4492620bc19a595863b1efc9 Mon Sep 17 00:00:00 2001 From: Masashi SHIBATA Date: Tue, 28 May 2019 22:05:51 +0900 Subject: [PATCH 041/263] Remove fallback for types.SimpleNamespace (#2049) --- tests/support.py | 15 --------------- tests/test_logger.py | 3 +-- tests/test_statsd.py | 2 +- 3 files changed, 2 insertions(+), 18 deletions(-) diff --git a/tests/support.py b/tests/support.py index 11782a30..af412348 100644 --- a/tests/support.py +++ b/tests/support.py @@ -48,18 +48,3 @@ def requires_mac_ver(*min_version): wrapper.min_version = min_version return wrapper return decorator - -try: - from types import SimpleNamespace # pylint: disable=unused-import -except ImportError: - class SimpleNamespace(object): - def __init__(self, **kwargs): - vars(self).update(kwargs) - - def __repr__(self): - keys = sorted(vars(self)) - items = ("{}={!r}".format(k, vars(self)[k]) for k in keys) - return "{}({})".format(type(self).__name__, ", ".join(items)) - - def __eq__(self, other): - return vars(self) == vars(other) diff --git a/tests/test_logger.py b/tests/test_logger.py index f2767946..5b8c0d42 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -1,10 +1,9 @@ import datetime +from types import SimpleNamespace from gunicorn.config import Config from gunicorn.glogging import Logger -from support import SimpleNamespace - def test_atoms_defaults(): response = SimpleNamespace( diff --git a/tests/test_statsd.py b/tests/test_statsd.py index 130bfa6b..06c1d964 100644 --- a/tests/test_statsd.py +++ b/tests/test_statsd.py @@ -5,10 +5,10 @@ import shutil import socket import tempfile from datetime import timedelta +from types import SimpleNamespace from gunicorn.config import Config from gunicorn.instrument.statsd import Statsd -from support import SimpleNamespace class StatsdTestException(Exception): From 679c3727df915ff34b9b9b20923b74cc7ba38332 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 27 May 2019 09:33:00 +0200 Subject: [PATCH 042/263] Fix typo in license text Signed-off-by: Philippe Ombredanne --- NOTICE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NOTICE b/NOTICE index 736bf32e..a2f4aa20 100644 --- a/NOTICE +++ b/NOTICE @@ -19,7 +19,7 @@ not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDINGALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT From 6df58a99b5f28f73b31364964e05ed05a51eb814 Mon Sep 17 00:00:00 2001 From: c-bata Date: Mon, 27 May 2019 15:58:29 +0900 Subject: [PATCH 043/263] Remove util.import_module --- gunicorn/app/base.py | 2 +- gunicorn/util.py | 40 ++-------------------------------------- 2 files changed, 3 insertions(+), 39 deletions(-) diff --git a/gunicorn/app/base.py b/gunicorn/app/base.py index a9e18859..470b40ab 100644 --- a/gunicorn/app/base.py +++ b/gunicorn/app/base.py @@ -109,7 +109,7 @@ class Application(BaseApplication): return vars(mod) def get_config_from_module_name(self, module_name): - return vars(util.import_module(module_name)) + return vars(importlib.import_module(module_name)) def load_config_from_module_name_or_filename(self, location): """ diff --git a/gunicorn/util.py b/gunicorn/util.py index cea69794..b857d2d0 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -54,43 +54,6 @@ except ImportError: pass -try: - from importlib import import_module -except ImportError: - def _resolve_name(name, package, level): - """Return the absolute name of the module to be imported.""" - if not hasattr(package, 'rindex'): - raise ValueError("'package' not set to a string") - dot = len(package) - for _ in range(level, 1, -1): - try: - dot = package.rindex('.', 0, dot) - except ValueError: - msg = "attempted relative import beyond top-level package" - raise ValueError(msg) - return "%s.%s" % (package[:dot], name) - - def import_module(name, package=None): - """Import a module. - -The 'package' argument is required when performing a relative import. It -specifies the package to use as the anchor point from which to resolve the -relative import to an absolute import. - -""" - if name.startswith('.'): - if not package: - raise TypeError("relative imports require the 'package' argument") - level = 0 - for character in name: - if character != '.': - break - level += 1 - name = _resolve_name(name[level:], package, level) - __import__(name) - return sys.modules[name] - - def load_class(uri, default="gunicorn.workers.sync.SyncWorker", section="gunicorn.workers"): if inspect.isclass(uri): @@ -132,7 +95,7 @@ def load_class(uri, default="gunicorn.workers.sync.SyncWorker", klass = components.pop(-1) try: - mod = import_module('.'.join(components)) + mod = importlib.import_module('.'.join(components)) except: exc = traceback.format_exc() msg = "class uri %r invalid or not found: \n\n[%s]" @@ -521,6 +484,7 @@ def to_bytestring(value, encoding="utf8"): return value.encode(encoding) + def has_fileno(obj): if not hasattr(obj, "fileno"): return False From b15712924f39ed4ab54205fa3bcb41059bf8c849 Mon Sep 17 00:00:00 2001 From: Masashi SHIBATA Date: Tue, 28 May 2019 22:09:44 +0900 Subject: [PATCH 044/263] Import unittest.mock unconditionally (#2050) --- tests/test_arbiter.py | 6 +----- tests/test_http.py | 6 +----- tests/test_pidfile.py | 6 +----- tests/test_sock.py | 5 +---- tests/test_systemd.py | 6 +----- 5 files changed, 5 insertions(+), 24 deletions(-) diff --git a/tests/test_arbiter.py b/tests/test_arbiter.py index bfd9fe65..dc059edb 100644 --- a/tests/test_arbiter.py +++ b/tests/test_arbiter.py @@ -4,11 +4,7 @@ # See the NOTICE for more information. import os - -try: - import unittest.mock as mock -except ImportError: - import mock +import unittest.mock as mock import gunicorn.app.base import gunicorn.arbiter diff --git a/tests/test_http.py b/tests/test_http.py index 6bcb5f09..33481266 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -3,6 +3,7 @@ import io import t import pytest +import unittest.mock as mock from gunicorn import util from gunicorn.http.body import Body, LengthReader, EOFReader @@ -10,11 +11,6 @@ from gunicorn.http.wsgi import Response from gunicorn.http.unreader import Unreader, IterUnreader, SocketUnreader from gunicorn.http.errors import InvalidHeader, InvalidHeaderName -try: - import unittest.mock as mock -except ImportError: - import mock - def assert_readline(payload, size, expected): body = Body(io.BytesIO(payload)) diff --git a/tests/test_pidfile.py b/tests/test_pidfile.py index e8c07567..fdcd0a24 100644 --- a/tests/test_pidfile.py +++ b/tests/test_pidfile.py @@ -4,11 +4,7 @@ # See the NOTICE for more information. import errno - -try: - import unittest.mock as mock -except ImportError: - import mock +import unittest.mock as mock import gunicorn.pidfile diff --git a/tests/test_sock.py b/tests/test_sock.py index f70ae09e..36205d84 100644 --- a/tests/test_sock.py +++ b/tests/test_sock.py @@ -3,10 +3,7 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. -try: - import unittest.mock as mock -except ImportError: - import mock +import unittest.mock as mock from gunicorn import sock diff --git a/tests/test_systemd.py b/tests/test_systemd.py index 4a7a0d7b..d2c78aa2 100644 --- a/tests/test_systemd.py +++ b/tests/test_systemd.py @@ -5,11 +5,7 @@ from contextlib import contextmanager import os - -try: - import unittest.mock as mock -except ImportError: - import mock +import unittest.mock as mock import pytest From 13c5d72bd198655b950d124942fbad153f613fb4 Mon Sep 17 00:00:00 2001 From: Robert Coup Date: Fri, 31 May 2019 11:49:55 +0100 Subject: [PATCH 045/263] Add --print-config option to print the resolved settings at startup. --- gunicorn/app/base.py | 5 ++++- gunicorn/config.py | 22 ++++++++++++++++++++++ tests/test_config.py | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/gunicorn/app/base.py b/gunicorn/app/base.py index 470b40ab..d5c99d38 100644 --- a/gunicorn/app/base.py +++ b/gunicorn/app/base.py @@ -191,7 +191,10 @@ class Application(BaseApplication): self.chdir() def run(self): - if self.cfg.check_config: + if self.cfg.print_config: + print(self.cfg) + + if self.cfg.print_config or self.cfg.check_config: try: self.load() except: diff --git a/gunicorn/config.py b/gunicorn/config.py index e8e0f926..02e06d0c 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -51,6 +51,16 @@ class Config(object): self.prog = prog or os.path.basename(sys.argv[0]) self.env_orig = os.environ.copy() + def __str__(self): + lines = [] + kmax = max(len(k) for k in self.settings) + for k in sorted(self.settings): + v = self.settings[k].value + if callable(v): + v = "<%s()>" % v.__qualname__ + lines.append(f"{k:{kmax}} = {v}") + return "\n".join(lines) + def __getattr__(self, name): if name not in self.settings: raise AttributeError("No configuration setting for: %s" % name) @@ -936,6 +946,18 @@ class ConfigCheck(Setting): """ +class PrintConfig(Setting): + name = "print_config" + section = "Debugging" + cli = ["--print-config"] + validator = validate_bool + action = "store_true" + default = False + desc = """\ + Print the resolved configuration. + """ + + class PreloadApp(Setting): name = "preload_app" section = "Server Mechanics" diff --git a/tests/test_config.py b/tests/test_config.py index 0587c63c..67480af3 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -4,6 +4,7 @@ # See the NOTICE for more information. import os +import re import sys import pytest @@ -435,3 +436,36 @@ def test_bind_fd(): with AltArgs(["prog_name", "-b", "fd://42"]): app = NoConfigApp() assert app.cfg.bind == ["fd://42"] + + +def test_str(): + c = config.Config() + o = str(c) + + # match the first few lines, some different types, but don't go OTT + # to avoid needless test fails with changes + OUTPUT_MATCH = { + 'access_log_format': '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"', + 'accesslog': 'None', + 'backlog': '2048', + 'bind': "['127.0.0.1:8000']", + 'capture_output': 'False', + 'child_exit': '', + } + for i, line in enumerate(o.splitlines()): + m = re.match(r'^(\w+)\s+= ', line) + assert m, f"Config line {i} didn't match expected format: {line!r}" + + key = m.group(1) + try: + s = OUTPUT_MATCH.pop(key) + except KeyError: + continue + + line_re = fr'^{key}\s+= {re.escape(s)}$' + assert re.match(line_re, line), f'{line_re!r} != {line!r}' + + if not OUTPUT_MATCH: + break + else: + assert False, f'missing expected setting lines? {list(OUTPUT_MATCH.keys())}' From 93d2687d2458e69b98c968aa193ae18ae064fff0 Mon Sep 17 00:00:00 2001 From: Robert Coup Date: Fri, 31 May 2019 12:12:39 +0100 Subject: [PATCH 046/263] f-strings only date back to Py3.6 --- gunicorn/config.py | 4 ++-- tests/test_config.py | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/gunicorn/config.py b/gunicorn/config.py index 02e06d0c..553962fc 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -57,8 +57,8 @@ class Config(object): for k in sorted(self.settings): v = self.settings[k].value if callable(v): - v = "<%s()>" % v.__qualname__ - lines.append(f"{k:{kmax}} = {v}") + v = "<{}()>".format(v.__qualname__) + lines.append("{k:{kmax}} = {v}".format(k=k, v=v, kmax=kmax)) return "\n".join(lines) def __getattr__(self, name): diff --git a/tests/test_config.py b/tests/test_config.py index 67480af3..71a11ea1 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -454,7 +454,7 @@ def test_str(): } for i, line in enumerate(o.splitlines()): m = re.match(r'^(\w+)\s+= ', line) - assert m, f"Config line {i} didn't match expected format: {line!r}" + assert m, "Line {} didn't match expected format: {!r}".format(i, line) key = m.group(1) try: @@ -462,10 +462,12 @@ def test_str(): except KeyError: continue - line_re = fr'^{key}\s+= {re.escape(s)}$' - assert re.match(line_re, line), f'{line_re!r} != {line!r}' + line_re = r'^{}\s+= {}$'.format(key, re.escape(s)) + assert re.match(line_re, line), '{!r} != {!r}'.format(line_re, line) if not OUTPUT_MATCH: break else: - assert False, f'missing expected setting lines? {list(OUTPUT_MATCH.keys())}' + assert False, 'missing expected setting lines? {}'.format( + OUTPUT_MATCH.keys() + ) From 000236aae204d629fccada3527eebf7954239846 Mon Sep 17 00:00:00 2001 From: Robert Coup Date: Fri, 31 May 2019 12:22:08 +0100 Subject: [PATCH 047/263] Docs. --- docs/source/configure.rst | 4 ++-- docs/source/settings.rst | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/source/configure.rst b/docs/source/configure.rst index 9ed1a484..2af39e3b 100644 --- a/docs/source/configure.rst +++ b/docs/source/configure.rst @@ -25,10 +25,10 @@ Once again, in order of least to most authoritative: .. note:: - To check your configuration when using the command line or the + To print your resolved configuration when using the command line or the configuration file you can run the following command:: - $ gunicorn --check-config APP_MODULE + $ gunicorn --print-config APP_MODULE It also allows you to know if your application can be launched. diff --git a/docs/source/settings.rst b/docs/source/settings.rst index 16d8961a..c0c99ef4 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -123,6 +123,16 @@ check_config Check the configuration. +.. _print-config: + +print_config +~~~~~~~~~~~~ + +* ``--print-config`` +* ``False`` + +Print the configuration settings as fully resolved. Implies :ref:`check-config`. + Logging ------- From dc7b5d5c4876b49f86ea2460698a335d0f5ef7c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marwan=20Rabb=C3=A2a?= <1440729+waghanza@users.noreply.github.com> Date: Tue, 4 Jun 2019 23:17:01 +0200 Subject: [PATCH 048/263] Fix compatility with tornado 6 (#2001) --- gunicorn/workers/gtornado.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gunicorn/workers/gtornado.py b/gunicorn/workers/gtornado.py index 40e5572a..c98acb85 100644 --- a/gunicorn/workers/gtornado.py +++ b/gunicorn/workers/gtornado.py @@ -105,9 +105,11 @@ class TornadoWorker(Worker): # instance of tornado.web.Application or is an # instance of tornado.wsgi.WSGIApplication app = self.wsgi - if not isinstance(app, tornado.web.Application) or \ - isinstance(app, tornado.wsgi.WSGIApplication): - app = WSGIContainer(app) + + if tornado.version_info[0] < 6: + if not isinstance(app, tornado.web.Application) or \ + isinstance(app, tornado.wsgi.WSGIApplication): + app = WSGIContainer(app) # Monkey-patching HTTPConnection.finish to count the # number of requests being handled by Tornado. This From b014fa78eeb2c05155b7324048739905c6fb526a Mon Sep 17 00:00:00 2001 From: Brett Randall Date: Fri, 22 Feb 2019 16:21:04 +1100 Subject: [PATCH 049/263] Various code improvements contributed by dilyanpalauzov. These were originally based on 19.9.0 code and were rebased with conflicts resolved. Fixed #1690. Co-Authored-By: dilyanpalauzov Signed-off-by: Brett Randall --- docs/sitemap_gen.py | 24 ++++++------------------ gunicorn/arbiter.py | 9 ++++----- gunicorn/glogging.py | 5 +---- tests/treq.py | 2 +- 4 files changed, 12 insertions(+), 28 deletions(-) diff --git a/docs/sitemap_gen.py b/docs/sitemap_gen.py index 1cfbbae1..39b2cb47 100755 --- a/docs/sitemap_gen.py +++ b/docs/sitemap_gen.py @@ -457,9 +457,7 @@ class URL(object): return False narrow = encoder.NarrowText(loc, None) (scheme, netloc, path, query, frag) = urlparse.urlsplit(narrow) - if (not scheme) or (not netloc): - return False - return True + return scheme and netloc #end def IsAbsolute IsAbsolute = staticmethod(IsAbsolute) @@ -543,26 +541,16 @@ class URL(object): # Test the lastmod if self.lastmod: - match = False self.lastmod = self.lastmod.upper() - for pattern in LASTMOD_PATTERNS: - match = pattern.match(self.lastmod) - if match: - break - if not match: + if not any(pattern.match(self.lastmod) for pattern in LASTMOD_PATTERNS): output.Warn('Lastmod "%s" does not appear to be in ISO8601 format on ' 'URL: %s' % (self.lastmod, self.loc)) self.lastmod = None # Test the changefreq if self.changefreq: - match = False self.changefreq = self.changefreq.lower() - for pattern in CHANGEFREQ_PATTERNS: - if self.changefreq == pattern: - match = True - break - if not match: + if all(self.changefreq != pattern for pattern in CHANGEFREQ_PATTERNS): output.Warn('Changefreq "%s" is not a valid change frequency on URL ' ': %s' % (self.changefreq, self.loc)) self.changefreq = None @@ -1490,7 +1478,7 @@ class InputSitemap(xml.sax.handler.ContentHandler): # Switch contexts if (self._current < 0) or (self._contexts[self._current].AcceptTag(tag)): - self._current = self._current + 1 + self._current += 1 assert self._current < len(self._contexts) self._contexts[self._current].Open() else: @@ -1663,7 +1651,7 @@ class PerURLStatistics: def Log(self): """ Dump out stats to the output. """ - if len(self._extensions): + if self._extensions: output.Log('Count of file extensions on URLs:', 1) set = self._extensions.keys() set.sort() @@ -1758,7 +1746,7 @@ class Sitemap(xml.sax.handler.ContentHandler): input.ProduceURLs(self.ConsumeURL) # Do last flushes - if len(self._set): + if self._set: self.FlushSet() if not self._sitemaps: output.Warn('No URLs were recorded, writing an empty sitemap.') diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 7eaa2c17..bfbfcb98 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -491,20 +491,19 @@ class Arbiter(object): """ if not self.timeout: return - workers = list(self.WORKERS.items()) - for (pid, worker) in workers: + for pid, worker in self.WORKERS.items(): try: if time.time() - worker.tmp.last_update() <= self.timeout: continue except (OSError, ValueError): continue - if not worker.aborted: + if worker.aborted: + self.kill_worker(pid, signal.SIGKILL) + else: self.log.critical("WORKER TIMEOUT (pid:%s)", pid) worker.aborted = True self.kill_worker(pid, signal.SIGABRT) - else: - self.kill_worker(pid, signal.SIGKILL) def reap_workers(self): """\ diff --git a/gunicorn/glogging.py b/gunicorn/glogging.py index 3f7b4ac7..56ebb78e 100644 --- a/gunicorn/glogging.py +++ b/gunicorn/glogging.py @@ -415,10 +415,7 @@ class Logger(object): def _set_syslog_handler(self, log, cfg, fmt, name): # setup format - if not cfg.syslog_prefix: - prefix = cfg.proc_name.replace(":", ".") - else: - prefix = cfg.syslog_prefix + prefix = cfg.syslog_prefix or cfg.proc_name.replace(":", ".") prefix = "gunicorn.%s.%s" % (prefix, name) diff --git a/tests/treq.py b/tests/treq.py index 71466855..9b6cdd1b 100644 --- a/tests/treq.py +++ b/tests/treq.py @@ -124,7 +124,7 @@ class request(object): def szread(self, func, sizes): sz = sizes() data = func(sz) - if sz >= 0 and len(data) > sz: + if 0 <= sz < len(data): raise AssertionError("Read more than %d bytes: %s" % (sz, data)) return data From 7e640f804cacc2189e6f539b7b8861a2ef1a2461 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ba=C5=A1ti?= Date: Thu, 11 Jul 2019 19:01:43 +0200 Subject: [PATCH 050/263] Logging: Handle auth type case insensitively MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According RFC-7617 (inherited from RFC-2978) schema and parameter names are handled case insensitively: ``` Note that both scheme and parameter names are matched case- insensitively. ``` Signed-off-by: Martin Bašti --- gunicorn/glogging.py | 2 +- tests/test_logger.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/gunicorn/glogging.py b/gunicorn/glogging.py index 3f7b4ac7..a096f967 100644 --- a/gunicorn/glogging.py +++ b/gunicorn/glogging.py @@ -445,7 +445,7 @@ class Logger(object): def _get_user(self, environ): user = None http_auth = environ.get("HTTP_AUTHORIZATION") - if http_auth and http_auth.startswith('Basic'): + if http_auth and http_auth.lower().startswith('basic'): auth = http_auth.split(" ", 1) if len(auth) == 2: try: diff --git a/tests/test_logger.py b/tests/test_logger.py index 5b8c0d42..54801266 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -1,6 +1,8 @@ import datetime from types import SimpleNamespace +import pytest + from gunicorn.config import Config from gunicorn.glogging import Logger @@ -47,7 +49,13 @@ def test_atoms_zero_bytes(): assert atoms['B'] == 0 -def test_get_username_from_basic_auth_header(): +@pytest.mark.parametrize('auth', [ + # auth type is case in-sensitive + 'Basic YnJrMHY6', + 'basic YnJrMHY6', + 'BASIC YnJrMHY6', +]) +def test_get_username_from_basic_auth_header(auth): request = SimpleNamespace(headers=()) response = SimpleNamespace( status='200', response_length=1024, sent=1024, @@ -57,7 +65,7 @@ def test_get_username_from_basic_auth_header(): 'REQUEST_METHOD': 'GET', 'RAW_URI': '/my/path?foo=bar', 'PATH_INFO': '/my/path', 'QUERY_STRING': 'foo=bar', 'SERVER_PROTOCOL': 'HTTP/1.1', - 'HTTP_AUTHORIZATION': 'Basic YnJrMHY6', + 'HTTP_AUTHORIZATION': auth, } logger = Logger(Config()) atoms = logger.atoms(response, request, environ, datetime.timedelta(seconds=1)) From f38f717539b1b7296720805b8ae3969c3509b9c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ba=C5=A1ti?= Date: Thu, 11 Jul 2019 19:12:16 +0200 Subject: [PATCH 051/263] Fix pytest 5.0.0 compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pytest.raises() returns exception info not the exception itself. They changed implementation of exception info, so now .value property must be used to get the exception instance and have proper output from str() method. https://github.com/pytest-dev/pytest/issues/5412 Signed-off-by: Martin Bašti --- tests/test_util.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_util.py b/tests/test_util.py index 3b8688a2..2494d2c5 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -29,15 +29,15 @@ def test_parse_address(test_input, expected): def test_parse_address_invalid(): - with pytest.raises(RuntimeError) as err: + with pytest.raises(RuntimeError) as exc_info: util.parse_address('127.0.0.1:test') - assert "'test' is not a valid port number." in str(err) + assert "'test' is not a valid port number." in str(exc_info.value) def test_parse_fd_invalid(): - with pytest.raises(RuntimeError) as err: + with pytest.raises(RuntimeError) as exc_info: util.parse_address('fd://asd') - assert "'asd' is not a valid file descriptor." in str(err) + assert "'asd' is not a valid file descriptor." in str(exc_info.value) def test_http_date(): @@ -63,24 +63,24 @@ def test_warn(capsys): def test_import_app(): assert util.import_app('support:app') - with pytest.raises(ImportError) as err: + with pytest.raises(ImportError) as exc_info: util.import_app('a:app') - assert 'No module' in str(err) + assert 'No module' in str(exc_info.value) - with pytest.raises(AppImportError) as err: + with pytest.raises(AppImportError) as exc_info: util.import_app('support:wrong_app') msg = "Failed to find application object 'wrong_app' in 'support'" - assert msg in str(err) + assert msg in str(exc_info.value) def test_to_bytestring(): assert util.to_bytestring('test_str', 'ascii') == b'test_str' assert util.to_bytestring('test_str®') == b'test_str\xc2\xae' assert util.to_bytestring(b'byte_test_str') == b'byte_test_str' - with pytest.raises(TypeError) as err: + with pytest.raises(TypeError) as exc_info: util.to_bytestring(100) msg = '100 is not a string' - assert msg in str(err) + assert msg in str(exc_info.value) @pytest.mark.parametrize('test_input, expected', [ From 40802904ebba5a8b1ab6bdec927bacd09fd1b099 Mon Sep 17 00:00:00 2001 From: Randall Leeds Date: Sun, 16 Jun 2019 23:50:45 -0400 Subject: [PATCH 052/263] Avoid unnecessary chown of temporary files When Gunicorn is configured to change the effective user or group of the worker processes, it changes the owner and group fo the the temporary files used for interprocess communication. With this change, Gunicorn does not change the owner or group of the files if the worker processes will run as the current effective user and gorup. This change avoids calling chown when it is not necessary, which may allow Gunicorn to be used in environments that restrict use of the chown syscall. Relates to #2059. --- gunicorn/workers/workertmp.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gunicorn/workers/workertmp.py b/gunicorn/workers/workertmp.py index 22aaef34..a37ed155 100644 --- a/gunicorn/workers/workertmp.py +++ b/gunicorn/workers/workertmp.py @@ -21,11 +21,13 @@ class WorkerTmp(object): if fdir and not os.path.isdir(fdir): raise RuntimeError("%s doesn't exist. Can't create workertmp." % fdir) fd, name = tempfile.mkstemp(prefix="wgunicorn-", dir=fdir) - - # allows the process to write to the file - util.chown(name, cfg.uid, cfg.gid) os.umask(old_umask) + # change the owner and group of the file if the worker will run as + # a different user or group, so that the worker can modify the file + if cfg.uid != os.geteuid() or cfg.gid != os.getegid(): + util.chown(name, cfg.uid, cfg.gid) + # unlink the file so we don't leak tempory files try: if not IS_CYGWIN: From 40d22ae38d577a6d364730ae8fba669693e87706 Mon Sep 17 00:00:00 2001 From: John Whitlock Date: Mon, 19 Aug 2019 19:27:59 -0500 Subject: [PATCH 053/263] Add setproctitle to extras_require (#2094) This allows you to specify that you want setproctitle installed so that gunicorn can set meaningful process names at install time or in a requirements file. --- docs/source/custom.rst | 3 ++- docs/source/install.rst | 22 ++++++++++++++++++++++ setup.py | 1 + 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/docs/source/custom.rst b/docs/source/custom.rst index 0b8c366c..0fb39250 100644 --- a/docs/source/custom.rst +++ b/docs/source/custom.rst @@ -13,7 +13,8 @@ Here is a small example where we create a very small WSGI app and load it with a custom Application: .. literalinclude:: ../../examples/standalone_app.py - :lines: 11-60 + :start-after: # See the NOTICE for more information + :lines: 2- Direct Usage of Existing WSGI Apps ---------------------------------- diff --git a/docs/source/install.rst b/docs/source/install.rst index 3002a611..d6d146d2 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -52,6 +52,28 @@ want to consider one of the alternate worker types. installed, this is the most likely reason. +Extra Packages +============== +Some Gunicorn options require additional packages. You can use the ``[extra]`` +syntax to install these at the same time as Gunicorn. + +Most extra packages are needed for alternate worker types. See the +`design docs`_ for more information on when you'll want to consider an +alternate worker type. + +* ``gunicorn[eventlet]`` - Eventlet-based greenlets workers +* ``gunicorn[gevent]`` - Gevent-based greenlets workers +* ``gunicorn[gthread]`` - Threaded workers +* ``gunicorn[tornado]`` - Tornado-based workers, not recommended + +If you are running more than one instance of Gunicorn, the :ref:`proc-name` +setting will help distinguish between them in tools like ``ps`` and ``top``. + +* ``gunicorn[setproctitle]`` - Enables setting the process name + +Multiple extras can be combined, like +``pip install gunicorn[gevent,setproctitle]``. + Debian GNU/Linux ================ diff --git a/setup.py b/setup.py index ee898d88..31d173f6 100644 --- a/setup.py +++ b/setup.py @@ -78,6 +78,7 @@ extras_require = { 'eventlet': ['eventlet>=0.9.7'], 'tornado': ['tornado>=0.2'], 'gthread': [], + 'setproctitle': ['setproctitle'], } setup( From d765f0d123fff5da0f36da8f087a8dd0da778411 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=A5=EC=A4=80=EC=98=81?= Date: Tue, 20 Aug 2019 09:34:18 +0900 Subject: [PATCH 054/263] Group exceptions with same body together in Arbiter.run() (#2081) --- gunicorn/arbiter.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 7eaa2c17..bca671d1 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -223,9 +223,7 @@ class Arbiter(object): self.log.info("Handling signal: %s", signame) handler() self.wakeup() - except StopIteration: - self.halt() - except KeyboardInterrupt: + except (StopIteration, KeyboardInterrupt): self.halt() except HaltServer as inst: self.halt(reason=inst.reason, exit_status=inst.exit_status) From 799df751c71c3c4024bfe5d4cb884ca159370a04 Mon Sep 17 00:00:00 2001 From: Leonardo Furtado Date: Mon, 19 Aug 2019 21:46:22 -0300 Subject: [PATCH 055/263] Add link to CONTRIBUTING.md from README.rst (#2069) --- README.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.rst b/README.rst index 6b9bcaf1..c9e3ebdf 100644 --- a/README.rst +++ b/README.rst @@ -52,6 +52,12 @@ Example with test app:: $ gunicorn --workers=2 test:app +Contributing +------------ + +See `our complete contributor's guide `_ for more details. + + License ------- From f35ae584b41b4808a23a689d3168e87328d5ebb1 Mon Sep 17 00:00:00 2001 From: johnthagen Date: Sat, 7 Sep 2019 21:55:26 -0400 Subject: [PATCH 056/263] Add pypy3 to list of tested environments (#2105) --- .travis.yml | 3 +++ setup.py | 2 ++ tox.ini | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 97c578aa..1d569a76 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,6 +16,9 @@ matrix: env: TOXENV=py37 dist: xenial sudo: true + - python: pypy3 + env: TOXENV=pypy3 + dist: xenial - python: 3.8-dev env: TOXENV=py38-dev dist: xenial diff --git a/setup.py b/setup.py index 31d173f6..8d79fb7a 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,8 @@ CLASSIFIERS = [ 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3 :: Only', + 'Programming Language :: Python :: Implementation :: CPython', + 'Programming Language :: Python :: Implementation :: PyPy', 'Topic :: Internet', 'Topic :: Utilities', 'Topic :: Software Development :: Libraries :: Python Modules', diff --git a/tox.ini b/tox.ini index 47249d6e..96388fa7 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py34, py35, py36, py37, py38-dev, pypy, lint +envlist = py34, py35, py36, py37, py38-dev, pypy3, lint skipsdist = True [testenv] From 49341f1fb35fec7f0606faa1e3129174e12dd28a Mon Sep 17 00:00:00 2001 From: Tyler Lubeck Date: Thu, 26 Sep 2019 14:54:54 -0700 Subject: [PATCH 057/263] Terminology changes --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1a6880bb..ac93950d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -141,7 +141,7 @@ The relevant maintainer for a pull request is assigned in 3 steps: * Step 2: Find the MAINTAINERS file which affects this directory. If the directory itself does not have a MAINTAINERS file, work your way up the the repo hierarchy until you find one. -* Step 3: The first maintainer listed is the primary maintainer. The pull request is assigned to him. He may assign it to other listed maintainers, at his discretion. +* Step 3: The first maintainer listed is the primary maintainer. The pull request is assigned to them. They may assign it to other listed maintainers, at their discretion. ### I'm a maintainer, should I make pull requests too? From ce03c192f4ca312dac93f52e61ccdf50e01b09d8 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Fri, 27 Sep 2019 01:45:03 +0200 Subject: [PATCH 058/263] fix formatting --- gunicorn/http/body.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gunicorn/http/body.py b/gunicorn/http/body.py index e75d72de..afde3685 100644 --- a/gunicorn/http/body.py +++ b/gunicorn/http/body.py @@ -7,7 +7,7 @@ import io import sys from gunicorn.http.errors import (NoMoreData, ChunkMissingTerminator, - InvalidChunkSize) + InvalidChunkSize) class ChunkedReader(object): @@ -187,6 +187,7 @@ class Body(object): if not ret: raise StopIteration() return ret + next = __next__ def getsize(self, size): From e6a88dbfcd78052a2f03e741d9e732ecb6c17e22 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Fri, 27 Sep 2019 01:47:03 +0200 Subject: [PATCH 059/263] bump to 20.0.0 --- gunicorn/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/__init__.py b/gunicorn/__init__.py index 78204797..7b38ab04 100644 --- a/gunicorn/__init__.py +++ b/gunicorn/__init__.py @@ -3,6 +3,6 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. -version_info = (19, 9, 0) +version_info = (20, 0, 0) __version__ = ".".join([str(v) for v in version_info]) SERVER_SOFTWARE = "gunicorn/%s" % __version__ From c6bb90ca827e1566158a00ab26e6cfd86fdedb00 Mon Sep 17 00:00:00 2001 From: Tyler Lubeck Date: Fri, 27 Sep 2019 11:09:45 -0700 Subject: [PATCH 060/263] Update CONTRIBUTING.md --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ac93950d..7bd82abd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -141,7 +141,7 @@ The relevant maintainer for a pull request is assigned in 3 steps: * Step 2: Find the MAINTAINERS file which affects this directory. If the directory itself does not have a MAINTAINERS file, work your way up the the repo hierarchy until you find one. -* Step 3: The first maintainer listed is the primary maintainer. The pull request is assigned to them. They may assign it to other listed maintainers, at their discretion. +* Step 3: The first maintainer listed is the primary maintainer who is assigned the Pull Request. The primary maintainer can reassign a Pull Request to other listed maintainers. ### I'm a maintainer, should I make pull requests too? From e147feaf8b12267ff9bb3c06ad45a2738a4027df Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Fri, 27 Sep 2019 23:15:59 +0200 Subject: [PATCH 061/263] fix echo example on python 3.7 --- examples/echo.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/examples/echo.py b/examples/echo.py index 06f61602..e10332d8 100644 --- a/examples/echo.py +++ b/examples/echo.py @@ -5,12 +5,9 @@ # # Example code from Eventlet sources -from wsgiref.validate import validator - from gunicorn import __version__ -@validator def app(environ, start_response): """Simplest possible application object""" From 54c820feb3f8a7c75d35769504de19a3fdcf04cc Mon Sep 17 00:00:00 2001 From: Jeff Brooks Date: Thu, 10 Oct 2019 10:41:22 -0500 Subject: [PATCH 062/263] Ensure header value is string before conducting regex search on it. --- gunicorn/http/wsgi.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 32e7a2ac..b786bc09 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -253,10 +253,12 @@ class Response(object): if HEADER_RE.search(name): raise InvalidHeaderName('%r' % name) + value = str(value) + if HEADER_VALUE_RE.search(value): raise InvalidHeader('%r' % value) - value = str(value).strip() + value = value.strip() lname = name.lower().strip() if lname == "content-length": self.response_length = int(value) From ad6ed3f4c835eb6a86ba61dadfd3896ddcbb48e3 Mon Sep 17 00:00:00 2001 From: Jeff Brooks Date: Tue, 15 Oct 2019 09:03:44 -0500 Subject: [PATCH 063/263] Implement check and exception for str type on value in Response process_headers method. --- gunicorn/http/wsgi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index b786bc09..3524471f 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -253,7 +253,8 @@ class Response(object): if HEADER_RE.search(name): raise InvalidHeaderName('%r' % name) - value = str(value) + if not isinstance(value, str): + raise TypeError('%r is not a string' % value) if HEADER_VALUE_RE.search(value): raise InvalidHeader('%r' % value) From 5d1d3af7f5ea479a9b2be3b047caea9a40c8b9ab Mon Sep 17 00:00:00 2001 From: Hasan Ramezani Date: Fri, 25 Oct 2019 11:09:20 +0200 Subject: [PATCH 064/263] Add import-outside-toplevel to pylintrc disable list. --- .pylintrc | 1 + 1 file changed, 1 insertion(+) diff --git a/.pylintrc b/.pylintrc index 89b82ab1..383d1159 100644 --- a/.pylintrc +++ b/.pylintrc @@ -21,6 +21,7 @@ disable= eval-used, fixme, import-error, + import-outside-toplevel, import-self, inconsistent-return-statements, invalid-name, From e7890d8d1b195ba3eb90ea3ccf3832bfd788e2d3 Mon Sep 17 00:00:00 2001 From: Hasan Ramezani Date: Fri, 25 Oct 2019 11:09:53 +0200 Subject: [PATCH 065/263] Fix lint errors (#2123) --- gunicorn/http/message.py | 2 +- gunicorn/reloader.py | 1 + gunicorn/util.py | 2 +- gunicorn/workers/gthread.py | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 3c080207..4040c7ae 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -242,7 +242,7 @@ class Request(Message): if idx > limit > 0: raise LimitRequestLine(idx, limit) break - elif len(data) - 2 > limit > 0: + if len(data) - 2 > limit > 0: raise LimitRequestLine(len(data), limit) self.get_data(unreader, buf) data = buf.getvalue() diff --git a/gunicorn/reloader.py b/gunicorn/reloader.py index 500b22ed..acfc2f8f 100644 --- a/gunicorn/reloader.py +++ b/gunicorn/reloader.py @@ -2,6 +2,7 @@ # # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. +# pylint: disable=no-else-continue import os import os.path diff --git a/gunicorn/util.py b/gunicorn/util.py index b857d2d0..fbdd5cfc 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -323,7 +323,7 @@ def write_error(sock, status_int, reason, mesg): def import_app(module): parts = module.split(":", 1) if len(parts) == 1: - module, obj = module, "application" + obj = "application" else: module, obj = parts[0], parts[1] diff --git a/gunicorn/workers/gthread.py b/gunicorn/workers/gthread.py index 40960f00..5828ee93 100644 --- a/gunicorn/workers/gthread.py +++ b/gunicorn/workers/gthread.py @@ -9,6 +9,7 @@ # Keepalive connections are put back in the loop waiting for an event. # If no event happen after the keep alive timeout, the connection is # closed. +# pylint: disable=no-else-break import concurrent.futures as futures import errno From 7d0c8f97f8ceac8c33c52e74e2f1a64600c662d1 Mon Sep 17 00:00:00 2001 From: Bastien Vallet Date: Thu, 24 Oct 2019 17:19:33 +0200 Subject: [PATCH 066/263] [py38] Python 3.8 is now official, use it by default --- .travis.yml | 10 ++++------ setup.py | 1 + tox.ini | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1d569a76..227b004c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ sudo: false language: python matrix: include: - - python: 3.7 + - python: 3.8 env: TOXENV=lint dist: xenial sudo: true @@ -19,16 +19,14 @@ matrix: - python: pypy3 env: TOXENV=pypy3 dist: xenial - - python: 3.8-dev - env: TOXENV=py38-dev + - python: 3.8 + env: TOXENV=py38 dist: xenial sudo: true - - python: 3.7 + - python: 3.8 env: TOXENV=docs-lint dist: xenial sudo: true - allow_failures: - - env: TOXENV=py38-dev install: pip install tox # TODO: https://github.com/tox-dev/tox/issues/149 script: tox --recreate diff --git a/setup.py b/setup.py index 8d79fb7a..2408a53f 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ CLASSIFIERS = [ 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3 :: Only', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', diff --git a/tox.ini b/tox.ini index 96388fa7..7180e559 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py34, py35, py36, py37, py38-dev, pypy3, lint +envlist = py34, py35, py36, py37, py38, pypy3, lint skipsdist = True [testenv] From f0a9c6b4ded6166dbdb5548efb47f3efb39909e3 Mon Sep 17 00:00:00 2001 From: Bastien Vallet Date: Thu, 24 Oct 2019 17:21:48 +0200 Subject: [PATCH 067/263] [tests] Remove old coverage required version --- requirements_test.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_test.txt b/requirements_test.txt index d50e1d9e..cc595b77 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -1,4 +1,4 @@ aiohttp -coverage>=4.0,<4.4 # TODO: https://github.com/benoitc/gunicorn/issues/1548 +coverage pytest -pytest-cov==2.5.1 +pytest-cov From a4b6b1385a5e0ba094b99608a6a117376beb4d31 Mon Sep 17 00:00:00 2001 From: Talha Malik Date: Wed, 16 Oct 2019 10:43:51 -0400 Subject: [PATCH 068/263] Fixes #2133 --- gunicorn/workers/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gunicorn/workers/base.py b/gunicorn/workers/base.py index f95994bc..81c0c616 100644 --- a/gunicorn/workers/base.py +++ b/gunicorn/workers/base.py @@ -116,6 +116,8 @@ class Worker(object): self.init_signals() + self.load_wsgi() + # start the reloader if self.cfg.reload: def changed(fname): @@ -130,7 +132,6 @@ class Worker(object): callback=changed) self.reloader.start() - self.load_wsgi() self.cfg.post_worker_init(self) # Enter main run loop From 51a4afd7ef2c0941047f40d7ec79f6344fa47528 Mon Sep 17 00:00:00 2001 From: Talha Malik Date: Thu, 17 Oct 2019 09:50:17 -0400 Subject: [PATCH 069/263] Remove locking in polling reloader --- THANKS | 1 + gunicorn/reloader.py | 7 ++----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/THANKS b/THANKS index 2c2a6de5..04e0f52b 100644 --- a/THANKS +++ b/THANKS @@ -163,6 +163,7 @@ Stephen DiCato Stephen Holsapple Steven Cummings Sébastien Fievet +Talha Malik TedWantsMore Thomas Grainger Thomas Steinacher diff --git a/gunicorn/reloader.py b/gunicorn/reloader.py index acfc2f8f..d00be2bf 100644 --- a/gunicorn/reloader.py +++ b/gunicorn/reloader.py @@ -19,13 +19,11 @@ class Reloader(threading.Thread): super().__init__() self.setDaemon(True) self._extra_files = set(extra_files or ()) - self._extra_files_lock = threading.RLock() self._interval = interval self._callback = callback def add_extra_file(self, filename): - with self._extra_files_lock: - self._extra_files.add(filename) + self._extra_files.add(filename) def get_files(self): fnames = [ @@ -34,8 +32,7 @@ class Reloader(threading.Thread): if getattr(module, '__file__', None) ] - with self._extra_files_lock: - fnames.extend(self._extra_files) + fnames.extend(self._extra_files) return fnames From 21d6287e15fb6a96ce64e1006281d4d9de135ff3 Mon Sep 17 00:00:00 2001 From: Jun Date: Wed, 30 Oct 2019 16:16:07 +0800 Subject: [PATCH 070/263] Fix fdopen RuntimeWarning in 3.8 (#2146) --- gunicorn/workers/workertmp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gunicorn/workers/workertmp.py b/gunicorn/workers/workertmp.py index a37ed155..c475a12b 100644 --- a/gunicorn/workers/workertmp.py +++ b/gunicorn/workers/workertmp.py @@ -32,7 +32,9 @@ class WorkerTmp(object): try: if not IS_CYGWIN: util.unlink(name) - self._tmp = os.fdopen(fd, 'w+b', 1) + # In Python 3.8, open() emits RuntimeWarning if buffering=1 for binary mode. + # Because we never write to this file, pass 0 to switch buffering off. + self._tmp = os.fdopen(fd, 'w+b', 0) except: os.close(fd) raise From 9edf8e8159a1f6ca9a7fbdf7b1dc12dbc93ec14b Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Wed, 30 Oct 2019 16:15:48 +0100 Subject: [PATCH 071/263] add 2018 changelog doc --- docs/source/2018-news.rst | 68 +++++++++++++++++++++++++++++++++++++++ docs/source/news.rst | 1 + 2 files changed, 69 insertions(+) create mode 100644 docs/source/2018-news.rst diff --git a/docs/source/2018-news.rst b/docs/source/2018-news.rst new file mode 100644 index 00000000..3b412cf5 --- /dev/null +++ b/docs/source/2018-news.rst @@ -0,0 +1,68 @@ +================ +Changelog - 2018 +================ + +.. note:: + + Please see :doc:`news` for the latest changes + +19.9.0 / 2018/07/03 +=================== + +- fix: address a regression that prevented syslog support from working + (:issue:`1668`, :pr:`1773`) +- fix: correctly set `REMOTE_ADDR` on versions of Python 3 affected by + `Python Issue 30205 `_ + (:issue:`1755`, :pr:`1796`) +- fix: show zero response length correctly in access log (:pr:`1787`) +- fix: prevent raising :exc:`AttributeError` when ``--reload`` is not passed + in case of a :exc:`SyntaxError` raised from the WSGI application. + (:issue:`1805`, :pr:`1806`) +- The internal module ``gunicorn.workers.async`` was renamed to ``gunicorn.workers.base_async`` + since ``async`` is now a reserved word in Python 3.7. + (:pr:`1527`) + +19.8.1 / 2018/04/30 +=================== + +- fix: secure scheme headers when bound to a unix socket + (:issue:`1766`, :pr:`1767`) + +19.8.0 / 2018/04/28 +=================== + +- Eventlet 0.21.0 support (:issue:`1584`) +- Tornado 5 support (:issue:`1728`, :pr:`1752`) +- support watching additional files with ``--reload-extra-file`` + (:pr:`1527`) +- support configuring logging with a dictionary with ``--logging-config-dict`` + (:issue:`1087`, :pr:`1110`, :pr:`1602`) +- add support for the ``--config`` flag in the ``GUNICORN_CMD_ARGS`` environment + variable (:issue:`1576`, :pr:`1581`) +- disable ``SO_REUSEPORT`` by default and add the ``--reuse-port`` setting + (:issue:`1553`, :issue:`1603`, :pr:`1669`) +- fix: installing `inotify` on MacOS no longer breaks the reloader + (:issue:`1540`, :pr:`1541`) +- fix: do not throw ``TypeError`` when ``SO_REUSEPORT`` is not available + (:issue:`1501`, :pr:`1491`) +- fix: properly decode HTTP paths containing certain non-ASCII characters + (:issue:`1577`, :pr:`1578`) +- fix: remove whitespace when logging header values under gevent (:pr:`1607`) +- fix: close unlinked temporary files (:issue:`1327`, :pr:`1428`) +- fix: parse ``--umask=0`` correctly (:issue:`1622`, :pr:`1632`) +- fix: allow loading applications using relative file paths + (:issue:`1349`, :pr:`1481`) +- fix: force blocking mode on the gevent sockets (:issue:`880`, :pr:`1616`) +- fix: preserve leading `/` in request path (:issue:`1512`, :pr:`1511`) +- fix: forbid contradictory secure scheme headers +- fix: handle malformed basic authentication headers in access log + (:issue:`1683`, :pr:`1684`) +- fix: defer handling of ``USR1`` signal to a new greenlet under gevent + (:issue:`1645`, :pr:`1651`) +- fix: the threaded worker would sometimes close the wrong keep-alive + connection under Python 2 (:issue:`1698`, :pr:`1699`) +- fix: re-open log files on ``USR1`` signal using ``handler._open`` to + support subclasses of ``FileHandler`` (:issue:`1739`, :pr:`1742`) +- deprecation: the ``gaiohttp`` worker is deprecated, see the + :ref:`worker-class` documentation for more information + (:issue:`1338`, :pr:`1418`, :pr:`1569`) \ No newline at end of file diff --git a/docs/source/news.rst b/docs/source/news.rst index 049e66cf..4705d21f 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -75,6 +75,7 @@ History .. toctree:: :titlesonly: + 2018-news 2017-news 2016-news 2015-news From a1bb7a935a077db627570286b269adf3e7fb85b1 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Wed, 30 Oct 2019 17:15:29 +0100 Subject: [PATCH 072/263] add 20.0 changelog --- docs/source/news.rst | 116 ++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 62 deletions(-) diff --git a/docs/source/news.rst b/docs/source/news.rst index 4705d21f..da6c5fd4 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -2,71 +2,63 @@ Changelog ========= -20.0 / not released -=================== +20.0 / 2019/10/30 +================= -- fix: Added support for binding to file descriptors (:issue:`1107`, :pr:`1809`) +- Fixed `fdopen` `RuntimeWarning` in Python 3.8 +- Added check and exception for str type on value in Response process_headers method. +- Ensure WSGI header value is string before conducting regex search on it. +- Added pypy3 to list of tested environments +- Grouped `StopIteration` and `KeyboardInterrupt` exceptions with same body together in Arbiter.run() +- Added `setproctitle` module to `extras_require` in setup.py +- Avoid unnecessary chown of temporary files +- Logging: Handle auth type case insensitively +- Removed `util.import_module` +- Removed fallback for `types.SimpleNamespace` in tests utils +- Use `SourceFileLoader` instead instead of `execfile_` +- Use `importlib` instead of `__import__` and eval` +- Fixed eventlet patching +- Added optional `datadog `_ tags for statsd metrics +- Header values now are encoded using latin-1, not ascii. +- Rewritten `parse_address` util added test +- Removed redundant super() arguments +- Simplify `futures` import in gthread module +- Fixed worker_connections` setting to also affects the Gthread worker type +- Fixed setting max_requests +- Bump minimum Eventlet and Gevent versions to 0.24 and 1.4 +- Use Python's default SSL cipher list by default +- handle `wsgi.input_terminated` extension +- Simplify Paste Deployment documentation +- Fix root logging: root and logger are same level. +- Fixed typo in ssl_version documentation +- Documented systemd deployement unit examples +- Added systemd sd_notify support +- Fixed typo in gthread.py +- Added `tornado `_ 5 and 6 support +- Declare our setuptools dependency +- Added support to `--bind` to open file descriptors +- Document how to serve WSGI app modules from Gunicorn +- Provide guidance on X-Forwarded-For access log in documentation +- Add support for named constants in the --ssl-version flag +- Clarify log format usage of header & environment in documentation +- Fixed systemd documentation to properly setup gunicorn unix socket +- Prevent removal unix socket for reuse_port +- Fix `ResourceWarning` when reading a Python config module +- Remove unnecessary call to dict keys method +- Support str and bytes for UNIX socket addresses +- fixed InotifyReloadeder: handle `module.__file__ is None +- `/dev/shm` as a convenient alternative to making your own tmpfs mount in fchmod FAQ +- fix examples to work on python3 +- Fix typo in `--max-requests` documentation +- Clear tornado ioloop before os.fork +- Miscellaneous fixes and improvement for linting using Pylint -19.9.0 / 2018/07/03 -=================== +Breaking Change ++++++++++++++++ -- fix: address a regression that prevented syslog support from working - (:issue:`1668`, :pr:`1773`) -- fix: correctly set `REMOTE_ADDR` on versions of Python 3 affected by - `Python Issue 30205 `_ - (:issue:`1755`, :pr:`1796`) -- fix: show zero response length correctly in access log (:pr:`1787`) -- fix: prevent raising :exc:`AttributeError` when ``--reload`` is not passed - in case of a :exc:`SyntaxError` raised from the WSGI application. - (:issue:`1805`, :pr:`1806`) -- The internal module ``gunicorn.workers.async`` was renamed to ``gunicorn.workers.base_async`` - since ``async`` is now a reserved word in Python 3.7. - (:pr:`1527`) - -19.8.1 / 2018/04/30 -=================== - -- fix: secure scheme headers when bound to a unix socket - (:issue:`1766`, :pr:`1767`) - -19.8.0 / 2018/04/28 -=================== - -- Eventlet 0.21.0 support (:issue:`1584`) -- Tornado 5 support (:issue:`1728`, :pr:`1752`) -- support watching additional files with ``--reload-extra-file`` - (:pr:`1527`) -- support configuring logging with a dictionary with ``--logging-config-dict`` - (:issue:`1087`, :pr:`1110`, :pr:`1602`) -- add support for the ``--config`` flag in the ``GUNICORN_CMD_ARGS`` environment - variable (:issue:`1576`, :pr:`1581`) -- disable ``SO_REUSEPORT`` by default and add the ``--reuse-port`` setting - (:issue:`1553`, :issue:`1603`, :pr:`1669`) -- fix: installing `inotify` on MacOS no longer breaks the reloader - (:issue:`1540`, :pr:`1541`) -- fix: do not throw ``TypeError`` when ``SO_REUSEPORT`` is not available - (:issue:`1501`, :pr:`1491`) -- fix: properly decode HTTP paths containing certain non-ASCII characters - (:issue:`1577`, :pr:`1578`) -- fix: remove whitespace when logging header values under gevent (:pr:`1607`) -- fix: close unlinked temporary files (:issue:`1327`, :pr:`1428`) -- fix: parse ``--umask=0`` correctly (:issue:`1622`, :pr:`1632`) -- fix: allow loading applications using relative file paths - (:issue:`1349`, :pr:`1481`) -- fix: force blocking mode on the gevent sockets (:issue:`880`, :pr:`1616`) -- fix: preserve leading `/` in request path (:issue:`1512`, :pr:`1511`) -- fix: forbid contradictory secure scheme headers -- fix: handle malformed basic authentication headers in access log - (:issue:`1683`, :pr:`1684`) -- fix: defer handling of ``USR1`` signal to a new greenlet under gevent - (:issue:`1645`, :pr:`1651`) -- fix: the threaded worker would sometimes close the wrong keep-alive - connection under Python 2 (:issue:`1698`, :pr:`1699`) -- fix: re-open log files on ``USR1`` signal using ``handler._open`` to - support subclasses of ``FileHandler`` (:issue:`1739`, :pr:`1742`) -- deprecation: the ``gaiohttp`` worker is deprecated, see the - :ref:`worker-class` documentation for more information - (:issue:`1338`, :pr:`1418`, :pr:`1569`) +- Removed gaiohttp worker +- Drop support for Python 2.x +- Drop support for EOL Python 3.2 and 3.3 History From 83e07e5cf3d0e0d0cc8af676c2bf0a84c9cdfc43 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Wed, 30 Oct 2019 17:40:35 +0100 Subject: [PATCH 073/263] Fixed linting error in changelog --- docs/source/news.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/news.rst b/docs/source/news.rst index da6c5fd4..4cc8d457 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -26,7 +26,7 @@ Changelog - Fixed worker_connections` setting to also affects the Gthread worker type - Fixed setting max_requests - Bump minimum Eventlet and Gevent versions to 0.24 and 1.4 -- Use Python's default SSL cipher list by default +- Use Python default SSL cipher list by default - handle `wsgi.input_terminated` extension - Simplify Paste Deployment documentation - Fix root logging: root and logger are same level. From c1f17a084e26674deb2440909fd7dcb8433fb571 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Wed, 30 Oct 2019 17:52:01 +0100 Subject: [PATCH 074/263] doc enhancements. --- docs/source/news.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/news.rst b/docs/source/news.rst index 4cc8d457..dcd71384 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -39,14 +39,14 @@ Changelog - Added support to `--bind` to open file descriptors - Document how to serve WSGI app modules from Gunicorn - Provide guidance on X-Forwarded-For access log in documentation -- Add support for named constants in the --ssl-version flag +- Add support for named constants in the `--ssl-version` flag - Clarify log format usage of header & environment in documentation - Fixed systemd documentation to properly setup gunicorn unix socket - Prevent removal unix socket for reuse_port - Fix `ResourceWarning` when reading a Python config module - Remove unnecessary call to dict keys method - Support str and bytes for UNIX socket addresses -- fixed InotifyReloadeder: handle `module.__file__ is None +- fixed `InotifyReloadeder`: handle `module.__file__` is None - `/dev/shm` as a convenient alternative to making your own tmpfs mount in fchmod FAQ - fix examples to work on python3 - Fix typo in `--max-requests` documentation From a05a447e76f811e67fd619e4b5e77ce808f8e917 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Wed, 30 Oct 2019 18:11:31 +0100 Subject: [PATCH 075/263] Change maintainers (#2147) changes maintainers to shows who is active on the project as of today. --- MAINTAINERS | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 004fa6e5..91339e23 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1,10 +1,22 @@ +Core maintainers +================ + Benoit Chesneau -Paul J. Davis -Randall Leeds Konstantin Kapustin +Randall Leeds +Berker Peksağ +Jason Madden + +Alumni +====== + +This list contains maintainers that are no longer active on the project. +It is thanks to these people that the project has become what it is today. +Thank you! + + +Paul J. Davis Kenneth Reitz Nikolay Kim Andrew Svetlov -Stéphane Wirtel -Berker Peksağ -Jason Madden +Stéphane Wirtel \ No newline at end of file From 438371ee90b9676336a44c7abaeb30ee7fc57a5c Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Sat, 9 Nov 2019 10:53:09 +0100 Subject: [PATCH 076/263] fix doc version --- docs/site/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/site/index.html b/docs/site/index.html index 0b35be58..198e7947 100644 --- a/docs/site/index.html +++ b/docs/site/index.html @@ -16,7 +16,7 @@
Latest version: 19.9.0 + href="https://docs.gunicorn.org/en/stable/">20.0.0
From 902d9c89abb2cf9bed688590b2276a19fcf4d35a Mon Sep 17 00:00:00 2001 From: Jason Madden Date: Wed, 13 Nov 2019 07:08:26 -0600 Subject: [PATCH 077/263] Capture os.sendfile before patching in gevent and eventlet workers. Fixes #1925 and fixes #2170. --- gunicorn/workers/geventlet.py | 4 ++-- gunicorn/workers/ggevent.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gunicorn/workers/geventlet.py b/gunicorn/workers/geventlet.py index 968868a3..e4b425cd 100644 --- a/gunicorn/workers/geventlet.py +++ b/gunicorn/workers/geventlet.py @@ -26,10 +26,10 @@ import greenlet from gunicorn.workers.base_async import AsyncWorker -def _eventlet_sendfile(fdout, fdin, offset, nbytes): +def _eventlet_sendfile(fdout, fdin, offset, nbytes, _os_sendfile=os.sendfile): while True: try: - return os.sendfile(fdout, fdin, offset, nbytes) + return _os_sendfile(fdout, fdin, offset, nbytes) except OSError as e: if e.args[0] == errno.EAGAIN: trampoline(fdout, write=True) diff --git a/gunicorn/workers/ggevent.py b/gunicorn/workers/ggevent.py index 5dfec5e1..85418035 100644 --- a/gunicorn/workers/ggevent.py +++ b/gunicorn/workers/ggevent.py @@ -30,10 +30,10 @@ from gunicorn.workers.base_async import AsyncWorker VERSION = "gevent/%s gunicorn/%s" % (gevent.__version__, gunicorn.__version__) -def _gevent_sendfile(fdout, fdin, offset, nbytes): +def _gevent_sendfile(fdout, fdin, offset, nbytes, _os_sendfile=os.sendfile): while True: try: - return os.sendfile(fdout, fdin, offset, nbytes) + return _os_sendfile(fdout, fdin, offset, nbytes) except OSError as e: if e.args[0] == errno.EAGAIN: socket.wait_write(fdout) From 74cf2ce084a38031ea592903176e5374ecd3f38d Mon Sep 17 00:00:00 2001 From: REN Xiaolei Date: Thu, 14 Nov 2019 11:49:58 +0800 Subject: [PATCH 078/263] Update faq.rst and fix technical mistake on ulimit --- docs/source/faq.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index bd8f1d18..6531b99b 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -128,10 +128,9 @@ How can I increase the maximum number of file descriptors? One of the first settings that usually needs to be bumped is the maximum number of open file descriptors for a given process. For the confused out there, remember that Unices treat sockets as files. - -:: - - $ sudo ulimit -n 2048 +Considering non-privileged users are not able to relax the limit, you should +firstly switch to root user, increase the limit, then run gunicorn. Using ``sudo +ulimit`` would not take effect. How can I increase the maximum socket backlog? ---------------------------------------------- From 9545e01d17e62fb1e959b6b18c7abe74ce99b9f1 Mon Sep 17 00:00:00 2001 From: REN Xiaolei Date: Mon, 18 Nov 2019 20:53:32 +0800 Subject: [PATCH 079/263] Update faq.rst --- docs/source/faq.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index 6531b99b..b728084c 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -128,10 +128,14 @@ How can I increase the maximum number of file descriptors? One of the first settings that usually needs to be bumped is the maximum number of open file descriptors for a given process. For the confused out there, remember that Unices treat sockets as files. + +.. warning:: Considering non-privileged users are not able to relax the limit, you should firstly switch to root user, increase the limit, then run gunicorn. Using ``sudo ulimit`` would not take effect. +Try systemd's service unit file, or an initscript which runs as root. + How can I increase the maximum socket backlog? ---------------------------------------------- From 5858f81566f812d5f12c107969d70162dc50eae7 Mon Sep 17 00:00:00 2001 From: REN Xiaolei Date: Mon, 18 Nov 2019 20:58:39 +0800 Subject: [PATCH 080/263] Update faq.rst --- docs/source/faq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index b728084c..55303122 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -129,7 +129,7 @@ One of the first settings that usually needs to be bumped is the maximum number of open file descriptors for a given process. For the confused out there, remember that Unices treat sockets as files. -.. warning:: +.. warning:: ``sudo ulimit`` may not work Considering non-privileged users are not able to relax the limit, you should firstly switch to root user, increase the limit, then run gunicorn. Using ``sudo ulimit`` would not take effect. From b798412444aeace740c574486e810735688c02c8 Mon Sep 17 00:00:00 2001 From: Emile Fugulin Date: Mon, 18 Nov 2019 19:44:01 -0500 Subject: [PATCH 081/263] Remove default strip of header name --- gunicorn/config.py | 17 +++++++++++++++++ gunicorn/http/message.py | 5 ++++- tests/requests/invalid/020.http | 4 ++++ tests/requests/invalid/020.py | 5 +++++ tests/requests/valid/028.http | 4 ++++ tests/requests/valid/028.py | 14 ++++++++++++++ 6 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tests/requests/invalid/020.http create mode 100644 tests/requests/invalid/020.py create mode 100644 tests/requests/valid/028.http create mode 100644 tests/requests/valid/028.py diff --git a/gunicorn/config.py b/gunicorn/config.py index e8e0f926..d165f256 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -2010,3 +2010,20 @@ class PasteGlobalConf(Setting): .. versionadded:: 19.7 """ + + +class StripHeaderSpaces(Setting): + name = "strip_header_spaces" + section = "Server Mechanics" + cli = ["--strip-header-spaces"] + validator = validate_bool + action = "store_true" + default = False + desc = """\ + Strip spaces present between the header name and the the ``:``. + + This is known to induce vulnerabilities and is not compliant with the HTTP/1.1 standard. + See https://portswigger.net/research/http-desync-attacks-request-smuggling-reborn. + + Use with care and only if necessary. + """ diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 4040c7ae..5807a464 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -90,7 +90,10 @@ class Message(object): if curr.find(":") < 0: raise InvalidHeader(curr.strip()) name, value = curr.split(":", 1) - name = name.rstrip(" \t").upper() + if self.cfg.strip_header_spaces: + name = name.rstrip(" \t").upper() + else: + name = name.upper() if HEADER_RE.search(name): raise InvalidHeaderName(name) diff --git a/tests/requests/invalid/020.http b/tests/requests/invalid/020.http new file mode 100644 index 00000000..a699e848 --- /dev/null +++ b/tests/requests/invalid/020.http @@ -0,0 +1,4 @@ +GET /stuff/here?foo=bar HTTP/1.1\r\n +Content-Length : 3\r\n +\r\n +xyz diff --git a/tests/requests/invalid/020.py b/tests/requests/invalid/020.py new file mode 100644 index 00000000..d336fbc8 --- /dev/null +++ b/tests/requests/invalid/020.py @@ -0,0 +1,5 @@ +from gunicorn.config import Config +from gunicorn.http.errors import InvalidHeaderName + +cfg = Config() +request = InvalidHeaderName diff --git a/tests/requests/valid/028.http b/tests/requests/valid/028.http new file mode 100644 index 00000000..9db5ecfb --- /dev/null +++ b/tests/requests/valid/028.http @@ -0,0 +1,4 @@ +GET /stuff/here?foo=bar HTTP/1.1\r\n +Content-Length : 3\r\n +\r\n +xyz \ No newline at end of file diff --git a/tests/requests/valid/028.py b/tests/requests/valid/028.py new file mode 100644 index 00000000..d8254683 --- /dev/null +++ b/tests/requests/valid/028.py @@ -0,0 +1,14 @@ +from gunicorn.config import Config + +cfg = Config() +cfg.set("strip_header_spaces", True) + +request = { + "method": "GET", + "uri": uri("/stuff/here?foo=bar"), + "version": (1, 1), + "headers": [ + ("CONTENT-LENGTH", "3"), + ], + "body": b"xyz" +} \ No newline at end of file From bd8670b4db321727de22d934dee6dc1c2d41f704 Mon Sep 17 00:00:00 2001 From: Emile Fugulin Date: Mon, 18 Nov 2019 20:49:22 -0500 Subject: [PATCH 082/263] Handle duplicate content-length --- gunicorn/http/message.py | 2 ++ tests/requests/invalid/021.http | 5 +++++ tests/requests/invalid/021.py | 5 +++++ 3 files changed, 12 insertions(+) create mode 100644 tests/requests/invalid/021.http create mode 100644 tests/requests/invalid/021.py diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 5807a464..cbfbd11c 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -131,6 +131,8 @@ class Message(object): content_length = None for (name, value) in self.headers: if name == "CONTENT-LENGTH": + if content_length is not None: + raise InvalidHeader("CONTENT-LENGTH", req=self) content_length = value elif name == "TRANSFER-ENCODING": chunked = value.lower() == "chunked" diff --git a/tests/requests/invalid/021.http b/tests/requests/invalid/021.http new file mode 100644 index 00000000..90e77dd1 --- /dev/null +++ b/tests/requests/invalid/021.http @@ -0,0 +1,5 @@ +GET /stuff/here?foo=bar HTTP/1.1\r\n +Content-Length: 3\r\n +Content-Length: 2\r\n +\r\n +xyz diff --git a/tests/requests/invalid/021.py b/tests/requests/invalid/021.py new file mode 100644 index 00000000..95b0581a --- /dev/null +++ b/tests/requests/invalid/021.py @@ -0,0 +1,5 @@ +from gunicorn.config import Config +from gunicorn.http.errors import InvalidHeader + +cfg = Config() +request = InvalidHeader From f74324bd750265a3c1f47b4c837f6fd7ce74db54 Mon Sep 17 00:00:00 2001 From: Emile Fugulin Date: Mon, 18 Nov 2019 22:29:02 -0500 Subject: [PATCH 083/263] Handle multiple transfer-encoding --- gunicorn/http/errors.py | 8 ++++++++ gunicorn/http/message.py | 10 ++++++++-- gunicorn/workers/base.py | 7 ++++++- tests/requests/invalid/022.http | 5 +++++ tests/requests/invalid/022.py | 5 +++++ tests/requests/valid/029.http | 7 +++++++ tests/requests/valid/029.py | 14 ++++++++++++++ tests/requests/valid/030.http | 7 +++++++ tests/requests/valid/030.py | 14 ++++++++++++++ 9 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 tests/requests/invalid/022.http create mode 100644 tests/requests/invalid/022.py create mode 100644 tests/requests/valid/029.http create mode 100644 tests/requests/valid/029.py create mode 100644 tests/requests/valid/030.http create mode 100644 tests/requests/valid/030.py diff --git a/gunicorn/http/errors.py b/gunicorn/http/errors.py index 7839ef05..ea5b4826 100644 --- a/gunicorn/http/errors.py +++ b/gunicorn/http/errors.py @@ -118,3 +118,11 @@ class ForbiddenProxyRequest(ParseException): class InvalidSchemeHeaders(ParseException): def __str__(self): return "Contradictory scheme headers" + + +class UnsupportedTransferEncoding(ParseException): + def __init__(self, te): + self.te = te + + def __str__(self): + return "Unsupported Transfer-Encoding: %s" % self.te diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index cbfbd11c..59f50d7e 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -12,7 +12,7 @@ from gunicorn.http.unreader import SocketUnreader from gunicorn.http.body import ChunkedReader, LengthReader, EOFReader, Body from gunicorn.http.errors import (InvalidHeader, InvalidHeaderName, NoMoreData, InvalidRequestLine, InvalidRequestMethod, InvalidHTTPVersion, - LimitRequestLine, LimitRequestHeaders) + LimitRequestLine, LimitRequestHeaders, UnsupportedTransferEncoding) from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest from gunicorn.http.errors import InvalidSchemeHeaders from gunicorn.util import bytes_to_str, split_request_uri @@ -135,7 +135,13 @@ class Message(object): raise InvalidHeader("CONTENT-LENGTH", req=self) content_length = value elif name == "TRANSFER-ENCODING": - chunked = value.lower() == "chunked" + normalized_value = value.lower() + if normalized_value == "identity": + pass + elif normalized_value == "chunked": + chunked = True + else: + raise UnsupportedTransferEncoding(normalized_value) elif name == "SEC-WEBSOCKET-KEY1": content_length = 8 diff --git a/gunicorn/workers/base.py b/gunicorn/workers/base.py index f95994bc..7689c61f 100644 --- a/gunicorn/workers/base.py +++ b/gunicorn/workers/base.py @@ -20,6 +20,7 @@ from gunicorn.http.errors import ( InvalidProxyLine, InvalidRequestLine, InvalidRequestMethod, InvalidSchemeHeaders, LimitRequestHeaders, LimitRequestLine, + UnsupportedTransferEncoding ) from gunicorn.http.wsgi import Response, default_environ from gunicorn.reloader import reloader_engines @@ -206,7 +207,7 @@ class Worker(object): LimitRequestLine, LimitRequestHeaders, InvalidProxyLine, ForbiddenProxyRequest, InvalidSchemeHeaders, - SSLError)): + SSLError, UnsupportedTransferEncoding)): status_int = 400 reason = "Bad Request" @@ -237,6 +238,10 @@ class Worker(object): reason = "Forbidden" mesg = "'%s'" % str(exc) status_int = 403 + elif isinstance(exc, UnsupportedTransferEncoding): + reason = "Not implemented" + mesg = "'%s'" % str(exc) + status_int = 501 msg = "Invalid request from ip={ip}: {error}" self.log.debug(msg.format(ip=addr[0], error=str(exc))) diff --git a/tests/requests/invalid/022.http b/tests/requests/invalid/022.http new file mode 100644 index 00000000..784504be --- /dev/null +++ b/tests/requests/invalid/022.http @@ -0,0 +1,5 @@ +GET /stuff/here?foo=bar HTTP/1.1\r\n +Transfer-Encoding: chunked\r\n +Transfer-Encoding: compress\r\n +\r\n +xyz diff --git a/tests/requests/invalid/022.py b/tests/requests/invalid/022.py new file mode 100644 index 00000000..db5c9f38 --- /dev/null +++ b/tests/requests/invalid/022.py @@ -0,0 +1,5 @@ +from gunicorn.config import Config +from gunicorn.http.errors import UnsupportedTransferEncoding + +cfg = Config() +request = UnsupportedTransferEncoding diff --git a/tests/requests/valid/029.http b/tests/requests/valid/029.http new file mode 100644 index 00000000..c8611dbd --- /dev/null +++ b/tests/requests/valid/029.http @@ -0,0 +1,7 @@ +GET /stuff/here?foo=bar HTTP/1.1\r\n +Transfer-Encoding: chunked\r\n +Transfer-Encoding: identity\r\n +\r\n +5\r\n +hello\r\n +000\r\n diff --git a/tests/requests/valid/029.py b/tests/requests/valid/029.py new file mode 100644 index 00000000..f25449d1 --- /dev/null +++ b/tests/requests/valid/029.py @@ -0,0 +1,14 @@ +from gunicorn.config import Config + +cfg = Config() + +request = { + "method": "GET", + "uri": uri("/stuff/here?foo=bar"), + "version": (1, 1), + "headers": [ + ('TRANSFER-ENCODING', 'chunked'), + ('TRANSFER-ENCODING', 'identity') + ], + "body": b"hello" +} diff --git a/tests/requests/valid/030.http b/tests/requests/valid/030.http new file mode 100644 index 00000000..5d029dd9 --- /dev/null +++ b/tests/requests/valid/030.http @@ -0,0 +1,7 @@ +GET /stuff/here?foo=bar HTTP/1.1\r\n +Transfer-Encoding: identity\r\n +Transfer-Encoding: chunked\r\n +\r\n +5\r\n +hello\r\n +000\r\n diff --git a/tests/requests/valid/030.py b/tests/requests/valid/030.py new file mode 100644 index 00000000..3e98467b --- /dev/null +++ b/tests/requests/valid/030.py @@ -0,0 +1,14 @@ +from gunicorn.config import Config + +cfg = Config() + +request = { + "method": "GET", + "uri": uri("/stuff/here?foo=bar"), + "version": (1, 1), + "headers": [ + ('TRANSFER-ENCODING', 'identity'), + ('TRANSFER-ENCODING', 'chunked') + ], + "body": b"hello" +} From c5a254ad9d3b46950a86d6b868cd6249c827b354 Mon Sep 17 00:00:00 2001 From: REN Xiaolei Date: Tue, 19 Nov 2019 16:32:43 +0800 Subject: [PATCH 084/263] Update faq.rst --- docs/source/faq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index 55303122..e98e4eb6 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -130,7 +130,7 @@ of open file descriptors for a given process. For the confused out there, remember that Unices treat sockets as files. .. warning:: ``sudo ulimit`` may not work -Considering non-privileged users are not able to relax the limit, you should + Considering non-privileged users are not able to relax the limit, you should firstly switch to root user, increase the limit, then run gunicorn. Using ``sudo ulimit`` would not take effect. From 00b58979385c98a60c516b1b1963d2a91a7238d8 Mon Sep 17 00:00:00 2001 From: REN Xiaolei Date: Tue, 19 Nov 2019 16:34:40 +0800 Subject: [PATCH 085/263] Update faq.rst --- docs/source/faq.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index e98e4eb6..a982f808 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -130,7 +130,8 @@ of open file descriptors for a given process. For the confused out there, remember that Unices treat sockets as files. .. warning:: ``sudo ulimit`` may not work - Considering non-privileged users are not able to relax the limit, you should + +Considering non-privileged users are not able to relax the limit, you should firstly switch to root user, increase the limit, then run gunicorn. Using ``sudo ulimit`` would not take effect. From 19cb68f4c3b55da22581c008659ee62d8c54ab2b Mon Sep 17 00:00:00 2001 From: David Lord Date: Tue, 19 Nov 2019 13:49:44 -0800 Subject: [PATCH 086/263] load application from factory function (#2178) * load application from factory function Use `ast.parse` to validate that the string passed to the CLI is either an attribute name or a function call. Use `ast.literal_eval` to parse any positional and keyword arguments to the function. Call the function to get the real application. Co-authored-by: Connor Brinton * test coverage for util.import_app * document app factory pattern --- docs/source/run.rst | 21 ++++++++++++ gunicorn/util.py | 84 +++++++++++++++++++++++++++++++++++++++++++-- tests/support.py | 35 +++++++++++++------ tests/test_util.py | 49 +++++++++++++++++++++----- 4 files changed, 167 insertions(+), 22 deletions(-) diff --git a/docs/source/run.rst b/docs/source/run.rst index 0da87374..71abc739 100644 --- a/docs/source/run.rst +++ b/docs/source/run.rst @@ -44,8 +44,29 @@ Example with the test app: You can now run the app with the following command:: +.. code-block:: text + $ gunicorn --workers=2 test:app +The variable name can also be a function call. In that case the name +will be imported from the module, then called to get the application +object. This is commonly referred to as the "application factory" +pattern. + +.. code-block:: python + + def create_app(): + app = FrameworkApp() + ... + return app + +.. code-block:: text + + $ gunicorn --workers=2 'test:create_app()' + +Positional and keyword arguments can also be passed, but it is +recommended to load configuration from environment variables rather than +the command line. Commonly Used Arguments ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/gunicorn/util.py b/gunicorn/util.py index fbdd5cfc..8ccaf9b3 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -2,7 +2,7 @@ # # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. - +import ast import email.utils import errno import fcntl @@ -320,6 +320,32 @@ def write_error(sock, status_int, reason, mesg): write_nonblock(sock, http.encode('latin1')) +def _called_with_wrong_args(f): + """Check whether calling a function raised a ``TypeError`` because + the call failed or because something in the function raised the + error. + + :param f: The function that was called. + :return: ``True`` if the call failed. + """ + tb = sys.exc_info()[2] + + try: + while tb is not None: + if tb.tb_frame.f_code is f.__code__: + # In the function, it was called successfully. + return False + + tb = tb.tb_next + + # Didn't reach the function. + return True + finally: + # Delete tb to break a circular reference in Python 2. + # https://docs.python.org/2/library/sys.html#sys.exc_info + del tb + + def import_app(module): parts = module.split(":", 1) if len(parts) == 1: @@ -335,13 +361,65 @@ def import_app(module): raise ImportError(msg % (module.rsplit(".", 1)[0], obj)) raise + # Parse obj as a single expression to determine if it's a valid + # attribute name or function call. + try: + expression = ast.parse(obj, mode="eval").body + except SyntaxError: + raise AppImportError( + "Failed to parse %r as an attribute name or function call." % obj + ) + + if isinstance(expression, ast.Name): + name = expression.id + args = kwargs = None + elif isinstance(expression, ast.Call): + # Ensure the function name is an attribute name only. + if not isinstance(expression.func, ast.Name): + raise AppImportError("Function reference must be a simple name: %r" % obj) + + name = expression.func.id + + # Parse the positional and keyword arguments as literals. + try: + args = [ast.literal_eval(arg) for arg in expression.args] + kwargs = {kw.arg: ast.literal_eval(kw.value) for kw in expression.keywords} + except ValueError: + # literal_eval gives cryptic error messages, show a generic + # message with the full expression instead. + raise AppImportError( + "Failed to parse arguments as literal values: %r" % obj + ) + else: + raise AppImportError( + "Failed to parse %r as an attribute name or function call." % obj + ) + is_debug = logging.root.level == logging.DEBUG try: - app = getattr(mod, obj) + app = getattr(mod, name) except AttributeError: if is_debug: traceback.print_exception(*sys.exc_info()) - raise AppImportError("Failed to find application object %r in %r" % (obj, module)) + raise AppImportError("Failed to find attribute %r in %r." % (name, module)) + + # If the expression was a function call, call the retrieved object + # to get the real application. + if args is not None: + try: + app = app(*args, **kwargs) + except TypeError as e: + # If the TypeError was due to bad arguments to the factory + # function, show Python's nice error message without a + # traceback. + if _called_with_wrong_args(app): + raise AppImportError( + "".join(traceback.format_exception_only(TypeError, e)).strip() + ) + + # Otherwise it was raised from within the function, show the + # full traceback. + raise if app is None: raise AppImportError("Failed to find application object: %r" % obj) diff --git a/tests/support.py b/tests/support.py index af412348..f50c2422 100644 --- a/tests/support.py +++ b/tests/support.py @@ -7,19 +7,32 @@ from wsgiref.validate import validator HOST = "127.0.0.1" -@validator -def app(environ, start_response): - """Simplest possible application object""" +def create_app(name="World", count=1): + message = (('Hello, %s!\n' % name) * count).encode("utf8") + length = str(len(message)) - data = b'Hello, World!\n' - status = '200 OK' + @validator + def app(environ, start_response): + """Simplest possible application object""" - response_headers = [ - ('Content-type', 'text/plain'), - ('Content-Length', str(len(data))), - ] - start_response(status, response_headers) - return iter([data]) + status = '200 OK' + + response_headers = [ + ('Content-type', 'text/plain'), + ('Content-Length', length), + ] + start_response(status, response_headers) + return iter([message]) + + return app + + +app = application = create_app() +none_app = None + + +def error_factory(): + raise TypeError("inner") def requires_mac_ver(*min_version): diff --git a/tests/test_util.py b/tests/test_util.py index 2494d2c5..b7ba6e01 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -2,6 +2,7 @@ # # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. +import os import pytest @@ -60,17 +61,49 @@ def test_warn(capsys): assert '!!! WARNING: test warn' in err -def test_import_app(): - assert util.import_app('support:app') +@pytest.mark.parametrize( + "value", + [ + "support", + "support:app", + "support:create_app()", + "support:create_app('Gunicorn', 3)", + "support:create_app(count=3)", + ], +) +def test_import_app_good(value): + assert util.import_app(value) + + +@pytest.mark.parametrize( + ("value", "exc_type", "msg"), + [ + ("a:app", ImportError, "No module"), + ("support:create_app(", AppImportError, "Failed to parse"), + ("support:create.app()", AppImportError, "Function reference"), + ("support:create_app(Gunicorn)", AppImportError, "literal values"), + ("support:create.app", AppImportError, "attribute name"), + ("support:wrong_app", AppImportError, "find attribute"), + ("support:error_factory(1)", AppImportError, "error_factory() takes"), + ("support:error_factory()", TypeError, "inner"), + ("support:none_app", AppImportError, "find application object"), + ("support:HOST", AppImportError, "callable"), + ], +) +def test_import_app_bad(value, exc_type, msg): + with pytest.raises(exc_type) as exc_info: + util.import_app(value) + + assert msg in str(exc_info.value) + + +def test_import_app_py_ext(monkeypatch): + monkeypatch.chdir(os.path.dirname(__file__)) with pytest.raises(ImportError) as exc_info: - util.import_app('a:app') - assert 'No module' in str(exc_info.value) + util.import_app("support.py") - with pytest.raises(AppImportError) as exc_info: - util.import_app('support:wrong_app') - msg = "Failed to find application object 'wrong_app' in 'support'" - assert msg in str(exc_info.value) + assert "did you mean" in str(exc_info.value) def test_to_bytestring(): From 4264e09c6f403dc3ffe3769e7d4b65fce41f0a3c Mon Sep 17 00:00:00 2001 From: Gaige B Paulsen Date: Wed, 20 Nov 2019 01:09:43 -0500 Subject: [PATCH 087/263] fix fromfd on solaris This patches the fromfd code in socketfromfd by grabbing the correct symbol under SmartOS (SunOS). The patch is pretty straightforward, basically switching on the OS to determine what the symbol is likely to be. If need be, I could put a try block around the original libc.getsockopt and then fall back to looking for _so_getsockopt, if that's preferred in this codebase fix #2184 --- gunicorn/socketfromfd.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index 4c2847b2..69299200 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -10,6 +10,8 @@ import ctypes import os import socket import sys +import platform + from ctypes.util import find_library __all__ = ('fromfd',) @@ -34,8 +36,10 @@ def _errcheck_errno(result, func, arguments): raise OSError(errno, os.strerror(errno)) return arguments - -_libc_getsockopt = libc.getsockopt +if platform.system() == 'SunOS': + _libc_getsockopt = libc._so_getsockopt +else: + _libc_getsockopt = libc.getsockopt _libc_getsockopt.argtypes = [ ctypes.c_int, # int sockfd ctypes.c_int, # int level From ddf5e66ac864c62d5426d96247f9156a18134597 Mon Sep 17 00:00:00 2001 From: Emile Fugulin Date: Wed, 20 Nov 2019 12:24:52 -0500 Subject: [PATCH 088/263] Remove strict check of Transfer-Encoding --- gunicorn/http/errors.py | 8 -------- gunicorn/http/message.py | 9 ++------- gunicorn/workers/base.py | 7 +------ tests/requests/invalid/022.http | 5 ----- tests/requests/invalid/022.py | 5 ----- 5 files changed, 3 insertions(+), 31 deletions(-) delete mode 100644 tests/requests/invalid/022.http delete mode 100644 tests/requests/invalid/022.py diff --git a/gunicorn/http/errors.py b/gunicorn/http/errors.py index ea5b4826..7839ef05 100644 --- a/gunicorn/http/errors.py +++ b/gunicorn/http/errors.py @@ -118,11 +118,3 @@ class ForbiddenProxyRequest(ParseException): class InvalidSchemeHeaders(ParseException): def __str__(self): return "Contradictory scheme headers" - - -class UnsupportedTransferEncoding(ParseException): - def __init__(self, te): - self.te = te - - def __str__(self): - return "Unsupported Transfer-Encoding: %s" % self.te diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 59f50d7e..e5ce4a68 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -12,7 +12,7 @@ from gunicorn.http.unreader import SocketUnreader from gunicorn.http.body import ChunkedReader, LengthReader, EOFReader, Body from gunicorn.http.errors import (InvalidHeader, InvalidHeaderName, NoMoreData, InvalidRequestLine, InvalidRequestMethod, InvalidHTTPVersion, - LimitRequestLine, LimitRequestHeaders, UnsupportedTransferEncoding) + LimitRequestLine, LimitRequestHeaders) from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest from gunicorn.http.errors import InvalidSchemeHeaders from gunicorn.util import bytes_to_str, split_request_uri @@ -135,13 +135,8 @@ class Message(object): raise InvalidHeader("CONTENT-LENGTH", req=self) content_length = value elif name == "TRANSFER-ENCODING": - normalized_value = value.lower() - if normalized_value == "identity": - pass - elif normalized_value == "chunked": + if value.lower() == "chunked": chunked = True - else: - raise UnsupportedTransferEncoding(normalized_value) elif name == "SEC-WEBSOCKET-KEY1": content_length = 8 diff --git a/gunicorn/workers/base.py b/gunicorn/workers/base.py index 7689c61f..f95994bc 100644 --- a/gunicorn/workers/base.py +++ b/gunicorn/workers/base.py @@ -20,7 +20,6 @@ from gunicorn.http.errors import ( InvalidProxyLine, InvalidRequestLine, InvalidRequestMethod, InvalidSchemeHeaders, LimitRequestHeaders, LimitRequestLine, - UnsupportedTransferEncoding ) from gunicorn.http.wsgi import Response, default_environ from gunicorn.reloader import reloader_engines @@ -207,7 +206,7 @@ class Worker(object): LimitRequestLine, LimitRequestHeaders, InvalidProxyLine, ForbiddenProxyRequest, InvalidSchemeHeaders, - SSLError, UnsupportedTransferEncoding)): + SSLError)): status_int = 400 reason = "Bad Request" @@ -238,10 +237,6 @@ class Worker(object): reason = "Forbidden" mesg = "'%s'" % str(exc) status_int = 403 - elif isinstance(exc, UnsupportedTransferEncoding): - reason = "Not implemented" - mesg = "'%s'" % str(exc) - status_int = 501 msg = "Invalid request from ip={ip}: {error}" self.log.debug(msg.format(ip=addr[0], error=str(exc))) diff --git a/tests/requests/invalid/022.http b/tests/requests/invalid/022.http deleted file mode 100644 index 784504be..00000000 --- a/tests/requests/invalid/022.http +++ /dev/null @@ -1,5 +0,0 @@ -GET /stuff/here?foo=bar HTTP/1.1\r\n -Transfer-Encoding: chunked\r\n -Transfer-Encoding: compress\r\n -\r\n -xyz diff --git a/tests/requests/invalid/022.py b/tests/requests/invalid/022.py deleted file mode 100644 index db5c9f38..00000000 --- a/tests/requests/invalid/022.py +++ /dev/null @@ -1,5 +0,0 @@ -from gunicorn.config import Config -from gunicorn.http.errors import UnsupportedTransferEncoding - -cfg = Config() -request = UnsupportedTransferEncoding From be513237cc4bcabc5b5988504294f8eb1da5f745 Mon Sep 17 00:00:00 2001 From: Emile Fugulin Date: Wed, 20 Nov 2019 12:57:46 -0500 Subject: [PATCH 089/263] Add syttent to THANKS --- THANKS | 1 + 1 file changed, 1 insertion(+) diff --git a/THANKS b/THANKS index 2c2a6de5..09a15fad 100644 --- a/THANKS +++ b/THANKS @@ -178,3 +178,4 @@ WooParadog Xie Shi Yue Du zakdances +Emile Fugulin From b8860ef6150bb6b7f0770180e9471b3c80aa8ffc Mon Sep 17 00:00:00 2001 From: benoitc Date: Wed, 20 Nov 2019 00:19:07 +0100 Subject: [PATCH 090/263] fix gunicorn when used with musl libc find_library('c') doesn't work in Alpine Linux. This happen because musl has a simpler implementation of libc. This patch fix it by extending ctypes.util.find_library to search the libs using LD_LIBRARY_PATH. Patch is based on https://github.com/python/cpython/commit/e3f67780aab24401a50af64e688d38c24ee41ad0 See also https://bugs.python.org/issue21622 fix #2160 --- gunicorn/socketfromfd.py | 2 +- gunicorn/util.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index 69299200..74f6503d 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -12,7 +12,7 @@ import socket import sys import platform -from ctypes.util import find_library +from .util import find_library __all__ = ('fromfd',) diff --git a/gunicorn/util.py b/gunicorn/util.py index 8ccaf9b3..233094cc 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -3,6 +3,7 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. import ast +import ctypes.util import email.utils import errno import fcntl @@ -635,3 +636,32 @@ def bytes_to_str(b): def unquote_to_wsgi_str(string): return urllib.parse.unquote_to_bytes(string).decode('latin-1') + + +def _findWalk_ldpath(name): + def _is_elf(filepath): + try: + with open(filepath, 'rb') as fh: + return fh.read(4) == b'\x7fELF' + except: + return False + from glob import glob + if os.path.isabs(name): + return name + + # search LD_LIBRARY_PATH list + paths = os.environ.get('LD_LIBRARY_PATH', '').split(':') + if paths: + for d in paths: + f = os.path.join(d, name) + if _is_elf(f): + return os.path.basename(f) + prefix = os.path.join(d, 'lib'+name) + for suffix in ['.so', '.so.*']: + for f in glob('{0}{1}'.format(prefix, suffix)): + if _is_elf(f): + return os.path.basename(f) + + +def find_library(name): + return ctypes.util.find_library(name) or _findWalk_ldpath(name) From d55c7cb01505a1c4db946ea76bac4aef04a248cf Mon Sep 17 00:00:00 2001 From: benoitc Date: Wed, 20 Nov 2019 21:36:01 +0100 Subject: [PATCH 091/263] fix lib discovery LD_LIBRARY_PATH is sometimes empty, this change fix it. Also test suffix with the "." as it seems to be an issue --- gunicorn/util.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/gunicorn/util.py b/gunicorn/util.py index 233094cc..402123da 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -650,17 +650,19 @@ def _findWalk_ldpath(name): return name # search LD_LIBRARY_PATH list - paths = os.environ.get('LD_LIBRARY_PATH', '').split(':') - if paths: - for d in paths: - f = os.path.join(d, name) - if _is_elf(f): - return os.path.basename(f) - prefix = os.path.join(d, 'lib'+name) - for suffix in ['.so', '.so.*']: - for f in glob('{0}{1}'.format(prefix, suffix)): - if _is_elf(f): - return os.path.basename(f) + paths = ['/lib', '/usr/local/lib', '/usr/lib'] + if 'LD_LIBRARY_PATH' in os.environ: + paths = os.environ['LD_LIBRARY_PATH'].split(':') + paths + + for d in paths: + f = os.path.join(d, name) + if _is_elf(f): + return os.path.basename(f) + prefix = os.path.join(d, 'lib'+name) + for suffix in ['so', 'so.*']: + for f in glob('{0}.{1}'.format(prefix, suffix)): + if _is_elf(f): + return os.path.basename(f) def find_library(name): From 265e58f9e82f64e003015fb25823f5a983be8c27 Mon Sep 17 00:00:00 2001 From: benoitc Date: Wed, 20 Nov 2019 22:46:00 +0100 Subject: [PATCH 092/263] fix wsgi.input_terminated Sometimes both TRANSFER-ENCODING=chunked and CONTENT_LENGTH are set. Since gunicorn prioritise (following the HTTP1.1 spec) chunked encoding we should make sure in this case to signal to th application that the input is terminated by the server. Without the change gunicorn were always setting wsg.input_terminated to False when a CONTENT_LENGTH header was present ignoring that Gunicorn was afaik handling the termination. --- gunicorn/http/message.py | 2 ++ gunicorn/http/wsgi.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index e5ce4a68..43ab714e 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -33,6 +33,7 @@ class Message(object): self.version = None self.headers = [] self.trailers = [] + self.terminated = True self.body = None self.scheme = "https" if cfg.is_ssl else "http" @@ -151,6 +152,7 @@ class Message(object): if content_length < 0: raise InvalidHeader("CONTENT-LENGTH", req=self) + self.terminated = False self.body = Body(LengthReader(self.unreader, content_length)) else: self.body = Body(EOFReader(self.unreader)) diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 3524471f..615c2deb 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -82,6 +82,7 @@ def default_environ(req, sock, cfg): env = base_environ(cfg) env.update({ "wsgi.input": req.body, + "wsgi.input_terminated": req.terminated, "gunicorn.socket": sock, "REQUEST_METHOD": req.method, "QUERY_STRING": req.query, @@ -131,7 +132,6 @@ def create(req, sock, client, server, cfg): continue elif hdr_name == "CONTENT-LENGTH": environ['CONTENT_LENGTH'] = hdr_value - environ['wsgi.input_terminated'] = False continue key = 'HTTP_' + hdr_name.replace('-', '_') From 499dcd4021587d573256284ac9e557825b881d71 Mon Sep 17 00:00:00 2001 From: benoitc Date: Thu, 21 Nov 2019 10:18:07 +0100 Subject: [PATCH 093/263] add a simple example to test module path --- examples/deep/__init__.py | 0 examples/deep/test.py | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 examples/deep/__init__.py create mode 100644 examples/deep/test.py diff --git a/examples/deep/__init__.py b/examples/deep/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/examples/deep/test.py b/examples/deep/test.py new file mode 100644 index 00000000..09ea06f0 --- /dev/null +++ b/examples/deep/test.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 - +# +# This file is part of gunicorn released under the MIT license. +# See the NOTICE for more information. +# +# Example code from Eventlet sources + +from wsgiref.validate import validator + +from gunicorn import __version__ + + +@validator +def app(environ, start_response): + """Simplest possible application object""" + + data = b'Hello, World!\n' + status = '200 OK' + + response_headers = [ + ('Content-type', 'text/plain'), + ('Content-Length', str(len(data))), + ('X-Gunicorn-Version', __version__), + ('Foo', 'B\u00e5r'), # Foo: Bår + ] + start_response(status, response_headers) + return iter([data]) From e150ca4ff87d52b1c2b3d5376fc71789b65a3fb4 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Thu, 21 Nov 2019 11:53:13 +0100 Subject: [PATCH 094/263] add "*.so.*" case to util.find_lib The actual libc name in musl is: libc.musl-x86_64.so.1 this case should handle it. --- gunicorn/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/util.py b/gunicorn/util.py index 402123da..0cf5e751 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -659,7 +659,7 @@ def _findWalk_ldpath(name): if _is_elf(f): return os.path.basename(f) prefix = os.path.join(d, 'lib'+name) - for suffix in ['so', 'so.*']: + for suffix in ['so', 'so.*', '*.so.*']: for f in glob('{0}.{1}'.format(prefix, suffix)): if _is_elf(f): return os.path.basename(f) From 0d8a02279114587e4cd8a11ebe853a874c5eacaf Mon Sep 17 00:00:00 2001 From: benoitc Date: Thu, 21 Nov 2019 18:11:17 +0100 Subject: [PATCH 095/263] Revert "fix wsgi.input_terminated" This reverts commit 265e58f9e82f64e003015fb25823f5a983be8c27. --- gunicorn/http/message.py | 2 -- gunicorn/http/wsgi.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 43ab714e..e5ce4a68 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -33,7 +33,6 @@ class Message(object): self.version = None self.headers = [] self.trailers = [] - self.terminated = True self.body = None self.scheme = "https" if cfg.is_ssl else "http" @@ -152,7 +151,6 @@ class Message(object): if content_length < 0: raise InvalidHeader("CONTENT-LENGTH", req=self) - self.terminated = False self.body = Body(LengthReader(self.unreader, content_length)) else: self.body = Body(EOFReader(self.unreader)) diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 615c2deb..3524471f 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -82,7 +82,6 @@ def default_environ(req, sock, cfg): env = base_environ(cfg) env.update({ "wsgi.input": req.body, - "wsgi.input_terminated": req.terminated, "gunicorn.socket": sock, "REQUEST_METHOD": req.method, "QUERY_STRING": req.query, @@ -132,6 +131,7 @@ def create(req, sock, client, server, cfg): continue elif hdr_name == "CONTENT-LENGTH": environ['CONTENT_LENGTH'] = hdr_value + environ['wsgi.input_terminated'] = False continue key = 'HTTP_' + hdr_name.replace('-', '_') From 8a270c9329ec3cc8054659117b2ffd613a3d8113 Mon Sep 17 00:00:00 2001 From: benoitc Date: Thu, 21 Nov 2019 18:13:55 +0100 Subject: [PATCH 096/263] wsgi.input_terminated is always true As spotted by @tilgovi Gunicorn always ensure the reader terminate. So set wsgi.input_terminated to true to signal it to the application. --- gunicorn/http/wsgi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 3524471f..414513e0 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -131,7 +131,6 @@ def create(req, sock, client, server, cfg): continue elif hdr_name == "CONTENT-LENGTH": environ['CONTENT_LENGTH'] = hdr_value - environ['wsgi.input_terminated'] = False continue key = 'HTTP_' + hdr_name.replace('-', '_') From f9ade3af34d2ced37cf95f749c414a995e0118aa Mon Sep 17 00:00:00 2001 From: benoitc Date: Fri, 22 Nov 2019 14:09:20 +0100 Subject: [PATCH 097/263] unblock select loop during reload of a sync worker When reloading the worker canbe in the select loop and it will wait until timeout before handling any change. So make sure to wake up the loop before going further. --- gunicorn/workers/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gunicorn/workers/base.py b/gunicorn/workers/base.py index 81c0c616..54a6713c 100644 --- a/gunicorn/workers/base.py +++ b/gunicorn/workers/base.py @@ -123,6 +123,7 @@ class Worker(object): def changed(fname): self.log.info("Worker reloading: %s modified", fname) self.alive = False + os.write(self.PIPE[1], b"1") self.cfg.worker_int(self) time.sleep(0.1) sys.exit(0) From b01fe34e5633768052e70d88e7e8db84368f93e3 Mon Sep 17 00:00:00 2001 From: benoitc Date: Fri, 22 Nov 2019 15:34:07 +0100 Subject: [PATCH 098/263] use hight protocol version of openssl by default --- gunicorn/config.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gunicorn/config.py b/gunicorn/config.py index d165f256..086725bd 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -1895,6 +1895,20 @@ class SSLVersion(Setting): section = "SSL" cli = ["--ssl-version"] validator = validate_ssl_version + + if hasattr(ssl, "PROTOCOL_TLS"): + default = ssl.PROTOCOL_TLS + else: + default = ssl.PROTOCOL_SSLv23 + + desc = """\ + SSL version to use (see stdlib ssl module's) + + .. versionchanged:: 20.0.1 + The default value has been changed from ``ssl.PROTOCOL_SSLv23`` to + ``ssl.PROTOCOL_TLS`` when Python >= 3.6 . + + """ default = ssl.PROTOCOL_SSLv23 desc = """\ SSL version to use. From c25563fa95245cb5e275d808c6cef5dd8c03f5d3 Mon Sep 17 00:00:00 2001 From: Randall Leeds Date: Fri, 22 Nov 2019 09:58:06 -0800 Subject: [PATCH 099/263] Add Paste Deploy changes to Breaking Change list --- docs/source/news.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/news.rst b/docs/source/news.rst index dcd71384..7ddbd9c6 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -59,6 +59,7 @@ Breaking Change - Removed gaiohttp worker - Drop support for Python 2.x - Drop support for EOL Python 3.2 and 3.3 +- Drop support for Paste Deploy server blocks History From 1aa9cf0529fe87e4084dea9dadc97848934cceb4 Mon Sep 17 00:00:00 2001 From: benoitc Date: Fri, 22 Nov 2019 21:12:03 +0100 Subject: [PATCH 100/263] fix faq correct the versions when we changed logging to the console. --- docs/source/faq.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index bd8f1d18..98af5a65 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -106,9 +106,9 @@ threads. However `a work has been started Why I don't see any logs in the console? ---------------------------------------- -In version R19, Gunicorn doesn't log by default in the console. +In version 19.0, Gunicorn doesn't log by default in the console. To watch the logs in the console you need to use the option ``--log-file=-``. -In version R20, Gunicorn logs to the console by default again. +In version 19.2, Gunicorn logs to the console by default again. Kernel Parameters ================= From 291483dd39cb79e66cb9006cc033a35b889538e7 Mon Sep 17 00:00:00 2001 From: Aaron Wilson Date: Fri, 22 Nov 2019 20:39:45 +0000 Subject: [PATCH 101/263] Add a __repr__ to config to include its value (#2076) It's sometimes helpful to be able to trivially dump all the config values for debugging purposes. This commit defines a repr for that. --- gunicorn/config.py | 9 +++++++++ tests/test_config.py | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/gunicorn/config.py b/gunicorn/config.py index 086725bd..f21f74f8 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -308,6 +308,15 @@ class Setting(object): self.order < other.order) __cmp__ = __lt__ + def __repr__(self): + return "<%s.%s object at %x with value %r>" % ( + self.__class__.__module__, + self.__class__.__name__, + id(self), + self.value, + ) + + Setting = SettingMeta('Setting', (Setting,), {}) diff --git a/tests/test_config.py b/tests/test_config.py index 0587c63c..8b1922e6 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -435,3 +435,10 @@ def test_bind_fd(): with AltArgs(["prog_name", "-b", "fd://42"]): app = NoConfigApp() assert app.cfg.bind == ["fd://42"] + + +def test_repr(): + c = config.Config() + c.set("workers", 5) + + assert "with value 5" in repr(c.settings['workers']) From 4d3ec28046f4d5d0b9fb8b24c1235c6e369b8837 Mon Sep 17 00:00:00 2001 From: ThePrez Date: Fri, 22 Nov 2019 15:14:35 -0600 Subject: [PATCH 102/263] fix IBM i 'missing libc' failure (#2195) allow gunicorn on IBM i (and AIX) platform with Python 3.6 and earlier --- gunicorn/socketfromfd.py | 13 +++++++++++-- gunicorn/util.py | 4 ++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index 74f6503d..71b40d3c 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -20,10 +20,18 @@ SO_DOMAIN = getattr(socket, 'SO_DOMAIN', 39) SO_TYPE = getattr(socket, 'SO_TYPE', 3) SO_PROTOCOL = getattr(socket, 'SO_PROTOCOL', 38) - _libc_name = find_library('c') if _libc_name is not None: - libc = ctypes.CDLL(_libc_name, use_errno=True) + if sys.platform.startswith("aix"): + member = ( + '(shr_64.o)' if ctypes.sizeof(ctypes.c_voidp) == 8 else '(shr.o)') + # 0x00040000 correspondes to RTLD_MEMBER, undefined in Python <= 3.6 + dlopen_mode = (ctypes.DEFAULT_MODE | 0x00040000 | os.RTLD_NOW) + libc = ctypes.CDLL(_libc_name+member, + use_errno=True, + mode=dlopen_mode) + else: + libc = ctypes.CDLL(_libc_name, use_errno=True) else: raise OSError('libc not found') @@ -36,6 +44,7 @@ def _errcheck_errno(result, func, arguments): raise OSError(errno, os.strerror(errno)) return arguments + if platform.system() == 'SunOS': _libc_getsockopt = libc._so_getsockopt else: diff --git a/gunicorn/util.py b/gunicorn/util.py index 0cf5e751..3c72d9fc 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -659,9 +659,9 @@ def _findWalk_ldpath(name): if _is_elf(f): return os.path.basename(f) prefix = os.path.join(d, 'lib'+name) - for suffix in ['so', 'so.*', '*.so.*']: + for suffix in ['so', 'so.*', '*.so.*', 'a']: for f in glob('{0}.{1}'.format(prefix, suffix)): - if _is_elf(f): + if _is_elf(f) or suffix == 'a': return os.path.basename(f) From 441977f57c628dffb903ce66cf50dcba92c1872e Mon Sep 17 00:00:00 2001 From: benoitc Date: Fri, 22 Nov 2019 10:57:14 +0100 Subject: [PATCH 103/263] only support python >= 3.5 python 3.4 is unsupported and we should focus on offering stable features than backporting some code to this version --- README.rst | 2 +- docs/source/index.rst | 2 +- gunicorn/__init__.py | 2 +- setup.py | 1 - tox.ini | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index c9e3ebdf..22b46be8 100644 --- a/README.rst +++ b/README.rst @@ -28,7 +28,7 @@ The documentation is hosted at http://docs.gunicorn.org. Installation ------------ -Gunicorn requires **Python 3.x >= 3.4**. +Gunicorn requires **Python 3.x >= 3.5**. Install from PyPI:: diff --git a/docs/source/index.rst b/docs/source/index.rst index 074a1117..50bb2abd 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -23,7 +23,7 @@ Features * Simple Python configuration * Multiple worker configurations * Various server hooks for extensibility -* Compatible with Python 3.x >= 3.4 +* Compatible with Python 3.x >= 3.5 Contents diff --git a/gunicorn/__init__.py b/gunicorn/__init__.py index 7b38ab04..1bb73fc0 100644 --- a/gunicorn/__init__.py +++ b/gunicorn/__init__.py @@ -3,6 +3,6 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. -version_info = (20, 0, 0) +version_info = (20, 0, 1) __version__ = ".".join([str(v) for v in version_info]) SERVER_SOFTWARE = "gunicorn/%s" % __version__ diff --git a/setup.py b/setup.py index 2408a53f..965546f4 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,6 @@ CLASSIFIERS = [ 'Operating System :: POSIX', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', diff --git a/tox.ini b/tox.ini index 7180e559..41298735 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py34, py35, py36, py37, py38, pypy3, lint +envlist = py35, py36, py37, py38, pypy3, lint skipsdist = True [testenv] From ad4ff8cb3cfff95e9eda3d58bab97b875b565151 Mon Sep 17 00:00:00 2001 From: benoitc Date: Fri, 22 Nov 2019 11:07:25 +0100 Subject: [PATCH 104/263] modernize the way the config module is loaded This change load the module as suggested in the Python docs : https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly which add the `__file__` attribute back and others possibly missing. This change remove the support of python 3.4 --- gunicorn/app/base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/gunicorn/app/base.py b/gunicorn/app/base.py index 470b40ab..59d07060 100644 --- a/gunicorn/app/base.py +++ b/gunicorn/app/base.py @@ -2,7 +2,7 @@ # # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. -import importlib.machinery +import importlib.util import os import sys import traceback @@ -97,9 +97,10 @@ class Application(BaseApplication): try: module_name = '__config__' - mod = types.ModuleType(module_name) - loader = importlib.machinery.SourceFileLoader(module_name, filename) - loader.exec_module(mod) + spec = importlib.util.spec_from_file_location(module_name, filename) + mod = importlib.util.module_from_spec(spec) + sys.modules[module_name] = mod + spec.loader.exec_module(mod) except Exception: print("Failed to read config file: %s" % filename, file=sys.stderr) traceback.print_exc() From 0752ccd5dfc9561c15ccbb8e6c1fd4daf4965e61 Mon Sep 17 00:00:00 2001 From: benoitc Date: Fri, 22 Nov 2019 11:13:40 +0100 Subject: [PATCH 105/263] remove Python 3.4 testing in travis --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 227b004c..f2d3c41a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,8 +6,6 @@ matrix: env: TOXENV=lint dist: xenial sudo: true - - python: 3.4 - env: TOXENV=py34 - python: 3.5 env: TOXENV=py35 - python: 3.6 From 455767b96af59fe3c723826455f542ded0ecad7f Mon Sep 17 00:00:00 2001 From: benoitc Date: Fri, 22 Nov 2019 11:14:35 +0100 Subject: [PATCH 106/263] remove useless import in app.base --- gunicorn/app/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gunicorn/app/base.py b/gunicorn/app/base.py index 59d07060..cb1bd973 100644 --- a/gunicorn/app/base.py +++ b/gunicorn/app/base.py @@ -6,7 +6,6 @@ import importlib.util import os import sys import traceback -import types from gunicorn import util from gunicorn.arbiter import Arbiter From ef7d10853d7d7407a60a0384255556357497bc92 Mon Sep 17 00:00:00 2001 From: benoitc Date: Sat, 23 Nov 2019 10:16:21 +0100 Subject: [PATCH 107/263] add 19.9.10 changelog --- docs/source/news.rst | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/docs/source/news.rst b/docs/source/news.rst index 7ddbd9c6..00486e8c 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -2,6 +2,23 @@ Changelog ========= + +19.9.10 / 2019/11/23 +==================== + +- unblock select loop during reload of a sync worker +- security fix: http desync attack +- handle `wsgi.input_terminated` +- added support for str and bytes in unix socket addresses +- fixed `max_requests` setting +- headers values are now encoded as LATN1, not ASCII +- fixed `InotifyReloadeder`: handle `module.__file__` is None +- fixed compatibility with tornado 6 +- fixed root logging +- Prevent removalof unix sockets from `reuse_port` +- Clear tornado ioloop before os.fork +- Miscellaneous fixes and improvement for linting using Pylint + 20.0 / 2019/10/30 ================= @@ -25,7 +42,7 @@ Changelog - Simplify `futures` import in gthread module - Fixed worker_connections` setting to also affects the Gthread worker type - Fixed setting max_requests -- Bump minimum Eventlet and Gevent versions to 0.24 and 1.4 +- Bump minimum Eventlet and Gevent versions to 0.24 and 1.4 - Use Python default SSL cipher list by default - handle `wsgi.input_terminated` extension - Simplify Paste Deployment documentation From e3659e8adc610aa2bbbd85d8654e44cd9bc0a09e Mon Sep 17 00:00:00 2001 From: benoitc Date: Sat, 23 Nov 2019 10:39:04 +0100 Subject: [PATCH 108/263] document 20.0.1 change --- docs/site/index.html | 2 +- docs/source/news.rst | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/docs/site/index.html b/docs/site/index.html index 198e7947..3c05803b 100644 --- a/docs/site/index.html +++ b/docs/site/index.html @@ -16,7 +16,7 @@
Latest version: 20.0.0 + href="https://docs.gunicorn.org/en/stable/">20.0.1
diff --git a/docs/source/news.rst b/docs/source/news.rst index 00486e8c..bea91184 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -2,6 +2,25 @@ Changelog ========= +20.0.1 / 2019/11/23 +================= + +- fixed the way the config module is loaded. `__file__` is now available +- fixed `wsgi.input_terminated`. It is always true. +- use the highest protocol version of openssl by default +- only support Python >= 3.5 +- added `__repr__` method to `Config` instance +- fixed support of AIX platform and musl libc in `socketfromfd.fromfd` function +- fixed support of applications loaded from a factory function +- fixed chunked encoding support to prevent any `request smuggling `_ +- Capture os.sendfile before patching in gevent and eventlet workers. + fix `RecursionError`. +- removed locking in reloader when adding new files +- load the WSGI application before the loader to pick up all files + +.. note:: this release add official support for applications loaded from a factory function + as documented in Flask and other places. + 19.9.10 / 2019/11/23 ==================== From 333dc9bb4d77d0bd3183b4a2f77265a9c9d627f6 Mon Sep 17 00:00:00 2001 From: benoitc Date: Sat, 23 Nov 2019 11:17:47 +0100 Subject: [PATCH 109/263] fix doc title --- docs/source/news.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/news.rst b/docs/source/news.rst index bea91184..2ccdf7b2 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -3,7 +3,7 @@ Changelog ========= 20.0.1 / 2019/11/23 -================= +=================== - fixed the way the config module is loaded. `__file__` is now available - fixed `wsgi.input_terminated`. It is always true. From a4803a48a6cc747ff3ebd82d570f24c75d585e97 Mon Sep 17 00:00:00 2001 From: benoitc Date: Sat, 23 Nov 2019 11:25:44 +0100 Subject: [PATCH 110/263] fix 19.10 version in doc --- docs/source/news.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/news.rst b/docs/source/news.rst index 2ccdf7b2..b9ba2685 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -22,7 +22,7 @@ Changelog as documented in Flask and other places. -19.9.10 / 2019/11/23 +19.10.0s:w / 2019/11/23 ==================== - unblock select loop during reload of a sync worker From 83bfe9e737433a1e8067da988dd5ffa1dba96cda Mon Sep 17 00:00:00 2001 From: benoitc Date: Sat, 23 Nov 2019 11:29:23 +0100 Subject: [PATCH 111/263] fix typo --- docs/source/news.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/news.rst b/docs/source/news.rst index b9ba2685..373b6ef0 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -22,7 +22,7 @@ Changelog as documented in Flask and other places. -19.10.0s:w / 2019/11/23 +19.10.0 / 2019/11/23 ==================== - unblock select loop during reload of a sync worker From 911f7057eff2678359e327114025fa396e351b36 Mon Sep 17 00:00:00 2001 From: benoitc Date: Sat, 23 Nov 2019 11:40:42 +0100 Subject: [PATCH 112/263] bump to 20.0.2 fix changelog --- docs/source/news.rst | 5 +++++ gunicorn/__init__.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/source/news.rst b/docs/source/news.rst index 373b6ef0..801aa74b 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -2,6 +2,11 @@ Changelog ========= +20.0.2 / 2019/11/23 +=================== + +- fix changelog + 20.0.1 / 2019/11/23 =================== diff --git a/gunicorn/__init__.py b/gunicorn/__init__.py index 1bb73fc0..d8c977b1 100644 --- a/gunicorn/__init__.py +++ b/gunicorn/__init__.py @@ -3,6 +3,6 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. -version_info = (20, 0, 1) +version_info = (20, 0, 2) __version__ = ".".join([str(v) for v in version_info]) SERVER_SOFTWARE = "gunicorn/%s" % __version__ From ceca7c849c1aa0aea3e737c06414224ad1e18831 Mon Sep 17 00:00:00 2001 From: benoitc Date: Sat, 23 Nov 2019 11:42:32 +0100 Subject: [PATCH 113/263] bump website version --- docs/site/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/site/index.html b/docs/site/index.html index 3c05803b..865a86f0 100644 --- a/docs/site/index.html +++ b/docs/site/index.html @@ -16,7 +16,7 @@
Latest version: 20.0.1 + href="https://docs.gunicorn.org/en/stable/">20.0.2
From 20774b8331dfb76845bf4a3e4c3c3d4a9d6a8efb Mon Sep 17 00:00:00 2001 From: Randall Leeds Date: Sat, 23 Nov 2019 15:36:56 -0800 Subject: [PATCH 114/263] Remove support for bdist_rpm --- docs/source/news.rst | 5 +++++ rpm/install | 16 ---------------- setup.cfg | 6 ------ 3 files changed, 5 insertions(+), 22 deletions(-) delete mode 100644 rpm/install diff --git a/docs/source/news.rst b/docs/source/news.rst index 801aa74b..7e72a4e5 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -2,6 +2,11 @@ Changelog ========= +Unreleased +========== + +- remove support for the `bdist_rpm` build + 20.0.2 / 2019/11/23 =================== diff --git a/rpm/install b/rpm/install deleted file mode 100644 index 4a8f9dc7..00000000 --- a/rpm/install +++ /dev/null @@ -1,16 +0,0 @@ -%{__python} setup.py install --skip-build --root=$RPM_BUILD_ROOT - -# Build the HTML documentation using the default theme. -%{__python} setup.py build_sphinx - -%if ! (0%{?fedora} > 12 || 0%{?rhel} > 5) -%{!?python_sitelib: %global python_sitelib %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")} -%{!?python_sitearch: %global python_sitearch %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")} -%endif - -INSTALLED_FILES="\ -%{python_sitelib}/* -%{_bindir}/* -%doc LICENSE NOTICE README.rst THANKS build/sphinx/html examples/example_config.py -" -echo "$INSTALLED_FILES" > INSTALLED_FILES diff --git a/setup.cfg b/setup.cfg index 20017519..64d0abc0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,9 +1,3 @@ -[bdist_rpm] -build-requires = python2-devel python-setuptools python-sphinx -requires = python-setuptools >= 0.6c6 python-ctypes -install_script = rpm/install -group = System Environment/Daemons - [tool:pytest] norecursedirs = examples lib local src testpaths = tests/ From 404a7120234e2b1119f4e8a3662c542e4d8700c8 Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 10:25:44 +0100 Subject: [PATCH 115/263] socketfromfd: remove python 2 compatibility --- gunicorn/socketfromfd.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index 71b40d3c..a7f7b899 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -95,15 +95,8 @@ def fromfd(fd, keep_fd=True): family = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_DOMAIN) typ = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_TYPE) proto = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_PROTOCOL) - if sys.version_info.major == 2: - # Python 2 has no fileno argument and always duplicates the fd - sockobj = socket.fromfd(fd, family, typ, proto) - sock = socket.socket(None, None, None, _sock=sockobj) - if not keep_fd: - os.close(fd) - return sock + s + if keep_fd: + return socket.fromfd(fd, family, typ, proto) else: - if keep_fd: - return socket.fromfd(fd, family, typ, proto) - else: - return socket.socket(family, typ, proto, fileno=fd) + return socket.socket(family, typ, proto, fileno=fd) From e69e8020539989d1b5d4846f5329843c6150767d Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 11:53:02 +0100 Subject: [PATCH 116/263] socketfromfd: fix cross platform usage on osx and maybe some other systems, SO_PROTOCOL is not set, also the socket family is not detected correctly. This patch remove default values in socketfromfd and do the following: * detect proper family using getsockname instead of getsocktopt * if no type is given, default to SOCK_STREAM (we don't have any other type) * if no protocol is given, default to 0 and let the system take care of it. --- gunicorn/socketfromfd.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index a7f7b899..521e0f00 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -16,10 +16,6 @@ from .util import find_library __all__ = ('fromfd',) -SO_DOMAIN = getattr(socket, 'SO_DOMAIN', 39) -SO_TYPE = getattr(socket, 'SO_TYPE', 3) -SO_PROTOCOL = getattr(socket, 'SO_PROTOCOL', 38) - _libc_name = find_library('c') if _libc_name is not None: if sys.platform.startswith("aix"): @@ -47,8 +43,12 @@ def _errcheck_errno(result, func, arguments): if platform.system() == 'SunOS': _libc_getsockopt = libc._so_getsockopt + _libc_getsockname = libc._so_getsockname else: _libc_getsockopt = libc.getsockopt + _libc_getsockname = libc.getsockname + + _libc_getsockopt.argtypes = [ ctypes.c_int, # int sockfd ctypes.c_int, # int level @@ -59,6 +59,17 @@ _libc_getsockopt.argtypes = [ _libc_getsockopt.restype = ctypes.c_int # 0: ok, -1: err _libc_getsockopt.errcheck = _errcheck_errno +class SockAddr(ctypes.Structure): + _fields_ = [ + ('sa_len', ctypes.c_uint8), + ('sa_family', ctypes.c_uint8), + ('sa_data', ctypes.c_char * 14) + ] +_libc_getsockname.argtypes = [ + ctypes.c_int, + ctypes.POINTER(SockAddr), + ctypes.POINTER(ctypes.c_int) +] def _raw_getsockopt(fd, level, optname): """Make raw getsockopt() call for int32 optval @@ -74,6 +85,11 @@ def _raw_getsockopt(fd, level, optname): ctypes.byref(optval), ctypes.byref(optlen)) return optval.value +def _raw_getsockname(fd): + sockaddr = SockAddr() + len = ctypes.c_int(ctypes.sizeof(sockaddr)) + _libc_getsockname(fd, sockaddr, len) + return sockaddr.sa_family def fromfd(fd, keep_fd=True): """Create a socket from a file descriptor @@ -92,10 +108,15 @@ def fromfd(fd, keep_fd=True): :return: socket.socket instance :raises OSError: for invalid socket fd """ - family = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_DOMAIN) - typ = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_TYPE) - proto = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_PROTOCOL) - s + family = _raw_getsockname(fd) + if hasattr(socket, 'SO_TYPE'): + typ = _raw_getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_TYPE')) + else: + typ = socket.SOCK_STREAM + if hasattr(socket, 'SO_PROTOCOL'): + proto = _raw_getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_PROTOCOL')) + else: + proto = 0 if keep_fd: return socket.fromfd(fd, family, typ, proto) else: From d38f6e47ea03a4c0121036a4300fe2d0155e85c6 Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 11:57:59 +0100 Subject: [PATCH 117/263] fix linting on python 3.8 --- gunicorn/socketfromfd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index 521e0f00..ec9d26f1 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -87,8 +87,8 @@ def _raw_getsockopt(fd, level, optname): def _raw_getsockname(fd): sockaddr = SockAddr() - len = ctypes.c_int(ctypes.sizeof(sockaddr)) - _libc_getsockname(fd, sockaddr, len) + sockaddrlen = ctypes.c_int(ctypes.sizeof(sockaddr)) + _libc_getsockname(fd, sockaddr, sockaddrlen) return sockaddr.sa_family def fromfd(fd, keep_fd=True): From 9419d660960869438c1a19838e09b63e6dbfbac6 Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 15:37:02 +0100 Subject: [PATCH 118/263] refactor module use appropriate naming for function, make _getsockname more generic and doc more useful --- NOTICE | 8 ++++++++ gunicorn/socketfromfd.py | 39 ++++++++++++++++++++++++++++----------- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/NOTICE b/NOTICE index a2f4aa20..12b21ccb 100644 --- a/NOTICE +++ b/NOTICE @@ -122,3 +122,11 @@ util/unlink.py -------------- backport frop python3 Lib/test/support.py + + +gunicorn.socketfromfd +--------------------- + +Under Apache License 2 + +Copyright (C) 2016 Christian Heimes \ No newline at end of file diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index ec9d26f1..2bca6749 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -1,9 +1,20 @@ -# Copyright (C) 2016 Christian Heimes -"""socketfromfd -- socket.fromd() with auto-discovery +# -*- coding: utf-8 - +# +# This file is part of gunicorn +# See the NOTICE for more information. -ATTENTION: Do not remove this backport till the minimum required version is - Python 3.7. See https://bugs.python.org/issue28134 for details. +# Copyright (C) 2016 Christian Heimes under Apache License 2 + +# source code based on https://github.com/tiran/socketfromfd/blob/master/socketfromfd.py +# and https://github.com/python/cpython/blob/master/Modules/socketmodule.c + +"""socketfromfd -- create a socket from its file descriptor +This module detect the socket properties. + +note: Before python 3.7 auto detecting the socket was not working. +See https://bugs.python.org/issue28134 for details. """ + from __future__ import print_function import ctypes @@ -43,7 +54,7 @@ def _errcheck_errno(result, func, arguments): if platform.system() == 'SunOS': _libc_getsockopt = libc._so_getsockopt - _libc_getsockname = libc._so_getsockname + _lib_getsockname = libc._so_getsockname else: _libc_getsockopt = libc.getsockopt _libc_getsockname = libc.getsockname @@ -65,13 +76,17 @@ class SockAddr(ctypes.Structure): ('sa_family', ctypes.c_uint8), ('sa_data', ctypes.c_char * 14) ] + + _libc_getsockname.argtypes = [ ctypes.c_int, ctypes.POINTER(SockAddr), ctypes.POINTER(ctypes.c_int) ] +_libc_getsockname.restype = ctypes.c_int # 0: ok, -1: err +_libc_getsockname.errcheck = _errcheck_errno -def _raw_getsockopt(fd, level, optname): +def _getsockopt(fd, level, optname): """Make raw getsockopt() call for int32 optval :param fd: socket fd @@ -85,11 +100,11 @@ def _raw_getsockopt(fd, level, optname): ctypes.byref(optval), ctypes.byref(optlen)) return optval.value -def _raw_getsockname(fd): +def _getsockname(fd): sockaddr = SockAddr() sockaddrlen = ctypes.c_int(ctypes.sizeof(sockaddr)) _libc_getsockname(fd, sockaddr, sockaddrlen) - return sockaddr.sa_family + return sockaddr def fromfd(fd, keep_fd=True): """Create a socket from a file descriptor @@ -108,13 +123,15 @@ def fromfd(fd, keep_fd=True): :return: socket.socket instance :raises OSError: for invalid socket fd """ - family = _raw_getsockname(fd) + sockaddr = _getsockname(fd) + family = sockaddr.sa_family if hasattr(socket, 'SO_TYPE'): - typ = _raw_getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_TYPE')) + typ = _getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_TYPE')) else: typ = socket.SOCK_STREAM + if hasattr(socket, 'SO_PROTOCOL'): - proto = _raw_getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_PROTOCOL')) + proto = _getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_PROTOCOL')) else: proto = 0 if keep_fd: From 9538358511c31937529c953e98940c5da5940136 Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 20:20:05 +0100 Subject: [PATCH 119/263] config file extension should be a python extension This change make it clear what is configuration file for Gunicorn. Using a filename with an extension different than a python extension was never supported. Also it gives us some room for a proper config file. --- docs/source/configure.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/source/configure.rst b/docs/source/configure.rst index 9ed1a484..a3fbb635 100644 --- a/docs/source/configure.rst +++ b/docs/source/configure.rst @@ -51,10 +51,11 @@ isn't mentioned in the list of :ref:`settings `. Configuration File ================== -The configuration file should be a valid Python source file. It only needs to -be readable from the file system. More specifically, it does not need to be -importable. Any Python is valid. Just consider that this will be run every time -you start Gunicorn (including when you signal Gunicorn to reload). +The configuration file should be a valid Python source file with a **python +extension** (e.g. `gunicorn.conf.py`). It only needs to be readable from the +file system. More specifically, it does not need to be importable. Any Python +is valid. Just consider that this will be run every time you start Gunicorn +(including when you signal Gunicorn to reload). To set a parameter, just assign to it. There's no special syntax. The values you provide will be used for the configuration values. From 9a3e008eca1b63b27daed2404f2ff4b0e6308640 Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 21:07:35 +0100 Subject: [PATCH 120/263] put back configuration file without py extension and deprecate it this change put back (and fix it) support of configuration files without ython extension and warn about its usage. --- gunicorn/app/base.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/gunicorn/app/base.py b/gunicorn/app/base.py index cb1bd973..9b024f15 100644 --- a/gunicorn/app/base.py +++ b/gunicorn/app/base.py @@ -3,6 +3,7 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. import importlib.util +import importlib.machinery import os import sys import traceback @@ -94,9 +95,17 @@ class Application(BaseApplication): if not os.path.exists(filename): raise RuntimeError("%r doesn't exist" % filename) + ext = os.path.splitext(filename)[1] + try: module_name = '__config__' - spec = importlib.util.spec_from_file_location(module_name, filename) + if ext in [".py", ".pyc"]: + spec = importlib.util.spec_from_file_location(module_name, filename) + else: + msg = "configuration file should have a valid Python extension.\n" + util.warn(msg) + loader_ = importlib.machinery.SourceFileLoader(module_name, filename) + spec = importlib.util.spec_from_file_location(module_name, filename, loader=loader_) mod = importlib.util.module_from_spec(spec) sys.modules[module_name] = mod spec.loader.exec_module(mod) From b4e52c181ae87d93ce728fd703c37c6b6864ad2d Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 21:22:34 +0100 Subject: [PATCH 121/263] make config doc more explicit explicit what we mean by "not need to be importable". --- docs/source/configure.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/configure.rst b/docs/source/configure.rst index a3fbb635..df5cbe2e 100644 --- a/docs/source/configure.rst +++ b/docs/source/configure.rst @@ -53,9 +53,9 @@ Configuration File The configuration file should be a valid Python source file with a **python extension** (e.g. `gunicorn.conf.py`). It only needs to be readable from the -file system. More specifically, it does not need to be importable. Any Python -is valid. Just consider that this will be run every time you start Gunicorn -(including when you signal Gunicorn to reload). +file system. More specifically, it does not have to be on the module path +(sys.path, PYTHONPATH). Any Python is valid. Just consider that this will be +run every time you start Gunicorn (including when you signal Gunicorn to reload). To set a parameter, just assign to it. There's no special syntax. The values you provide will be used for the configuration values. From fa32aa77571581688811b9c043780ab3f87b733a Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 10:25:44 +0100 Subject: [PATCH 122/263] socketfromfd: remove python 2 compatibility --- gunicorn/socketfromfd.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index 71b40d3c..a7f7b899 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -95,15 +95,8 @@ def fromfd(fd, keep_fd=True): family = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_DOMAIN) typ = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_TYPE) proto = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_PROTOCOL) - if sys.version_info.major == 2: - # Python 2 has no fileno argument and always duplicates the fd - sockobj = socket.fromfd(fd, family, typ, proto) - sock = socket.socket(None, None, None, _sock=sockobj) - if not keep_fd: - os.close(fd) - return sock + s + if keep_fd: + return socket.fromfd(fd, family, typ, proto) else: - if keep_fd: - return socket.fromfd(fd, family, typ, proto) - else: - return socket.socket(family, typ, proto, fileno=fd) + return socket.socket(family, typ, proto, fileno=fd) From 883e62da46a04142e1c2fbd20f0d74f3e38261b8 Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 11:53:02 +0100 Subject: [PATCH 123/263] socketfromfd: fix cross platform usage on osx and maybe some other systems, SO_PROTOCOL is not set, also the socket family is not detected correctly. This patch remove default values in socketfromfd and do the following: * detect proper family using getsockname instead of getsocktopt * if no type is given, default to SOCK_STREAM (we don't have any other type) * if no protocol is given, default to 0 and let the system take care of it. --- gunicorn/socketfromfd.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index a7f7b899..521e0f00 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -16,10 +16,6 @@ from .util import find_library __all__ = ('fromfd',) -SO_DOMAIN = getattr(socket, 'SO_DOMAIN', 39) -SO_TYPE = getattr(socket, 'SO_TYPE', 3) -SO_PROTOCOL = getattr(socket, 'SO_PROTOCOL', 38) - _libc_name = find_library('c') if _libc_name is not None: if sys.platform.startswith("aix"): @@ -47,8 +43,12 @@ def _errcheck_errno(result, func, arguments): if platform.system() == 'SunOS': _libc_getsockopt = libc._so_getsockopt + _libc_getsockname = libc._so_getsockname else: _libc_getsockopt = libc.getsockopt + _libc_getsockname = libc.getsockname + + _libc_getsockopt.argtypes = [ ctypes.c_int, # int sockfd ctypes.c_int, # int level @@ -59,6 +59,17 @@ _libc_getsockopt.argtypes = [ _libc_getsockopt.restype = ctypes.c_int # 0: ok, -1: err _libc_getsockopt.errcheck = _errcheck_errno +class SockAddr(ctypes.Structure): + _fields_ = [ + ('sa_len', ctypes.c_uint8), + ('sa_family', ctypes.c_uint8), + ('sa_data', ctypes.c_char * 14) + ] +_libc_getsockname.argtypes = [ + ctypes.c_int, + ctypes.POINTER(SockAddr), + ctypes.POINTER(ctypes.c_int) +] def _raw_getsockopt(fd, level, optname): """Make raw getsockopt() call for int32 optval @@ -74,6 +85,11 @@ def _raw_getsockopt(fd, level, optname): ctypes.byref(optval), ctypes.byref(optlen)) return optval.value +def _raw_getsockname(fd): + sockaddr = SockAddr() + len = ctypes.c_int(ctypes.sizeof(sockaddr)) + _libc_getsockname(fd, sockaddr, len) + return sockaddr.sa_family def fromfd(fd, keep_fd=True): """Create a socket from a file descriptor @@ -92,10 +108,15 @@ def fromfd(fd, keep_fd=True): :return: socket.socket instance :raises OSError: for invalid socket fd """ - family = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_DOMAIN) - typ = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_TYPE) - proto = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_PROTOCOL) - s + family = _raw_getsockname(fd) + if hasattr(socket, 'SO_TYPE'): + typ = _raw_getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_TYPE')) + else: + typ = socket.SOCK_STREAM + if hasattr(socket, 'SO_PROTOCOL'): + proto = _raw_getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_PROTOCOL')) + else: + proto = 0 if keep_fd: return socket.fromfd(fd, family, typ, proto) else: From 3ae3d1cfccbd7fd6eb90766771186f6d66e2de0a Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 11:57:59 +0100 Subject: [PATCH 124/263] fix linting on python 3.8 --- gunicorn/socketfromfd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index 521e0f00..ec9d26f1 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -87,8 +87,8 @@ def _raw_getsockopt(fd, level, optname): def _raw_getsockname(fd): sockaddr = SockAddr() - len = ctypes.c_int(ctypes.sizeof(sockaddr)) - _libc_getsockname(fd, sockaddr, len) + sockaddrlen = ctypes.c_int(ctypes.sizeof(sockaddr)) + _libc_getsockname(fd, sockaddr, sockaddrlen) return sockaddr.sa_family def fromfd(fd, keep_fd=True): From 444b2d82138d63967a4007fc90d1f8665384e732 Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 15:37:02 +0100 Subject: [PATCH 125/263] refactor module use appropriate naming for function, make _getsockname more generic and doc more useful --- NOTICE | 8 ++++++++ gunicorn/socketfromfd.py | 39 ++++++++++++++++++++++++++++----------- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/NOTICE b/NOTICE index a2f4aa20..12b21ccb 100644 --- a/NOTICE +++ b/NOTICE @@ -122,3 +122,11 @@ util/unlink.py -------------- backport frop python3 Lib/test/support.py + + +gunicorn.socketfromfd +--------------------- + +Under Apache License 2 + +Copyright (C) 2016 Christian Heimes \ No newline at end of file diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index ec9d26f1..2bca6749 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -1,9 +1,20 @@ -# Copyright (C) 2016 Christian Heimes -"""socketfromfd -- socket.fromd() with auto-discovery +# -*- coding: utf-8 - +# +# This file is part of gunicorn +# See the NOTICE for more information. -ATTENTION: Do not remove this backport till the minimum required version is - Python 3.7. See https://bugs.python.org/issue28134 for details. +# Copyright (C) 2016 Christian Heimes under Apache License 2 + +# source code based on https://github.com/tiran/socketfromfd/blob/master/socketfromfd.py +# and https://github.com/python/cpython/blob/master/Modules/socketmodule.c + +"""socketfromfd -- create a socket from its file descriptor +This module detect the socket properties. + +note: Before python 3.7 auto detecting the socket was not working. +See https://bugs.python.org/issue28134 for details. """ + from __future__ import print_function import ctypes @@ -43,7 +54,7 @@ def _errcheck_errno(result, func, arguments): if platform.system() == 'SunOS': _libc_getsockopt = libc._so_getsockopt - _libc_getsockname = libc._so_getsockname + _lib_getsockname = libc._so_getsockname else: _libc_getsockopt = libc.getsockopt _libc_getsockname = libc.getsockname @@ -65,13 +76,17 @@ class SockAddr(ctypes.Structure): ('sa_family', ctypes.c_uint8), ('sa_data', ctypes.c_char * 14) ] + + _libc_getsockname.argtypes = [ ctypes.c_int, ctypes.POINTER(SockAddr), ctypes.POINTER(ctypes.c_int) ] +_libc_getsockname.restype = ctypes.c_int # 0: ok, -1: err +_libc_getsockname.errcheck = _errcheck_errno -def _raw_getsockopt(fd, level, optname): +def _getsockopt(fd, level, optname): """Make raw getsockopt() call for int32 optval :param fd: socket fd @@ -85,11 +100,11 @@ def _raw_getsockopt(fd, level, optname): ctypes.byref(optval), ctypes.byref(optlen)) return optval.value -def _raw_getsockname(fd): +def _getsockname(fd): sockaddr = SockAddr() sockaddrlen = ctypes.c_int(ctypes.sizeof(sockaddr)) _libc_getsockname(fd, sockaddr, sockaddrlen) - return sockaddr.sa_family + return sockaddr def fromfd(fd, keep_fd=True): """Create a socket from a file descriptor @@ -108,13 +123,15 @@ def fromfd(fd, keep_fd=True): :return: socket.socket instance :raises OSError: for invalid socket fd """ - family = _raw_getsockname(fd) + sockaddr = _getsockname(fd) + family = sockaddr.sa_family if hasattr(socket, 'SO_TYPE'): - typ = _raw_getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_TYPE')) + typ = _getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_TYPE')) else: typ = socket.SOCK_STREAM + if hasattr(socket, 'SO_PROTOCOL'): - proto = _raw_getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_PROTOCOL')) + proto = _getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_PROTOCOL')) else: proto = 0 if keep_fd: From d31ac1df83a2d6e3a890559765f4d6229edcca49 Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 20:20:05 +0100 Subject: [PATCH 126/263] config file extension should be a python extension This change make it clear what is configuration file for Gunicorn. Using a filename with an extension different than a python extension was never supported. Also it gives us some room for a proper config file. --- docs/source/configure.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/source/configure.rst b/docs/source/configure.rst index 9ed1a484..a3fbb635 100644 --- a/docs/source/configure.rst +++ b/docs/source/configure.rst @@ -51,10 +51,11 @@ isn't mentioned in the list of :ref:`settings `. Configuration File ================== -The configuration file should be a valid Python source file. It only needs to -be readable from the file system. More specifically, it does not need to be -importable. Any Python is valid. Just consider that this will be run every time -you start Gunicorn (including when you signal Gunicorn to reload). +The configuration file should be a valid Python source file with a **python +extension** (e.g. `gunicorn.conf.py`). It only needs to be readable from the +file system. More specifically, it does not need to be importable. Any Python +is valid. Just consider that this will be run every time you start Gunicorn +(including when you signal Gunicorn to reload). To set a parameter, just assign to it. There's no special syntax. The values you provide will be used for the configuration values. From 05515873ea5f245d8abc4a14d8bb1e8efeb8c79f Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 21:07:35 +0100 Subject: [PATCH 127/263] put back configuration file without py extension and deprecate it this change put back (and fix it) support of configuration files without ython extension and warn about its usage. --- gunicorn/app/base.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/gunicorn/app/base.py b/gunicorn/app/base.py index cb1bd973..9b024f15 100644 --- a/gunicorn/app/base.py +++ b/gunicorn/app/base.py @@ -3,6 +3,7 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. import importlib.util +import importlib.machinery import os import sys import traceback @@ -94,9 +95,17 @@ class Application(BaseApplication): if not os.path.exists(filename): raise RuntimeError("%r doesn't exist" % filename) + ext = os.path.splitext(filename)[1] + try: module_name = '__config__' - spec = importlib.util.spec_from_file_location(module_name, filename) + if ext in [".py", ".pyc"]: + spec = importlib.util.spec_from_file_location(module_name, filename) + else: + msg = "configuration file should have a valid Python extension.\n" + util.warn(msg) + loader_ = importlib.machinery.SourceFileLoader(module_name, filename) + spec = importlib.util.spec_from_file_location(module_name, filename, loader=loader_) mod = importlib.util.module_from_spec(spec) sys.modules[module_name] = mod spec.loader.exec_module(mod) From 388fe407973c0425dc967a1d49a7c677ed90284f Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 21:22:34 +0100 Subject: [PATCH 128/263] make config doc more explicit explicit what we mean by "not need to be importable". --- docs/source/configure.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/configure.rst b/docs/source/configure.rst index a3fbb635..df5cbe2e 100644 --- a/docs/source/configure.rst +++ b/docs/source/configure.rst @@ -53,9 +53,9 @@ Configuration File The configuration file should be a valid Python source file with a **python extension** (e.g. `gunicorn.conf.py`). It only needs to be readable from the -file system. More specifically, it does not need to be importable. Any Python -is valid. Just consider that this will be run every time you start Gunicorn -(including when you signal Gunicorn to reload). +file system. More specifically, it does not have to be on the module path +(sys.path, PYTHONPATH). Any Python is valid. Just consider that this will be +run every time you start Gunicorn (including when you signal Gunicorn to reload). To set a parameter, just assign to it. There's no special syntax. The values you provide will be used for the configuration values. From 5257d924b61741c4a7b10384713ae755f07e0e7b Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 22:57:00 +0100 Subject: [PATCH 129/263] changelog for the 20.0.3 --- docs/source/news.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/source/news.rst b/docs/source/news.rst index 801aa74b..d43db01b 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -2,6 +2,14 @@ Changelog ========= +20.0.3 / 2019/11/24 +=================== + +- fixed load of a config file without a Python extension +- fixed `socketfromfd.fromfd` when defaults are not set + +.. note:: we now warn when we load a config file without Python Extension + 20.0.2 / 2019/11/23 =================== From 50a2cb6b160e7244dfa8989515bd4ca375c9b98c Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 22:59:12 +0100 Subject: [PATCH 130/263] bump to 20.0.3 --- gunicorn/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/__init__.py b/gunicorn/__init__.py index d8c977b1..4d59629c 100644 --- a/gunicorn/__init__.py +++ b/gunicorn/__init__.py @@ -3,6 +3,6 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. -version_info = (20, 0, 2) +version_info = (20, 0, 3) __version__ = ".".join([str(v) for v in version_info]) SERVER_SOFTWARE = "gunicorn/%s" % __version__ From 8b64aaedd5b247957e89b1e06df8250922cfd439 Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 22:57:00 +0100 Subject: [PATCH 131/263] changelog for the 20.0.3 --- docs/source/news.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/source/news.rst b/docs/source/news.rst index 7e72a4e5..7b9af4c6 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -7,6 +7,15 @@ Unreleased - remove support for the `bdist_rpm` build +20.0.3 / 2019/11/24 +=================== + +- fixed load of a config file without a Python extension +- fixed `socketfromfd.fromfd` when defaults are not set + +.. note:: we now warn when we load a config file without Python Extension +>>>>>>> 5257d92... changelog for the 20.0.3 + 20.0.2 / 2019/11/23 =================== From 2dae3ea165b7ea01f74ad96d4e2508a6055c5f50 Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 22:59:12 +0100 Subject: [PATCH 132/263] bump to 20.0.3 --- gunicorn/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/__init__.py b/gunicorn/__init__.py index d8c977b1..4d59629c 100644 --- a/gunicorn/__init__.py +++ b/gunicorn/__init__.py @@ -3,6 +3,6 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. -version_info = (20, 0, 2) +version_info = (20, 0, 3) __version__ = ".".join([str(v) for v in version_info]) SERVER_SOFTWARE = "gunicorn/%s" % __version__ From f646bdee39683cb30a6e30466ac1b49be8358ad5 Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 23:15:22 +0100 Subject: [PATCH 133/263] fix bad cherry-picking --- docs/source/news.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/news.rst b/docs/source/news.rst index 7b9af4c6..0839013d 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -14,7 +14,6 @@ Unreleased - fixed `socketfromfd.fromfd` when defaults are not set .. note:: we now warn when we load a config file without Python Extension ->>>>>>> 5257d92... changelog for the 20.0.3 20.0.2 / 2019/11/23 =================== From d95ed4489ef51024cbd7d880b590e3a2684f748e Mon Sep 17 00:00:00 2001 From: benoitc Date: Mon, 25 Nov 2019 23:30:42 +0100 Subject: [PATCH 134/263] point website to last version --- docs/site/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/site/index.html b/docs/site/index.html index 865a86f0..7fdd07e7 100644 --- a/docs/site/index.html +++ b/docs/site/index.html @@ -16,7 +16,7 @@
Latest version: 20.0.2 + href="https://docs.gunicorn.org/en/stable/">20.0.3
From d530e673c344b37fa45a3094d8cbde59d9b48253 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Tue, 26 Nov 2019 09:53:13 +0100 Subject: [PATCH 135/263] Revert "refactor module" This reverts commit 9419d660960869438c1a19838e09b63e6dbfbac6. --- NOTICE | 8 -------- gunicorn/socketfromfd.py | 39 +++++++++++---------------------------- 2 files changed, 11 insertions(+), 36 deletions(-) diff --git a/NOTICE b/NOTICE index 12b21ccb..a2f4aa20 100644 --- a/NOTICE +++ b/NOTICE @@ -122,11 +122,3 @@ util/unlink.py -------------- backport frop python3 Lib/test/support.py - - -gunicorn.socketfromfd ---------------------- - -Under Apache License 2 - -Copyright (C) 2016 Christian Heimes \ No newline at end of file diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index 2bca6749..ec9d26f1 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -1,20 +1,9 @@ -# -*- coding: utf-8 - -# -# This file is part of gunicorn -# See the NOTICE for more information. +# Copyright (C) 2016 Christian Heimes +"""socketfromfd -- socket.fromd() with auto-discovery -# Copyright (C) 2016 Christian Heimes under Apache License 2 - -# source code based on https://github.com/tiran/socketfromfd/blob/master/socketfromfd.py -# and https://github.com/python/cpython/blob/master/Modules/socketmodule.c - -"""socketfromfd -- create a socket from its file descriptor -This module detect the socket properties. - -note: Before python 3.7 auto detecting the socket was not working. -See https://bugs.python.org/issue28134 for details. +ATTENTION: Do not remove this backport till the minimum required version is + Python 3.7. See https://bugs.python.org/issue28134 for details. """ - from __future__ import print_function import ctypes @@ -54,7 +43,7 @@ def _errcheck_errno(result, func, arguments): if platform.system() == 'SunOS': _libc_getsockopt = libc._so_getsockopt - _lib_getsockname = libc._so_getsockname + _libc_getsockname = libc._so_getsockname else: _libc_getsockopt = libc.getsockopt _libc_getsockname = libc.getsockname @@ -76,17 +65,13 @@ class SockAddr(ctypes.Structure): ('sa_family', ctypes.c_uint8), ('sa_data', ctypes.c_char * 14) ] - - _libc_getsockname.argtypes = [ ctypes.c_int, ctypes.POINTER(SockAddr), ctypes.POINTER(ctypes.c_int) ] -_libc_getsockname.restype = ctypes.c_int # 0: ok, -1: err -_libc_getsockname.errcheck = _errcheck_errno -def _getsockopt(fd, level, optname): +def _raw_getsockopt(fd, level, optname): """Make raw getsockopt() call for int32 optval :param fd: socket fd @@ -100,11 +85,11 @@ def _getsockopt(fd, level, optname): ctypes.byref(optval), ctypes.byref(optlen)) return optval.value -def _getsockname(fd): +def _raw_getsockname(fd): sockaddr = SockAddr() sockaddrlen = ctypes.c_int(ctypes.sizeof(sockaddr)) _libc_getsockname(fd, sockaddr, sockaddrlen) - return sockaddr + return sockaddr.sa_family def fromfd(fd, keep_fd=True): """Create a socket from a file descriptor @@ -123,15 +108,13 @@ def fromfd(fd, keep_fd=True): :return: socket.socket instance :raises OSError: for invalid socket fd """ - sockaddr = _getsockname(fd) - family = sockaddr.sa_family + family = _raw_getsockname(fd) if hasattr(socket, 'SO_TYPE'): - typ = _getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_TYPE')) + typ = _raw_getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_TYPE')) else: typ = socket.SOCK_STREAM - if hasattr(socket, 'SO_PROTOCOL'): - proto = _getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_PROTOCOL')) + proto = _raw_getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_PROTOCOL')) else: proto = 0 if keep_fd: From 8c759dd2e3b7a913c50b4e72ecf66fe106c64fdb Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Tue, 26 Nov 2019 09:53:13 +0100 Subject: [PATCH 136/263] Revert "fix linting on python 3.8" This reverts commit d38f6e47ea03a4c0121036a4300fe2d0155e85c6. --- gunicorn/socketfromfd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index ec9d26f1..521e0f00 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -87,8 +87,8 @@ def _raw_getsockopt(fd, level, optname): def _raw_getsockname(fd): sockaddr = SockAddr() - sockaddrlen = ctypes.c_int(ctypes.sizeof(sockaddr)) - _libc_getsockname(fd, sockaddr, sockaddrlen) + len = ctypes.c_int(ctypes.sizeof(sockaddr)) + _libc_getsockname(fd, sockaddr, len) return sockaddr.sa_family def fromfd(fd, keep_fd=True): From ab25bae7ebc6854093eaa8bb55ffd51b7dbbb529 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Tue, 26 Nov 2019 09:53:13 +0100 Subject: [PATCH 137/263] Revert "socketfromfd: fix cross platform usage" This reverts commit e69e8020539989d1b5d4846f5329843c6150767d. --- gunicorn/socketfromfd.py | 37 ++++++++----------------------------- 1 file changed, 8 insertions(+), 29 deletions(-) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index 521e0f00..a7f7b899 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -16,6 +16,10 @@ from .util import find_library __all__ = ('fromfd',) +SO_DOMAIN = getattr(socket, 'SO_DOMAIN', 39) +SO_TYPE = getattr(socket, 'SO_TYPE', 3) +SO_PROTOCOL = getattr(socket, 'SO_PROTOCOL', 38) + _libc_name = find_library('c') if _libc_name is not None: if sys.platform.startswith("aix"): @@ -43,12 +47,8 @@ def _errcheck_errno(result, func, arguments): if platform.system() == 'SunOS': _libc_getsockopt = libc._so_getsockopt - _libc_getsockname = libc._so_getsockname else: _libc_getsockopt = libc.getsockopt - _libc_getsockname = libc.getsockname - - _libc_getsockopt.argtypes = [ ctypes.c_int, # int sockfd ctypes.c_int, # int level @@ -59,17 +59,6 @@ _libc_getsockopt.argtypes = [ _libc_getsockopt.restype = ctypes.c_int # 0: ok, -1: err _libc_getsockopt.errcheck = _errcheck_errno -class SockAddr(ctypes.Structure): - _fields_ = [ - ('sa_len', ctypes.c_uint8), - ('sa_family', ctypes.c_uint8), - ('sa_data', ctypes.c_char * 14) - ] -_libc_getsockname.argtypes = [ - ctypes.c_int, - ctypes.POINTER(SockAddr), - ctypes.POINTER(ctypes.c_int) -] def _raw_getsockopt(fd, level, optname): """Make raw getsockopt() call for int32 optval @@ -85,11 +74,6 @@ def _raw_getsockopt(fd, level, optname): ctypes.byref(optval), ctypes.byref(optlen)) return optval.value -def _raw_getsockname(fd): - sockaddr = SockAddr() - len = ctypes.c_int(ctypes.sizeof(sockaddr)) - _libc_getsockname(fd, sockaddr, len) - return sockaddr.sa_family def fromfd(fd, keep_fd=True): """Create a socket from a file descriptor @@ -108,15 +92,10 @@ def fromfd(fd, keep_fd=True): :return: socket.socket instance :raises OSError: for invalid socket fd """ - family = _raw_getsockname(fd) - if hasattr(socket, 'SO_TYPE'): - typ = _raw_getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_TYPE')) - else: - typ = socket.SOCK_STREAM - if hasattr(socket, 'SO_PROTOCOL'): - proto = _raw_getsockopt(fd, socket.SOL_SOCKET, getattr(socket, 'SO_PROTOCOL')) - else: - proto = 0 + family = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_DOMAIN) + typ = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_TYPE) + proto = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_PROTOCOL) + s if keep_fd: return socket.fromfd(fd, family, typ, proto) else: From c58337731af5d5c9c9a3f0e7c268805e1afec4d8 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Tue, 26 Nov 2019 09:53:13 +0100 Subject: [PATCH 138/263] Revert "socketfromfd: remove python 2 compatibility" This reverts commit 404a7120234e2b1119f4e8a3662c542e4d8700c8. --- gunicorn/socketfromfd.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py index a7f7b899..71b40d3c 100644 --- a/gunicorn/socketfromfd.py +++ b/gunicorn/socketfromfd.py @@ -95,8 +95,15 @@ def fromfd(fd, keep_fd=True): family = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_DOMAIN) typ = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_TYPE) proto = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_PROTOCOL) - s - if keep_fd: - return socket.fromfd(fd, family, typ, proto) + if sys.version_info.major == 2: + # Python 2 has no fileno argument and always duplicates the fd + sockobj = socket.fromfd(fd, family, typ, proto) + sock = socket.socket(None, None, None, _sock=sockobj) + if not keep_fd: + os.close(fd) + return sock else: - return socket.socket(family, typ, proto, fileno=fd) + if keep_fd: + return socket.fromfd(fd, family, typ, proto) + else: + return socket.socket(family, typ, proto, fileno=fd) From 67cb62099c27de00cf80b30e37a7617c2c0ec2ed Mon Sep 17 00:00:00 2001 From: benoitc Date: Tue, 26 Nov 2019 10:14:23 +0100 Subject: [PATCH 139/263] remove socketfromfd module socket.fromfd using socket.AF_UNIX as type should be enough to be cross-platform since the address is larger than for others family. This should allow the code to work cross-platform. --- gunicorn/sock.py | 3 +- gunicorn/socketfromfd.py | 109 --------------------------------------- 2 files changed, 1 insertion(+), 111 deletions(-) delete mode 100644 gunicorn/socketfromfd.py diff --git a/gunicorn/sock.py b/gunicorn/sock.py index e53e578e..d4586770 100644 --- a/gunicorn/sock.py +++ b/gunicorn/sock.py @@ -11,7 +11,6 @@ import sys import time from gunicorn import util -from gunicorn.socketfromfd import fromfd class BaseSocket(object): @@ -168,7 +167,7 @@ def create_sockets(conf, log, fds=None): # sockets are already bound if fdaddr: for fd in fdaddr: - sock = fromfd(fd) + sock = socket.fromfd(fd, socket.AF_UNIX, socket.SOCK_STREAM) sock_name = sock.getsockname() sock_type = _sock_type(sock_name) listener = sock_type(sock_name, conf, log, fd=fd) diff --git a/gunicorn/socketfromfd.py b/gunicorn/socketfromfd.py deleted file mode 100644 index 71b40d3c..00000000 --- a/gunicorn/socketfromfd.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (C) 2016 Christian Heimes -"""socketfromfd -- socket.fromd() with auto-discovery - -ATTENTION: Do not remove this backport till the minimum required version is - Python 3.7. See https://bugs.python.org/issue28134 for details. -""" -from __future__ import print_function - -import ctypes -import os -import socket -import sys -import platform - -from .util import find_library - -__all__ = ('fromfd',) - -SO_DOMAIN = getattr(socket, 'SO_DOMAIN', 39) -SO_TYPE = getattr(socket, 'SO_TYPE', 3) -SO_PROTOCOL = getattr(socket, 'SO_PROTOCOL', 38) - -_libc_name = find_library('c') -if _libc_name is not None: - if sys.platform.startswith("aix"): - member = ( - '(shr_64.o)' if ctypes.sizeof(ctypes.c_voidp) == 8 else '(shr.o)') - # 0x00040000 correspondes to RTLD_MEMBER, undefined in Python <= 3.6 - dlopen_mode = (ctypes.DEFAULT_MODE | 0x00040000 | os.RTLD_NOW) - libc = ctypes.CDLL(_libc_name+member, - use_errno=True, - mode=dlopen_mode) - else: - libc = ctypes.CDLL(_libc_name, use_errno=True) -else: - raise OSError('libc not found') - - -def _errcheck_errno(result, func, arguments): - """Raise OSError by errno for -1 - """ - if result == -1: - errno = ctypes.get_errno() - raise OSError(errno, os.strerror(errno)) - return arguments - - -if platform.system() == 'SunOS': - _libc_getsockopt = libc._so_getsockopt -else: - _libc_getsockopt = libc.getsockopt -_libc_getsockopt.argtypes = [ - ctypes.c_int, # int sockfd - ctypes.c_int, # int level - ctypes.c_int, # int optname - ctypes.c_void_p, # void *optval - ctypes.POINTER(ctypes.c_uint32) # socklen_t *optlen -] -_libc_getsockopt.restype = ctypes.c_int # 0: ok, -1: err -_libc_getsockopt.errcheck = _errcheck_errno - - -def _raw_getsockopt(fd, level, optname): - """Make raw getsockopt() call for int32 optval - - :param fd: socket fd - :param level: SOL_* - :param optname: SO_* - :return: value as int - """ - optval = ctypes.c_int(0) - optlen = ctypes.c_uint32(4) - _libc_getsockopt(fd, level, optname, - ctypes.byref(optval), ctypes.byref(optlen)) - return optval.value - - -def fromfd(fd, keep_fd=True): - """Create a socket from a file descriptor - - socket domain (family), type and protocol are auto-detected. By default - the socket uses a dup()ed fd. The original fd can be closed. - - The parameter `keep_fd` influences fd duplication. Under Python 2 the - fd is still duplicated but the input fd is closed. Under Python 3 and - with `keep_fd=True`, the new socket object uses the same fd. - - :param fd: socket fd - :type fd: int - :param keep_fd: keep input fd - :type keep_fd: bool - :return: socket.socket instance - :raises OSError: for invalid socket fd - """ - family = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_DOMAIN) - typ = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_TYPE) - proto = _raw_getsockopt(fd, socket.SOL_SOCKET, SO_PROTOCOL) - if sys.version_info.major == 2: - # Python 2 has no fileno argument and always duplicates the fd - sockobj = socket.fromfd(fd, family, typ, proto) - sock = socket.socket(None, None, None, _sock=sockobj) - if not keep_fd: - os.close(fd) - return sock - else: - if keep_fd: - return socket.fromfd(fd, family, typ, proto) - else: - return socket.socket(family, typ, proto, fileno=fd) From 5d0c7783008d4df87d579a576d19182c4a95caf7 Mon Sep 17 00:00:00 2001 From: benoitc Date: Tue, 26 Nov 2019 20:38:04 +0100 Subject: [PATCH 140/263] bump to 20.0.4 --- docs/source/news.rst | 5 +++-- gunicorn/__init__.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/source/news.rst b/docs/source/news.rst index 0839013d..2f8c7481 100644 --- a/docs/source/news.rst +++ b/docs/source/news.rst @@ -2,9 +2,10 @@ Changelog ========= -Unreleased -========== +20.0.4 / 2019/11/26 +=================== +- fix binding a socket using the file descriptor - remove support for the `bdist_rpm` build 20.0.3 / 2019/11/24 diff --git a/gunicorn/__init__.py b/gunicorn/__init__.py index 4d59629c..467084a2 100644 --- a/gunicorn/__init__.py +++ b/gunicorn/__init__.py @@ -3,6 +3,6 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. -version_info = (20, 0, 3) +version_info = (20, 0, 4) __version__ = ".".join([str(v) for v in version_info]) SERVER_SOFTWARE = "gunicorn/%s" % __version__ From ff1dbfec0657efac3d5c0812c541767815249960 Mon Sep 17 00:00:00 2001 From: benoitc Date: Wed, 27 Nov 2019 00:02:48 +0100 Subject: [PATCH 141/263] update site with the new version --- docs/site/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/site/index.html b/docs/site/index.html index 7fdd07e7..4fc6e170 100644 --- a/docs/site/index.html +++ b/docs/site/index.html @@ -16,7 +16,7 @@
Latest version: 20.0.3 + href="https://docs.gunicorn.org/en/stable/">20.0.4
From ed901637ff054939902ff2b1e7633a8cef4762f2 Mon Sep 17 00:00:00 2001 From: Takuya Noguchi Date: Sun, 17 Sep 2017 16:48:34 +0900 Subject: [PATCH 142/263] Enable pycodestyle --- gunicorn/app/base.py | 4 +-- gunicorn/arbiter.py | 6 ++-- gunicorn/config.py | 25 +++++++++---- gunicorn/glogging.py | 65 ++++++++++++++++++---------------- gunicorn/http/message.py | 8 +++-- gunicorn/http/wsgi.py | 2 +- gunicorn/instrument/statsd.py | 1 + gunicorn/pidfile.py | 2 +- gunicorn/reloader.py | 1 + gunicorn/util.py | 14 ++++---- gunicorn/workers/base.py | 21 ++++++----- gunicorn/workers/base_async.py | 2 +- gunicorn/workers/ggevent.py | 3 +- gunicorn/workers/gthread.py | 18 +++++----- gunicorn/workers/sync.py | 10 +++--- gunicorn/workers/workertmp.py | 2 +- tox.ini | 11 ++++++ 17 files changed, 117 insertions(+), 78 deletions(-) diff --git a/gunicorn/app/base.py b/gunicorn/app/base.py index 9b024f15..240f5eb2 100644 --- a/gunicorn/app/base.py +++ b/gunicorn/app/base.py @@ -142,7 +142,7 @@ class Application(BaseApplication): continue try: self.cfg.set(k.lower(), v) - except: + except Exception: print("Invalid value for %s: %s\n" % (k, v), file=sys.stderr) sys.stderr.flush() raise @@ -203,7 +203,7 @@ class Application(BaseApplication): if self.cfg.check_config: try: self.load() - except: + except Exception: msg = "\nError while loading the application:\n" print(msg, file=sys.stderr) traceback.print_exc() diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index bca671d1..532426d2 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -590,7 +590,7 @@ class Arbiter(object): print("%s" % e, file=sys.stderr) sys.stderr.flush() sys.exit(self.APP_LOAD_ERROR) - except: + except Exception: self.log.exception("Exception in worker process") if not worker.booted: sys.exit(self.WORKER_BOOT_ERROR) @@ -600,9 +600,9 @@ class Arbiter(object): try: worker.tmp.close() self.cfg.worker_exit(self, worker) - except: + except Exception: self.log.warning("Exception during worker exit:\n%s", - traceback.format_exc()) + traceback.format_exc()) def spawn_workers(self): """\ diff --git a/gunicorn/config.py b/gunicorn/config.py index f21f74f8..6c1d1777 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -78,9 +78,9 @@ class Config(object): } parser = argparse.ArgumentParser(**kwargs) parser.add_argument("-v", "--version", - action="version", default=argparse.SUPPRESS, - version="%(prog)s (version " + __version__ + ")\n", - help="show program's version number and exit") + action="version", default=argparse.SUPPRESS, + version="%(prog)s (version " + __version__ + ")\n", + help="show program's version number and exit") parser.add_argument("args", nargs="*", help=argparse.SUPPRESS) keys = sorted(self.settings, key=self.settings.__getitem__) @@ -93,7 +93,7 @@ class Config(object): def worker_class_str(self): uri = self.settings['worker_class'].get() - ## are we using a threaded worker? + # are we using a threaded worker? is_sync = uri.endswith('SyncWorker') or uri == 'sync' if is_sync and self.threads > 1: return "threads" @@ -103,7 +103,7 @@ class Config(object): def worker_class(self): uri = self.settings['worker_class'].get() - ## are we using a threaded worker? + # are we using a threaded worker? is_sync = uri.endswith('SyncWorker') or uri == 'sync' if is_sync and self.threads > 1: uri = "gunicorn.workers.gthread.ThreadWorker" @@ -524,7 +524,7 @@ def validate_reload_engine(val): def get_default_config_file(): config_path = os.path.join(os.path.abspath(os.getcwd()), - 'gunicorn.conf.py') + 'gunicorn.conf.py') if os.path.exists(config_path): return config_path return None @@ -550,6 +550,7 @@ class ConfigFile(Setting): prefix. """ + class Bind(Setting): name = "bind" action = "append" @@ -654,6 +655,7 @@ class WorkerClass(Setting): ``gunicorn.workers.ggevent.GeventWorker``. """ + class WorkerThreads(Setting): name = "threads" section = "Worker Processes" @@ -1025,6 +1027,7 @@ class Daemon(Setting): background. """ + class Env(Setting): name = "raw_env" action = "append" @@ -1058,6 +1061,7 @@ class Pidfile(Setting): If not set, no PID file will be written. """ + class WorkerTmpDir(Setting): name = "worker_tmp_dir" section = "Server Mechanics" @@ -1111,6 +1115,7 @@ class Group(Setting): change the worker processes group. """ + class Umask(Setting): name = "umask" section = "Server Mechanics" @@ -1224,6 +1229,7 @@ class AccessLog(Setting): ``'-'`` means log to stdout. """ + class DisableRedirectAccessToSyslog(Setting): name = "disable_redirect_access_to_syslog" section = "Logging" @@ -1676,6 +1682,7 @@ class PostWorkerInit(Setting): Worker. """ + class WorkerInt(Setting): name = "worker_int" section = "Server Hooks" @@ -1819,6 +1826,7 @@ class NumWorkersChanged(Setting): be ``None``. """ + class OnExit(Setting): name = "on_exit" section = "Server Hooks" @@ -1899,6 +1907,7 @@ class CertFile(Setting): SSL certificate file """ + class SSLVersion(Setting): name = "ssl_version" section = "SSL" @@ -1945,6 +1954,7 @@ class SSLVersion(Setting): constants. """ + class CertReqs(Setting): name = "cert_reqs" section = "SSL" @@ -1955,6 +1965,7 @@ class CertReqs(Setting): Whether client certificate is required (see stdlib ssl module's) """ + class CACerts(Setting): name = "ca_certs" section = "SSL" @@ -1966,6 +1977,7 @@ class CACerts(Setting): CA certificates file """ + class SuppressRaggedEOFs(Setting): name = "suppress_ragged_eofs" section = "SSL" @@ -1977,6 +1989,7 @@ class SuppressRaggedEOFs(Setting): Suppress ragged EOFs (see stdlib ssl module's) """ + class DoHandshakeOnConnect(Setting): name = "do_handshake_on_connect" section = "SSL" diff --git a/gunicorn/glogging.py b/gunicorn/glogging.py index a096f967..0ee00989 100644 --- a/gunicorn/glogging.py +++ b/gunicorn/glogging.py @@ -7,7 +7,7 @@ import base64 import binascii import time import logging -logging.Logger.manager.emittedNoHandlerWarning = 1 +logging.Logger.manager.emittedNoHandlerWarning = 1 # noqa from logging.config import dictConfig from logging.config import fileConfig import os @@ -21,28 +21,28 @@ from gunicorn import util # syslog facility codes SYSLOG_FACILITIES = { - "auth": 4, - "authpriv": 10, - "cron": 9, - "daemon": 3, - "ftp": 11, - "kern": 0, - "lpr": 6, - "mail": 2, - "news": 7, - "security": 4, # DEPRECATED - "syslog": 5, - "user": 1, - "uucp": 8, - "local0": 16, - "local1": 17, - "local2": 18, - "local3": 19, - "local4": 20, - "local5": 21, - "local6": 22, - "local7": 23 - } + "auth": 4, + "authpriv": 10, + "cron": 9, + "daemon": 3, + "ftp": 11, + "kern": 0, + "lpr": 6, + "mail": 2, + "news": 7, + "security": 4, # DEPRECATED + "syslog": 5, + "user": 1, + "uucp": 8, + "local0": 16, + "local1": 17, + "local2": 18, + "local3": 19, + "local4": 20, + "local5": 21, + "local6": 22, + "local7": 23 +} CONFIG_DEFAULTS = dict( @@ -213,8 +213,10 @@ class Logger(object): # set gunicorn.access handler if cfg.accesslog is not None: - self._set_handler(self.access_log, cfg.accesslog, - fmt=logging.Formatter(self.access_fmt), stream=sys.stdout) + self._set_handler( + self.access_log, cfg.accesslog, + fmt=logging.Formatter(self.access_fmt), stream=sys.stdout + ) # set syslog handler if cfg.syslog: @@ -284,7 +286,8 @@ class Logger(object): 'u': self._get_user(environ) or '-', 't': self.now(), 'r': "%s %s %s" % (environ['REQUEST_METHOD'], - environ['RAW_URI'], environ["SERVER_PROTOCOL"]), + environ['RAW_URI'], + environ["SERVER_PROTOCOL"]), 's': status, 'm': environ.get('REQUEST_METHOD'), 'U': environ.get('PATH_INFO'), @@ -295,7 +298,7 @@ class Logger(object): 'f': environ.get('HTTP_REFERER', '-'), 'a': environ.get('HTTP_USER_AGENT', '-'), 'T': request_time.seconds, - 'D': (request_time.seconds*1000000) + request_time.microseconds, + 'D': (request_time.seconds * 1000000) + request_time.microseconds, 'L': "%d.%06d" % (request_time.seconds, request_time.microseconds), 'p': "<%s>" % os.getpid() } @@ -337,12 +340,13 @@ class Logger(object): # wrap atoms: # - make sure atoms will be test case insensitively # - if atom doesn't exist replace it by '-' - safe_atoms = self.atoms_wrapper_class(self.atoms(resp, req, environ, - request_time)) + safe_atoms = self.atoms_wrapper_class( + self.atoms(resp, req, environ, request_time) + ) try: self.access_log.info(self.cfg.access_log_format, safe_atoms) - except: + except Exception: self.error(traceback.format_exc()) def now(self): @@ -361,7 +365,6 @@ class Logger(object): os.dup2(self.logfile.fileno(), sys.stdout.fileno()) os.dup2(self.logfile.fileno(), sys.stderr.fileno()) - for log in loggers(): for handler in log.handlers: if isinstance(handler, logging.FileHandler): diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index e5ce4a68..93ecf3b3 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -10,9 +10,11 @@ from errno import ENOTCONN from gunicorn.http.unreader import SocketUnreader from gunicorn.http.body import ChunkedReader, LengthReader, EOFReader, Body -from gunicorn.http.errors import (InvalidHeader, InvalidHeaderName, NoMoreData, +from gunicorn.http.errors import ( + InvalidHeader, InvalidHeaderName, NoMoreData, InvalidRequestLine, InvalidRequestMethod, InvalidHTTPVersion, - LimitRequestLine, LimitRequestHeaders) + LimitRequestLine, LimitRequestHeaders, +) from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest from gunicorn.http.errors import InvalidSchemeHeaders from gunicorn.util import bytes_to_str, split_request_uri @@ -105,7 +107,7 @@ class Message(object): header_length += len(curr) if header_length > self.limit_request_field_size > 0: raise LimitRequestHeaders("limit request headers " - + "fields size") + "fields size") value.append(curr) value = ''.join(value).rstrip() diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 414513e0..32f6c0b4 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -303,7 +303,7 @@ class Response(object): headers = [ "HTTP/%s.%s %s\r\n" % (self.req.version[0], - self.req.version[1], self.status), + self.req.version[1], self.status), "Server: %s\r\n" % self.version, "Date: %s\r\n" % util.http_date(), "Connection: %s\r\n" % connection diff --git a/gunicorn/instrument/statsd.py b/gunicorn/instrument/statsd.py index 9a537205..afbfd7b4 100644 --- a/gunicorn/instrument/statsd.py +++ b/gunicorn/instrument/statsd.py @@ -19,6 +19,7 @@ GAUGE_TYPE = "gauge" COUNTER_TYPE = "counter" HISTOGRAM_TYPE = "histogram" + class Statsd(Logger): """statsD-based instrumentation, that passes as a logger """ diff --git a/gunicorn/pidfile.py b/gunicorn/pidfile.py index a6e085fd..585b02af 100644 --- a/gunicorn/pidfile.py +++ b/gunicorn/pidfile.py @@ -57,7 +57,7 @@ class Pidfile(object): if pid1 == self.pid: os.unlink(self.fname) - except: + except Exception: pass def validate(self): diff --git a/gunicorn/reloader.py b/gunicorn/reloader.py index d00be2bf..022179c9 100644 --- a/gunicorn/reloader.py +++ b/gunicorn/reloader.py @@ -53,6 +53,7 @@ class Reloader(threading.Thread): self._callback(filename) time.sleep(self._interval) + has_inotify = False if sys.platform.startswith('linux'): try: diff --git a/gunicorn/util.py b/gunicorn/util.py index 3c72d9fc..d52df771 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -56,7 +56,7 @@ except ImportError: def load_class(uri, default="gunicorn.workers.sync.SyncWorker", - section="gunicorn.workers"): + section="gunicorn.workers"): if inspect.isclass(uri): return uri if uri.startswith("egg:"): @@ -70,7 +70,7 @@ def load_class(uri, default="gunicorn.workers.sync.SyncWorker", try: return pkg_resources.load_entry_point(dist, section, name) - except: + except Exception: exc = traceback.format_exc() msg = "class uri %r invalid or not found: \n\n[%s]" raise RuntimeError(msg % (uri, exc)) @@ -86,9 +86,10 @@ def load_class(uri, default="gunicorn.workers.sync.SyncWorker", break try: - return pkg_resources.load_entry_point("gunicorn", - section, uri) - except: + return pkg_resources.load_entry_point( + "gunicorn", section, uri + ) + except Exception: exc = traceback.format_exc() msg = "class uri %r invalid or not found: \n\n[%s]" raise RuntimeError(msg % (uri, exc)) @@ -260,6 +261,7 @@ def close(sock): except socket.error: pass + try: from os import closerange except ImportError: @@ -439,7 +441,7 @@ def getcwd(): cwd = os.environ['PWD'] else: cwd = os.getcwd() - except: + except Exception: cwd = os.getcwd() return cwd diff --git a/gunicorn/workers/base.py b/gunicorn/workers/base.py index 54a6713c..8e0129cf 100644 --- a/gunicorn/workers/base.py +++ b/gunicorn/workers/base.py @@ -28,8 +28,9 @@ from gunicorn.workers.workertmp import WorkerTmp class Worker(object): - SIGNALS = [getattr(signal, "SIG%s" % x) - for x in "ABRT HUP QUIT INT TERM USR1 USR2 WINCH CHLD".split()] + SIGNALS = [getattr(signal, "SIG%s" % x) for x in ( + "ABRT HUP QUIT INT TERM USR1 USR2 WINCH CHLD".split() + )] PIPE = [] @@ -203,12 +204,14 @@ class Worker(object): def handle_error(self, req, client, addr, exc): request_start = datetime.now() addr = addr or ('', -1) # unix socket case - if isinstance(exc, (InvalidRequestLine, InvalidRequestMethod, - InvalidHTTPVersion, InvalidHeader, InvalidHeaderName, - LimitRequestLine, LimitRequestHeaders, - InvalidProxyLine, ForbiddenProxyRequest, - InvalidSchemeHeaders, - SSLError)): + if isinstance(exc, ( + InvalidRequestLine, InvalidRequestMethod, + InvalidHTTPVersion, InvalidHeader, InvalidHeaderName, + LimitRequestLine, LimitRequestHeaders, + InvalidProxyLine, ForbiddenProxyRequest, + InvalidSchemeHeaders, + SSLError, + )): status_int = 400 reason = "Bad Request" @@ -261,7 +264,7 @@ class Worker(object): try: util.write_error(client, status_int, reason, mesg) - except: + except Exception: self.log.debug("Failed to send error message.") def handle_winch(self, sig, fname): diff --git a/gunicorn/workers/base_async.py b/gunicorn/workers/base_async.py index ebd0fc10..7b2daf91 100644 --- a/gunicorn/workers/base_async.py +++ b/gunicorn/workers/base_async.py @@ -92,7 +92,7 @@ class AsyncWorker(base.Worker): try: self.cfg.pre_request(self, req) resp, environ = wsgi.create(req, sock, addr, - listener_name, self.cfg) + listener_name, self.cfg) environ["wsgi.multithread"] = True self.nr += 1 if self.alive and self.nr >= self.max_requests: diff --git a/gunicorn/workers/ggevent.py b/gunicorn/workers/ggevent.py index 85418035..1b964e58 100644 --- a/gunicorn/workers/ggevent.py +++ b/gunicorn/workers/ggevent.py @@ -40,6 +40,7 @@ def _gevent_sendfile(fdout, fdin, offset, nbytes, _os_sendfile=os.sendfile): else: raise + def patch_sendfile(): setattr(os, "sendfile", _gevent_sendfile) @@ -129,7 +130,7 @@ class GeventWorker(AsyncWorker): self.log.warning("Worker graceful timeout (pid:%s)" % self.pid) for server in servers: server.stop(timeout=1) - except: + except Exception: pass def handle(self, listener, client, addr): diff --git a/gunicorn/workers/gthread.py b/gunicorn/workers/gthread.py index 5828ee93..376c3cb7 100644 --- a/gunicorn/workers/gthread.py +++ b/gunicorn/workers/gthread.py @@ -50,7 +50,7 @@ class TConn(object): # wrap the socket if needed if self.cfg.is_ssl: self.sock = ssl.wrap_socket(self.sock, server_side=True, - **self.cfg.ssl_options) + **self.cfg.ssl_options) # initialize the parser self.parser = http.RequestParser(self.cfg, self.sock) @@ -83,7 +83,7 @@ class ThreadWorker(base.Worker): if max_keepalived <= 0 and cfg.keepalive: log.warning("No keepalived connections can be handled. " + - "Check the number of worker connections and threads.") + "Check the number of worker connections and threads.") def init_process(self): self.tpool = self.get_thread_pool() @@ -123,8 +123,8 @@ class ThreadWorker(base.Worker): # enqueue the job self.enqueue_req(conn) except EnvironmentError as e: - if e.errno not in (errno.EAGAIN, - errno.ECONNABORTED, errno.EWOULDBLOCK): + if e.errno not in (errno.EAGAIN, errno.ECONNABORTED, + errno.EWOULDBLOCK): raise def reuse_connection(self, conn, client): @@ -204,11 +204,11 @@ class ThreadWorker(base.Worker): # check (but do not wait) for finished requests result = futures.wait(self.futures, timeout=0, - return_when=futures.FIRST_COMPLETED) + return_when=futures.FIRST_COMPLETED) else: # wait for a request to finish result = futures.wait(self.futures, timeout=1.0, - return_when=futures.FIRST_COMPLETED) + return_when=futures.FIRST_COMPLETED) # clean up finished requests for fut in result.done: @@ -249,11 +249,11 @@ class ThreadWorker(base.Worker): # add the socket to the event loop self.poller.register(conn.sock, selectors.EVENT_READ, - partial(self.reuse_connection, conn)) + partial(self.reuse_connection, conn)) else: self.nr_conns -= 1 conn.close() - except: + except Exception: # an exception happened, make sure to close the # socket. self.nr_conns -= 1 @@ -304,7 +304,7 @@ class ThreadWorker(base.Worker): self.cfg.pre_request(self, req) request_start = datetime.now() resp, environ = wsgi.create(req, conn.sock, conn.client, - conn.server, self.cfg) + conn.server, self.cfg) environ["wsgi.multithread"] = True self.nr += 1 if self.alive and self.nr >= self.max_requests: diff --git a/gunicorn/workers/sync.py b/gunicorn/workers/sync.py index ab9e0856..fd423bc9 100644 --- a/gunicorn/workers/sync.py +++ b/gunicorn/workers/sync.py @@ -17,9 +17,11 @@ import gunicorn.http.wsgi as wsgi import gunicorn.util as util import gunicorn.workers.base as base + class StopWaiting(Exception): """ exception raised to stop waiting for a connection """ + class SyncWorker(base.Worker): def accept(self, listener): @@ -72,7 +74,7 @@ class SyncWorker(base.Worker): except EnvironmentError as e: if e.errno not in (errno.EAGAIN, errno.ECONNABORTED, - errno.EWOULDBLOCK): + errno.EWOULDBLOCK): raise if not self.is_parent_alive(): @@ -101,7 +103,7 @@ class SyncWorker(base.Worker): self.accept(listener) except EnvironmentError as e: if e.errno not in (errno.EAGAIN, errno.ECONNABORTED, - errno.EWOULDBLOCK): + errno.EWOULDBLOCK): raise if not self.is_parent_alive(): @@ -127,7 +129,7 @@ class SyncWorker(base.Worker): try: if self.cfg.is_ssl: client = ssl.wrap_socket(client, server_side=True, - **self.cfg.ssl_options) + **self.cfg.ssl_options) parser = http.RequestParser(self.cfg, client) req = next(parser) @@ -163,7 +165,7 @@ class SyncWorker(base.Worker): self.cfg.pre_request(self, req) request_start = datetime.now() resp, environ = wsgi.create(req, client, addr, - listener.getsockname(), self.cfg) + listener.getsockname(), self.cfg) # Force the connection closed until someone shows # a buffering proxy that supports Keep-Alive to # the backend. diff --git a/gunicorn/workers/workertmp.py b/gunicorn/workers/workertmp.py index c475a12b..65bbe54f 100644 --- a/gunicorn/workers/workertmp.py +++ b/gunicorn/workers/workertmp.py @@ -35,7 +35,7 @@ class WorkerTmp(object): # In Python 3.8, open() emits RuntimeWarning if buffering=1 for binary mode. # Because we never write to this file, pass 0 to switch buffering off. self._tmp = os.fdopen(fd, 'w+b', 0) - except: + except Exception: os.close(fd) raise diff --git a/tox.ini b/tox.ini index 41298735..26e314ab 100644 --- a/tox.ini +++ b/tox.ini @@ -38,3 +38,14 @@ deps = commands = rst-lint README.rst docs/README.rst bash -c "(set -o pipefail; rst-lint --encoding utf-8 docs/source/*.rst | grep -v 'Unknown interpreted text role\|Unknown directive type'); test $? == 1" + +[testenv:pycodestyle] +commands = + pycodestyle gunicorn \ + --exclude=gunicorn/six.py +deps = + pycodestyle + +[pycodestyle] +max-line-length = 120 +ignore = E129,W503,W504,W606 From 9c147f6649a7c83f2ed918617498b7f8483859da Mon Sep 17 00:00:00 2001 From: Brett Randall Date: Sat, 30 Nov 2019 12:25:50 +1100 Subject: [PATCH 143/263] Added Brett Randall to MAINTAINERS. --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 91339e23..3117512a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6,6 +6,7 @@ Konstantin Kapustin Randall Leeds Berker Peksağ Jason Madden +Brett Randall Alumni ====== From 4ef01b1c100e7590ace3e1cac5a83b81974a6161 Mon Sep 17 00:00:00 2001 From: Brett Randall Date: Wed, 4 Dec 2019 09:29:55 +1100 Subject: [PATCH 144/263] Bumped py3 min version in setup.py to 3.5 (was 3.4). Signed-off-by: Brett Randall --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 965546f4..11079f86 100644 --- a/setup.py +++ b/setup.py @@ -94,7 +94,7 @@ setup( license='MIT', url='http://gunicorn.org', - python_requires='>=3.4', + python_requires='>=3.5', install_requires=install_requires, classifiers=CLASSIFIERS, zip_safe=False, From 5a57d595c578761eb235635aba645953e99b38a2 Mon Sep 17 00:00:00 2001 From: Wesley Ellis Date: Fri, 13 Dec 2019 15:00:26 -0500 Subject: [PATCH 145/263] Fix sample command syntax highlighting in run.rst Extra `:` was causing the syntax highlighting to mess up, resulting in the docs containing the `..code-block:: text` markup --- docs/source/run.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/run.rst b/docs/source/run.rst index 71abc739..6e4be8ad 100644 --- a/docs/source/run.rst +++ b/docs/source/run.rst @@ -42,7 +42,7 @@ Example with the test app: start_response(status, response_headers) return iter([data]) -You can now run the app with the following command:: +You can now run the app with the following command: .. code-block:: text From cfc93ad70115aa384d4cc2592b858565c2df41c2 Mon Sep 17 00:00:00 2001 From: monobaila Date: Fri, 13 Dec 2019 11:42:09 +0000 Subject: [PATCH 146/263] Update FAQ - Workers Silently Killed. --- docs/source/faq.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index 98af5a65..71fb2b84 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -205,3 +205,30 @@ Check the result:: tmpfs 65536 0 65536 0% /mem Now you can set ``--worker-tmp-dir /mem``. + +Why are Workers Silently Killed? +-------------------------------------------------------------- + +A sometimes subtle problem to debug is when a worker process is killed and there +is little logging information about what happened. + +If you use a reverse proxy like NGINX you might see 502 returned to a client. + +In the gunicorn logs you might simply see ``[35] [INFO] Booting worker with pid: 35`` + +It's completely normal for workers to be killed and startup, for example due to +max-requests setting. Ordinarily gunicorn will capture any signals and log something. + +This particular failure case is usually due to a SIGKILL being received, as it's +not possible to catch this signal silence is usually a common side effect! A common +cause of SIGKILL is when OOM killer terminates a process due to low memory condition. + +This is increasingly common in container deployments where memory limits are enforced +by cgroups, you'll usually see evidence of this from dmesg:: + + dmesg | grep gunicorn + Memory cgroup out of memory: Kill process 24534 (gunicorn) score 1506 or sacrifice child + Killed process 24534 (gunicorn) total-vm:1016648kB, anon-rss:550160kB, file-rss:25824kB, shmem-rss:0kB + +In these instances adjusting the memory limit is usually your best bet, it's also possible +to configure OOM not to send SIGKILL by default. From e5310d15e9cae4a9f7df645ac5f5966377afb9ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mika=C3=ABl=20Dusenne?= Date: Mon, 30 Dec 2019 08:39:03 -0500 Subject: [PATCH 147/263] add os.path.abspath() in dirname generation of the reloader Otherwise adding a watcher for a file located in the working directory generates an empty dirname, resulting in the following error: inotify.calls.InotifyError: Call failed (should not be -1): (-1) ERRNO=(0) Caused by the fact that we call inotify with an empty path --- gunicorn/reloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/reloader.py b/gunicorn/reloader.py index d00be2bf..66d98c55 100644 --- a/gunicorn/reloader.py +++ b/gunicorn/reloader.py @@ -92,7 +92,7 @@ if has_inotify: def get_dirs(self): fnames = [ - os.path.dirname(COMPILED_EXT_RE.sub('py', module.__file__)) + os.path.dirname(os.path.abspath(COMPILED_EXT_RE.sub('py', module.__file__))) for module in tuple(sys.modules.values()) if getattr(module, '__file__', None) ] From 7cce20e93776491506d8d0d4123e549d15b42bce Mon Sep 17 00:00:00 2001 From: Brett Randall Date: Wed, 1 Jan 2020 11:35:11 +1100 Subject: [PATCH 148/263] Don't build universal wheel, since Python 2 support has been removed. This reverts commit 765b8ab48b6fb991eeb9bcf4f60c0cba6f48359f . --- setup.cfg | 3 --- 1 file changed, 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index 64d0abc0..b880b5d9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,8 +3,5 @@ norecursedirs = examples lib local src testpaths = tests/ addopts = --assert=plain --cov=gunicorn --cov-report=xml -[wheel] -universal = 1 - [metadata] license_file = LICENSE From 2ec7493bcbd866aab7238424c7df67f39a41d362 Mon Sep 17 00:00:00 2001 From: Brett Randall Date: Wed, 1 Jan 2020 12:58:30 +1100 Subject: [PATCH 149/263] Updated requires Python to >= 3.5. --- docs/source/install.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/install.rst b/docs/source/install.rst index d6d146d2..13bae45c 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -4,7 +4,7 @@ Installation .. highlight:: bash -:Requirements: **Python 3.x >= 3.4** +:Requirements: **Python 3.x >= 3.5** To install the latest released version of Gunicorn:: From 2d40e6daceb9735d27bb91d9c32743695de8e01c Mon Sep 17 00:00:00 2001 From: Jason Madden Date: Sat, 4 Jan 2020 06:31:25 -0600 Subject: [PATCH 150/263] Use socket.sendfile() instead of os.sendfile(). Fixes #2223. Unfortunately, eventlet doesn't implement GreenSocket.sendfile, so we have to do it for it. Add gevent and eventlet to tox.ini and add tests to make sure we can at least import the workers. Some tests that this actually functions would be nice... Update the gevent and eventlet setup extras to require the versions that are enforced in their worker modules. --- .travis.yml | 20 +++++------ gunicorn/http/wsgi.py | 13 +------ gunicorn/workers/geventlet.py | 61 ++++++++++++++++++++++++++------- gunicorn/workers/ggevent.py | 19 ---------- requirements_test.txt | 2 ++ setup.py | 4 +-- tests/workers/__init__.py | 0 tests/workers/test_geventlet.py | 7 ++++ tests/workers/test_ggevent.py | 7 ++++ tox.ini | 2 +- 10 files changed, 77 insertions(+), 58 deletions(-) create mode 100644 tests/workers/__init__.py create mode 100644 tests/workers/test_geventlet.py create mode 100644 tests/workers/test_ggevent.py diff --git a/.travis.yml b/.travis.yml index f2d3c41a..2e11c55a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,33 +1,31 @@ -sudo: false language: python matrix: include: - python: 3.8 env: TOXENV=lint - dist: xenial - sudo: true - python: 3.5 env: TOXENV=py35 - python: 3.6 env: TOXENV=py36 - python: 3.7 env: TOXENV=py37 - dist: xenial - sudo: true - python: pypy3 - env: TOXENV=pypy3 + env: + - TOXENV=pypy3 + # Embedded c-ares takes a long time to build and + # as-of 2020-01-04 there are no PyPy3 manylinux + # wheels for gevent on PyPI. + - GEVENTSETUP_EMBED_CARES=no dist: xenial - python: 3.8 env: TOXENV=py38 - dist: xenial - sudo: true - python: 3.8 env: TOXENV=docs-lint - dist: xenial - sudo: true -install: pip install tox +install: pip install -U tox coverage # TODO: https://github.com/tox-dev/tox/issues/149 script: tox --recreate +after_success: + - if [ -f .coverage ]; then coverage report ; fi cache: directories: - .tox diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 32f6c0b4..17360826 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -360,12 +360,6 @@ class Response(object): offset = os.lseek(fileno, 0, os.SEEK_CUR) if self.response_length is None: filesize = os.fstat(fileno).st_size - - # The file may be special and sendfile will fail. - # It may also be zero-length, but that is okay. - if filesize == 0: - return False - nbytes = filesize - offset else: nbytes = self.response_length @@ -378,12 +372,7 @@ class Response(object): chunk_size = "%X\r\n" % nbytes self.sock.sendall(chunk_size.encode('utf-8')) - sockno = self.sock.fileno() - sent = 0 - - while sent != nbytes: - count = min(nbytes - sent, BLKSIZE) - sent += os.sendfile(sockno, fileno, offset + sent, count) + self.sock.sendfile(respiter.filelike, count=nbytes) if self.is_chunked(): self.sock.sendall(b"\r\n") diff --git a/gunicorn/workers/geventlet.py b/gunicorn/workers/geventlet.py index e4b425cd..ffdb206c 100644 --- a/gunicorn/workers/geventlet.py +++ b/gunicorn/workers/geventlet.py @@ -4,8 +4,6 @@ # See the NOTICE for more information. from functools import partial -import errno -import os import sys try: @@ -19,22 +17,49 @@ else: from eventlet import hubs, greenthread from eventlet.greenio import GreenSocket -from eventlet.hubs import trampoline from eventlet.wsgi import ALREADY_HANDLED as EVENTLET_ALREADY_HANDLED import greenlet from gunicorn.workers.base_async import AsyncWorker -def _eventlet_sendfile(fdout, fdin, offset, nbytes, _os_sendfile=os.sendfile): - while True: - try: - return _os_sendfile(fdout, fdin, offset, nbytes) - except OSError as e: - if e.args[0] == errno.EAGAIN: - trampoline(fdout, write=True) - else: - raise +def _eventlet_socket_sendfile(self, file, offset=0, count=None): + # Based on the implementation in gevent which in turn is slightly + # modified from the standard library implementation. + if self.gettimeout() == 0: + raise ValueError("non-blocking sockets are not supported") + if offset: + file.seek(offset) + blocksize = min(count, 8192) if count else 8192 + total_sent = 0 + # localize variable access to minimize overhead + file_read = file.read + sock_send = self.send + try: + while True: + if count: + blocksize = min(count - total_sent, blocksize) + if blocksize <= 0: + break + data = memoryview(file_read(blocksize)) + if not data: + break # EOF + while True: + try: + sent = sock_send(data) + except BlockingIOError: + continue + else: + total_sent += sent + if sent < len(data): + data = data[sent:] + else: + break + return total_sent + finally: + if total_sent > 0 and hasattr(file, 'seek'): + file.seek(offset + total_sent) + def _eventlet_serve(sock, handle, concurrency): @@ -79,7 +104,17 @@ def _eventlet_stop(client, server, conn): def patch_sendfile(): - setattr(os, "sendfile", _eventlet_sendfile) + # As of eventlet 0.25.1, GreenSocket.sendfile doesn't exist, + # meaning the native implementations of socket.sendfile will be used. + # If os.sendfile exists, it will attempt to use that, failing explicitly + # if the socket is in non-blocking mode, which the underlying + # socket object /is/. Even the regular _sendfile_use_send will + # fail in that way; plus, it would use the underlying socket.send which isn't + # properly cooperative. So we have to monkey-patch a working socket.sendfile() + # into GreenSocket; in this method, `self.send` will be the GreenSocket's + # send method which is properly cooperative. + if not hasattr(GreenSocket, 'sendfile'): + GreenSocket.sendfile = _eventlet_socket_sendfile class EventletWorker(AsyncWorker): diff --git a/gunicorn/workers/ggevent.py b/gunicorn/workers/ggevent.py index 1b964e58..57340221 100644 --- a/gunicorn/workers/ggevent.py +++ b/gunicorn/workers/ggevent.py @@ -3,7 +3,6 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. -import errno import os import sys from datetime import datetime @@ -30,21 +29,6 @@ from gunicorn.workers.base_async import AsyncWorker VERSION = "gevent/%s gunicorn/%s" % (gevent.__version__, gunicorn.__version__) -def _gevent_sendfile(fdout, fdin, offset, nbytes, _os_sendfile=os.sendfile): - while True: - try: - return _os_sendfile(fdout, fdin, offset, nbytes) - except OSError as e: - if e.args[0] == errno.EAGAIN: - socket.wait_write(fdout) - else: - raise - - -def patch_sendfile(): - setattr(os, "sendfile", _gevent_sendfile) - - class GeventWorker(AsyncWorker): server_class = None @@ -53,9 +37,6 @@ class GeventWorker(AsyncWorker): def patch(self): monkey.patch_all() - # monkey patch sendfile to make it none blocking - patch_sendfile() - # patch sockets sockets = [] for s in self.sockets: diff --git a/requirements_test.txt b/requirements_test.txt index cc595b77..03af4969 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -1,4 +1,6 @@ aiohttp +gevent +eventlet coverage pytest pytest-cov diff --git a/setup.py b/setup.py index 11079f86..9b76ddd3 100644 --- a/setup.py +++ b/setup.py @@ -76,8 +76,8 @@ install_requires = [ ] extras_require = { - 'gevent': ['gevent>=0.13'], - 'eventlet': ['eventlet>=0.9.7'], + 'gevent': ['gevent>=1.4.0'], + 'eventlet': ['eventlet>=0.24.1'], 'tornado': ['tornado>=0.2'], 'gthread': [], 'setproctitle': ['setproctitle'], diff --git a/tests/workers/__init__.py b/tests/workers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/workers/test_geventlet.py b/tests/workers/test_geventlet.py new file mode 100644 index 00000000..815dcec3 --- /dev/null +++ b/tests/workers/test_geventlet.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 - +# +# This file is part of gunicorn released under the MIT license. +# See the NOTICE for more information. + +def test_import(): + __import__('gunicorn.workers.geventlet') diff --git a/tests/workers/test_ggevent.py b/tests/workers/test_ggevent.py new file mode 100644 index 00000000..261ce40d --- /dev/null +++ b/tests/workers/test_ggevent.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 - +# +# This file is part of gunicorn released under the MIT license. +# See the NOTICE for more information. + +def test_import(): + __import__('gunicorn.workers.ggevent') diff --git a/tox.ini b/tox.ini index 26e314ab..db4f00f2 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ skipsdist = True [testenv] usedevelop = True -commands = py.test {posargs} +commands = py.test --cov=gunicorn {posargs} deps = -rrequirements_test.txt From 2f944c9bea6238ab3feee4f36352dbeaa8b15859 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Fri, 10 Jan 2020 11:00:00 +0100 Subject: [PATCH 151/263] remove version from the Server header while we still want to know which server is running to ease operation, the version was giving too much information on the installation, so let's remove it. --- gunicorn/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/__init__.py b/gunicorn/__init__.py index 467084a2..24f6e7ff 100644 --- a/gunicorn/__init__.py +++ b/gunicorn/__init__.py @@ -5,4 +5,4 @@ version_info = (20, 0, 4) __version__ = ".".join([str(v) for v in version_info]) -SERVER_SOFTWARE = "gunicorn/%s" % __version__ +SERVER_SOFTWARE = "gunicorn" From dcfd0f04e8bed3bbe5e44cf058c489298c22cb30 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Fri, 10 Jan 2020 13:50:53 +0100 Subject: [PATCH 152/263] fix SERVER_SOFTWARE property WSGI spec requires the SERVER_SOFTWARE property containing the name and version. This change fix it and separate the version header from SERVER_SOFTWARE property. We expose the SERVER variable so custom installations can change it in one place without looking much when needed. --- gunicorn/__init__.py | 4 +++- gunicorn/http/wsgi.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/gunicorn/__init__.py b/gunicorn/__init__.py index 24f6e7ff..6c208488 100644 --- a/gunicorn/__init__.py +++ b/gunicorn/__init__.py @@ -5,4 +5,6 @@ version_info = (20, 0, 4) __version__ = ".".join([str(v) for v in version_info]) -SERVER_SOFTWARE = "gunicorn" +SERVER = "gunicorn" +SERVER_SOFTWARE = "%s/%s" % (SERVER, __version__) + diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 17360826..478677f4 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -11,7 +11,7 @@ import sys from gunicorn.http.message import HEADER_RE from gunicorn.http.errors import InvalidHeader, InvalidHeaderName -from gunicorn import SERVER_SOFTWARE +from gunicorn import SERVER_SOFTWARE, SERVER import gunicorn.util as util # Send files in at most 1GB blocks as some operating systems can have problems @@ -195,7 +195,7 @@ class Response(object): def __init__(self, req, sock, cfg): self.req = req self.sock = sock - self.version = SERVER_SOFTWARE + self.version = SERVER self.status = None self.chunked = False self.must_close = False From f74f926f3673a2cb97ae09be9b4d2af2eb2cc51b Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Fri, 10 Jan 2020 13:58:29 +0100 Subject: [PATCH 153/263] remove trailing new line --- gunicorn/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gunicorn/__init__.py b/gunicorn/__init__.py index 6c208488..cc376951 100644 --- a/gunicorn/__init__.py +++ b/gunicorn/__init__.py @@ -7,4 +7,3 @@ version_info = (20, 0, 4) __version__ = ".".join([str(v) for v in version_info]) SERVER = "gunicorn" SERVER_SOFTWARE = "%s/%s" % (SERVER, __version__) - From c1bea68fce73ece990b41c34527d98ce50006bf3 Mon Sep 17 00:00:00 2001 From: Brett Randall Date: Tue, 14 Jan 2020 06:08:53 +1100 Subject: [PATCH 154/263] Added a set of pip requirements files to cover the examples in examples/frameworks. Signed-off-by: Brett Randall --- examples/frameworks/requirements.txt | 5 +++++ examples/frameworks/requirements_cherryapp.txt | 1 + examples/frameworks/requirements_flaskapp.txt | 1 + examples/frameworks/requirements_pyramidapp.txt | 1 + examples/frameworks/requirements_tornadoapp.txt | 1 + examples/frameworks/requirements_webpyapp.txt | 1 + 6 files changed, 10 insertions(+) create mode 100644 examples/frameworks/requirements.txt create mode 100644 examples/frameworks/requirements_cherryapp.txt create mode 100644 examples/frameworks/requirements_flaskapp.txt create mode 100644 examples/frameworks/requirements_pyramidapp.txt create mode 100644 examples/frameworks/requirements_tornadoapp.txt create mode 100644 examples/frameworks/requirements_webpyapp.txt diff --git a/examples/frameworks/requirements.txt b/examples/frameworks/requirements.txt new file mode 100644 index 00000000..223999a7 --- /dev/null +++ b/examples/frameworks/requirements.txt @@ -0,0 +1,5 @@ +-r requirements_flaskapp.txt +-r requirements_cherryapp.txt +-r requirements_pyramidapp.txt +-r requirements_tornadoapp.txt +-r requirements_webpyapp.txt diff --git a/examples/frameworks/requirements_cherryapp.txt b/examples/frameworks/requirements_cherryapp.txt new file mode 100644 index 00000000..d7187069 --- /dev/null +++ b/examples/frameworks/requirements_cherryapp.txt @@ -0,0 +1 @@ +cherrypy diff --git a/examples/frameworks/requirements_flaskapp.txt b/examples/frameworks/requirements_flaskapp.txt new file mode 100644 index 00000000..7e106024 --- /dev/null +++ b/examples/frameworks/requirements_flaskapp.txt @@ -0,0 +1 @@ +flask diff --git a/examples/frameworks/requirements_pyramidapp.txt b/examples/frameworks/requirements_pyramidapp.txt new file mode 100644 index 00000000..d94f7c98 --- /dev/null +++ b/examples/frameworks/requirements_pyramidapp.txt @@ -0,0 +1 @@ +pyramid diff --git a/examples/frameworks/requirements_tornadoapp.txt b/examples/frameworks/requirements_tornadoapp.txt new file mode 100644 index 00000000..22b60c77 --- /dev/null +++ b/examples/frameworks/requirements_tornadoapp.txt @@ -0,0 +1 @@ +tornado<6 diff --git a/examples/frameworks/requirements_webpyapp.txt b/examples/frameworks/requirements_webpyapp.txt new file mode 100644 index 00000000..8f9f7958 --- /dev/null +++ b/examples/frameworks/requirements_webpyapp.txt @@ -0,0 +1 @@ +web-py From 9219da52cd30b606261732b27f4de70df0d7ec8e Mon Sep 17 00:00:00 2001 From: Brett Randall Date: Mon, 20 Jan 2020 10:08:16 +1100 Subject: [PATCH 155/263] Updated setup Development Status metadata to 5 - Production/Stable (was 4 - Beta). Signed-off-by: Brett Randall --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9b76ddd3..f2675927 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ from gunicorn import __version__ CLASSIFIERS = [ - 'Development Status :: 4 - Beta', + 'Development Status :: 5 - Production/Stable', 'Environment :: Other Environment', 'Intended Audience :: Developers', 'License :: OSI Approved :: MIT License', From 287916a5d43bf6f5d6536b1acde991e521f68dd8 Mon Sep 17 00:00:00 2001 From: Kevin Michel Date: Wed, 29 Jan 2020 16:14:33 +0100 Subject: [PATCH 156/263] Remove dead code: find_library and _findWalk_ldpath --- gunicorn/util.py | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/gunicorn/util.py b/gunicorn/util.py index d52df771..a821e357 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -3,7 +3,6 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. import ast -import ctypes.util import email.utils import errno import fcntl @@ -638,34 +637,3 @@ def bytes_to_str(b): def unquote_to_wsgi_str(string): return urllib.parse.unquote_to_bytes(string).decode('latin-1') - - -def _findWalk_ldpath(name): - def _is_elf(filepath): - try: - with open(filepath, 'rb') as fh: - return fh.read(4) == b'\x7fELF' - except: - return False - from glob import glob - if os.path.isabs(name): - return name - - # search LD_LIBRARY_PATH list - paths = ['/lib', '/usr/local/lib', '/usr/lib'] - if 'LD_LIBRARY_PATH' in os.environ: - paths = os.environ['LD_LIBRARY_PATH'].split(':') + paths - - for d in paths: - f = os.path.join(d, name) - if _is_elf(f): - return os.path.basename(f) - prefix = os.path.join(d, 'lib'+name) - for suffix in ['so', 'so.*', '*.so.*', 'a']: - for f in glob('{0}.{1}'.format(prefix, suffix)): - if _is_elf(f) or suffix == 'a': - return os.path.basename(f) - - -def find_library(name): - return ctypes.util.find_library(name) or _findWalk_ldpath(name) From d307045984100a7edfd262c94783d9c3c8597888 Mon Sep 17 00:00:00 2001 From: Anmar85 Date: Thu, 30 Jan 2020 09:29:25 -0500 Subject: [PATCH 157/263] Add milliseconds option to request_time in access_log (#2218) Add milliseconds option to request_time in access_log --- docs/source/settings.rst | 1 + gunicorn/config.py | 1 + gunicorn/glogging.py | 1 + 3 files changed, 3 insertions(+) diff --git a/docs/source/settings.rst b/docs/source/settings.rst index 16d8961a..e293307b 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -178,6 +178,7 @@ b response length or ``'-'`` (CLF format) f referer a user agent T request time in seconds +M request time in milliseconds D request time in microseconds L request time in decimal seconds p process ID diff --git a/gunicorn/config.py b/gunicorn/config.py index 6c1d1777..f220aa32 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -1272,6 +1272,7 @@ class AccessLogFormat(Setting): f referer a user agent T request time in seconds + M request time in milliseconds D request time in microseconds L request time in decimal seconds p process ID diff --git a/gunicorn/glogging.py b/gunicorn/glogging.py index 0ee00989..b37e6847 100644 --- a/gunicorn/glogging.py +++ b/gunicorn/glogging.py @@ -299,6 +299,7 @@ class Logger(object): 'a': environ.get('HTTP_USER_AGENT', '-'), 'T': request_time.seconds, 'D': (request_time.seconds * 1000000) + request_time.microseconds, + 'M': (request_time.seconds * 1000) + int(request_time.microseconds/1000), 'L': "%d.%06d" % (request_time.seconds, request_time.microseconds), 'p': "<%s>" % os.getpid() } From c82996f791a496e4c45fd7e9078047e46717acb2 Mon Sep 17 00:00:00 2001 From: Kevin Michel Date: Wed, 29 Jan 2020 17:01:27 +0100 Subject: [PATCH 158/263] Add documentation for the $PORT environment variable --- docs/source/settings.rst | 4 ++++ gunicorn/config.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/docs/source/settings.rst b/docs/source/settings.rst index 16d8961a..bf67bd25 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -1164,6 +1164,10 @@ Multiple addresses can be bound. ex.:: will bind the `test:app` application on localhost both on ipv6 and ipv4 interfaces. +If the ``PORT`` environment variable is defined, the default +is ``['0.0.0.0:$PORT']``. If it is not defined, the default +is ``['127.0.0.1:8000']``. + .. _backlog: backlog diff --git a/gunicorn/config.py b/gunicorn/config.py index 6c1d1777..d501db69 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -579,6 +579,10 @@ class Bind(Setting): will bind the `test:app` application on localhost both on ipv6 and ipv4 interfaces. + + If the ``PORT`` environment variable is defined, the default + is ``['0.0.0.0:$PORT']``. If it is not defined, the default + is ``['127.0.0.1:8000']``. """ From a648f8a838c1b5b1dd45cbd2b8bc849296ebf48a Mon Sep 17 00:00:00 2001 From: Kevin Michel Date: Wed, 29 Jan 2020 17:03:24 +0100 Subject: [PATCH 159/263] Document how environment variables impact configuration --- docs/source/configure.rst | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/docs/source/configure.rst b/docs/source/configure.rst index df5cbe2e..370f7070 100644 --- a/docs/source/configure.rst +++ b/docs/source/configure.rst @@ -4,23 +4,35 @@ Configuration Overview ====================== -Gunicorn pulls configuration information from three distinct places. +Gunicorn reads configuration information from five places. -The first place that Gunicorn will read configuration from is the framework +Gunicorn first reads environment variables for some +configuration :ref:`settings `. + +Gunicorn will then read configuration from a framework specific configuration file. Currently this only affects Paster applications. -The second source of configuration information is a configuration file that is -optionally specified on the command line. Anything specified in the Gunicorn -config file will override any framework specific settings. +The third source of configuration information is a configuration file that is +optionally specified using command line arguments. Anything specified in the +Gunicorn config file will override any framework specific settings. + +The fourth place of configuration information are command line arguments +stored in an environment variable named ``GUNICORN_CMD_ARGS``. Lastly, the command line arguments used to invoke Gunicorn are the final place considered for configuration settings. If an option is specified on the command line, this is the value that will be used. +When a configuration file is specified in the command line arguments and in the +``GUNICORN_CMD_ARGS`` environment variable, only the configuration +file specified on the command line is used. + Once again, in order of least to most authoritative: - 1. Framework Settings - 2. Configuration File - 3. Command Line + 1. Environment Variables + 2. Framework Settings + 3. Configuration File + 4. ``GUNICORN_CMD_ARGS`` + 5. Command Line .. note:: From 83b78e09c10c2f7a07eed5300104cc09a4f4cbf4 Mon Sep 17 00:00:00 2001 From: Kevin Michel Date: Fri, 31 Jan 2020 09:47:06 +0100 Subject: [PATCH 160/263] Document the default name and path for the configuration file --- docs/source/configure.rst | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/source/configure.rst b/docs/source/configure.rst index 370f7070..c340f5ee 100644 --- a/docs/source/configure.rst +++ b/docs/source/configure.rst @@ -6,15 +6,16 @@ Configuration Overview Gunicorn reads configuration information from five places. -Gunicorn first reads environment variables for some -configuration :ref:`settings `. +Gunicorn first reads environment variables for some configuration +:ref:`settings `. -Gunicorn will then read configuration from a framework -specific configuration file. Currently this only affects Paster applications. +Gunicorn then reads configuration from a framework specific configuration +file. Currently this only affects Paster applications. -The third source of configuration information is a configuration file that is -optionally specified using command line arguments. Anything specified in the -Gunicorn config file will override any framework specific settings. +The third source of configuration information is an optional configuration file +``gunicorn.conf.py`` searched in the current working directory or specified +using a command line argument. Anything specified in this configuration file +will override any framework specific settings. The fourth place of configuration information are command line arguments stored in an environment variable named ``GUNICORN_CMD_ARGS``. From 93bcf5a41ebd86b8072daef9d29eb5141a4e58fe Mon Sep 17 00:00:00 2001 From: Kevin Michel Date: Fri, 31 Jan 2020 14:15:32 +0100 Subject: [PATCH 161/263] Replace and run the sitemap generator This replaces the very old sitemap generator which was over 2kloc and only compatible with Python 2. According to the stored lastmod, the generator wasn't used since 2010. The minimal replacement script scan the static site for html files and uses git to deduce the last modification date of each page. The sitemap xmlns version was updated to the latest 0.9 from sitemaps.org . The index page was given a higher priority since the other pages are just redirects to the index with anchors. The output file is pretty printed to help with diffs. Static assets (css, images...) aren't listed in the sitemap anymore. --- NOTICE | 35 - docs/site/sitemap.xml | 105 +- docs/sitemap_config.xml | 19 - docs/sitemap_gen.py | 2221 +-------------------------------------- 4 files changed, 70 insertions(+), 2310 deletions(-) delete mode 100644 docs/sitemap_config.xml mode change 100755 => 100644 docs/sitemap_gen.py diff --git a/NOTICE b/NOTICE index a2f4aa20..8506656b 100644 --- a/NOTICE +++ b/NOTICE @@ -82,41 +82,6 @@ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -doc/sitemap_gen.py ------------------- -Under BSD License : - -Copyright (c) 2004, 2005, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of Google Inc. nor the names of its contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - util/unlink.py -------------- diff --git a/docs/site/sitemap.xml b/docs/site/sitemap.xml index 6a8e43a0..6411a24e 100644 --- a/docs/site/sitemap.xml +++ b/docs/site/sitemap.xml @@ -1,112 +1,73 @@ - - + + http://gunicorn.org/ - 2010-07-01T05:14:22Z - 0.5000 + 2019-11-27T00:02:48+01:00 + 1.0 + + + http://gunicorn.org/community.html + 2012-10-04T00:43:15+05:45 + 0.5 http://gunicorn.org/configuration.html - 2010-07-01T05:14:22Z - 0.5000 + 2012-10-04T00:43:15+05:45 + 0.5 http://gunicorn.org/configure.html - 2010-07-01T05:14:22Z - 0.5000 - - - http://gunicorn.org/css/ - 2010-07-01T05:14:22Z - 0.5000 - - - http://gunicorn.org/css/index.css - 2010-07-01T05:14:22Z - 0.5000 - - - http://gunicorn.org/css/style.css - 2010-07-01T05:14:22Z - 0.5000 + 2012-10-04T00:43:15+05:45 + 0.5 http://gunicorn.org/deploy.html - 2010-07-01T05:14:22Z - 0.5000 + 2012-10-04T00:43:15+05:45 + 0.5 http://gunicorn.org/deployment.html - 2010-07-01T05:14:22Z - 0.5000 + 2012-10-04T00:43:15+05:45 + 0.5 http://gunicorn.org/design.html - 2010-07-01T05:14:22Z - 0.5000 + 2012-10-04T00:43:15+05:45 + 0.5 http://gunicorn.org/faq.html - 2010-07-01T05:14:22Z - 0.5000 - - - http://gunicorn.org/images/ - 2010-07-01T05:14:22Z - 0.5000 - - - http://gunicorn.org/images/gunicorn.png - 2010-07-01T05:14:22Z - 0.5000 - - - http://gunicorn.org/images/large_gunicorn.png - 2010-07-01T05:14:22Z - 0.5000 - - - http://gunicorn.org/images/logo.png - 2010-07-01T05:14:22Z - 0.5000 - - - http://gunicorn.org/index.html - 2010-07-01T05:14:22Z - 0.5000 + 2012-10-04T00:43:15+05:45 + 0.5 http://gunicorn.org/install.html - 2010-07-01T05:14:22Z - 0.5000 + 2012-10-04T00:43:15+05:45 + 0.5 http://gunicorn.org/installation.html - 2010-07-01T05:14:22Z - 0.5000 + 2012-10-04T00:43:15+05:45 + 0.5 http://gunicorn.org/news.html - 2010-07-08T19:57:19Z - 0.5000 + 2012-10-04T00:43:15+05:45 + 0.5 http://gunicorn.org/run.html - 2010-07-01T05:14:22Z - 0.5000 + 2012-10-04T00:43:15+05:45 + 0.5 http://gunicorn.org/tuning.html - 2010-07-01T05:14:22Z - 0.5000 + 2012-10-04T00:43:15+05:45 + 0.5 http://gunicorn.org/usage.html - 2010-07-01T05:14:22Z - 0.5000 + 2012-10-04T00:43:15+05:45 + 0.5 diff --git a/docs/sitemap_config.xml b/docs/sitemap_config.xml deleted file mode 100644 index 513d19bb..00000000 --- a/docs/sitemap_config.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/sitemap_gen.py b/docs/sitemap_gen.py old mode 100755 new mode 100644 index 1cfbbae1..29c7ca02 --- a/docs/sitemap_gen.py +++ b/docs/sitemap_gen.py @@ -1,2188 +1,41 @@ -#!/usr/bin/env python -# -# Copyright (c) 2004, 2005 Google Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# -# * Neither the name of Google nor the names of its contributors may -# be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# -# -# The sitemap_gen.py script is written in Python 2.2 and released to -# the open source community for continuous improvements under the BSD -# 2.0 new license, which can be found at: -# -# http://www.opensource.org/licenses/bsd-license.php -# - -__usage__ = \ -"""A simple script to automatically produce sitemaps for a webserver, -in the Google Sitemap Protocol (GSP). - -Usage: python sitemap_gen.py --config=config.xml [--help] [--testing] - --config=config.xml, specifies config file location - --help, displays usage message - --testing, specified when user is experimenting -""" - -import fnmatch -import glob -import gzip -import hashlib import os -import re -import stat -import time -import types -import urllib -import urlparse -import xml.sax +import subprocess +from xml.etree import ElementTree + + +def main(): + generate( + site_path=os.path.join(os.path.dirname(__file__), 'site'), + special_priorities={'index.html': 1.0}) + + +def generate(site_path, special_priorities, directory_index='index.html'): + urlset = ElementTree.Element('urlset', xmlns='http://www.sitemaps.org/schemas/sitemap/0.9') + urlset.text = '\n ' + for root, dirs, filenames in os.walk(site_path): + for filename in filenames: + if filename.endswith('.html'): + absolute_filepath = os.path.join(root, filename) + relative_path = os.path.relpath(absolute_filepath, site_path) + relative_url = os.path.dirname(relative_path) if filename == directory_index else relative_path + last_modification = subprocess.check_output( + ['git', 'log', '-1', '--pretty="%cI"', absolute_filepath]).decode('ascii').strip('\n"') + url_element = ElementTree.SubElement(urlset, 'url') + loc_element = ElementTree.SubElement(url_element, 'loc') + loc_element.text = 'http://gunicorn.org/' + relative_url + lastmod_element = ElementTree.SubElement(url_element, 'lastmod') + lastmod_element.text = last_modification + priority_element = ElementTree.SubElement(url_element, 'priority') + priority_element.text = str(special_priorities.get(relative_path, 0.5)) + url_element.tail = priority_element.tail = '\n ' + url_element.text = loc_element.tail = lastmod_element.tail = '\n ' + # We sort the url nodes instead of the filenames because + # filenames might be altered by the directory_index option + urlset[:] = sorted([url for url in urlset], key=lambda url: url[0].text) + urlset.tail = urlset[-1].tail = '\n' + with open(os.path.join(site_path, 'sitemap.xml'), 'wb') as sitemap_file: + ElementTree.ElementTree(urlset).write(sitemap_file, encoding='UTF-8', xml_declaration=True) -# Text encodings -ENC_ASCII = 'ASCII' -ENC_UTF8 = 'UTF-8' -ENC_IDNA = 'IDNA' -ENC_ASCII_LIST = ['ASCII', 'US-ASCII', 'US', 'IBM367', 'CP367', 'ISO646-US' - 'ISO_646.IRV:1991', 'ISO-IR-6', 'ANSI_X3.4-1968', - 'ANSI_X3.4-1986', 'CPASCII' ] -ENC_DEFAULT_LIST = ['ISO-8859-1', 'ISO-8859-2', 'ISO-8859-5'] - -# Maximum number of urls in each sitemap, before next Sitemap is created -MAXURLS_PER_SITEMAP = 50000 - -# Suffix on a Sitemap index file -SITEINDEX_SUFFIX = '_index.xml' - -# Regular expressions tried for extracting URLs from access logs. -ACCESSLOG_CLF_PATTERN = re.compile( - r'.+\s+"([^\s]+)\s+([^\s]+)\s+HTTP/\d+\.\d+"\s+200\s+.*' - ) - -# Match patterns for lastmod attributes -LASTMOD_PATTERNS = map(re.compile, [ - r'^\d\d\d\d$', - r'^\d\d\d\d-\d\d$', - r'^\d\d\d\d-\d\d-\d\d$', - r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\dZ$', - r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d[+-]\d\d:\d\d$', - r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?Z$', - r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?[+-]\d\d:\d\d$', - ]) - -# Match patterns for changefreq attributes -CHANGEFREQ_PATTERNS = [ - 'always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never' - ] - -# XML formats -SITEINDEX_HEADER = \ - '\n' \ - '\n' -SITEINDEX_FOOTER = '\n' -SITEINDEX_ENTRY = \ - ' \n' \ - ' %(loc)s\n' \ - ' %(lastmod)s\n' \ - ' \n' -SITEMAP_HEADER = \ - '\n' \ - '\n' -SITEMAP_FOOTER = '\n' -SITEURL_XML_PREFIX = ' \n' -SITEURL_XML_SUFFIX = ' \n' - -# Search engines to notify with the updated sitemaps -# -# This list is very non-obvious in what's going on. Here's the gist: -# Each item in the list is a 6-tuple of items. The first 5 are "almost" -# the same as the input arguments to urlparse.urlunsplit(): -# 0 - schema -# 1 - netloc -# 2 - path -# 3 - query <-- EXCEPTION: specify a query map rather than a string -# 4 - fragment -# Additionally, add item 5: -# 5 - query attribute that should be set to the new Sitemap URL -# Clear as mud, I know. -NOTIFICATION_SITES = [ - ('http', 'www.google.com', 'webmasters/sitemaps/ping', {}, '', 'sitemap') - ] - - -class Error(Exception): - """ - Base exception class. In this module we tend not to use our own exception - types for very much, but they come in very handy on XML parsing with SAX. - """ - pass -#end class Error - - -class SchemaError(Error): - """Failure to process an XML file according to the schema we know.""" - pass -#end class SchemeError - - -class Encoder: - """ - Manages wide-character/narrow-character conversions for just about all - text that flows into or out of the script. - - You should always use this class for string coercion, as opposed to - letting Python handle coercions automatically. Reason: Python - usually assumes ASCII (7-bit) as a default narrow character encoding, - which is not the kind of data we generally deal with. - - General high-level methodologies used in sitemap_gen: - - [PATHS] - File system paths may be wide or narrow, depending on platform. - This works fine, just be aware of it and be very careful to not - mix them. That is, if you have to pass several file path arguments - into a library call, make sure they are all narrow or all wide. - This class has MaybeNarrowPath() which should be called on every - file system path you deal with. - - [URLS] - URL locations are stored in Narrow form, already escaped. This has the - benefit of keeping escaping and encoding as close as possible to the format - we read them in. The downside is we may end up with URLs that have - intermingled encodings -- the root path may be encoded in one way - while the filename is encoded in another. This is obviously wrong, but - it should hopefully be an issue hit by very few users. The workaround - from the user level (assuming they notice) is to specify a default_encoding - parameter in their config file. - - [OTHER] - Other text, such as attributes of the URL class, configuration options, - etc, are generally stored in Unicode for simplicity. - """ - - def __init__(self): - self._user = None # User-specified default encoding - self._learned = [] # Learned default encodings - self._widefiles = False # File system can be wide - - # Can the file system be Unicode? - try: - self._widefiles = os.path.supports_unicode_filenames - except AttributeError: - try: - self._widefiles = sys.getwindowsversion() == os.VER_PLATFORM_WIN32_NT - except AttributeError: - pass - - # Try to guess a working default - try: - encoding = sys.getfilesystemencoding() - if encoding and not (encoding.upper() in ENC_ASCII_LIST): - self._learned = [ encoding ] - except AttributeError: - pass - - if not self._learned: - encoding = sys.getdefaultencoding() - if encoding and not (encoding.upper() in ENC_ASCII_LIST): - self._learned = [ encoding ] - - # If we had no guesses, start with some European defaults - if not self._learned: - self._learned = ENC_DEFAULT_LIST - #end def __init__ - - def SetUserEncoding(self, encoding): - self._user = encoding - #end def SetUserEncoding - - def NarrowText(self, text, encoding): - """ Narrow a piece of arbitrary text """ - if type(text) != types.UnicodeType: - return text - - # Try the passed in preference - if encoding: - try: - result = text.encode(encoding) - if not encoding in self._learned: - self._learned.append(encoding) - return result - except UnicodeError: - pass - except LookupError: - output.Warn('Unknown encoding: %s' % encoding) - - # Try the user preference - if self._user: - try: - return text.encode(self._user) - except UnicodeError: - pass - except LookupError: - temp = self._user - self._user = None - output.Warn('Unknown default_encoding: %s' % temp) - - # Look through learned defaults, knock any failing ones out of the list - while self._learned: - try: - return text.encode(self._learned[0]) - except: - del self._learned[0] - - # When all other defaults are exhausted, use UTF-8 - try: - return text.encode(ENC_UTF8) - except UnicodeError: - pass - - # Something is seriously wrong if we get to here - return text.encode(ENC_ASCII, 'ignore') - #end def NarrowText - - def MaybeNarrowPath(self, text): - """ Paths may be allowed to stay wide """ - if self._widefiles: - return text - return self.NarrowText(text, None) - #end def MaybeNarrowPath - - def WidenText(self, text, encoding): - """ Widen a piece of arbitrary text """ - if type(text) != types.StringType: - return text - - # Try the passed in preference - if encoding: - try: - result = unicode(text, encoding) - if not encoding in self._learned: - self._learned.append(encoding) - return result - except UnicodeError: - pass - except LookupError: - output.Warn('Unknown encoding: %s' % encoding) - - # Try the user preference - if self._user: - try: - return unicode(text, self._user) - except UnicodeError: - pass - except LookupError: - temp = self._user - self._user = None - output.Warn('Unknown default_encoding: %s' % temp) - - # Look through learned defaults, knock any failing ones out of the list - while self._learned: - try: - return unicode(text, self._learned[0]) - except: - del self._learned[0] - - # When all other defaults are exhausted, use UTF-8 - try: - return unicode(text, ENC_UTF8) - except UnicodeError: - pass - - # Getting here means it wasn't UTF-8 and we had no working default. - # We really don't have anything "right" we can do anymore. - output.Warn('Unrecognized encoding in text: %s' % text) - if not self._user: - output.Warn('You may need to set a default_encoding in your ' - 'configuration file.') - return text.decode(ENC_ASCII, 'ignore') - #end def WidenText -#end class Encoder -encoder = Encoder() - - -class Output: - """ - Exposes logging functionality, and tracks how many errors - we have thus output. - - Logging levels should be used as thus: - Fatal -- extremely sparingly - Error -- config errors, entire blocks of user 'intention' lost - Warn -- individual URLs lost - Log(,0) -- Un-suppressable text that's not an error - Log(,1) -- touched files, major actions - Log(,2) -- parsing notes, filtered or duplicated URLs - Log(,3) -- each accepted URL - """ - - def __init__(self): - self.num_errors = 0 # Count of errors - self.num_warns = 0 # Count of warnings - - self._errors_shown = {} # Shown errors - self._warns_shown = {} # Shown warnings - self._verbose = 0 # Level of verbosity - #end def __init__ - - def Log(self, text, level): - """ Output a blurb of diagnostic text, if the verbose level allows it """ - if text: - text = encoder.NarrowText(text, None) - if self._verbose >= level: - print text - #end def Log - - def Warn(self, text): - """ Output and count a warning. Suppress duplicate warnings. """ - if text: - text = encoder.NarrowText(text, None) - hash = hashlib.md5(text).hexdigest() - if not self._warns_shown.has_key(hash): - self._warns_shown[hash] = 1 - print '[WARNING] ' + text - else: - self.Log('(suppressed) [WARNING] ' + text, 3) - self.num_warns = self.num_warns + 1 - #end def Warn - - def Error(self, text): - """ Output and count an error. Suppress duplicate errors. """ - if text: - text = encoder.NarrowText(text, None) - hash = hashlib.md5(text).hexdigest() - if not self._errors_shown.has_key(hash): - self._errors_shown[hash] = 1 - print '[ERROR] ' + text - else: - self.Log('(suppressed) [ERROR] ' + text, 3) - self.num_errors = self.num_errors + 1 - #end def Error - - def Fatal(self, text): - """ Output an error and terminate the program. """ - if text: - text = encoder.NarrowText(text, None) - print '[FATAL] ' + text - else: - print 'Fatal error.' - sys.exit(1) - #end def Fatal - - def SetVerbose(self, level): - """ Sets the verbose level. """ - try: - if type(level) != types.IntType: - level = int(level) - if (level >= 0) and (level <= 3): - self._verbose = level - return - except ValueError: - pass - self.Error('Verbose level (%s) must be between 0 and 3 inclusive.' % level) - #end def SetVerbose -#end class Output -output = Output() - - -class URL(object): - """ URL is a smart structure grouping together the properties we - care about for a single web reference. """ - __slots__ = 'loc', 'lastmod', 'changefreq', 'priority' - - def __init__(self): - self.loc = None # URL -- in Narrow characters - self.lastmod = None # ISO8601 timestamp of last modify - self.changefreq = None # Text term for update frequency - self.priority = None # Float between 0 and 1 (inc) - #end def __init__ - - def __cmp__(self, other): - if self.loc < other.loc: - return -1 - if self.loc > other.loc: - return 1 - return 0 - #end def __cmp__ - - def TrySetAttribute(self, attribute, value): - """ Attempt to set the attribute to the value, with a pretty try - block around it. """ - if attribute == 'loc': - self.loc = self.Canonicalize(value) - else: - try: - setattr(self, attribute, value) - except AttributeError: - output.Warn('Unknown URL attribute: %s' % attribute) - #end def TrySetAttribute - - def IsAbsolute(loc): - """ Decide if the URL is absolute or not """ - if not loc: - return False - narrow = encoder.NarrowText(loc, None) - (scheme, netloc, path, query, frag) = urlparse.urlsplit(narrow) - if (not scheme) or (not netloc): - return False - return True - #end def IsAbsolute - IsAbsolute = staticmethod(IsAbsolute) - - def Canonicalize(loc): - """ Do encoding and canonicalization on a URL string """ - if not loc: - return loc - - # Let the encoder try to narrow it - narrow = encoder.NarrowText(loc, None) - - # Escape components individually - (scheme, netloc, path, query, frag) = urlparse.urlsplit(narrow) - unr = '-._~' - sub = '!$&\'()*+,;=' - netloc = urllib.quote(netloc, unr + sub + '%:@/[]') - path = urllib.quote(path, unr + sub + '%:@/') - query = urllib.quote(query, unr + sub + '%:@/?') - frag = urllib.quote(frag, unr + sub + '%:@/?') - - # Try built-in IDNA encoding on the netloc - try: - (ignore, widenetloc, ignore, ignore, ignore) = urlparse.urlsplit(loc) - for c in widenetloc: - if c >= unichr(128): - netloc = widenetloc.encode(ENC_IDNA) - netloc = urllib.quote(netloc, unr + sub + '%:@/[]') - break - except UnicodeError: - # urlsplit must have failed, based on implementation differences in the - # library. There is not much we can do here, except ignore it. - pass - except LookupError: - output.Warn('An International Domain Name (IDN) is being used, but this ' - 'version of Python does not have support for IDNA encoding. ' - ' (IDNA support was introduced in Python 2.3) The encoding ' - 'we have used instead is wrong and will probably not yield ' - 'valid URLs.') - bad_netloc = False - if '%' in netloc: - bad_netloc = True - - # Put it all back together - narrow = urlparse.urlunsplit((scheme, netloc, path, query, frag)) - - # I let '%' through. Fix any that aren't pre-existing escapes. - HEXDIG = '0123456789abcdefABCDEF' - list = narrow.split('%') - narrow = list[0] - del list[0] - for item in list: - if (len(item) >= 2) and (item[0] in HEXDIG) and (item[1] in HEXDIG): - narrow = narrow + '%' + item - else: - narrow = narrow + '%25' + item - - # Issue a warning if this is a bad URL - if bad_netloc: - output.Warn('Invalid characters in the host or domain portion of a URL: ' - + narrow) - - return narrow - #end def Canonicalize - Canonicalize = staticmethod(Canonicalize) - - def Validate(self, base_url, allow_fragment): - """ Verify the data in this URL is well-formed, and override if not. """ - assert type(base_url) == types.StringType - - # Test (and normalize) the ref - if not self.loc: - output.Warn('Empty URL') - return False - if allow_fragment: - self.loc = urlparse.urljoin(base_url, self.loc) - if not self.loc.startswith(base_url): - output.Warn('Discarded URL for not starting with the base_url: %s' % - self.loc) - self.loc = None - return False - - # Test the lastmod - if self.lastmod: - match = False - self.lastmod = self.lastmod.upper() - for pattern in LASTMOD_PATTERNS: - match = pattern.match(self.lastmod) - if match: - break - if not match: - output.Warn('Lastmod "%s" does not appear to be in ISO8601 format on ' - 'URL: %s' % (self.lastmod, self.loc)) - self.lastmod = None - - # Test the changefreq - if self.changefreq: - match = False - self.changefreq = self.changefreq.lower() - for pattern in CHANGEFREQ_PATTERNS: - if self.changefreq == pattern: - match = True - break - if not match: - output.Warn('Changefreq "%s" is not a valid change frequency on URL ' - ': %s' % (self.changefreq, self.loc)) - self.changefreq = None - - # Test the priority - if self.priority: - priority = -1.0 - try: - priority = float(self.priority) - except ValueError: - pass - if (priority < 0.0) or (priority > 1.0): - output.Warn('Priority "%s" is not a number between 0 and 1 inclusive ' - 'on URL: %s' % (self.priority, self.loc)) - self.priority = None - - return True - #end def Validate - - def MakeHash(self): - """ Provides a uniform way of hashing URLs """ - if not self.loc: - return None - if self.loc.endswith('/'): - return hashlib.md5(self.loc[:-1]).hexdigest() - return hashlib.md5(self.loc).hexdigest() - #end def MakeHash - - def Log(self, prefix='URL', level=3): - """ Dump the contents, empty or not, to the log. """ - out = prefix + ':' - - for attribute in self.__slots__: - value = getattr(self, attribute) - if not value: - value = '' - out = out + (' %s=[%s]' % (attribute, value)) - - output.Log('%s' % encoder.NarrowText(out, None), level) - #end def Log - - def WriteXML(self, file): - """ Dump non-empty contents to the output file, in XML format. """ - if not self.loc: - return - out = SITEURL_XML_PREFIX - - for attribute in self.__slots__: - value = getattr(self, attribute) - if value: - if type(value) == types.UnicodeType: - value = encoder.NarrowText(value, None) - elif type(value) != types.StringType: - value = str(value) - value = xml.sax.saxutils.escape(value) - out = out + (' <%s>%s\n' % (attribute, value, attribute)) - - out = out + SITEURL_XML_SUFFIX - file.write(out) - #end def WriteXML -#end class URL - - -class Filter: - """ - A filter on the stream of URLs we find. A filter is, in essence, - a wildcard applied to the stream. You can think of this as an - operator that returns a tri-state when given a URL: - - True -- this URL is to be included in the sitemap - None -- this URL is undecided - False -- this URL is to be dropped from the sitemap - """ - - def __init__(self, attributes): - self._wildcard = None # Pattern for wildcard match - self._regexp = None # Pattern for regexp match - self._pass = False # "Drop" filter vs. "Pass" filter - - if not ValidateAttributes('FILTER', attributes, - ('pattern', 'type', 'action')): - return - - # Check error count on the way in - num_errors = output.num_errors - - # Fetch the attributes - pattern = attributes.get('pattern') - type = attributes.get('type', 'wildcard') - action = attributes.get('action', 'drop') - if type: - type = type.lower() - if action: - action = action.lower() - - # Verify the attributes - if not pattern: - output.Error('On a filter you must specify a "pattern" to match') - elif (not type) or ((type != 'wildcard') and (type != 'regexp')): - output.Error('On a filter you must specify either \'type="wildcard"\' ' - 'or \'type="regexp"\'') - elif (action != 'pass') and (action != 'drop'): - output.Error('If you specify a filter action, it must be either ' - '\'action="pass"\' or \'action="drop"\'') - - # Set the rule - if action == 'drop': - self._pass = False - elif action == 'pass': - self._pass = True - - if type == 'wildcard': - self._wildcard = pattern - elif type == 'regexp': - try: - self._regexp = re.compile(pattern) - except re.error: - output.Error('Bad regular expression: %s' % pattern) - - # Log the final results iff we didn't add any errors - if num_errors == output.num_errors: - output.Log('Filter: %s any URL that matches %s "%s"' % - (action, type, pattern), 2) - #end def __init__ - - def Apply(self, url): - """ Process the URL, as above. """ - if (not url) or (not url.loc): - return None - - if self._wildcard: - if fnmatch.fnmatchcase(url.loc, self._wildcard): - return self._pass - return None - - if self._regexp: - if self._regexp.search(url.loc): - return self._pass - return None - - assert False # unreachable - #end def Apply -#end class Filter - - -class InputURL: - """ - Each Input class knows how to yield a set of URLs from a data source. - - This one handles a single URL, manually specified in the config file. - """ - - def __init__(self, attributes): - self._url = None # The lonely URL - - if not ValidateAttributes('URL', attributes, - ('href', 'lastmod', 'changefreq', 'priority')): - return - - url = URL() - for attr in attributes.keys(): - if attr == 'href': - url.TrySetAttribute('loc', attributes[attr]) - else: - url.TrySetAttribute(attr, attributes[attr]) - - if not url.loc: - output.Error('Url entries must have an href attribute.') - return - - self._url = url - output.Log('Input: From URL "%s"' % self._url.loc, 2) - #end def __init__ - - def ProduceURLs(self, consumer): - """ Produces URLs from our data source, hands them in to the consumer. """ - if self._url: - consumer(self._url, True) - #end def ProduceURLs -#end class InputURL - - -class InputURLList: - """ - Each Input class knows how to yield a set of URLs from a data source. - - This one handles a text file with a list of URLs - """ - - def __init__(self, attributes): - self._path = None # The file path - self._encoding = None # Encoding of that file - - if not ValidateAttributes('URLLIST', attributes, ('path', 'encoding')): - return - - self._path = attributes.get('path') - self._encoding = attributes.get('encoding', ENC_UTF8) - if self._path: - self._path = encoder.MaybeNarrowPath(self._path) - if os.path.isfile(self._path): - output.Log('Input: From URLLIST "%s"' % self._path, 2) - else: - output.Error('Can not locate file: %s' % self._path) - self._path = None - else: - output.Error('Urllist entries must have a "path" attribute.') - #end def __init__ - - def ProduceURLs(self, consumer): - """ Produces URLs from our data source, hands them in to the consumer. """ - - # Open the file - (frame, file) = OpenFileForRead(self._path, 'URLLIST') - if not file: - return - - # Iterate lines - linenum = 0 - for line in file.readlines(): - linenum = linenum + 1 - - # Strip comments and empty lines - if self._encoding: - line = encoder.WidenText(line, self._encoding) - line = line.strip() - if (not line) or line[0] == '#': - continue - - # Split the line on space - url = URL() - cols = line.split(' ') - for i in range(0,len(cols)): - cols[i] = cols[i].strip() - url.TrySetAttribute('loc', cols[0]) - - # Extract attributes from the other columns - for i in range(1,len(cols)): - if cols[i]: - try: - (attr_name, attr_val) = cols[i].split('=', 1) - url.TrySetAttribute(attr_name, attr_val) - except ValueError: - output.Warn('Line %d: Unable to parse attribute: %s' % - (linenum, cols[i])) - - # Pass it on - consumer(url, False) - - file.close() - if frame: - frame.close() - #end def ProduceURLs -#end class InputURLList - - -class InputDirectory: - """ - Each Input class knows how to yield a set of URLs from a data source. - - This one handles a directory that acts as base for walking the filesystem. - """ - - def __init__(self, attributes, base_url): - self._path = None # The directory - self._url = None # The URL equivalent - self._default_file = None - - if not ValidateAttributes('DIRECTORY', attributes, ('path', 'url', - 'default_file')): - return - - # Prep the path -- it MUST end in a sep - path = attributes.get('path') - if not path: - output.Error('Directory entries must have both "path" and "url" ' - 'attributes') - return - path = encoder.MaybeNarrowPath(path) - if not path.endswith(os.sep): - path = path + os.sep - if not os.path.isdir(path): - output.Error('Can not locate directory: %s' % path) - return - - # Prep the URL -- it MUST end in a sep - url = attributes.get('url') - if not url: - output.Error('Directory entries must have both "path" and "url" ' - 'attributes') - return - url = URL.Canonicalize(url) - if not url.endswith('/'): - url = url + '/' - if not url.startswith(base_url): - url = urlparse.urljoin(base_url, url) - if not url.startswith(base_url): - output.Error('The directory URL "%s" is not relative to the ' - 'base_url: %s' % (url, base_url)) - return - - # Prep the default file -- it MUST be just a filename - file = attributes.get('default_file') - if file: - file = encoder.MaybeNarrowPath(file) - if os.sep in file: - output.Error('The default_file "%s" can not include path information.' - % file) - file = None - - self._path = path - self._url = url - self._default_file = file - if file: - output.Log('Input: From DIRECTORY "%s" (%s) with default file "%s"' - % (path, url, file), 2) - else: - output.Log('Input: From DIRECTORY "%s" (%s) with no default file' - % (path, url), 2) - #end def __init__ - - def ProduceURLs(self, consumer): - """ Produces URLs from our data source, hands them in to the consumer. """ - if not self._path: - return - - root_path = self._path - root_URL = self._url - root_file = self._default_file - - def PerFile(dirpath, name): - """ - Called once per file. - Note that 'name' will occasionally be None -- for a directory itself - """ - # Pull a timestamp - url = URL() - isdir = False - try: - if name: - path = os.path.join(dirpath, name) - else: - path = dirpath - isdir = os.path.isdir(path) - time = None - if isdir and root_file: - file = os.path.join(path, root_file) - try: - time = os.stat(file)[stat.ST_MTIME]; - except OSError: - pass - if not time: - time = os.stat(path)[stat.ST_MTIME]; - url.lastmod = TimestampISO8601(time) - except OSError: - pass - except ValueError: - pass - - # Build a URL - middle = dirpath[len(root_path):] - if os.sep != '/': - middle = middle.replace(os.sep, '/') - if middle: - middle = middle + '/' - if name: - middle = middle + name - if isdir: - middle = middle + '/' - url.TrySetAttribute('loc', root_URL + encoder.WidenText(middle, None)) - - # Suppress default files. (All the way down here so we can log it.) - if name and (root_file == name): - url.Log(prefix='IGNORED (default file)', level=2) - return - - consumer(url, False) - #end def PerFile - - def PerDirectory(ignore, dirpath, namelist): - """ - Called once per directory with a list of all the contained files/dirs. - """ - ignore = ignore # Avoid warnings of an unused parameter - - if not dirpath.startswith(root_path): - output.Warn('Unable to decide what the root path is for directory: ' - '%s' % dirpath) - return - - for name in namelist: - PerFile(dirpath, name) - #end def PerDirectory - - output.Log('Walking DIRECTORY "%s"' % self._path, 1) - PerFile(self._path, None) - os.path.walk(self._path, PerDirectory, None) - #end def ProduceURLs -#end class InputDirectory - - -class InputAccessLog: - """ - Each Input class knows how to yield a set of URLs from a data source. - - This one handles access logs. It's non-trivial in that we want to - auto-detect log files in the Common Logfile Format (as used by Apache, - for instance) and the Extended Log File Format (as used by IIS, for - instance). - """ - - def __init__(self, attributes): - self._path = None # The file path - self._encoding = None # Encoding of that file - self._is_elf = False # Extended Log File Format? - self._is_clf = False # Common Logfile Format? - self._elf_status = -1 # ELF field: '200' - self._elf_method = -1 # ELF field: 'HEAD' - self._elf_uri = -1 # ELF field: '/foo?bar=1' - self._elf_urifrag1 = -1 # ELF field: '/foo' - self._elf_urifrag2 = -1 # ELF field: 'bar=1' - - if not ValidateAttributes('ACCESSLOG', attributes, ('path', 'encoding')): - return - - self._path = attributes.get('path') - self._encoding = attributes.get('encoding', ENC_UTF8) - if self._path: - self._path = encoder.MaybeNarrowPath(self._path) - if os.path.isfile(self._path): - output.Log('Input: From ACCESSLOG "%s"' % self._path, 2) - else: - output.Error('Can not locate file: %s' % self._path) - self._path = None - else: - output.Error('Accesslog entries must have a "path" attribute.') - #end def __init__ - - def RecognizeELFLine(self, line): - """ Recognize the Fields directive that heads an ELF file """ - if not line.startswith('#Fields:'): - return False - fields = line.split(' ') - del fields[0] - for i in range(0, len(fields)): - field = fields[i].strip() - if field == 'sc-status': - self._elf_status = i - elif field == 'cs-method': - self._elf_method = i - elif field == 'cs-uri': - self._elf_uri = i - elif field == 'cs-uri-stem': - self._elf_urifrag1 = i - elif field == 'cs-uri-query': - self._elf_urifrag2 = i - output.Log('Recognized an Extended Log File Format file.', 2) - return True - #end def RecognizeELFLine - - def GetELFLine(self, line): - """ Fetch the requested URL from an ELF line """ - fields = line.split(' ') - count = len(fields) - - # Verify status was Ok - if self._elf_status >= 0: - if self._elf_status >= count: - return None - if not fields[self._elf_status].strip() == '200': - return None - - # Verify method was HEAD or GET - if self._elf_method >= 0: - if self._elf_method >= count: - return None - if not fields[self._elf_method].strip() in ('HEAD', 'GET'): - return None - - # Pull the full URL if we can - if self._elf_uri >= 0: - if self._elf_uri >= count: - return None - url = fields[self._elf_uri].strip() - if url != '-': - return url - - # Put together a fragmentary URL - if self._elf_urifrag1 >= 0: - if self._elf_urifrag1 >= count or self._elf_urifrag2 >= count: - return None - urlfrag1 = fields[self._elf_urifrag1].strip() - urlfrag2 = None - if self._elf_urifrag2 >= 0: - urlfrag2 = fields[self._elf_urifrag2] - if urlfrag1 and (urlfrag1 != '-'): - if urlfrag2 and (urlfrag2 != '-'): - urlfrag1 = urlfrag1 + '?' + urlfrag2 - return urlfrag1 - - return None - #end def GetELFLine - - def RecognizeCLFLine(self, line): - """ Try to tokenize a logfile line according to CLF pattern and see if - it works. """ - match = ACCESSLOG_CLF_PATTERN.match(line) - recognize = match and (match.group(1) in ('HEAD', 'GET')) - if recognize: - output.Log('Recognized a Common Logfile Format file.', 2) - return recognize - #end def RecognizeCLFLine - - def GetCLFLine(self, line): - """ Fetch the requested URL from a CLF line """ - match = ACCESSLOG_CLF_PATTERN.match(line) - if match: - request = match.group(1) - if request in ('HEAD', 'GET'): - return match.group(2) - return None - #end def GetCLFLine - - def ProduceURLs(self, consumer): - """ Produces URLs from our data source, hands them in to the consumer. """ - - # Open the file - (frame, file) = OpenFileForRead(self._path, 'ACCESSLOG') - if not file: - return - - # Iterate lines - for line in file.readlines(): - if self._encoding: - line = encoder.WidenText(line, self._encoding) - line = line.strip() - - # If we don't know the format yet, try them both - if (not self._is_clf) and (not self._is_elf): - self._is_elf = self.RecognizeELFLine(line) - self._is_clf = self.RecognizeCLFLine(line) - - # Digest the line - match = None - if self._is_elf: - match = self.GetELFLine(line) - elif self._is_clf: - match = self.GetCLFLine(line) - if not match: - continue - - # Pass it on - url = URL() - url.TrySetAttribute('loc', match) - consumer(url, True) - - file.close() - if frame: - frame.close() - #end def ProduceURLs -#end class InputAccessLog - - -class InputSitemap(xml.sax.handler.ContentHandler): - - """ - Each Input class knows how to yield a set of URLs from a data source. - - This one handles Sitemap files and Sitemap index files. For the sake - of simplicity in design (and simplicity in interfacing with the SAX - package), we do not handle these at the same time, recursively. Instead - we read an index file completely and make a list of Sitemap files, then - go back and process each Sitemap. - """ - - class _ContextBase(object): - - """Base class for context handlers in our SAX processing. A context - handler is a class that is responsible for understanding one level of - depth in the XML schema. The class knows what sub-tags are allowed, - and doing any processing specific for the tag we're in. - - This base class is the API filled in by specific context handlers, - all defined below. - """ - - def __init__(self, subtags): - """Initialize with a sequence of the sub-tags that would be valid in - this context.""" - self._allowed_tags = subtags # Sequence of sub-tags we can have - self._last_tag = None # Most recent seen sub-tag - #end def __init__ - - def AcceptTag(self, tag): - """Returns True iff opening a sub-tag is valid in this context.""" - valid = tag in self._allowed_tags - if valid: - self._last_tag = tag - else: - self._last_tag = None - return valid - #end def AcceptTag - - def AcceptText(self, text): - """Returns True iff a blurb of text is valid in this context.""" - return False - #end def AcceptText - - def Open(self): - """The context is opening. Do initialization.""" - pass - #end def Open - - def Close(self): - """The context is closing. Return our result, if any.""" - pass - #end def Close - - def Return(self, result): - """We're returning to this context after handling a sub-tag. This - method is called with the result data from the sub-tag that just - closed. Here in _ContextBase, if we ever see a result it means - the derived child class forgot to override this method.""" - if result: - raise NotImplementedError - #end def Return - #end class _ContextBase - - class _ContextUrlSet(_ContextBase): - - """Context handler for the document node in a Sitemap.""" - - def __init__(self): - InputSitemap._ContextBase.__init__(self, ('url',)) - #end def __init__ - #end class _ContextUrlSet - - class _ContextUrl(_ContextBase): - - """Context handler for a URL node in a Sitemap.""" - - def __init__(self, consumer): - """Initialize this context handler with the callable consumer that - wants our URLs.""" - InputSitemap._ContextBase.__init__(self, URL.__slots__) - self._url = None # The URL object we're building - self._consumer = consumer # Who wants to consume it - #end def __init__ - - def Open(self): - """Initialize the URL.""" - assert not self._url - self._url = URL() - #end def Open - - def Close(self): - """Pass the URL to the consumer and reset it to None.""" - assert self._url - self._consumer(self._url, False) - self._url = None - #end def Close - - def Return(self, result): - """A value context has closed, absorb the data it gave us.""" - assert self._url - if result: - self._url.TrySetAttribute(self._last_tag, result) - #end def Return - #end class _ContextUrl - - class _ContextSitemapIndex(_ContextBase): - - """Context handler for the document node in an index file.""" - - def __init__(self): - InputSitemap._ContextBase.__init__(self, ('sitemap',)) - self._loclist = [] # List of accumulated Sitemap URLs - #end def __init__ - - def Open(self): - """Just a quick verify of state.""" - assert not self._loclist - #end def Open - - def Close(self): - """Return our list of accumulated URLs.""" - if self._loclist: - temp = self._loclist - self._loclist = [] - return temp - #end def Close - - def Return(self, result): - """Getting a new loc URL, add it to the collection.""" - if result: - self._loclist.append(result) - #end def Return - #end class _ContextSitemapIndex - - class _ContextSitemap(_ContextBase): - - """Context handler for a Sitemap entry in an index file.""" - - def __init__(self): - InputSitemap._ContextBase.__init__(self, ('loc', 'lastmod')) - self._loc = None # The URL to the Sitemap - #end def __init__ - - def Open(self): - """Just a quick verify of state.""" - assert not self._loc - #end def Open - - def Close(self): - """Return our URL to our parent.""" - if self._loc: - temp = self._loc - self._loc = None - return temp - output.Warn('In the Sitemap index file, a "sitemap" entry had no "loc".') - #end def Close - - def Return(self, result): - """A value has closed. If it was a 'loc', absorb it.""" - if result and (self._last_tag == 'loc'): - self._loc = result - #end def Return - #end class _ContextSitemap - - class _ContextValue(_ContextBase): - - """Context handler for a single value. We return just the value. The - higher level context has to remember what tag led into us.""" - - def __init__(self): - InputSitemap._ContextBase.__init__(self, ()) - self._text = None - #end def __init__ - - def AcceptText(self, text): - """Allow all text, adding it to our buffer.""" - if self._text: - self._text = self._text + text - else: - self._text = text - return True - #end def AcceptText - - def Open(self): - """Initialize our buffer.""" - self._text = None - #end def Open - - def Close(self): - """Return what's in our buffer.""" - text = self._text - self._text = None - if text: - text = text.strip() - return text - #end def Close - #end class _ContextValue - - def __init__(self, attributes): - """Initialize with a dictionary of attributes from our entry in the - config file.""" - xml.sax.handler.ContentHandler.__init__(self) - self._pathlist = None # A list of files - self._current = -1 # Current context in _contexts - self._contexts = None # The stack of contexts we allow - self._contexts_idx = None # ...contexts for index files - self._contexts_stm = None # ...contexts for Sitemap files - - if not ValidateAttributes('SITEMAP', attributes, ['path']): - return - - # Init the first file path - path = attributes.get('path') - if path: - path = encoder.MaybeNarrowPath(path) - if os.path.isfile(path): - output.Log('Input: From SITEMAP "%s"' % path, 2) - self._pathlist = [path] - else: - output.Error('Can not locate file "%s"' % path) - else: - output.Error('Sitemap entries must have a "path" attribute.') - #end def __init__ - - def ProduceURLs(self, consumer): - """In general: Produces URLs from our data source, hand them to the - callable consumer. - - In specific: Iterate over our list of paths and delegate the actual - processing to helper methods. This is a complexity no other data source - needs to suffer. We are unique in that we can have files that tell us - to bring in other files. - - Note the decision to allow an index file or not is made in this method. - If we call our parser with (self._contexts == None) the parser will - grab whichever context stack can handle the file. IE: index is allowed. - If instead we set (self._contexts = ...) before parsing, the parser - will only use the stack we specify. IE: index not allowed. - """ - # Set up two stacks of contexts - self._contexts_idx = [InputSitemap._ContextSitemapIndex(), - InputSitemap._ContextSitemap(), - InputSitemap._ContextValue()] - - self._contexts_stm = [InputSitemap._ContextUrlSet(), - InputSitemap._ContextUrl(consumer), - InputSitemap._ContextValue()] - - # Process the first file - assert self._pathlist - path = self._pathlist[0] - self._contexts = None # We allow an index file here - self._ProcessFile(path) - - # Iterate over remaining files - self._contexts = self._contexts_stm # No index files allowed - for path in self._pathlist[1:]: - self._ProcessFile(path) - #end def ProduceURLs - - def _ProcessFile(self, path): - """Do per-file reading/parsing/consuming for the file path passed in.""" - assert path - - # Open our file - (frame, file) = OpenFileForRead(path, 'SITEMAP') - if not file: - return - - # Rev up the SAX engine - try: - self._current = -1 - xml.sax.parse(file, self) - except SchemaError: - output.Error('An error in file "%s" made us abort reading the Sitemap.' - % path) - except IOError: - output.Error('Cannot read from file "%s"' % path) - except xml.sax._exceptions.SAXParseException, e: - output.Error('XML error in the file "%s" (line %d, column %d): %s' % - (path, e._linenum, e._colnum, e.getMessage())) - - # Clean up - file.close() - if frame: - frame.close() - #end def _ProcessFile - - def _MungeLocationListIntoFiles(self, urllist): - """Given a list of URLs, munge them into our self._pathlist property. - We do this by assuming all the files live in the same directory as - the first file in the existing pathlist. That is, we assume a - Sitemap index points to Sitemaps only in the same directory. This - is not true in general, but will be true for any output produced - by this script. - """ - assert self._pathlist - path = self._pathlist[0] - path = os.path.normpath(path) - dir = os.path.dirname(path) - wide = False - if type(path) == types.UnicodeType: - wide = True - - for url in urllist: - url = URL.Canonicalize(url) - output.Log('Index points to Sitemap file at: %s' % url, 2) - (scheme, netloc, path, query, frag) = urlparse.urlsplit(url) - file = os.path.basename(path) - file = urllib.unquote(file) - if wide: - file = encoder.WidenText(file) - if dir: - file = dir + os.sep + file - if file: - self._pathlist.append(file) - output.Log('Will attempt to read Sitemap file: %s' % file, 1) - #end def _MungeLocationListIntoFiles - - def startElement(self, tag, attributes): - """SAX processing, called per node in the config stream. - As long as the new tag is legal in our current context, this - becomes an Open call on one context deeper. - """ - # If this is the document node, we may have to look for a context stack - if (self._current < 0) and not self._contexts: - assert self._contexts_idx and self._contexts_stm - if tag == 'urlset': - self._contexts = self._contexts_stm - elif tag == 'sitemapindex': - self._contexts = self._contexts_idx - output.Log('File is a Sitemap index.', 2) - else: - output.Error('The document appears to be neither a Sitemap nor a ' - 'Sitemap index.') - raise SchemaError - - # Display a kinder error on a common mistake - if (self._current < 0) and (self._contexts == self._contexts_stm) and ( - tag == 'sitemapindex'): - output.Error('A Sitemap index can not refer to another Sitemap index.') - raise SchemaError - - # Verify no unexpected attributes - if attributes: - text = '' - for attr in attributes.keys(): - # The document node will probably have namespaces - if self._current < 0: - if attr.find('xmlns') >= 0: - continue - if attr.find('xsi') >= 0: - continue - if text: - text = text + ', ' - text = text + attr - if text: - output.Warn('Did not expect any attributes on any tag, instead tag ' - '"%s" had attributes: %s' % (tag, text)) - - # Switch contexts - if (self._current < 0) or (self._contexts[self._current].AcceptTag(tag)): - self._current = self._current + 1 - assert self._current < len(self._contexts) - self._contexts[self._current].Open() - else: - output.Error('Can not accept tag "%s" where it appears.' % tag) - raise SchemaError - #end def startElement - - def endElement(self, tag): - """SAX processing, called per node in the config stream. - This becomes a call to Close on one context followed by a call - to Return on the previous. - """ - tag = tag # Avoid warning on unused argument - assert self._current >= 0 - retval = self._contexts[self._current].Close() - self._current = self._current - 1 - if self._current >= 0: - self._contexts[self._current].Return(retval) - elif retval and (self._contexts == self._contexts_idx): - self._MungeLocationListIntoFiles(retval) - #end def endElement - - def characters(self, text): - """SAX processing, called when text values are read. Important to - note that one single text value may be split across multiple calls - of this method. - """ - if (self._current < 0) or ( - not self._contexts[self._current].AcceptText(text)): - if text.strip(): - output.Error('Can not accept text "%s" where it appears.' % text) - raise SchemaError - #end def characters -#end class InputSitemap - - -class FilePathGenerator: - """ - This class generates filenames in a series, upon request. - You can request any iteration number at any time, you don't - have to go in order. - - Example of iterations for '/path/foo.xml.gz': - 0 --> /path/foo.xml.gz - 1 --> /path/foo1.xml.gz - 2 --> /path/foo2.xml.gz - _index.xml --> /path/foo_index.xml - """ - - def __init__(self): - self.is_gzip = False # Is this a GZIP file? - - self._path = None # '/path/' - self._prefix = None # 'foo' - self._suffix = None # '.xml.gz' - #end def __init__ - - def Preload(self, path): - """ Splits up a path into forms ready for recombination. """ - path = encoder.MaybeNarrowPath(path) - - # Get down to a base name - path = os.path.normpath(path) - base = os.path.basename(path).lower() - if not base: - output.Error('Couldn\'t parse the file path: %s' % path) - return False - lenbase = len(base) - - # Recognize extension - lensuffix = 0 - compare_suffix = ['.xml', '.xml.gz', '.gz'] - for suffix in compare_suffix: - if base.endswith(suffix): - lensuffix = len(suffix) - break - if not lensuffix: - output.Error('The path "%s" doesn\'t end in a supported file ' - 'extension.' % path) - return False - self.is_gzip = suffix.endswith('.gz') - - # Split the original path - lenpath = len(path) - self._path = path[:lenpath-lenbase] - self._prefix = path[lenpath-lenbase:lenpath-lensuffix] - self._suffix = path[lenpath-lensuffix:] - - return True - #end def Preload - - def GeneratePath(self, instance): - """ Generates the iterations, as described above. """ - prefix = self._path + self._prefix - if type(instance) == types.IntType: - if instance: - return '%s%d%s' % (prefix, instance, self._suffix) - return prefix + self._suffix - return prefix + instance - #end def GeneratePath - - def GenerateURL(self, instance, root_url): - """ Generates iterations, but as a URL instead of a path. """ - prefix = root_url + self._prefix - retval = None - if type(instance) == types.IntType: - if instance: - retval = '%s%d%s' % (prefix, instance, self._suffix) - else: - retval = prefix + self._suffix - else: - retval = prefix + instance - return URL.Canonicalize(retval) - #end def GenerateURL - - def GenerateWildURL(self, root_url): - """ Generates a wildcard that should match all our iterations """ - prefix = URL.Canonicalize(root_url + self._prefix) - temp = URL.Canonicalize(prefix + self._suffix) - suffix = temp[len(prefix):] - return prefix + '*' + suffix - #end def GenerateURL -#end class FilePathGenerator - - -class PerURLStatistics: - """ Keep track of some simple per-URL statistics, like file extension. """ - - def __init__(self): - self._extensions = {} # Count of extension instances - #end def __init__ - - def Consume(self, url): - """ Log some stats for the URL. At the moment, that means extension. """ - if url and url.loc: - (scheme, netloc, path, query, frag) = urlparse.urlsplit(url.loc) - if not path: - return - - # Recognize directories - if path.endswith('/'): - if self._extensions.has_key('/'): - self._extensions['/'] = self._extensions['/'] + 1 - else: - self._extensions['/'] = 1 - return - - # Strip to a filename - i = path.rfind('/') - if i >= 0: - assert i < len(path) - path = path[i:] - - # Find extension - i = path.rfind('.') - if i > 0: - assert i < len(path) - ext = path[i:].lower() - if self._extensions.has_key(ext): - self._extensions[ext] = self._extensions[ext] + 1 - else: - self._extensions[ext] = 1 - else: - if self._extensions.has_key('(no extension)'): - self._extensions['(no extension)'] = self._extensions[ - '(no extension)'] + 1 - else: - self._extensions['(no extension)'] = 1 - #end def Consume - - def Log(self): - """ Dump out stats to the output. """ - if len(self._extensions): - output.Log('Count of file extensions on URLs:', 1) - set = self._extensions.keys() - set.sort() - for ext in set: - output.Log(' %7d %s' % (self._extensions[ext], ext), 1) - #end def Log - -class Sitemap(xml.sax.handler.ContentHandler): - """ - This is the big workhorse class that processes your inputs and spits - out sitemap files. It is built as a SAX handler for set up purposes. - That is, it processes an XML stream to bring itself up. - """ - - def __init__(self, suppress_notify): - xml.sax.handler.ContentHandler.__init__(self) - self._filters = [] # Filter objects - self._inputs = [] # Input objects - self._urls = {} # Maps URLs to count of dups - self._set = [] # Current set of URLs - self._filegen = None # Path generator for output files - self._wildurl1 = None # Sitemap URLs to filter out - self._wildurl2 = None # Sitemap URLs to filter out - self._sitemaps = 0 # Number of output files - # We init _dup_max to 2 so the default priority is 0.5 instead of 1.0 - self._dup_max = 2 # Max number of duplicate URLs - self._stat = PerURLStatistics() # Some simple stats - self._in_site = False # SAX: are we in a Site node? - self._in_Site_ever = False # SAX: were we ever in a Site? - - self._default_enc = None # Best encoding to try on URLs - self._base_url = None # Prefix to all valid URLs - self._store_into = None # Output filepath - self._suppress = suppress_notify # Suppress notify of servers - #end def __init__ - - def ValidateBasicConfig(self): - """ Verifies (and cleans up) the basic user-configurable options. """ - all_good = True - - if self._default_enc: - encoder.SetUserEncoding(self._default_enc) - - # Canonicalize the base_url - if all_good and not self._base_url: - output.Error('A site needs a "base_url" attribute.') - all_good = False - if all_good and not URL.IsAbsolute(self._base_url): - output.Error('The "base_url" must be absolute, not relative: %s' % - self._base_url) - all_good = False - if all_good: - self._base_url = URL.Canonicalize(self._base_url) - if not self._base_url.endswith('/'): - self._base_url = self._base_url + '/' - output.Log('BaseURL is set to: %s' % self._base_url, 2) - - # Load store_into into a generator - if all_good: - if self._store_into: - self._filegen = FilePathGenerator() - if not self._filegen.Preload(self._store_into): - all_good = False - else: - output.Error('A site needs a "store_into" attribute.') - all_good = False - - # Ask the generator for patterns on what its output will look like - if all_good: - self._wildurl1 = self._filegen.GenerateWildURL(self._base_url) - self._wildurl2 = self._filegen.GenerateURL(SITEINDEX_SUFFIX, - self._base_url) - - # Unify various forms of False - if all_good: - if self._suppress: - if (type(self._suppress) == types.StringType) or (type(self._suppress) - == types.UnicodeType): - if (self._suppress == '0') or (self._suppress.lower() == 'false'): - self._suppress = False - - # Done - if not all_good: - output.Log('See "example_config.xml" for more information.', 0) - return all_good - #end def ValidateBasicConfig - - def Generate(self): - """ Run over all the Inputs and ask them to Produce """ - # Run the inputs - for input in self._inputs: - input.ProduceURLs(self.ConsumeURL) - - # Do last flushes - if len(self._set): - self.FlushSet() - if not self._sitemaps: - output.Warn('No URLs were recorded, writing an empty sitemap.') - self.FlushSet() - - # Write an index as needed - if self._sitemaps > 1: - self.WriteIndex() - - # Notify - self.NotifySearch() - - # Dump stats - self._stat.Log() - #end def Generate - - def ConsumeURL(self, url, allow_fragment): - """ - All per-URL processing comes together here, regardless of Input. - Here we run filters, remove duplicates, spill to disk as needed, etc. - """ - if not url: - return - - # Validate - if not url.Validate(self._base_url, allow_fragment): - return - - # Run filters - accept = None - for filter in self._filters: - accept = filter.Apply(url) - if accept != None: - break - if not (accept or (accept == None)): - url.Log(prefix='FILTERED', level=2) - return - - # Ignore our out output URLs - if fnmatch.fnmatchcase(url.loc, self._wildurl1) or fnmatch.fnmatchcase( - url.loc, self._wildurl2): - url.Log(prefix='IGNORED (output file)', level=2) - return - - # Note the sighting - hash = url.MakeHash() - if self._urls.has_key(hash): - dup = self._urls[hash] - if dup > 0: - dup = dup + 1 - self._urls[hash] = dup - if self._dup_max < dup: - self._dup_max = dup - url.Log(prefix='DUPLICATE') - return - - # Acceptance -- add to set - self._urls[hash] = 1 - self._set.append(url) - self._stat.Consume(url) - url.Log() - - # Flush the set if needed - if len(self._set) >= MAXURLS_PER_SITEMAP: - self.FlushSet() - #end def ConsumeURL - - def FlushSet(self): - """ - Flush the current set of URLs to the output. This is a little - slow because we like to sort them all and normalize the priorities - before dumping. - """ - - # Sort and normalize - output.Log('Sorting and normalizing collected URLs.', 1) - self._set.sort() - for url in self._set: - hash = url.MakeHash() - dup = self._urls[hash] - if dup > 0: - self._urls[hash] = -1 - if not url.priority: - url.priority = '%.4f' % (float(dup) / float(self._dup_max)) - - # Get the filename we're going to write to - filename = self._filegen.GeneratePath(self._sitemaps) - if not filename: - output.Fatal('Unexpected: Couldn\'t generate output filename.') - self._sitemaps = self._sitemaps + 1 - output.Log('Writing Sitemap file "%s" with %d URLs' % - (filename, len(self._set)), 1) - - # Write to it - frame = None - file = None - - try: - if self._filegen.is_gzip: - basename = os.path.basename(filename); - frame = open(filename, 'wb') - file = gzip.GzipFile(fileobj=frame, filename=basename, mode='wt') - else: - file = open(filename, 'wt') - - file.write(SITEMAP_HEADER) - for url in self._set: - url.WriteXML(file) - file.write(SITEMAP_FOOTER) - - file.close() - if frame: - frame.close() - - frame = None - file = None - except IOError: - output.Fatal('Couldn\'t write out to file: %s' % filename) - os.chmod(filename, 0644) - - # Flush - self._set = [] - #end def FlushSet - - def WriteIndex(self): - """ Write the master index of all Sitemap files """ - # Make a filename - filename = self._filegen.GeneratePath(SITEINDEX_SUFFIX) - if not filename: - output.Fatal('Unexpected: Couldn\'t generate output index filename.') - output.Log('Writing index file "%s" with %d Sitemaps' % - (filename, self._sitemaps), 1) - - # Make a lastmod time - lastmod = TimestampISO8601(time.time()) - - # Write to it - try: - fd = open(filename, 'wt') - fd.write(SITEINDEX_HEADER) - - for mapnumber in range(0,self._sitemaps): - # Write the entry - mapurl = self._filegen.GenerateURL(mapnumber, self._base_url) - mapattributes = { 'loc' : mapurl, 'lastmod' : lastmod } - fd.write(SITEINDEX_ENTRY % mapattributes) - - fd.write(SITEINDEX_FOOTER) - - fd.close() - fd = None - except IOError: - output.Fatal('Couldn\'t write out to file: %s' % filename) - os.chmod(filename, 0644) - #end def WriteIndex - - def NotifySearch(self): - """ Send notification of the new Sitemap(s) to the search engines. """ - if self._suppress: - output.Log('Search engine notification is suppressed.', 1) - return - - output.Log('Notifying search engines.', 1) - - # Override the urllib's opener class with one that doesn't ignore 404s - class ExceptionURLopener(urllib.FancyURLopener): - def http_error_default(self, url, fp, errcode, errmsg, headers): - output.Log('HTTP error %d: %s' % (errcode, errmsg), 2) - raise IOError - #end def http_error_default - #end class ExceptionURLOpener - old_opener = urllib._urlopener - urllib._urlopener = ExceptionURLopener() - - # Build the URL we want to send in - if self._sitemaps > 1: - url = self._filegen.GenerateURL(SITEINDEX_SUFFIX, self._base_url) - else: - url = self._filegen.GenerateURL(0, self._base_url) - - # Test if we can hit it ourselves - try: - u = urllib.urlopen(url) - u.close() - except IOError: - output.Error('When attempting to access our generated Sitemap at the ' - 'following URL:\n %s\n we failed to read it. Please ' - 'verify the store_into path you specified in\n' - ' your configuration file is web-accessable. Consult ' - 'the FAQ for more\n information.' % url) - output.Warn('Proceeding to notify with an unverifyable URL.') - - # Cycle through notifications - # To understand this, see the comment near the NOTIFICATION_SITES comment - for ping in NOTIFICATION_SITES: - query_map = ping[3] - query_attr = ping[5] - query_map[query_attr] = url - query = urllib.urlencode(query_map) - notify = urlparse.urlunsplit((ping[0], ping[1], ping[2], query, ping[4])) - - # Send the notification - output.Log('Notifying: %s' % ping[1], 1) - output.Log('Notification URL: %s' % notify, 2) - try: - u = urllib.urlopen(notify) - u.read() - u.close() - except IOError: - output.Warn('Cannot contact: %s' % ping[1]) - - if old_opener: - urllib._urlopener = old_opener - #end def NotifySearch - - def startElement(self, tag, attributes): - """ SAX processing, called per node in the config stream. """ - - if tag == 'site': - if self._in_site: - output.Error('Can not nest Site entries in the configuration.') - else: - self._in_site = True - - if not ValidateAttributes('SITE', attributes, - ('verbose', 'default_encoding', 'base_url', 'store_into', - 'suppress_search_engine_notify')): - return - - verbose = attributes.get('verbose', 0) - if verbose: - output.SetVerbose(verbose) - - self._default_enc = attributes.get('default_encoding') - self._base_url = attributes.get('base_url') - self._store_into = attributes.get('store_into') - if not self._suppress: - self._suppress = attributes.get('suppress_search_engine_notify', - False) - self.ValidateBasicConfig() - - elif tag == 'filter': - self._filters.append(Filter(attributes)) - - elif tag == 'url': - self._inputs.append(InputURL(attributes)) - - elif tag == 'urllist': - for attributeset in ExpandPathAttribute(attributes, 'path'): - self._inputs.append(InputURLList(attributeset)) - - elif tag == 'directory': - self._inputs.append(InputDirectory(attributes, self._base_url)) - - elif tag == 'accesslog': - for attributeset in ExpandPathAttribute(attributes, 'path'): - self._inputs.append(InputAccessLog(attributeset)) - - elif tag == 'sitemap': - for attributeset in ExpandPathAttribute(attributes, 'path'): - self._inputs.append(InputSitemap(attributeset)) - - else: - output.Error('Unrecognized tag in the configuration: %s' % tag) - #end def startElement - - def endElement(self, tag): - """ SAX processing, called per node in the config stream. """ - if tag == 'site': - assert self._in_site - self._in_site = False - self._in_site_ever = True - #end def endElement - - def endDocument(self): - """ End of SAX, verify we can proceed. """ - if not self._in_site_ever: - output.Error('The configuration must specify a "site" element.') - else: - if not self._inputs: - output.Warn('There were no inputs to generate a sitemap from.') - #end def endDocument -#end class Sitemap - - -def ValidateAttributes(tag, attributes, goodattributes): - """ Makes sure 'attributes' does not contain any attribute not - listed in 'goodattributes' """ - all_good = True - for attr in attributes.keys(): - if not attr in goodattributes: - output.Error('Unknown %s attribute: %s' % (tag, attr)) - all_good = False - return all_good -#end def ValidateAttributes - -def ExpandPathAttribute(src, attrib): - """ Given a dictionary of attributes, return a list of dictionaries - with all the same attributes except for the one named attrib. - That one, we treat as a file path and expand into all its possible - variations. """ - # Do the path expansion. On any error, just return the source dictionary. - path = src.get(attrib) - if not path: - return [src] - path = encoder.MaybeNarrowPath(path); - pathlist = glob.glob(path) - if not pathlist: - return [src] - - # If this isn't actually a dictionary, make it one - if type(src) != types.DictionaryType: - tmp = {} - for key in src.keys(): - tmp[key] = src[key] - src = tmp - - # Create N new dictionaries - retval = [] - for path in pathlist: - dst = src.copy() - dst[attrib] = path - retval.append(dst) - - return retval -#end def ExpandPathAttribute - -def OpenFileForRead(path, logtext): - """ Opens a text file, be it GZip or plain """ - - frame = None - file = None - - if not path: - return (frame, file) - - try: - if path.endswith('.gz'): - frame = open(path, 'rb') - file = gzip.GzipFile(fileobj=frame, mode='rt') - else: - file = open(path, 'rt') - - if logtext: - output.Log('Opened %s file: %s' % (logtext, path), 1) - else: - output.Log('Opened file: %s' % path, 1) - except IOError: - output.Error('Can not open file: %s' % path) - - return (frame, file) -#end def OpenFileForRead - -def TimestampISO8601(t): - """Seconds since epoch (1970-01-01) --> ISO 8601 time string.""" - return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) -#end def TimestampISO8601 - -def CreateSitemapFromFile(configpath, suppress_notify): - """ Sets up a new Sitemap object from the specified configuration file. """ - - # Remember error count on the way in - num_errors = output.num_errors - - # Rev up SAX to parse the config - sitemap = Sitemap(suppress_notify) - try: - output.Log('Reading configuration file: %s' % configpath, 0) - xml.sax.parse(configpath, sitemap) - except IOError: - output.Error('Cannot read configuration file: %s' % configpath) - except xml.sax._exceptions.SAXParseException, e: - output.Error('XML error in the config file (line %d, column %d): %s' % - (e._linenum, e._colnum, e.getMessage())) - except xml.sax._exceptions.SAXReaderNotAvailable: - output.Error('Some installs of Python 2.2 did not include complete support' - ' for XML.\n Please try upgrading your version of Python' - ' and re-running the script.') - - # If we added any errors, return no sitemap - if num_errors == output.num_errors: - return sitemap - return None -#end def CreateSitemapFromFile - -def ProcessCommandFlags(args): - """ - Parse command line flags per specified usage, pick off key, value pairs - All flags of type "--key=value" will be processed as __flags[key] = value, - "--option" will be processed as __flags[option] = option - """ - - flags = {} - rkeyval = '--(?P\S*)[=](?P\S*)' # --key=val - roption = '--(?P