feat: add socket backlog metric (Linux only)

Add --enable-backlog-metric option to emit a gunicorn.backlog histogram
metric showing connections waiting in the socket backlog. This helps
identify worker saturation and concurrency issues.

Also distinguishes between timer (|ms) and histogram (|h) statsd metric
types per the statsd spec.

Note: Only works on Linux using TCP_INFO from getsockopt.

Closes #2407
Partially fixes #2057
This commit is contained in:
Benoit Chesneau 2026-01-23 11:11:55 +01:00
parent e52ac46e29
commit f22cd6558e
4 changed files with 56 additions and 2 deletions

View File

@ -600,6 +600,16 @@ class Arbiter:
"value": active_worker_count, "value": active_worker_count,
"mtype": "gauge"}) "mtype": "gauge"})
if self.cfg.enable_backlog_metric:
backlog = sum(sock.get_backlog() or 0
for sock in self.LISTENERS)
if backlog >= 0:
self.log.debug("socket backlog: {0}".format(backlog),
extra={"metric": "gunicorn.backlog",
"value": backlog,
"mtype": "histogram"})
def spawn_worker(self): def spawn_worker(self):
self.worker_age += 1 self.worker_age += 1
worker = self.worker_class(self.worker_age, self.pid, self.LISTENERS, worker = self.worker_class(self.worker_age, self.pid, self.LISTENERS,

View File

@ -1710,6 +1710,21 @@ class StatsdPrefix(Setting):
""" """
class BacklogMetric(Setting):
name = "enable_backlog_metric"
section = "Logging"
cli = ["--enable-backlog-metric"]
validator = validate_bool
default = False
action = "store_true"
desc = """\
Enable socket backlog metric (only supported on Linux).
When enabled, gunicorn will emit a ``gunicorn.backlog`` histogram metric
showing the number of connections waiting in the socket backlog.
"""
class Procname(Setting): class Procname(Setting):
name = "proc_name" name = "proc_name"
section = "Process Naming" section = "Process Naming"

View File

@ -17,6 +17,7 @@ MTYPE_VAR = "mtype"
GAUGE_TYPE = "gauge" GAUGE_TYPE = "gauge"
COUNTER_TYPE = "counter" COUNTER_TYPE = "counter"
HISTOGRAM_TYPE = "histogram" HISTOGRAM_TYPE = "histogram"
TIMER_TYPE = "timer"
class Statsd(Logger): class Statsd(Logger):
@ -80,6 +81,8 @@ class Statsd(Logger):
self.increment(metric, value) self.increment(metric, value)
elif typ == HISTOGRAM_TYPE: elif typ == HISTOGRAM_TYPE:
self.histogram(metric, value) self.histogram(metric, value)
elif typ == TIMER_TYPE:
self.timer(metric, value)
else: else:
pass pass
@ -101,7 +104,7 @@ class Statsd(Logger):
status = status.decode('utf-8') status = status.decode('utf-8')
if isinstance(status, str): if isinstance(status, str):
status = int(status.split(None, 1)[0]) status = int(status.split(None, 1)[0])
self.histogram("gunicorn.request.duration", duration_in_ms) self.timer("gunicorn.request.duration", duration_in_ms)
self.increment("gunicorn.requests", 1) self.increment("gunicorn.requests", 1)
self.increment("gunicorn.request.status.%d" % status, 1) self.increment("gunicorn.request.status.%d" % status, 1)
@ -116,9 +119,12 @@ class Statsd(Logger):
def decrement(self, name, value, sampling_rate=1.0): def decrement(self, name, value, sampling_rate=1.0):
self._sock_send("{0}{1}:-{2}|c|@{3}".format(self.prefix, name, value, sampling_rate)) self._sock_send("{0}{1}:-{2}|c|@{3}".format(self.prefix, name, value, sampling_rate))
def histogram(self, name, value): def timer(self, name, value):
self._sock_send("{0}{1}:{2}|ms".format(self.prefix, name, value)) self._sock_send("{0}{1}:{2}|ms".format(self.prefix, name, value))
def histogram(self, name, value):
self._sock_send("{0}{1}:{2}|h".format(self.prefix, name, value))
def _sock_send(self, msg): def _sock_send(self, msg):
try: try:
if isinstance(msg, str): if isinstance(msg, str):

View File

@ -7,11 +7,14 @@ import os
import socket import socket
import ssl import ssl
import stat import stat
import struct
import sys import sys
import time import time
from gunicorn import util from gunicorn import util
PLATFORM = sys.platform
class BaseSocket: class BaseSocket:
@ -70,6 +73,9 @@ class BaseSocket:
self.sock = None self.sock = None
def get_backlog(self):
return -1
class TCPSocket(BaseSocket): class TCPSocket(BaseSocket):
@ -88,6 +94,23 @@ class TCPSocket(BaseSocket):
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
return super().set_options(sock, bound=bound) return super().set_options(sock, bound=bound)
if PLATFORM == "linux":
def get_backlog(self):
if self.sock:
# tcp_info struct from include/uapi/linux/tcp.h
fmt = 'B' * 8 + 'I' * 24
try:
tcp_info_struct = self.sock.getsockopt(socket.IPPROTO_TCP,
socket.TCP_INFO, 104)
# 12 is tcpi_unacked
return struct.unpack(fmt, tcp_info_struct)[12]
except (AttributeError, OSError):
pass
return 0
else:
def get_backlog(self):
return -1
class TCP6Socket(TCPSocket): class TCP6Socket(TCPSocket):