feat: add socket backlog metric (Linux only)

Add --enable-backlog-metric option to emit a gunicorn.backlog histogram
metric showing connections waiting in the socket backlog. This helps
identify worker saturation and concurrency issues.

Also distinguishes between timer (|ms) and histogram (|h) statsd metric
types per the statsd spec.

Note: Only works on Linux using TCP_INFO from getsockopt.

Closes #2407
Partially fixes #2057
This commit is contained in:
Benoit Chesneau 2026-01-23 11:11:55 +01:00
parent e52ac46e29
commit f22cd6558e
4 changed files with 56 additions and 2 deletions

View File

@ -600,6 +600,16 @@ class Arbiter:
"value": active_worker_count,
"mtype": "gauge"})
if self.cfg.enable_backlog_metric:
backlog = sum(sock.get_backlog() or 0
for sock in self.LISTENERS)
if backlog >= 0:
self.log.debug("socket backlog: {0}".format(backlog),
extra={"metric": "gunicorn.backlog",
"value": backlog,
"mtype": "histogram"})
def spawn_worker(self):
self.worker_age += 1
worker = self.worker_class(self.worker_age, self.pid, self.LISTENERS,

View File

@ -1710,6 +1710,21 @@ class StatsdPrefix(Setting):
"""
class BacklogMetric(Setting):
name = "enable_backlog_metric"
section = "Logging"
cli = ["--enable-backlog-metric"]
validator = validate_bool
default = False
action = "store_true"
desc = """\
Enable socket backlog metric (only supported on Linux).
When enabled, gunicorn will emit a ``gunicorn.backlog`` histogram metric
showing the number of connections waiting in the socket backlog.
"""
class Procname(Setting):
name = "proc_name"
section = "Process Naming"

View File

@ -17,6 +17,7 @@ MTYPE_VAR = "mtype"
GAUGE_TYPE = "gauge"
COUNTER_TYPE = "counter"
HISTOGRAM_TYPE = "histogram"
TIMER_TYPE = "timer"
class Statsd(Logger):
@ -80,6 +81,8 @@ class Statsd(Logger):
self.increment(metric, value)
elif typ == HISTOGRAM_TYPE:
self.histogram(metric, value)
elif typ == TIMER_TYPE:
self.timer(metric, value)
else:
pass
@ -101,7 +104,7 @@ class Statsd(Logger):
status = status.decode('utf-8')
if isinstance(status, str):
status = int(status.split(None, 1)[0])
self.histogram("gunicorn.request.duration", duration_in_ms)
self.timer("gunicorn.request.duration", duration_in_ms)
self.increment("gunicorn.requests", 1)
self.increment("gunicorn.request.status.%d" % status, 1)
@ -116,9 +119,12 @@ class Statsd(Logger):
def decrement(self, name, value, sampling_rate=1.0):
self._sock_send("{0}{1}:-{2}|c|@{3}".format(self.prefix, name, value, sampling_rate))
def histogram(self, name, value):
def timer(self, name, value):
self._sock_send("{0}{1}:{2}|ms".format(self.prefix, name, value))
def histogram(self, name, value):
self._sock_send("{0}{1}:{2}|h".format(self.prefix, name, value))
def _sock_send(self, msg):
try:
if isinstance(msg, str):

View File

@ -7,11 +7,14 @@ import os
import socket
import ssl
import stat
import struct
import sys
import time
from gunicorn import util
PLATFORM = sys.platform
class BaseSocket:
@ -70,6 +73,9 @@ class BaseSocket:
self.sock = None
def get_backlog(self):
return -1
class TCPSocket(BaseSocket):
@ -88,6 +94,23 @@ class TCPSocket(BaseSocket):
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
return super().set_options(sock, bound=bound)
if PLATFORM == "linux":
def get_backlog(self):
if self.sock:
# tcp_info struct from include/uapi/linux/tcp.h
fmt = 'B' * 8 + 'I' * 24
try:
tcp_info_struct = self.sock.getsockopt(socket.IPPROTO_TCP,
socket.TCP_INFO, 104)
# 12 is tcpi_unacked
return struct.unpack(fmt, tcp_info_struct)[12]
except (AttributeError, OSError):
pass
return 0
else:
def get_backlog(self):
return -1
class TCP6Socket(TCPSocket):