fix: Remove hardcoded paths for slow prediction

This commit is contained in:
Ankush Menat 2026-05-28 16:01:02 +05:30
parent 2471050b3a
commit ec6af68013
5 changed files with 11 additions and 58 deletions

View File

@ -163,9 +163,6 @@ New settings, mirroring `WorkerThreads` (`config.py:697`):
- `slow_threads``S`, slow-lane worker count. Default `1`. - `slow_threads``S`, slow-lane worker count. Default `1`.
- `slow_queue_maxsize` — bound on `slow_q`; overflow ⇒ `503`. Default e.g. `100` - `slow_queue_maxsize` — bound on `slow_q`; overflow ⇒ `503`. Default e.g. `100`
(`0` = unbounded). (`0` = unbounded).
- `slow_routes` — optional list of regex route patterns (matched with
`re.search` against the route key) operators know are slow, seeded into the
predictor so even the *first* request routes correctly.
- `slow_lane_retry_after` — seconds for the `Retry-After` header on 503. - `slow_lane_retry_after` — seconds for the `Retry-After` header on 503.
A `slow_route_key` hook to customize the route key (e.g. collapse A `slow_route_key` hook to customize the route key (e.g. collapse
@ -198,9 +195,8 @@ A small, self-contained, thread-safe object:
- `update(route_key, duration)`: EWMA with decay so a route that becomes fast - `update(route_key, duration)`: EWMA with decay so a route that becomes fast
again eventually returns to the fast lane (avoids permanent misclassification again eventually returns to the fast lane (avoids permanent misclassification
after a one-off slow spike). Called on every completion. after a one-off slow spike). Called on every completion.
- `is_slow(route_key)`: `True` if the route matches a seeded `slow_routes` - `is_slow(route_key)`: `True` if its `ewma_seconds >= slow_request_threshold`.
pattern, or its `ewma_seconds >= slow_request_threshold`. Unknown routes ⇒ Unknown routes ⇒ `False` (fast) by default.
`False` (fast) by default.
- Optional hysteresis (separate promote/demote thresholds) to avoid flapping - Optional hysteresis (separate promote/demote thresholds) to avoid flapping
around the boundary. around the boundary.
@ -216,19 +212,17 @@ A small, self-contained, thread-safe object:
request — we can't). This shortens the learning window when many requests to a request — we can't). This shortens the learning window when many requests to a
brand-new slow route arrive at once: subsequent ones in the burst route to the brand-new slow route arrive at once: subsequent ones in the burst route to the
slow lane after one threshold interval instead of after a full slow request. slow lane after one threshold interval instead of after a full slow request.
3. **Seeding (eliminates the first-occurrence window for known offenders)**:
`slow_routes` patterns mark routes slow from the very first request.
## 6. Behavior under load (the cases that matter) ## 6. Behavior under load (the cases that matter)
- **Flood of a known/seeded or previously-seen slow route**: every such request - **Flood of a previously-seen slow route**: every such request
is routed to the slow pool. The `F` fast threads are never given this work and is routed to the slow pool. The `F` fast threads are never given this work and
keep serving fast traffic at full capacity. When the slow lane reaches keep serving fast traffic at full capacity. When the slow lane reaches
`S + slow_queue_maxsize`, further slow requests get a fast `503` — backpressure `S + slow_queue_maxsize`, further slow requests get a fast `503` — backpressure
is contained to the slow lane. is contained to the slow lane.
- **Flood of a never-seen slow route**: the first occurrence(s) run in the fast - **Flood of a never-seen slow route**: the first occurrence(s) run in the fast
lane; mid-flight learning (§5.4.2) flips the route to slow after one threshold lane; mid-flight learning (§5.4.2) flips the route to slow after one threshold
interval, so the flood is contained quickly. Seeding avoids even this window. interval, so the flood is contained quickly.
- **Mixed fast traffic, idle slow lane**: the `S` slow threads stay parked (no - **Mixed fast traffic, idle slow lane**: the `S` slow threads stay parked (no
work stealing in this design — see §3), so fast throughput is `F`, not `F + S`. work stealing in this design — see §3), so fast throughput is `F`, not `F + S`.
- **Misprediction (route marked slow but now fast)**: handled gracefully — it - **Misprediction (route marked slow but now fast)**: handled gracefully — it
@ -239,7 +233,7 @@ A small, self-contained, thread-safe object:
Implemented: Implemented:
- `config.py``slow_request_threshold`, `slow_threads`, `slow_queue_maxsize`, - `config.py``slow_request_threshold`, `slow_threads`, `slow_queue_maxsize`,
`slow_routes`, `slow_lane_retry_after`, plus `validate_pos_float`. `slow_lane_retry_after`, plus `validate_pos_float`.
- `gthread.py` `init_process`/`get_thread_pool` — build `fast_pool` and - `gthread.py` `init_process`/`get_thread_pool` — build `fast_pool` and
`slow_pool` (or the single legacy pool when disabled); `_shutdown_pools`. `slow_pool` (or the single legacy pool when disabled); `_shutdown_pools`.
- `gthread.py` `enqueue_req` — route to the matching pool; `nr_slow` bound + - `gthread.py` `enqueue_req` — route to the matching pool; `nr_slow` bound +

View File

@ -823,30 +823,6 @@ class SlowQueueMaxsize(Setting):
""" """
class SlowRoutes(Setting):
name = "slow_routes"
section = "Worker Processes"
cli = ["--slow-route"]
action = "append"
meta = "PATTERN"
validator = validate_list_string
default = []
desc = """\
Regular expression(s) matching routes that should always be treated as
slow, regardless of observed timings.
Each pattern is matched (using ``re.search``) against the route key,
which is the request method and path joined by a space, e.g.
``"POST /reports/generate"``. Seeding known-slow routes avoids the brief
window where a never-before-seen slow route is learned.
Only used by the ``gthread`` worker when
:ref:`slow-request-threshold` is set.
.. versionadded:: 23.1.0
"""
class SlowLaneRetryAfter(Setting): class SlowLaneRetryAfter(Setting):
name = "slow_lane_retry_after" name = "slow_lane_retry_after"
section = "Worker Processes" section = "Worker Processes"

View File

@ -107,10 +107,7 @@ class ThreadWorker(base.Worker):
def init_process(self): def init_process(self):
self.tpool = self.get_thread_pool() self.tpool = self.get_thread_pool()
if self.routing_enabled: if self.routing_enabled:
self.predictor = SlowRoutePredictor( self.predictor = SlowRoutePredictor(self.slow_threshold)
self.slow_threshold,
seed_patterns=self.cfg.slow_routes,
)
# a dedicated pool for the slow lane: slow requests can never # a dedicated pool for the slow lane: slow requests can never
# occupy the fast pool's (``self.tpool``) threads # occupy the fast pool's (``self.tpool``) threads
self.slow_pool = futures.ThreadPoolExecutor( self.slow_pool = futures.ThreadPoolExecutor(

View File

@ -6,12 +6,11 @@
The :class:`SlowRoutePredictor` decides, before a request is handed to a The :class:`SlowRoutePredictor` decides, before a request is handed to a
worker, whether its route is expected to be slow, based on previously observed worker, whether its route is expected to be slow, based on previously observed
timings of the same route (method + path) plus operator-seeded patterns. The timings of the same route (method + path). The gthread worker uses this to
gthread worker uses this to route slow requests to a dedicated thread pool so route slow requests to a dedicated thread pool so they cannot starve fast
they cannot starve fast requests. requests.
""" """
import re
import threading import threading
from collections import OrderedDict from collections import OrderedDict
@ -22,26 +21,19 @@ class SlowRoutePredictor:
Timings are tracked per route as an exponentially weighted moving average Timings are tracked per route as an exponentially weighted moving average
(EWMA) so that a route which becomes fast again decays back below the (EWMA) so that a route which becomes fast again decays back below the
threshold. The table is bounded (LRU) to cap memory under high route threshold. The table is bounded (LRU) to cap memory under high route
cardinality. Operator-seeded regex patterns always classify as slow. cardinality.
""" """
def __init__(self, threshold, max_entries=1024, alpha=0.3, def __init__(self, threshold, max_entries=1024, alpha=0.3):
seed_patterns=None):
self.threshold = threshold self.threshold = threshold
self.alpha = alpha self.alpha = alpha
self.max_entries = max_entries self.max_entries = max_entries
self._stats = OrderedDict() self._stats = OrderedDict()
self._lock = threading.Lock() self._lock = threading.Lock()
self._seed = [re.compile(p) for p in (seed_patterns or [])]
def _seeded(self, key):
return any(p.search(key) for p in self._seed)
def is_slow(self, key): def is_slow(self, key):
if not key: if not key:
return False return False
if self._seeded(key):
return True
with self._lock: with self._lock:
ewma = self._stats.get(key) ewma = self._stats.get(key)
if ewma is None: if ewma is None:

View File

@ -33,12 +33,6 @@ def test_predictor_observe_slow_marks_immediately():
assert p.is_slow("POST /report") is True assert p.is_slow("POST /report") is True
def test_predictor_seed_patterns():
p = SlowRoutePredictor(threshold=1.0, seed_patterns=[r"^POST /reports/"])
assert p.is_slow("POST /reports/generate") is True
assert p.is_slow("GET /reports/generate") is False
def test_predictor_lru_bound(): def test_predictor_lru_bound():
p = SlowRoutePredictor(threshold=1.0, max_entries=10) p = SlowRoutePredictor(threshold=1.0, max_entries=10)
for i in range(50): for i in range(50):