adler_core/escalation.rs
1//! Escalation: when a cheap transport's `Uncertain` looks fixable by a
2//! heavier transport, retry through it — bounded by a per-scan budget.
3//!
4//! The default routing in [`Client::probe_once`](crate::Client) picks one
5//! transport per site based on its `protection` tags and the `bot-protected`
6//! tag. That works when the registry tags the site correctly, but misses the
7//! long tail of sites we haven't pre-tagged that nevertheless sit behind
8//! Cloudflare or a rate-limit edge. For those, the HTTP / impersonate path
9//! returns `Uncertain(CloudflareChallenge)` or `Uncertain(RateLimited)`; an
10//! automatic retry through the browser backend flips the verdict to a real
11//! `Found` / `NotFound` rather than the operator having to re-run with a
12//! manual override.
13//!
14//! Escalation is bounded by [`EscalationBudget`]: the operator controls how
15//! many extra browser fetches a single scan may consume, on top of the
16//! [`BrowserBudget`](crate::BrowserBudget) cap that gates the pre-tagged
17//! bot-protected subset. Defaults to 30; `--no-escalation` turns it off.
18
19use std::sync::atomic::{AtomicUsize, Ordering};
20
21use serde::{Deserialize, Serialize};
22
23use crate::check::UncertainReason;
24
25/// Which transport actually produced an outcome.
26///
27/// Stamped on every [`CheckOutcome`](crate::CheckOutcome) so downstream
28/// tools (the doctor, the bench harness, the web UI) can tell whether the
29/// HTTP path was enough, whether impersonation was needed, or whether the
30/// scan reached for the browser. `Option<TransportTier>` in the outcome
31/// keeps older persisted JSON parseable.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
33#[serde(rename_all = "snake_case")]
34#[non_exhaustive]
35pub enum TransportTier {
36 /// Plain `reqwest` HTTP path (the default cheap transport).
37 Http,
38 /// `wreq` with Chrome 134 TLS-fingerprint emulation, behind the
39 /// `impersonate` Cargo feature.
40 Impersonate,
41 /// Headless browser via [`BrowserBackend`](crate::BrowserBackend).
42 Browser,
43}
44
45impl TransportTier {
46 /// Short stable identifier for logs / JSON / explain output.
47 #[must_use]
48 pub const fn as_str(self) -> &'static str {
49 match self {
50 Self::Http => "http",
51 Self::Impersonate => "impersonate",
52 Self::Browser => "browser",
53 }
54 }
55}
56
57impl core::fmt::Display for TransportTier {
58 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
59 f.write_str(self.as_str())
60 }
61}
62
63/// Per-scan ceiling on automatic escalation attempts.
64///
65/// Mirrors [`BrowserBudget`](crate::BrowserBudget) in shape but is a
66/// distinct type so the two caps are independent: a `bot-protected` site
67/// that goes straight to the browser consumes [`crate::BrowserBudget`]; a site
68/// that tries HTTP first and falls back to the browser consumes one of
69/// each. Cheap to share across tasks.
70#[derive(Debug)]
71pub struct EscalationBudget {
72 used: AtomicUsize,
73 cap: usize,
74}
75
76impl EscalationBudget {
77 /// Allow up to `cap` consumes. `cap = 0` denies all escalations.
78 #[must_use]
79 pub const fn new(cap: usize) -> Self {
80 Self {
81 used: AtomicUsize::new(0),
82 cap,
83 }
84 }
85
86 /// No ceiling — every `try_consume` succeeds.
87 #[must_use]
88 pub const fn unlimited() -> Self {
89 Self::new(usize::MAX)
90 }
91
92 /// Atomically reserve one unit of budget.
93 ///
94 /// Returns `true` if accepted, `false` once the cap is reached. The
95 /// compare-exchange loop guarantees `used <= cap` under concurrent
96 /// callers.
97 pub fn try_consume(&self) -> bool {
98 let mut cur = self.used.load(Ordering::Acquire);
99 loop {
100 if cur >= self.cap {
101 return false;
102 }
103 match self
104 .used
105 .compare_exchange_weak(cur, cur + 1, Ordering::AcqRel, Ordering::Acquire)
106 {
107 Ok(_) => return true,
108 Err(actual) => cur = actual,
109 }
110 }
111 }
112
113 /// Number of escalations the scan has consumed so far.
114 #[must_use]
115 pub fn used(&self) -> usize {
116 self.used.load(Ordering::Acquire)
117 }
118
119 /// Maximum the budget allows.
120 #[must_use]
121 pub const fn cap(&self) -> usize {
122 self.cap
123 }
124}
125
126/// Whether an `Uncertain` outcome from the cheap path is worth retrying
127/// through the browser.
128///
129/// We escalate only on reasons that a real browser plausibly resolves —
130/// Cloudflare interstitials and rate-limit / 429-style responses. Reasons
131/// that no transport change can fix (the operator opted into robots-
132/// disallowed, the username is locally invalid, the deadline elapsed, the
133/// egress pool can't satisfy a geo requirement, a session is missing) stay
134/// as-is so escalation doesn't waste budget on hopeless cases.
135pub(crate) const fn should_escalate(reason: &UncertainReason) -> bool {
136 matches!(
137 reason,
138 UncertainReason::CloudflareChallenge | UncertainReason::RateLimited
139 )
140}
141
142#[cfg(test)]
143mod tests {
144 use super::*;
145
146 #[test]
147 fn escalates_on_cloudflare_and_rate_limited_only() {
148 assert!(should_escalate(&UncertainReason::CloudflareChallenge));
149 assert!(should_escalate(&UncertainReason::RateLimited));
150
151 assert!(!should_escalate(&UncertainReason::Captcha));
152 assert!(!should_escalate(&UncertainReason::RobotsDisallowed));
153 assert!(!should_escalate(&UncertainReason::Deadline));
154 assert!(!should_escalate(&UncertainReason::SchedulerClosed));
155 assert!(!should_escalate(&UncertainReason::Network(
156 "refused".into()
157 )));
158 assert!(!should_escalate(&UncertainReason::BodyRead("eof".into())));
159 assert!(!should_escalate(&UncertainReason::BrowserBudget));
160 assert!(!should_escalate(&UncertainReason::UsernameNotAllowed));
161 assert!(!should_escalate(&UncertainReason::BrowserFailed(
162 "timeout".into()
163 )));
164 assert!(!should_escalate(&UncertainReason::GeoUnavailable));
165 assert!(!should_escalate(&UncertainReason::SessionRequired));
166 assert!(!should_escalate(&UncertainReason::Other("?".into())));
167 }
168
169 #[test]
170 fn budget_consumes_up_to_cap() {
171 let b = EscalationBudget::new(2);
172 assert!(b.try_consume());
173 assert!(b.try_consume());
174 assert!(!b.try_consume());
175 assert_eq!(b.used(), 2);
176 assert_eq!(b.cap(), 2);
177 }
178
179 #[test]
180 fn budget_zero_denies_all() {
181 let b = EscalationBudget::new(0);
182 assert!(!b.try_consume());
183 }
184
185 #[test]
186 fn budget_unlimited_never_denies() {
187 let b = EscalationBudget::unlimited();
188 for _ in 0..1024 {
189 assert!(b.try_consume());
190 }
191 }
192
193 #[test]
194 fn transport_tier_as_str_matches_serde() {
195 assert_eq!(TransportTier::Http.as_str(), "http");
196 assert_eq!(TransportTier::Impersonate.as_str(), "impersonate");
197 assert_eq!(TransportTier::Browser.as_str(), "browser");
198
199 let json = serde_json::to_string(&TransportTier::Impersonate).unwrap();
200 assert_eq!(json, r#""impersonate""#);
201 let back: TransportTier = serde_json::from_str(&json).unwrap();
202 assert_eq!(back, TransportTier::Impersonate);
203 }
204}