Skip to main content

adler_core/
escalation.rs

1//! Escalation: when a cheap transport's `Uncertain` looks fixable by a
2//! heavier transport, retry through it — bounded by a per-scan budget.
3//!
4//! The default routing in [`Client::probe_once`](crate::Client) picks one
5//! transport per site based on its `protection` tags and the `bot-protected`
6//! tag. That works when the registry tags the site correctly, but misses the
7//! long tail of sites we haven't pre-tagged that nevertheless sit behind
8//! Cloudflare or a rate-limit edge. For those, the HTTP / impersonate path
9//! returns `Uncertain(CloudflareChallenge)` or `Uncertain(RateLimited)`; an
10//! automatic retry through the browser backend flips the verdict to a real
11//! `Found` / `NotFound` rather than the operator having to re-run with a
12//! manual override.
13//!
14//! Escalation is bounded by [`EscalationBudget`]: the operator controls how
15//! many extra browser fetches a single scan may consume, on top of the
16//! [`BrowserBudget`](crate::BrowserBudget) cap that gates the pre-tagged
17//! bot-protected subset. Defaults to 30; `--no-escalation` turns it off.
18
19use std::sync::atomic::{AtomicUsize, Ordering};
20
21use serde::{Deserialize, Serialize};
22
23use crate::check::UncertainReason;
24
25/// Which transport actually produced an outcome.
26///
27/// Stamped on every [`CheckOutcome`](crate::CheckOutcome) so downstream
28/// tools (the doctor, the bench harness, the web UI) can tell whether the
29/// HTTP path was enough, whether impersonation was needed, or whether the
30/// scan reached for the browser. `Option<TransportTier>` in the outcome
31/// keeps older persisted JSON parseable.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
33#[serde(rename_all = "snake_case")]
34#[non_exhaustive]
35pub enum TransportTier {
36    /// Plain `reqwest` HTTP path (the default cheap transport).
37    Http,
38    /// `wreq` with Chrome 134 TLS-fingerprint emulation, behind the
39    /// `impersonate` Cargo feature.
40    Impersonate,
41    /// Headless browser via [`BrowserBackend`](crate::BrowserBackend).
42    Browser,
43}
44
45impl TransportTier {
46    /// Short stable identifier for logs / JSON / explain output.
47    #[must_use]
48    pub const fn as_str(self) -> &'static str {
49        match self {
50            Self::Http => "http",
51            Self::Impersonate => "impersonate",
52            Self::Browser => "browser",
53        }
54    }
55}
56
57impl core::fmt::Display for TransportTier {
58    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
59        f.write_str(self.as_str())
60    }
61}
62
63/// Per-scan ceiling on automatic escalation attempts.
64///
65/// Mirrors [`BrowserBudget`](crate::BrowserBudget) in shape but is a
66/// distinct type so the two caps are independent: a `bot-protected` site
67/// that goes straight to the browser consumes [`crate::BrowserBudget`]; a site
68/// that tries HTTP first and falls back to the browser consumes one of
69/// each. Cheap to share across tasks.
70#[derive(Debug)]
71pub struct EscalationBudget {
72    used: AtomicUsize,
73    cap: usize,
74}
75
76impl EscalationBudget {
77    /// Allow up to `cap` consumes. `cap = 0` denies all escalations.
78    #[must_use]
79    pub const fn new(cap: usize) -> Self {
80        Self {
81            used: AtomicUsize::new(0),
82            cap,
83        }
84    }
85
86    /// No ceiling — every `try_consume` succeeds.
87    #[must_use]
88    pub const fn unlimited() -> Self {
89        Self::new(usize::MAX)
90    }
91
92    /// Atomically reserve one unit of budget.
93    ///
94    /// Returns `true` if accepted, `false` once the cap is reached. The
95    /// compare-exchange loop guarantees `used <= cap` under concurrent
96    /// callers.
97    pub fn try_consume(&self) -> bool {
98        let mut cur = self.used.load(Ordering::Acquire);
99        loop {
100            if cur >= self.cap {
101                return false;
102            }
103            match self
104                .used
105                .compare_exchange_weak(cur, cur + 1, Ordering::AcqRel, Ordering::Acquire)
106            {
107                Ok(_) => return true,
108                Err(actual) => cur = actual,
109            }
110        }
111    }
112
113    /// Number of escalations the scan has consumed so far.
114    #[must_use]
115    pub fn used(&self) -> usize {
116        self.used.load(Ordering::Acquire)
117    }
118
119    /// Maximum the budget allows.
120    #[must_use]
121    pub const fn cap(&self) -> usize {
122        self.cap
123    }
124}
125
126/// Whether an `Uncertain` outcome from the cheap path is worth retrying
127/// through the browser.
128///
129/// We escalate only on reasons that a real browser plausibly resolves —
130/// Cloudflare interstitials and rate-limit / 429-style responses. Reasons
131/// that no transport change can fix (the operator opted into robots-
132/// disallowed, the username is locally invalid, the deadline elapsed, the
133/// egress pool can't satisfy a geo requirement, a session is missing) stay
134/// as-is so escalation doesn't waste budget on hopeless cases.
135pub(crate) const fn should_escalate(reason: &UncertainReason) -> bool {
136    matches!(
137        reason,
138        UncertainReason::CloudflareChallenge | UncertainReason::RateLimited
139    )
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145
146    #[test]
147    fn escalates_on_cloudflare_and_rate_limited_only() {
148        assert!(should_escalate(&UncertainReason::CloudflareChallenge));
149        assert!(should_escalate(&UncertainReason::RateLimited));
150
151        assert!(!should_escalate(&UncertainReason::Captcha));
152        assert!(!should_escalate(&UncertainReason::RobotsDisallowed));
153        assert!(!should_escalate(&UncertainReason::Deadline));
154        assert!(!should_escalate(&UncertainReason::SchedulerClosed));
155        assert!(!should_escalate(&UncertainReason::Network(
156            "refused".into()
157        )));
158        assert!(!should_escalate(&UncertainReason::BodyRead("eof".into())));
159        assert!(!should_escalate(&UncertainReason::BrowserBudget));
160        assert!(!should_escalate(&UncertainReason::UsernameNotAllowed));
161        assert!(!should_escalate(&UncertainReason::BrowserFailed(
162            "timeout".into()
163        )));
164        assert!(!should_escalate(&UncertainReason::GeoUnavailable));
165        assert!(!should_escalate(&UncertainReason::SessionRequired));
166        assert!(!should_escalate(&UncertainReason::Other("?".into())));
167    }
168
169    #[test]
170    fn budget_consumes_up_to_cap() {
171        let b = EscalationBudget::new(2);
172        assert!(b.try_consume());
173        assert!(b.try_consume());
174        assert!(!b.try_consume());
175        assert_eq!(b.used(), 2);
176        assert_eq!(b.cap(), 2);
177    }
178
179    #[test]
180    fn budget_zero_denies_all() {
181        let b = EscalationBudget::new(0);
182        assert!(!b.try_consume());
183    }
184
185    #[test]
186    fn budget_unlimited_never_denies() {
187        let b = EscalationBudget::unlimited();
188        for _ in 0..1024 {
189            assert!(b.try_consume());
190        }
191    }
192
193    #[test]
194    fn transport_tier_as_str_matches_serde() {
195        assert_eq!(TransportTier::Http.as_str(), "http");
196        assert_eq!(TransportTier::Impersonate.as_str(), "impersonate");
197        assert_eq!(TransportTier::Browser.as_str(), "browser");
198
199        let json = serde_json::to_string(&TransportTier::Impersonate).unwrap();
200        assert_eq!(json, r#""impersonate""#);
201        let back: TransportTier = serde_json::from_str(&json).unwrap();
202        assert_eq!(back, TransportTier::Impersonate);
203    }
204}