Skip to main content

adler_core/client/
mod.rs

1//! HTTP client wrapping `reqwest`, plus the per-site probe entry point.
2//!
3//! The wrapper exists to keep `reqwest` out of Adler's public API surface.
4//! All knobs that future modules need (timeouts, redirect policy, user agent)
5//! are configured through [`ClientBuilder`]; per-request transient failures
6//! never bubble up as errors — they become
7//! [`MatchKind::Uncertain`](crate::MatchKind::Uncertain) on the returned
8//! outcome.
9
10use std::fmt;
11use std::sync::Arc;
12use std::time::Duration;
13
14use crate::access::{EgressPool, SessionStore};
15use crate::browser::{BrowserBackend, BrowserBudget};
16use crate::retry::RetryPolicy;
17use crate::robots::RobotsCache;
18use crate::throttle::HostThrottle;
19use crate::transport::HttpFetcher;
20#[cfg(feature = "impersonate")]
21use crate::transport::ImpersonateFetcher;
22
23const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
24const DEFAULT_CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
25const DEFAULT_REDIRECT_LIMIT: usize = 8;
26const DEFAULT_PER_HOST_INTERVAL: Duration = Duration::from_millis(100);
27/// Single fixed key for the global rate limiter (it gates all hosts).
28const GLOBAL_THROTTLE_KEY: &str = "*global*";
29
30/// HTTP client used to probe sites.
31///
32/// Cheap to clone — the underlying `reqwest::Client` is reference-counted
33/// internally, and the throttle is `Arc`-backed, so cloning is the
34/// recommended way to share a client between tasks. Cloned clients share
35/// throttle state, which is what you want: a fan-out scan must not
36/// accidentally exceed a per-host budget by spawning more clients.
37#[derive(Clone)]
38pub struct Client {
39    http: Arc<HttpFetcher>,
40    /// Geo / IP-type egress pool for sites whose `access` policy needs a
41    /// specific proxy. Empty by default → every site uses `http`.
42    egress: Arc<EgressPool>,
43    /// Operator-supplied sessions, keyed by the name a site references
44    /// via `access.session`. Empty by default.
45    sessions: Arc<SessionStore>,
46    throttle: HostThrottle,
47    /// Global RPS cap applied across all hosts. `None` → uncapped.
48    global_throttle: Option<HostThrottle>,
49    retry: RetryPolicy,
50    /// Optional rotation pool. Empty → use the client's fixed User-Agent.
51    /// `Arc<[String]>` so cloning a client per task stays cheap.
52    user_agents: Arc<[String]>,
53    /// Extract profile fields from `Found` pages that declare extractors.
54    enrich: bool,
55    /// When set, skip probes disallowed by the host's `robots.txt`.
56    robots: Option<RobotsCache>,
57    /// Browser backend used for `bot-protected` sites. `None` → those sites
58    /// stay on the raw HTTP path and typically end up `Uncertain`.
59    browser: Option<Arc<dyn BrowserBackend>>,
60    /// TLS-fingerprint-impersonating HTTP client (`wreq`). Built when
61    /// the `impersonate` Cargo feature is on; routes sites whose
62    /// `protection` is exactly `TlsFingerprint`.
63    #[cfg(feature = "impersonate")]
64    impersonate: Option<Arc<ImpersonateFetcher>>,
65    /// Per-scan cap on browser fetches. Shared across `Client::check` calls
66    /// for a single scan, so several tasks compete for the same budget.
67    browser_budget: Arc<BrowserBudget>,
68    /// Per-scan cap on *automatic escalations* from a cheap transport to
69    /// the browser when the cheap path returns
70    /// `Uncertain(CloudflareChallenge | RateLimited)`. Independent of
71    /// `browser_budget` so the pre-tagged `bot-protected` subset and the
72    /// long-tail escalation subset don't fight over the same number.
73    escalation_budget: Arc<crate::escalation::EscalationBudget>,
74    /// Whether automatic escalation runs at all. `false` keeps the cheap
75    /// transport's outcome verbatim — useful for benchmarking the raw
76    /// signals without the access-engine lift on top.
77    escalation_enabled: bool,
78}
79
80impl Client {
81    /// Start configuring a new client.
82    pub fn builder() -> ClientBuilder {
83        ClientBuilder::default()
84    }
85
86    /// Read-only view of the configured egress pool — `(country, kind)`
87    /// for every registered proxy, in the order they were declared.
88    /// Proxy URLs are not surfaced (they typically carry credentials),
89    /// so this is safe to serialise to a JSON response.
90    #[must_use]
91    pub fn egress_summary(&self) -> Vec<crate::access::EgressSummary> {
92        self.egress.summary()
93    }
94
95    /// Names of the configured sessions (sorted lexicographically),
96    /// without any header values. Useful for a UI listing which session
97    /// keys an operator can reference via `access.session` on a site.
98    #[must_use]
99    pub fn session_names(&self) -> Vec<String> {
100        self.sessions.names()
101    }
102
103    /// Names of the configured egresses (in registration order, only
104    /// those that supplied a name). Used by the server to validate
105    /// per-scan `egress_names` against the loaded pool.
106    #[must_use]
107    pub fn egress_names(&self) -> Vec<String> {
108        self.egress.names()
109    }
110
111    /// Returns a new client identical to this one except its egress
112    /// pool is restricted to entries whose `name` matches one of
113    /// `names`. An empty `names` slice is treated as "no filter" and
114    /// returns a clone of the full pool.
115    ///
116    /// Cheap to call repeatedly: all shared state (HTTP clients,
117    /// throttle, sessions, budgets, browser backend, …) is
118    /// `Arc`-cloned so the returned client shares the parent's
119    /// per-scan caps (browser budget, escalation budget, throttle
120    /// state) rather than each subset getting a fresh one. This is the
121    /// right behaviour for a single web-server instance handing out
122    /// per-request clients.
123    #[must_use]
124    pub fn with_egress_subset(&self, names: &[String]) -> Self {
125        Self {
126            http: Arc::clone(&self.http),
127            egress: Arc::new(self.egress.subset(names)),
128            sessions: Arc::clone(&self.sessions),
129            throttle: self.throttle.clone(),
130            global_throttle: self.global_throttle.clone(),
131            retry: self.retry.clone(),
132            user_agents: Arc::clone(&self.user_agents),
133            enrich: self.enrich,
134            robots: self.robots.clone(),
135            browser: self.browser.clone(),
136            #[cfg(feature = "impersonate")]
137            impersonate: self.impersonate.clone(),
138            browser_budget: Arc::clone(&self.browser_budget),
139            escalation_budget: Arc::clone(&self.escalation_budget),
140            escalation_enabled: self.escalation_enabled,
141        }
142    }
143}
144
145/// Raw response data returned by [`Client::fetch`] for diagnostics.
146#[derive(Debug, Clone)]
147pub struct RawResponse {
148    /// HTTP status code.
149    pub status: u16,
150    /// Final URL after redirects.
151    pub final_url: String,
152    /// Decoded response body.
153    pub body: String,
154}
155
156impl fmt::Debug for Client {
157    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
158        f.debug_struct("Client")
159            .field("throttle", &self.throttle)
160            .field("global_throttle", &self.global_throttle)
161            .field("retry", &self.retry)
162            .field("user_agents", &self.user_agents)
163            .field("enrich", &self.enrich)
164            .field("robots", &self.robots.is_some())
165            .field("browser", &self.browser.is_some())
166            .field("browser_budget", &self.browser_budget)
167            .field("escalation_budget", &self.escalation_budget)
168            .field("escalation_enabled", &self.escalation_enabled)
169            .finish_non_exhaustive()
170    }
171}
172
173/// Registry tag marking a site as bot-protected.
174///
175/// Set on sites behind Cloudflare, `PerimeterX`, datadome,
176/// `hCaptcha`, etc. The routing layer treats it as a hint that
177/// residential egress is likely required; the doctor and
178/// registry-summary surfaces use it to annotate honest-limit audits.
179/// Tags are compared with [`str::eq_ignore_ascii_case`].
180pub const BOT_PROTECTED_TAG: &str = "bot-protected";
181
182mod builder;
183mod probe;
184mod util;
185pub use builder::{ClientBuilder, DEFAULT_BROWSER_BUDGET, DEFAULT_ESCALATION_BUDGET};
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190    use crate::browser::RenderedPage;
191    use crate::check::{MatchKind, UncertainReason};
192    use crate::error::{Error, Result};
193    use crate::site::{HttpMethod, Signal, Site, UrlTemplate};
194    use crate::username::Username;
195    use std::time::Instant;
196    use wiremock::matchers::{any, method, path};
197    use wiremock::{Mock, MockServer, ResponseTemplate};
198
199    use crate::test_fixtures::{default_site, test_client};
200
201    fn build_client() -> Client {
202        test_client()
203    }
204
205    fn site_with(server: &MockServer, signals: Vec<Signal>) -> Site {
206        let mut s = default_site("Mock", &format!("{}/{{username}}", server.uri()));
207        s.signals = signals;
208        s
209    }
210
211    fn user() -> Username {
212        Username::new("alice").unwrap()
213    }
214
215    #[tokio::test]
216    async fn regex_check_short_circuits_before_any_request() {
217        // Stand up a mock that would 200 on *anything* — if probe_once
218        // failed to short-circuit on regex mismatch, the username
219        // "alice" (5 chars) would resolve to Found here.
220        let server = MockServer::start().await;
221        Mock::given(any())
222            .respond_with(ResponseTemplate::new(200))
223            .mount(&server)
224            .await;
225        let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
226        // The site only accepts usernames of 8+ chars; "alice" is 5.
227        site.regex_check = Some("^[A-Za-z]{8,}$".into());
228        let outcome = build_client().check(&site, &user()).await;
229        assert_eq!(outcome.kind, MatchKind::Uncertain);
230        assert!(
231            matches!(outcome.reason, Some(UncertainReason::UsernameNotAllowed)),
232            "expected UsernameNotAllowed, got {:?}",
233            outcome.reason,
234        );
235        // No request should have hit the mock — assert by counting
236        // received_requests on the wiremock server.
237        let recvd = server.received_requests().await.unwrap_or_default();
238        assert_eq!(
239            recvd.len(),
240            0,
241            "regex_check mismatch must skip the HTTP request entirely"
242        );
243    }
244
245    #[tokio::test]
246    async fn geo_constrained_site_with_no_egress_is_geo_unavailable() {
247        // A mock that would 200 on anything — if the geo gate failed to
248        // short-circuit, "alice" would resolve to Found here.
249        let server = MockServer::start().await;
250        Mock::given(any())
251            .respond_with(ResponseTemplate::new(200))
252            .mount(&server)
253            .await;
254        let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
255        // Require a Polish egress; the default client has no egress pool,
256        // so nothing can satisfy it.
257        site.access = crate::access::AccessPolicy {
258            geo: vec![crate::access::CountryCode::new("pl").unwrap()],
259            ..crate::access::AccessPolicy::default()
260        };
261        let outcome = build_client().check(&site, &user()).await;
262        assert_eq!(outcome.kind, MatchKind::Uncertain);
263        assert!(
264            matches!(outcome.reason, Some(UncertainReason::GeoUnavailable)),
265            "expected GeoUnavailable, got {:?}",
266            outcome.reason,
267        );
268        // The site must NOT have been probed — an unreachable geo is not
269        // evidence of absence, and we don't fetch from the wrong location.
270        let recvd = server.received_requests().await.unwrap_or_default();
271        assert_eq!(
272            recvd.len(),
273            0,
274            "geo-unavailable must skip the HTTP request entirely"
275        );
276    }
277
278    #[tokio::test]
279    async fn session_headers_are_sent_on_probe() {
280        // Only respond 200 when the request carries the session cookie,
281        // so a Found verdict proves the header was actually applied.
282        let server = MockServer::start().await;
283        Mock::given(any())
284            .and(wiremock::matchers::header("cookie", "sessionid=real"))
285            .respond_with(ResponseTemplate::new(200))
286            .mount(&server)
287            .await;
288        let mut headers = std::collections::BTreeMap::new();
289        headers.insert("Cookie".to_string(), "sessionid=real".to_string());
290        let mut store = SessionStore::new();
291        store.insert("acct", crate::access::Session::from_headers(headers));
292        let client = Client::builder()
293            .timeout(Duration::from_secs(2))
294            .min_request_interval(Duration::ZERO)
295            .max_retries(0)
296            .sessions(store)
297            .build()
298            .expect("client builds");
299        let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
300        site.access.session = Some("acct".to_string());
301        let outcome = client.check(&site, &user()).await;
302        assert_eq!(
303            outcome.kind,
304            MatchKind::Found,
305            "session cookie should unlock the 200 (got {:?})",
306            outcome.reason,
307        );
308    }
309
310    #[tokio::test]
311    async fn missing_named_session_is_session_required() {
312        let server = MockServer::start().await;
313        Mock::given(any())
314            .respond_with(ResponseTemplate::new(200))
315            .mount(&server)
316            .await;
317        let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
318        // Names a session the (empty) store doesn't have.
319        site.access.session = Some("not-configured".to_string());
320        let outcome = build_client().check(&site, &user()).await;
321        assert_eq!(outcome.kind, MatchKind::Uncertain);
322        assert!(
323            matches!(outcome.reason, Some(UncertainReason::SessionRequired)),
324            "expected SessionRequired, got {:?}",
325            outcome.reason,
326        );
327        let recvd = server.received_requests().await.unwrap_or_default();
328        assert_eq!(
329            recvd.len(),
330            0,
331            "a missing session must skip the request, not probe unauthenticated"
332        );
333    }
334
335    #[cfg(feature = "impersonate")]
336    #[tokio::test]
337    async fn impersonate_routes_pure_tls_fingerprint_site() {
338        let server = MockServer::start().await;
339        Mock::given(any())
340            .respond_with(ResponseTemplate::new(200))
341            .mount(&server)
342            .await;
343        let client = Client::builder()
344            .timeout(Duration::from_secs(2))
345            .min_request_interval(Duration::ZERO)
346            .max_retries(0)
347            .build()
348            .expect("client builds with impersonate");
349        let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
350        // Pure TLS-fingerprint protection — exactly the shape that
351        // routes to the impersonate fetcher.
352        site.protection = vec![crate::site::ProtectionKind::TlsFingerprint];
353        let outcome = client.check(&site, &user()).await;
354        assert_eq!(
355            outcome.kind,
356            MatchKind::Found,
357            "expected Found (reason {:?})",
358            outcome.reason,
359        );
360        // wreq's Chrome-134 emulation sets a Chrome-shaped User-Agent —
361        // observable proof that the request came from the impersonate
362        // path and not the default `adler/<version>` HTTP fetcher.
363        let recvd = server.received_requests().await.expect("received requests");
364        assert_eq!(recvd.len(), 1, "expected exactly one request");
365        let ua = recvd[0]
366            .headers
367            .get("user-agent")
368            .and_then(|v| v.to_str().ok())
369            .unwrap_or("");
370        assert!(
371            ua.contains("Chrome/"),
372            "expected Chrome-shaped UA from wreq, got {ua:?}"
373        );
374    }
375
376    #[tokio::test]
377    async fn regex_check_pass_proceeds_to_probe() {
378        let server = MockServer::start().await;
379        Mock::given(any())
380            .and(path("/alice"))
381            .respond_with(ResponseTemplate::new(200))
382            .mount(&server)
383            .await;
384        let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
385        // Pattern that matches "alice".
386        site.regex_check = Some("^[a-z]{3,}$".into());
387        let outcome = build_client().check(&site, &user()).await;
388        assert_eq!(outcome.kind, MatchKind::Found);
389    }
390
391    #[tokio::test]
392    async fn status_signal_reports_found_on_match() {
393        let server = MockServer::start().await;
394        Mock::given(any())
395            .and(path("/alice"))
396            .respond_with(ResponseTemplate::new(200))
397            .mount(&server)
398            .await;
399        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
400        let outcome = build_client().check(&site, &user()).await;
401        assert_eq!(outcome.kind, MatchKind::Found);
402        assert!(outcome.url.ends_with("/alice"));
403        assert!(outcome.reason.is_none());
404        assert_eq!(outcome.evidence, ["HTTP 200 (status_found)"]);
405    }
406
407    #[tokio::test]
408    async fn status_signal_pair_reports_not_found_on_404() {
409        let server = MockServer::start().await;
410        Mock::given(any())
411            .and(path("/alice"))
412            .respond_with(ResponseTemplate::new(404))
413            .mount(&server)
414            .await;
415        let site = site_with(
416            &server,
417            vec![
418                Signal::StatusFound { codes: vec![200] },
419                Signal::StatusNotFound { codes: vec![404] },
420            ],
421        );
422        let outcome = build_client().check(&site, &user()).await;
423        assert_eq!(outcome.kind, MatchKind::NotFound);
424        // Only the NotFound-voting signal is cited as evidence.
425        assert_eq!(outcome.evidence, ["HTTP 404 (status_not_found)"]);
426    }
427
428    #[tokio::test]
429    async fn body_absent_signal_detects_missing_account() {
430        let server = MockServer::start().await;
431        Mock::given(any())
432            .and(path("/alice"))
433            .respond_with(ResponseTemplate::new(200).set_body_string("<h1>Profile not found</h1>"))
434            .mount(&server)
435            .await;
436        let site = site_with(
437            &server,
438            vec![Signal::BodyAbsent {
439                text: "Profile not found".into(),
440            }],
441        );
442        let outcome = build_client().check(&site, &user()).await;
443        assert_eq!(outcome.kind, MatchKind::NotFound);
444    }
445
446    #[tokio::test]
447    async fn body_absent_alone_yields_uncertain_when_marker_missing() {
448        // Phase 2 semantics: absence of an absence-marker is not evidence
449        // of presence — it just means we have no signal that fired.
450        let server = MockServer::start().await;
451        Mock::given(any())
452            .and(path("/alice"))
453            .respond_with(ResponseTemplate::new(200).set_body_string("<h1>Welcome alice</h1>"))
454            .mount(&server)
455            .await;
456        let site = site_with(
457            &server,
458            vec![Signal::BodyAbsent {
459                text: "Profile not found".into(),
460            }],
461        );
462        let outcome = build_client().check(&site, &user()).await;
463        assert_eq!(outcome.kind, MatchKind::Uncertain);
464    }
465
466    #[tokio::test]
467    async fn body_present_plus_absent_resolve_to_found() {
468        let server = MockServer::start().await;
469        Mock::given(any())
470            .and(path("/alice"))
471            .respond_with(
472                ResponseTemplate::new(200)
473                    .set_body_string(r#"<div class="profile-card">alice</div>"#),
474            )
475            .mount(&server)
476            .await;
477        let site = site_with(
478            &server,
479            vec![
480                Signal::BodyPresent {
481                    text: "profile-card".into(),
482                },
483                Signal::BodyAbsent {
484                    text: "Profile not found".into(),
485                },
486            ],
487        );
488        let outcome = build_client().check(&site, &user()).await;
489        assert_eq!(outcome.kind, MatchKind::Found);
490    }
491
492    #[tokio::test]
493    async fn redirect_absent_signal_detects_missing_account() {
494        let server = MockServer::start().await;
495        Mock::given(any())
496            .and(path("/alice"))
497            .respond_with(
498                ResponseTemplate::new(302).insert_header("location", "/login?next=/alice"),
499            )
500            .mount(&server)
501            .await;
502        Mock::given(any())
503            .and(path("/login"))
504            .respond_with(ResponseTemplate::new(200).set_body_string("login page"))
505            .mount(&server)
506            .await;
507        let site = site_with(
508            &server,
509            vec![Signal::RedirectAbsent {
510                fragment: "/login".into(),
511            }],
512        );
513        let outcome = build_client().check(&site, &user()).await;
514        assert_eq!(outcome.kind, MatchKind::NotFound);
515    }
516
517    #[tokio::test]
518    async fn negative_signal_wins_over_positive() {
519        // StatusFound votes Found (200 matches); BodyAbsent votes NotFound
520        // (error marker appears). Negative-priority aggregation → NotFound.
521        // This is the canonical Sherlock "message" pattern: a site that
522        // returns 200 for everyone and differentiates via an error string.
523        let server = MockServer::start().await;
524        Mock::given(any())
525            .and(path("/alice"))
526            .respond_with(ResponseTemplate::new(200).set_body_string("Profile not found"))
527            .mount(&server)
528            .await;
529        let site = site_with(
530            &server,
531            vec![
532                Signal::StatusFound { codes: vec![200] },
533                Signal::BodyAbsent {
534                    text: "Profile not found".into(),
535                },
536            ],
537        );
538        let outcome = build_client().check(&site, &user()).await;
539        assert_eq!(outcome.kind, MatchKind::NotFound);
540    }
541
542    #[tokio::test]
543    async fn network_failure_yields_uncertain() {
544        let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
545        let port = listener.local_addr().unwrap().port();
546        drop(listener);
547
548        let site = Site {
549            name: "Dead".into(),
550            url: UrlTemplate::new(format!("http://127.0.0.1:{port}/{{username}}")).unwrap(),
551            signals: vec![Signal::StatusFound { codes: vec![200] }],
552            known_present: None,
553            known_absent: None,
554            extract: Vec::new(),
555            tags: Vec::new(),
556            request_headers: std::collections::BTreeMap::new(),
557            regex_check: None,
558            engine: None,
559            strip_bad_char: None,
560            request_method: crate::site::HttpMethod::Get,
561            request_body: None,
562            protection: Vec::new(),
563            disabled: false,
564            disabled_reason: None,
565            source: None,
566            popularity: None,
567            access: crate::AccessPolicy::default(),
568        };
569        let client = Client::builder()
570            .timeout(Duration::from_millis(500))
571            .connect_timeout(Duration::from_millis(500))
572            .max_retries(0)
573            .build()
574            .unwrap();
575        let outcome = client.check(&site, &user()).await;
576        assert_eq!(outcome.kind, MatchKind::Uncertain);
577        assert!(outcome.reason.is_some());
578    }
579
580    #[tokio::test]
581    async fn throttle_spaces_consecutive_calls_to_same_host() {
582        let server = MockServer::start().await;
583        Mock::given(any())
584            .and(path("/alice"))
585            .respond_with(ResponseTemplate::new(200))
586            .mount(&server)
587            .await;
588        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
589        // Interval is intentionally much larger than typical wiremock latency
590        // (≤10 ms locally, can spike under heavy parallel test load). Any
591        // value too close to HTTP latency would let the first request burn
592        // through the throttle window and make the assertion flaky.
593        let client = Client::builder()
594            .timeout(Duration::from_secs(2))
595            .min_request_interval(Duration::from_millis(300))
596            .build()
597            .unwrap();
598
599        client.check(&site, &user()).await;
600        let started = Instant::now();
601        client.check(&site, &user()).await;
602        let elapsed = started.elapsed();
603        assert!(
604            elapsed >= Duration::from_millis(200),
605            "second probe to the same host should wait ≥200 ms, got {elapsed:?}",
606        );
607    }
608
609    #[tokio::test]
610    async fn builder_overrides_user_agent() {
611        let server = MockServer::start().await;
612        Mock::given(any())
613            .and(path("/alice"))
614            .and(wiremock::matchers::header("user-agent", "adler-test/1.0"))
615            .respond_with(ResponseTemplate::new(200))
616            .mount(&server)
617            .await;
618        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
619        let client = Client::builder()
620            .user_agent("adler-test/1.0")
621            .build()
622            .unwrap();
623        let outcome = client.check(&site, &user()).await;
624        assert_eq!(outcome.kind, MatchKind::Found);
625    }
626
627    #[tokio::test]
628    async fn rate_limit_429_yields_uncertain_with_note() {
629        let server = MockServer::start().await;
630        Mock::given(any())
631            .and(path("/alice"))
632            .respond_with(ResponseTemplate::new(429))
633            .mount(&server)
634            .await;
635        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
636        let outcome = build_client().check(&site, &user()).await;
637        assert_eq!(outcome.kind, MatchKind::Uncertain);
638        assert_eq!(outcome.reason, Some(UncertainReason::RateLimited));
639    }
640
641    #[tokio::test]
642    async fn cloudflare_server_header_yields_uncertain() {
643        let server = MockServer::start().await;
644        Mock::given(any())
645            .and(path("/alice"))
646            .respond_with(ResponseTemplate::new(503).insert_header("server", "cloudflare"))
647            .mount(&server)
648            .await;
649        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
650        let outcome = build_client().check(&site, &user()).await;
651        assert_eq!(outcome.kind, MatchKind::Uncertain);
652        assert_eq!(outcome.reason, Some(UncertainReason::CloudflareChallenge));
653    }
654
655    #[tokio::test]
656    async fn cloudflare_interstitial_in_body_yields_uncertain() {
657        // Body-based ban detection only runs when a signal already needs
658        // the body — this site uses BodyAbsent so the body is read.
659        let server = MockServer::start().await;
660        Mock::given(any())
661            .and(path("/alice"))
662            .respond_with(
663                ResponseTemplate::new(200)
664                    .set_body_string("<html><head><title>Just a moment...</title></head></html>"),
665            )
666            .mount(&server)
667            .await;
668        let site = site_with(
669            &server,
670            vec![Signal::BodyAbsent {
671                text: "Profile not found".into(),
672            }],
673        );
674        let outcome = build_client().check(&site, &user()).await;
675        assert_eq!(outcome.kind, MatchKind::Uncertain);
676        assert_eq!(outcome.reason, Some(UncertainReason::CloudflareChallenge));
677    }
678
679    #[tokio::test]
680    async fn ban_detection_does_not_fire_on_legitimate_403() {
681        let server = MockServer::start().await;
682        Mock::given(any())
683            .and(path("/alice"))
684            .respond_with(ResponseTemplate::new(403))
685            .mount(&server)
686            .await;
687        let site = site_with(
688            &server,
689            vec![
690                Signal::StatusFound { codes: vec![200] },
691                Signal::StatusNotFound { codes: vec![403] },
692            ],
693        );
694        let outcome = build_client().check(&site, &user()).await;
695        // 403 is ambiguous for bans; site explicitly maps it to NotFound.
696        assert_eq!(outcome.kind, MatchKind::NotFound);
697        assert!(outcome.reason.is_none());
698    }
699
700    #[tokio::test]
701    async fn retry_recovers_after_transient_429() {
702        let server = MockServer::start().await;
703        // First request: 429. Subsequent: 200.
704        Mock::given(any())
705            .and(path("/alice"))
706            .respond_with(ResponseTemplate::new(429))
707            .up_to_n_times(1)
708            .mount(&server)
709            .await;
710        Mock::given(any())
711            .and(path("/alice"))
712            .respond_with(ResponseTemplate::new(200))
713            .mount(&server)
714            .await;
715        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
716        let client = Client::builder()
717            .timeout(Duration::from_secs(2))
718            .min_request_interval(Duration::ZERO)
719            .max_retries(2)
720            .base_backoff_delay(Duration::from_millis(20))
721            .max_backoff_delay(Duration::from_millis(100))
722            .build()
723            .unwrap();
724        let outcome = client.check(&site, &user()).await;
725        assert_eq!(outcome.kind, MatchKind::Found);
726        assert!(outcome.reason.is_none());
727    }
728
729    #[tokio::test]
730    async fn retry_exhausts_and_returns_uncertain() {
731        let server = MockServer::start().await;
732        Mock::given(any())
733            .and(path("/alice"))
734            .respond_with(ResponseTemplate::new(429))
735            .mount(&server)
736            .await;
737        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
738        let client = Client::builder()
739            .timeout(Duration::from_secs(2))
740            .min_request_interval(Duration::ZERO)
741            .max_retries(2)
742            .base_backoff_delay(Duration::from_millis(10))
743            .max_backoff_delay(Duration::from_millis(50))
744            .build()
745            .unwrap();
746        let outcome = client.check(&site, &user()).await;
747        assert_eq!(outcome.kind, MatchKind::Uncertain);
748        assert_eq!(outcome.reason, Some(UncertainReason::RateLimited));
749    }
750
751    #[tokio::test]
752    async fn retry_does_not_fire_on_network_error() {
753        // Connection refused → Uncertain note starts with "request:", not a
754        // ban marker. We must NOT retry — otherwise a single dead site
755        // burns the full backoff budget before reporting.
756        let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
757        let port = listener.local_addr().unwrap().port();
758        drop(listener);
759        let site = Site {
760            name: "Dead".into(),
761            url: UrlTemplate::new(format!("http://127.0.0.1:{port}/{{username}}")).unwrap(),
762            signals: vec![Signal::StatusFound { codes: vec![200] }],
763            known_present: None,
764            known_absent: None,
765            extract: Vec::new(),
766            tags: Vec::new(),
767            request_headers: std::collections::BTreeMap::new(),
768            regex_check: None,
769            engine: None,
770            strip_bad_char: None,
771            request_method: crate::site::HttpMethod::Get,
772            request_body: None,
773            protection: Vec::new(),
774            disabled: false,
775            disabled_reason: None,
776            source: None,
777            popularity: None,
778            access: crate::AccessPolicy::default(),
779        };
780        let client = Client::builder()
781            .timeout(Duration::from_millis(500))
782            .connect_timeout(Duration::from_millis(500))
783            .min_request_interval(Duration::ZERO)
784            .max_retries(3)
785            .base_backoff_delay(Duration::from_secs(60))
786            .build()
787            .unwrap();
788        let started = Instant::now();
789        let outcome = client.check(&site, &user()).await;
790        // If retry fired, we'd be sleeping minutes; instead this returns
791        // promptly with an Uncertain.
792        assert!(started.elapsed() < Duration::from_secs(5));
793        assert_eq!(outcome.kind, MatchKind::Uncertain);
794        assert!(
795            matches!(outcome.reason, Some(UncertainReason::Network(_))),
796            "got {:?}",
797            outcome.reason,
798        );
799    }
800
801    #[tokio::test]
802    async fn rotates_user_agent_per_request() {
803        // The mock only matches when the request carries one of the pooled
804        // UAs; if rotation weren't applied, the default adler/x.y UA would
805        // miss and the verdict would be NotFound.
806        let server = MockServer::start().await;
807        Mock::given(any())
808            .and(path("/alice"))
809            .and(wiremock::matchers::header("user-agent", "RotatorUA/9.9"))
810            .respond_with(ResponseTemplate::new(200))
811            .mount(&server)
812            .await;
813        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
814        let client = Client::builder()
815            .min_request_interval(Duration::ZERO)
816            .max_retries(0)
817            .rotate_user_agents(vec!["RotatorUA/9.9".into()])
818            .build()
819            .unwrap();
820        let outcome = client.check(&site, &user()).await;
821        assert_eq!(outcome.kind, MatchKind::Found);
822    }
823
824    #[test]
825    fn invalid_proxy_url_fails_build() {
826        let err = Client::builder().proxy("not a url").build().unwrap_err();
827        assert!(matches!(err, Error::HttpSetup { .. }));
828    }
829
830    #[test]
831    fn schemeless_proxy_is_rejected_up_front() {
832        // reqwest would silently treat this as a host; we require a scheme.
833        let err = Client::builder().proxy("not-a-url").build().unwrap_err();
834        let Error::HttpSetup { message } = err else {
835            panic!("expected HttpSetup, got {err:?}");
836        };
837        assert!(message.contains("must start with"), "{message}");
838    }
839
840    #[test]
841    fn socks5_proxy_scheme_is_accepted() {
842        // Valid scheme + endpoint builds fine (no connection is attempted).
843        assert!(
844            Client::builder()
845                .proxy("socks5://127.0.0.1:9050")
846                .build()
847                .is_ok()
848        );
849    }
850
851    #[tokio::test]
852    async fn global_rps_cap_spaces_requests_across_hosts() {
853        // Two distinct host paths; per-host throttle is disabled, so any
854        // spacing must come from the global RPS cap. 5 RPS → 200 ms apart.
855        let server = MockServer::start().await;
856        Mock::given(any())
857            .respond_with(ResponseTemplate::new(200))
858            .mount(&server)
859            .await;
860        let site_a = Site {
861            name: "A".into(),
862            url: UrlTemplate::new(format!("{}/a/{{username}}", server.uri())).unwrap(),
863            signals: vec![Signal::StatusFound { codes: vec![200] }],
864            known_present: None,
865            known_absent: None,
866            extract: Vec::new(),
867            tags: Vec::new(),
868            request_headers: std::collections::BTreeMap::new(),
869            regex_check: None,
870            engine: None,
871            strip_bad_char: None,
872            request_method: crate::site::HttpMethod::Get,
873            request_body: None,
874            protection: Vec::new(),
875            disabled: false,
876            disabled_reason: None,
877            source: None,
878            popularity: None,
879            access: crate::AccessPolicy::default(),
880        };
881        let site_b = Site {
882            name: "B".into(),
883            url: UrlTemplate::new(format!("{}/b/{{username}}", server.uri())).unwrap(),
884            signals: vec![Signal::StatusFound { codes: vec![200] }],
885            known_present: None,
886            known_absent: None,
887            extract: Vec::new(),
888            tags: Vec::new(),
889            request_headers: std::collections::BTreeMap::new(),
890            regex_check: None,
891            engine: None,
892            strip_bad_char: None,
893            request_method: crate::site::HttpMethod::Get,
894            request_body: None,
895            protection: Vec::new(),
896            disabled: false,
897            disabled_reason: None,
898            source: None,
899            popularity: None,
900            access: crate::AccessPolicy::default(),
901        };
902        // 2 RPS → ~500 ms between requests. A large interval keeps the
903        // assertion robust even when the first probe's own duration (which
904        // eats into the measured gap) is inflated by test instrumentation
905        // such as coverage tooling.
906        let client = Client::builder()
907            .min_request_interval(Duration::ZERO)
908            .max_retries(0)
909            .max_rps(std::num::NonZeroU32::new(2).unwrap())
910            .build()
911            .unwrap();
912        // First request consumes the slot at t≈0; second waits ~500 ms even
913        // though it targets a different host.
914        client.check(&site_a, &user()).await;
915        let started = Instant::now();
916        client.check(&site_b, &user()).await;
917        assert!(
918            started.elapsed() >= Duration::from_millis(350),
919            "global cap should space cross-host requests, got {:?}",
920            started.elapsed(),
921        );
922    }
923
924    #[tokio::test]
925    async fn respect_robots_skips_disallowed_paths() {
926        let server = MockServer::start().await;
927        Mock::given(any())
928            .and(path("/robots.txt"))
929            .respond_with(
930                ResponseTemplate::new(200).set_body_string("User-agent: *\nDisallow: /no"),
931            )
932            .mount(&server)
933            .await;
934        Mock::given(any())
935            .and(path("/no/alice"))
936            .respond_with(ResponseTemplate::new(200))
937            .mount(&server)
938            .await;
939        Mock::given(any())
940            .and(path("/yes/alice"))
941            .respond_with(ResponseTemplate::new(200))
942            .mount(&server)
943            .await;
944        let client = Client::builder()
945            .min_request_interval(Duration::ZERO)
946            .max_retries(0)
947            .respect_robots(true)
948            .build()
949            .unwrap();
950
951        let disallowed = Site {
952            name: "No".into(),
953            url: UrlTemplate::new(format!("{}/no/{{username}}", server.uri())).unwrap(),
954            signals: vec![Signal::StatusFound { codes: vec![200] }],
955            known_present: None,
956            known_absent: None,
957            extract: Vec::new(),
958            tags: Vec::new(),
959            request_headers: std::collections::BTreeMap::new(),
960            regex_check: None,
961            engine: None,
962            strip_bad_char: None,
963            request_method: crate::site::HttpMethod::Get,
964            request_body: None,
965            protection: Vec::new(),
966            disabled: false,
967            disabled_reason: None,
968            source: None,
969            popularity: None,
970            access: crate::AccessPolicy::default(),
971        };
972        let allowed = Site {
973            name: "Yes".into(),
974            url: UrlTemplate::new(format!("{}/yes/{{username}}", server.uri())).unwrap(),
975            signals: vec![Signal::StatusFound { codes: vec![200] }],
976            known_present: None,
977            known_absent: None,
978            extract: Vec::new(),
979            tags: Vec::new(),
980            request_headers: std::collections::BTreeMap::new(),
981            regex_check: None,
982            engine: None,
983            strip_bad_char: None,
984            request_method: crate::site::HttpMethod::Get,
985            request_body: None,
986            protection: Vec::new(),
987            disabled: false,
988            disabled_reason: None,
989            source: None,
990            popularity: None,
991            access: crate::AccessPolicy::default(),
992        };
993
994        let no = client.check(&disallowed, &user()).await;
995        assert_eq!(no.kind, MatchKind::Uncertain);
996        assert_eq!(no.reason, Some(UncertainReason::RobotsDisallowed));
997
998        let yes = client.check(&allowed, &user()).await;
999        assert_eq!(yes.kind, MatchKind::Found);
1000    }
1001
1002    #[tokio::test]
1003    async fn body_read_skipped_when_no_body_signal_needed() {
1004        // Mock returns body that would fail a body_absent check — but since
1005        // we only have a status signal, body is never read.
1006        let server = MockServer::start().await;
1007        Mock::given(any())
1008            .and(path("/alice"))
1009            .respond_with(ResponseTemplate::new(200).set_body_string("Profile not found"))
1010            .mount(&server)
1011            .await;
1012        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1013        let outcome = build_client().check(&site, &user()).await;
1014        assert_eq!(outcome.kind, MatchKind::Found);
1015    }
1016
1017    // ===== Browser routing =====
1018
1019    /// Test backend that returns a canned page and counts calls. Lets the
1020    /// routing tests assert "Client did/did not invoke the browser" without
1021    /// involving a real Chrome process.
1022    #[derive(Debug)]
1023    struct RecordingBackend {
1024        page: RenderedPage,
1025        calls: std::sync::atomic::AtomicUsize,
1026    }
1027
1028    impl RecordingBackend {
1029        fn with_page(page: RenderedPage) -> Self {
1030            Self {
1031                page,
1032                calls: std::sync::atomic::AtomicUsize::new(0),
1033            }
1034        }
1035        fn call_count(&self) -> usize {
1036            self.calls.load(std::sync::atomic::Ordering::SeqCst)
1037        }
1038    }
1039
1040    #[async_trait::async_trait]
1041    impl BrowserBackend for RecordingBackend {
1042        async fn fetch(
1043            &self,
1044            _url: &url::Url,
1045            _headers: &std::collections::BTreeMap<String, String>,
1046            _timeout: Duration,
1047        ) -> Result<RenderedPage> {
1048            self.calls.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
1049            Ok(self.page.clone())
1050        }
1051    }
1052
1053    fn site_bot_protected(server: &MockServer) -> Site {
1054        let mut s = site_with(server, vec![Signal::StatusFound { codes: vec![200] }]);
1055        s.tags = vec![BOT_PROTECTED_TAG.into()];
1056        s
1057    }
1058
1059    #[tokio::test]
1060    async fn browser_routes_bot_protected_sites() {
1061        // wiremock would *not* fire (raw HTTP path is skipped) — the backend
1062        // returns its canned page directly.
1063        let server = MockServer::start().await;
1064        let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1065            status: 200,
1066            final_url: url::Url::parse("https://example.com/alice").unwrap(),
1067            body: "<html></html>".into(),
1068            elapsed_ms: 42,
1069        }));
1070        let client = Client::builder()
1071            .min_request_interval(Duration::ZERO)
1072            .max_retries(0)
1073            .browser(backend.clone())
1074            .build()
1075            .unwrap();
1076        let outcome = client.check(&site_bot_protected(&server), &user()).await;
1077        assert_eq!(outcome.kind, MatchKind::Found);
1078        assert_eq!(backend.call_count(), 1, "browser invoked exactly once");
1079    }
1080
1081    #[tokio::test]
1082    async fn non_bot_protected_sites_skip_browser() {
1083        let server = MockServer::start().await;
1084        Mock::given(any())
1085            .and(path("/alice"))
1086            .respond_with(ResponseTemplate::new(200))
1087            .mount(&server)
1088            .await;
1089        let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1090            status: 500, // would make wiremock case fail if browser was taken
1091            final_url: url::Url::parse("https://x/").unwrap(),
1092            body: String::new(),
1093            elapsed_ms: 0,
1094        }));
1095        let client = Client::builder()
1096            .min_request_interval(Duration::ZERO)
1097            .max_retries(0)
1098            .browser(backend.clone())
1099            .build()
1100            .unwrap();
1101        // site WITHOUT bot-protected tag → must go via raw HTTP (wiremock).
1102        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1103        let outcome = client.check(&site, &user()).await;
1104        assert_eq!(outcome.kind, MatchKind::Found);
1105        assert_eq!(backend.call_count(), 0, "browser must not be touched");
1106    }
1107
1108    #[tokio::test]
1109    async fn browser_budget_exhaust_yields_uncertain() {
1110        let server = MockServer::start().await;
1111        let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1112            status: 200,
1113            final_url: url::Url::parse("https://x/").unwrap(),
1114            body: String::new(),
1115            elapsed_ms: 0,
1116        }));
1117        let client = Client::builder()
1118            .min_request_interval(Duration::ZERO)
1119            .max_retries(0)
1120            .browser(backend.clone())
1121            .browser_budget(1)
1122            .build()
1123            .unwrap();
1124        let site = site_bot_protected(&server);
1125        // First call consumes the only slot.
1126        let first = client.check(&site, &user()).await;
1127        assert_eq!(first.kind, MatchKind::Found);
1128        // Second call hits the cap → Uncertain(BrowserBudget), backend NOT invoked.
1129        let second = client.check(&site, &user()).await;
1130        assert_eq!(second.kind, MatchKind::Uncertain);
1131        assert!(matches!(
1132            second.reason,
1133            Some(UncertainReason::BrowserBudget)
1134        ));
1135        assert_eq!(
1136            backend.call_count(),
1137            1,
1138            "second call must not invoke backend"
1139        );
1140    }
1141
1142    #[tokio::test]
1143    async fn browser_failure_surfaces_as_uncertain_browser_failed() {
1144        struct FailingBackend;
1145        #[async_trait::async_trait]
1146        impl BrowserBackend for FailingBackend {
1147            async fn fetch(
1148                &self,
1149                _url: &url::Url,
1150                _headers: &std::collections::BTreeMap<String, String>,
1151                _timeout: Duration,
1152            ) -> Result<RenderedPage> {
1153                Err(Error::BrowserSetup {
1154                    message: "simulated crash".into(),
1155                })
1156            }
1157        }
1158        impl std::fmt::Debug for FailingBackend {
1159            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1160                f.write_str("FailingBackend")
1161            }
1162        }
1163
1164        let server = MockServer::start().await;
1165        let client = Client::builder()
1166            .min_request_interval(Duration::ZERO)
1167            .max_retries(0)
1168            .browser(Arc::new(FailingBackend))
1169            .build()
1170            .unwrap();
1171        let outcome = client.check(&site_bot_protected(&server), &user()).await;
1172        assert_eq!(outcome.kind, MatchKind::Uncertain);
1173        match outcome.reason {
1174            Some(UncertainReason::BrowserFailed(msg)) => {
1175                assert!(msg.contains("simulated crash"), "got: {msg}");
1176            }
1177            other => panic!("expected BrowserFailed, got {other:?}"),
1178        }
1179    }
1180
1181    #[tokio::test]
1182    async fn status_only_site_uses_head_request() {
1183        // Site with only status signals (no body markers, no enrichment)
1184        // should be probed with HEAD — saves the body download on
1185        // ~30% of the registry.
1186        let server = MockServer::start().await;
1187        Mock::given(method("HEAD"))
1188            .and(path("/alice"))
1189            .respond_with(ResponseTemplate::new(200))
1190            .mount(&server)
1191            .await;
1192        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1193        let outcome = build_client().check(&site, &user()).await;
1194        assert_eq!(outcome.kind, MatchKind::Found);
1195        let recvd = server.received_requests().await.unwrap_or_default();
1196        assert_eq!(recvd.len(), 1);
1197        assert_eq!(recvd[0].method.as_str(), "HEAD");
1198    }
1199
1200    #[tokio::test]
1201    async fn body_signal_site_uses_get_request() {
1202        // Same baseline plus a body-marker signal — must still GET so
1203        // the body actually arrives for matching.
1204        let server = MockServer::start().await;
1205        Mock::given(any())
1206            .and(path("/alice"))
1207            .respond_with(ResponseTemplate::new(200).set_body_string("hello alice"))
1208            .mount(&server)
1209            .await;
1210        let site = site_with(
1211            &server,
1212            vec![Signal::BodyPresent {
1213                text: "hello".into(),
1214            }],
1215        );
1216        let outcome = build_client().check(&site, &user()).await;
1217        assert_eq!(outcome.kind, MatchKind::Found);
1218        let recvd = server.received_requests().await.unwrap_or_default();
1219        assert_eq!(recvd[0].method.as_str(), "GET");
1220    }
1221
1222    #[tokio::test]
1223    async fn protection_field_routes_through_browser_like_bot_protected_tag() {
1224        // A site that declares `protection: [Cloudflare]` but doesn't
1225        // carry the legacy `bot-protected` tag should still route
1226        // through the browser backend — the new structured field is
1227        // an additional signal, not a tag replacement.
1228        let server = MockServer::start().await;
1229        Mock::given(any())
1230            .respond_with(ResponseTemplate::new(200))
1231            .mount(&server)
1232            .await;
1233        let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1234        site.protection = vec![crate::site::ProtectionKind::Cloudflare];
1235        // No bot-protected tag — pure structured-field test.
1236        let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1237            status: 200,
1238            final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
1239            body: String::new(),
1240            elapsed_ms: 0,
1241        }));
1242        let client = Client::builder()
1243            .min_request_interval(Duration::ZERO)
1244            .max_retries(0)
1245            .browser(backend)
1246            .build()
1247            .unwrap();
1248        let outcome = client.check(&site, &user()).await;
1249        // The recording backend always returns a synthetic 200, so
1250        // Found means we went through the browser path.
1251        assert_eq!(outcome.kind, MatchKind::Found);
1252        // No raw HTTP probe should have hit the mock server.
1253        let recvd = server.received_requests().await.unwrap_or_default();
1254        assert_eq!(
1255            recvd.len(),
1256            0,
1257            "structured protection must skip the raw HTTP path"
1258        );
1259    }
1260
1261    #[tokio::test]
1262    async fn post_method_sends_body_with_username_substituted() {
1263        // A POST-probed site (e.g. Anilist GraphQL) — the username
1264        // goes in the body, not the URL. Adler should substitute
1265        // `{username}` and send a POST with the rendered payload.
1266        let server = MockServer::start().await;
1267        Mock::given(method("POST"))
1268            .and(path("/api"))
1269            .respond_with(ResponseTemplate::new(200))
1270            .mount(&server)
1271            .await;
1272        // URL substitution still requires the `{username}` placeholder,
1273        // even for POST sites where the username also lives in the
1274        // body. Most real POST endpoints encode the username in both
1275        // (e.g. query string + body); we mirror that.
1276        let site = Site {
1277            name: "ApiPost".into(),
1278            url: UrlTemplate::new(format!("{}/api?_={{username}}", server.uri())).unwrap(),
1279            signals: vec![Signal::StatusFound { codes: vec![200] }],
1280            known_present: None,
1281            known_absent: None,
1282            extract: Vec::new(),
1283            tags: Vec::new(),
1284            request_headers: std::collections::BTreeMap::new(),
1285            regex_check: None,
1286            engine: None,
1287            strip_bad_char: None,
1288            request_method: HttpMethod::Post,
1289            request_body: Some(r#"{"name":"{username}"}"#.into()),
1290            protection: Vec::new(),
1291            disabled: false,
1292            disabled_reason: None,
1293            source: None,
1294            popularity: None,
1295            access: crate::AccessPolicy::default(),
1296        };
1297        let outcome = build_client().check(&site, &user()).await;
1298        assert_eq!(outcome.kind, MatchKind::Found);
1299        let recvd = server.received_requests().await.unwrap_or_default();
1300        assert_eq!(recvd.len(), 1);
1301        assert_eq!(recvd[0].method.as_str(), "POST");
1302        let body = String::from_utf8_lossy(&recvd[0].body).to_string();
1303        assert!(body.contains("\"name\":\"alice\""), "body was: {body}");
1304    }
1305
1306    #[tokio::test]
1307    async fn head_405_falls_back_to_get() {
1308        // A server that rejects HEAD with 405 — Adler should silently
1309        // retry with GET so the optimisation can never cost accuracy.
1310        let server = MockServer::start().await;
1311        Mock::given(method("HEAD"))
1312            .and(path("/alice"))
1313            .respond_with(ResponseTemplate::new(405))
1314            .mount(&server)
1315            .await;
1316        Mock::given(any())
1317            .and(path("/alice"))
1318            .respond_with(ResponseTemplate::new(200))
1319            .mount(&server)
1320            .await;
1321        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1322        let outcome = build_client().check(&site, &user()).await;
1323        assert_eq!(outcome.kind, MatchKind::Found);
1324        let recvd = server.received_requests().await.unwrap_or_default();
1325        assert_eq!(recvd.len(), 2);
1326        assert_eq!(recvd[0].method.as_str(), "HEAD");
1327        assert_eq!(recvd[1].method.as_str(), "GET");
1328    }
1329
1330    // ------------------------------------------------------------------
1331    // Phase 4 — automatic escalation when the cheap transport hits a
1332    // Cloudflare / rate-limit Uncertain that the browser could resolve.
1333    // ------------------------------------------------------------------
1334
1335    /// Mocked HTTP that always responds with a Cloudflare 503 (server
1336    /// header + 503 status — what the pre-body ban detector turns into
1337    /// `Uncertain(CloudflareChallenge)`).
1338    async fn cloudflare_503_server() -> MockServer {
1339        let server = MockServer::start().await;
1340        Mock::given(any())
1341            .respond_with(ResponseTemplate::new(503).insert_header("server", "cloudflare"))
1342            .mount(&server)
1343            .await;
1344        server
1345    }
1346
1347    #[tokio::test]
1348    async fn http_success_stamps_http_transport_no_escalations() {
1349        let server = MockServer::start().await;
1350        Mock::given(any())
1351            .respond_with(ResponseTemplate::new(200))
1352            .mount(&server)
1353            .await;
1354        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1355        let outcome = build_client().check(&site, &user()).await;
1356        assert_eq!(outcome.kind, MatchKind::Found);
1357        assert_eq!(
1358            outcome.transport,
1359            Some(crate::escalation::TransportTier::Http),
1360            "successful HTTP probe must stamp Http transport"
1361        );
1362        assert_eq!(outcome.escalations, 0, "no escalation on the happy path");
1363    }
1364
1365    #[tokio::test]
1366    async fn escalates_cloudflare_uncertain_to_browser_and_stamps_one() {
1367        let server = cloudflare_503_server().await;
1368        // Browser returns a 200 that the StatusFound signal turns into Found.
1369        let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1370            status: 200,
1371            final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
1372            body: String::new(),
1373            elapsed_ms: 5,
1374        }));
1375        let client = Client::builder()
1376            .min_request_interval(Duration::ZERO)
1377            .max_retries(0)
1378            .browser(Arc::clone(&backend) as Arc<dyn BrowserBackend>)
1379            .build()
1380            .unwrap();
1381        // Non-bot-protected site — HTTP path runs first, hits Cloudflare,
1382        // escalation routes to the browser, browser's 200 → Found.
1383        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1384        let outcome = client.check(&site, &user()).await;
1385        assert_eq!(
1386            outcome.kind,
1387            MatchKind::Found,
1388            "escalation should flip CF challenge to Found via browser (reason {:?})",
1389            outcome.reason
1390        );
1391        assert_eq!(
1392            outcome.transport,
1393            Some(crate::escalation::TransportTier::Browser),
1394            "escalated outcome must be stamped Browser"
1395        );
1396        assert_eq!(
1397            outcome.escalations, 1,
1398            "exactly one escalation should have fired"
1399        );
1400        assert_eq!(backend.call_count(), 1, "browser invoked exactly once");
1401    }
1402
1403    #[tokio::test]
1404    async fn disable_escalation_leaves_cloudflare_uncertain_untouched() {
1405        let server = cloudflare_503_server().await;
1406        let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1407            status: 200,
1408            final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
1409            body: String::new(),
1410            elapsed_ms: 0,
1411        }));
1412        let client = Client::builder()
1413            .min_request_interval(Duration::ZERO)
1414            .max_retries(0)
1415            .browser(Arc::clone(&backend) as Arc<dyn BrowserBackend>)
1416            .disable_escalation()
1417            .build()
1418            .unwrap();
1419        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1420        let outcome = client.check(&site, &user()).await;
1421        assert_eq!(outcome.kind, MatchKind::Uncertain);
1422        assert!(matches!(
1423            outcome.reason,
1424            Some(UncertainReason::CloudflareChallenge)
1425        ));
1426        assert_eq!(
1427            outcome.transport,
1428            Some(crate::escalation::TransportTier::Http),
1429            "primary transport must still be stamped"
1430        );
1431        assert_eq!(outcome.escalations, 0);
1432        assert_eq!(
1433            backend.call_count(),
1434            0,
1435            "browser must not be touched when --no-escalation"
1436        );
1437    }
1438
1439    #[tokio::test]
1440    async fn escalation_budget_zero_keeps_browser_untouched() {
1441        let server = cloudflare_503_server().await;
1442        let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1443            status: 200,
1444            final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
1445            body: String::new(),
1446            elapsed_ms: 0,
1447        }));
1448        let client = Client::builder()
1449            .min_request_interval(Duration::ZERO)
1450            .max_retries(0)
1451            .browser(Arc::clone(&backend) as Arc<dyn BrowserBackend>)
1452            .escalation_budget(0)
1453            .build()
1454            .unwrap();
1455        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1456        let outcome = client.check(&site, &user()).await;
1457        assert_eq!(outcome.kind, MatchKind::Uncertain);
1458        assert!(matches!(
1459            outcome.reason,
1460            Some(UncertainReason::CloudflareChallenge)
1461        ));
1462        assert_eq!(outcome.escalations, 0);
1463        assert_eq!(
1464            backend.call_count(),
1465            0,
1466            "zero budget must deny every escalation"
1467        );
1468    }
1469
1470    #[tokio::test]
1471    async fn escalation_consumes_budget_then_stops() {
1472        let server = cloudflare_503_server().await;
1473        let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1474            status: 200,
1475            final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
1476            body: String::new(),
1477            elapsed_ms: 0,
1478        }));
1479        let client = Client::builder()
1480            .min_request_interval(Duration::ZERO)
1481            .max_retries(0)
1482            .browser(Arc::clone(&backend) as Arc<dyn BrowserBackend>)
1483            .escalation_budget(1)
1484            .build()
1485            .unwrap();
1486        let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1487        // First call burns the only escalation slot.
1488        let first = client.check(&site, &user()).await;
1489        assert_eq!(first.kind, MatchKind::Found);
1490        assert_eq!(first.escalations, 1);
1491        // Second call's escalation is denied → cheap-path Uncertain survives.
1492        let second = client.check(&site, &user()).await;
1493        assert_eq!(second.kind, MatchKind::Uncertain);
1494        assert!(matches!(
1495            second.reason,
1496            Some(UncertainReason::CloudflareChallenge)
1497        ));
1498        assert_eq!(second.escalations, 0);
1499        assert_eq!(backend.call_count(), 1, "browser called exactly once total");
1500    }
1501}