use std::fmt;
use std::sync::Arc;
use std::time::Duration;
use crate::access::{EgressPool, SessionStore};
use crate::browser::{BrowserBackend, BrowserBudget};
use crate::retry::RetryPolicy;
use crate::robots::RobotsCache;
use crate::throttle::HostThrottle;
use crate::transport::HttpFetcher;
#[cfg(feature = "impersonate")]
use crate::transport::ImpersonateFetcher;
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
const DEFAULT_CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
const DEFAULT_REDIRECT_LIMIT: usize = 8;
const DEFAULT_PER_HOST_INTERVAL: Duration = Duration::from_millis(100);
const GLOBAL_THROTTLE_KEY: &str = "*global*";
#[derive(Clone)]
pub struct Client {
http: Arc<HttpFetcher>,
egress: Arc<EgressPool>,
sessions: Arc<SessionStore>,
throttle: HostThrottle,
global_throttle: Option<HostThrottle>,
retry: RetryPolicy,
user_agents: Arc<[String]>,
enrich: bool,
robots: Option<RobotsCache>,
browser: Option<Arc<dyn BrowserBackend>>,
#[cfg(feature = "impersonate")]
impersonate: Option<Arc<ImpersonateFetcher>>,
browser_budget: Arc<BrowserBudget>,
escalation_budget: Arc<crate::escalation::EscalationBudget>,
escalation_enabled: bool,
}
impl Client {
pub fn builder() -> ClientBuilder {
ClientBuilder::default()
}
#[must_use]
pub fn egress_summary(&self) -> Vec<crate::access::EgressSummary> {
self.egress.summary()
}
#[must_use]
pub fn session_names(&self) -> Vec<String> {
self.sessions.names()
}
#[must_use]
pub fn egress_names(&self) -> Vec<String> {
self.egress.names()
}
#[must_use]
pub fn with_egress_subset(&self, names: &[String]) -> Self {
Self {
http: Arc::clone(&self.http),
egress: Arc::new(self.egress.subset(names)),
sessions: Arc::clone(&self.sessions),
throttle: self.throttle.clone(),
global_throttle: self.global_throttle.clone(),
retry: self.retry.clone(),
user_agents: Arc::clone(&self.user_agents),
enrich: self.enrich,
robots: self.robots.clone(),
browser: self.browser.clone(),
#[cfg(feature = "impersonate")]
impersonate: self.impersonate.clone(),
browser_budget: Arc::clone(&self.browser_budget),
escalation_budget: Arc::clone(&self.escalation_budget),
escalation_enabled: self.escalation_enabled,
}
}
}
#[derive(Debug, Clone)]
pub struct RawResponse {
pub status: u16,
pub final_url: String,
pub body: String,
}
impl fmt::Debug for Client {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Client")
.field("throttle", &self.throttle)
.field("global_throttle", &self.global_throttle)
.field("retry", &self.retry)
.field("user_agents", &self.user_agents)
.field("enrich", &self.enrich)
.field("robots", &self.robots.is_some())
.field("browser", &self.browser.is_some())
.field("browser_budget", &self.browser_budget)
.field("escalation_budget", &self.escalation_budget)
.field("escalation_enabled", &self.escalation_enabled)
.finish_non_exhaustive()
}
}
pub const BOT_PROTECTED_TAG: &str = "bot-protected";
mod builder;
mod probe;
mod util;
pub use builder::{ClientBuilder, DEFAULT_BROWSER_BUDGET, DEFAULT_ESCALATION_BUDGET};
#[cfg(test)]
mod tests {
use super::*;
use crate::browser::RenderedPage;
use crate::check::{MatchKind, UncertainReason};
use crate::confidence::ConfidenceReason;
use crate::error::{Error, Result};
use crate::profile::{EvidenceOrigin, ProfileEvidenceKind};
use crate::site::{Extractor, HttpMethod, ProtectionKind, Signal, Site, UrlTemplate};
use crate::username::Username;
use std::time::Instant;
use wiremock::matchers::{any, method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
use crate::test_fixtures::{default_site, test_client};
fn build_client() -> Client {
test_client()
}
fn site_with(server: &MockServer, signals: Vec<Signal>) -> Site {
let mut s = default_site("Mock", &format!("{}/{{username}}", server.uri()));
s.signals = signals;
s
}
fn user() -> Username {
Username::new("alice").unwrap()
}
#[tokio::test]
async fn regex_check_short_circuits_before_any_request() {
let server = MockServer::start().await;
Mock::given(any())
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
site.regex_check = Some("^[A-Za-z]{8,}$".into());
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Uncertain);
assert!(
matches!(outcome.reason, Some(UncertainReason::UsernameNotAllowed)),
"expected UsernameNotAllowed, got {:?}",
outcome.reason,
);
let recvd = server.received_requests().await.unwrap_or_default();
assert_eq!(
recvd.len(),
0,
"regex_check mismatch must skip the HTTP request entirely"
);
}
#[tokio::test]
async fn geo_constrained_site_with_no_egress_is_geo_unavailable() {
let server = MockServer::start().await;
Mock::given(any())
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
site.access = crate::access::AccessPolicy {
geo: vec![crate::access::CountryCode::new("pl").unwrap()],
..crate::access::AccessPolicy::default()
};
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Uncertain);
assert!(
matches!(outcome.reason, Some(UncertainReason::GeoUnavailable)),
"expected GeoUnavailable, got {:?}",
outcome.reason,
);
let recvd = server.received_requests().await.unwrap_or_default();
assert_eq!(
recvd.len(),
0,
"geo-unavailable must skip the HTTP request entirely"
);
}
#[tokio::test]
async fn session_headers_are_sent_on_probe() {
let server = MockServer::start().await;
Mock::given(any())
.and(wiremock::matchers::header("cookie", "sessionid=real"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let mut headers = std::collections::BTreeMap::new();
headers.insert("Cookie".to_string(), "sessionid=real".to_string());
let mut store = SessionStore::new();
store.insert("acct", crate::access::Session::from_headers(headers));
let client = Client::builder()
.timeout(Duration::from_secs(2))
.min_request_interval(Duration::ZERO)
.max_retries(0)
.sessions(store)
.build()
.expect("client builds");
let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
site.access.session = Some("acct".to_string());
let outcome = client.check(&site, &user()).await;
assert_eq!(
outcome.kind,
MatchKind::Found,
"session cookie should unlock the 200 (got {:?})",
outcome.reason,
);
}
#[tokio::test]
async fn live_enriched_result_stamps_evidence_access_metadata() {
let server = MockServer::start().await;
Mock::given(any())
.respond_with(
ResponseTemplate::new(200)
.set_body_string(r#"<html><h1 class="name">Alice Example</h1></html>"#),
)
.mount(&server)
.await;
let client = Client::builder()
.timeout(Duration::from_secs(2))
.min_request_interval(Duration::ZERO)
.max_retries(0)
.enrich(true)
.build()
.expect("client builds");
let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
site.extract = vec![Extractor {
field: "name".to_owned(),
selector: "h1.name".to_owned(),
attr: None,
}];
let outcome = client.check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
assert_eq!(outcome.profile_evidence.len(), 1);
let source = &outcome.profile_evidence[0].source;
assert!(source.observed_at_ms.is_some());
let access = source.access_path.as_ref().expect("access metadata");
assert_eq!(access.transport, crate::escalation::TransportTier::Http);
assert!(!access.escalated);
assert!(!access.authenticated);
assert!(!access.session_required);
}
#[tokio::test]
async fn authenticated_enriched_result_marks_authenticated_without_session_name() {
let server = MockServer::start().await;
Mock::given(any())
.and(wiremock::matchers::header("cookie", "sessionid=real"))
.respond_with(
ResponseTemplate::new(200)
.set_body_string(r#"<html><h1 class="name">Alice Example</h1></html>"#),
)
.mount(&server)
.await;
let mut headers = std::collections::BTreeMap::new();
headers.insert("Cookie".to_string(), "sessionid=real".to_string());
let mut store = SessionStore::new();
store.insert("acct", crate::access::Session::from_headers(headers));
let client = Client::builder()
.timeout(Duration::from_secs(2))
.min_request_interval(Duration::ZERO)
.max_retries(0)
.sessions(store)
.enrich(true)
.build()
.expect("client builds");
let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
site.access.session = Some("acct".to_owned());
site.extract = vec![Extractor {
field: "name".to_owned(),
selector: "h1.name".to_owned(),
attr: None,
}];
let outcome = client.check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
let evidence = outcome.profile_evidence.first().expect("profile evidence");
let access = evidence
.source
.access_path
.as_ref()
.expect("access metadata");
assert!(access.authenticated);
let encoded = serde_json::to_string(evidence).unwrap();
assert!(!encoded.contains("acct"));
assert!(!encoded.contains("sessionid=real"));
}
#[tokio::test]
async fn missing_named_session_is_session_required() {
let server = MockServer::start().await;
Mock::given(any())
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
site.access.session = Some("not-configured".to_string());
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Uncertain);
assert!(
matches!(outcome.reason, Some(UncertainReason::SessionRequired)),
"expected SessionRequired, got {:?}",
outcome.reason,
);
let recvd = server.received_requests().await.unwrap_or_default();
assert_eq!(
recvd.len(),
0,
"a missing session must skip the request, not probe unauthenticated"
);
}
#[cfg(feature = "impersonate")]
#[tokio::test]
async fn impersonate_routes_pure_tls_fingerprint_site() {
let server = MockServer::start().await;
Mock::given(any())
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let client = Client::builder()
.timeout(Duration::from_secs(2))
.min_request_interval(Duration::ZERO)
.max_retries(0)
.build()
.expect("client builds with impersonate");
let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
site.protection = vec![crate::site::ProtectionKind::TlsFingerprint];
let outcome = client.check(&site, &user()).await;
assert_eq!(
outcome.kind,
MatchKind::Found,
"expected Found (reason {:?})",
outcome.reason,
);
let recvd = server.received_requests().await.expect("received requests");
assert_eq!(recvd.len(), 1, "expected exactly one request");
let ua = recvd[0]
.headers
.get("user-agent")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
assert!(
ua.contains("Chrome/"),
"expected Chrome-shaped UA from wreq, got {ua:?}"
);
}
#[tokio::test]
async fn regex_check_pass_proceeds_to_probe() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
site.regex_check = Some("^[a-z]{3,}$".into());
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
}
#[tokio::test]
async fn status_signal_reports_found_on_match() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
assert!(outcome.url.ends_with("/alice"));
assert!(outcome.reason.is_none());
assert_eq!(outcome.evidence, ["HTTP 200 (status_found)"]);
}
#[tokio::test]
async fn body_username_signal_creates_exact_username_evidence_without_enrich() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/johndoe"))
.respond_with(ResponseTemplate::new(200).set_body_string(r#"{"handle":"johndoe"}"#))
.mount(&server)
.await;
let mut site = site_with(
&server,
vec![Signal::BodyUsername {
text: r#""handle":"{username}""#.into(),
}],
);
site.strip_bad_char = Some(".".into());
let outcome = build_client()
.check(&site, &Username::new("john.doe").unwrap())
.await;
assert_eq!(outcome.kind, MatchKind::Found);
assert!(outcome.enrichment.is_empty());
assert_eq!(outcome.profile_evidence.len(), 1);
let evidence = &outcome.profile_evidence[0];
assert_eq!(evidence.kind, ProfileEvidenceKind::Username);
assert_eq!(evidence.field, None);
assert_eq!(evidence.value, "johndoe");
assert_eq!(evidence.source.origin, EvidenceOrigin::Signal);
assert!(evidence.source.observed_at_ms.is_some());
assert!(
evidence
.source
.access_path
.as_ref()
.is_some_and(|path| path.transport == crate::TransportTier::Http)
);
assert!(
outcome
.confidence
.reasons
.iter()
.any(|reason| matches!(reason, ConfidenceReason::ExactUsernameMatch { count: 1 }))
);
}
#[tokio::test]
async fn generic_body_present_does_not_create_username_evidence() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200).set_body_string(r#"{"username":"alice"}"#))
.mount(&server)
.await;
let site = site_with(
&server,
vec![Signal::BodyPresent {
text: "username".into(),
}],
);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
assert!(outcome.profile_evidence.is_empty());
assert!(
!outcome
.confidence
.reasons
.iter()
.any(|reason| matches!(reason, ConfidenceReason::ExactUsernameMatch { .. }))
);
}
#[tokio::test]
async fn status_signal_pair_reports_not_found_on_404() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(404))
.mount(&server)
.await;
let site = site_with(
&server,
vec![
Signal::StatusFound { codes: vec![200] },
Signal::StatusNotFound { codes: vec![404] },
],
);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::NotFound);
assert_eq!(outcome.evidence, ["HTTP 404 (status_not_found)"]);
}
#[tokio::test]
async fn conflicting_not_found_does_not_attach_username_evidence() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(
ResponseTemplate::new(200)
.set_body_string(r#"{"username":"alice","error":"missing"}"#),
)
.mount(&server)
.await;
let site = site_with(
&server,
vec![
Signal::BodyUsername {
text: r#""username":"{username}""#.into(),
},
Signal::BodyAbsent {
text: r#""error":"missing""#.into(),
},
],
);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::NotFound);
assert!(outcome.profile_evidence.is_empty());
}
#[tokio::test]
async fn body_absent_signal_detects_missing_account() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200).set_body_string("<h1>Profile not found</h1>"))
.mount(&server)
.await;
let site = site_with(
&server,
vec![Signal::BodyAbsent {
text: "Profile not found".into(),
}],
);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::NotFound);
}
#[tokio::test]
async fn body_absent_alone_yields_uncertain_when_marker_missing() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200).set_body_string("<h1>Welcome alice</h1>"))
.mount(&server)
.await;
let site = site_with(
&server,
vec![Signal::BodyAbsent {
text: "Profile not found".into(),
}],
);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Uncertain);
}
#[tokio::test]
async fn body_present_plus_absent_resolve_to_found() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(
ResponseTemplate::new(200)
.set_body_string(r#"<div class="profile-card">alice</div>"#),
)
.mount(&server)
.await;
let site = site_with(
&server,
vec![
Signal::BodyPresent {
text: "profile-card".into(),
},
Signal::BodyAbsent {
text: "Profile not found".into(),
},
],
);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
}
#[tokio::test]
async fn redirect_absent_signal_detects_missing_account() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(
ResponseTemplate::new(302).insert_header("location", "/login?next=/alice"),
)
.mount(&server)
.await;
Mock::given(any())
.and(path("/login"))
.respond_with(ResponseTemplate::new(200).set_body_string("login page"))
.mount(&server)
.await;
let site = site_with(
&server,
vec![Signal::RedirectAbsent {
fragment: "/login".into(),
}],
);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::NotFound);
}
#[tokio::test]
async fn negative_signal_wins_over_positive() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200).set_body_string("Profile not found"))
.mount(&server)
.await;
let site = site_with(
&server,
vec![
Signal::StatusFound { codes: vec![200] },
Signal::BodyAbsent {
text: "Profile not found".into(),
},
],
);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::NotFound);
}
#[tokio::test]
async fn network_failure_yields_uncertain() {
let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
let port = listener.local_addr().unwrap().port();
drop(listener);
let site = Site {
name: "Dead".into(),
url: UrlTemplate::new(format!("http://127.0.0.1:{port}/{{username}}")).unwrap(),
signals: vec![Signal::StatusFound { codes: vec![200] }],
known_present: None,
known_absent: None,
extract: Vec::new(),
tags: Vec::new(),
request_headers: std::collections::BTreeMap::new(),
regex_check: None,
engine: None,
strip_bad_char: None,
request_method: crate::site::HttpMethod::Get,
request_body: None,
protection: Vec::new(),
disabled: false,
disabled_reason: None,
source: None,
popularity: None,
access: crate::AccessPolicy::default(),
};
let client = Client::builder()
.timeout(Duration::from_millis(500))
.connect_timeout(Duration::from_millis(500))
.max_retries(0)
.build()
.unwrap();
let outcome = client.check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Uncertain);
assert!(outcome.reason.is_some());
}
#[tokio::test]
async fn throttle_spaces_consecutive_calls_to_same_host() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let client = Client::builder()
.timeout(Duration::from_secs(2))
.min_request_interval(Duration::from_millis(300))
.build()
.unwrap();
client.check(&site, &user()).await;
let started = Instant::now();
client.check(&site, &user()).await;
let elapsed = started.elapsed();
assert!(
elapsed >= Duration::from_millis(200),
"second probe to the same host should wait ≥200 ms, got {elapsed:?}",
);
}
#[tokio::test]
async fn builder_overrides_user_agent() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.and(wiremock::matchers::header("user-agent", "adler-test/1.0"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let client = Client::builder()
.user_agent("adler-test/1.0")
.build()
.unwrap();
let outcome = client.check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
}
#[tokio::test]
async fn rate_limit_429_yields_uncertain_with_note() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(429))
.mount(&server)
.await;
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Uncertain);
assert_eq!(outcome.reason, Some(UncertainReason::RateLimited));
}
#[tokio::test]
async fn cloudflare_server_header_yields_uncertain() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(503).insert_header("server", "cloudflare"))
.mount(&server)
.await;
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Uncertain);
assert_eq!(outcome.reason, Some(UncertainReason::CloudflareChallenge));
}
#[tokio::test]
async fn cloudflare_interstitial_in_body_yields_uncertain() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(
ResponseTemplate::new(200)
.set_body_string("<html><head><title>Just a moment...</title></head></html>"),
)
.mount(&server)
.await;
let site = site_with(
&server,
vec![Signal::BodyAbsent {
text: "Profile not found".into(),
}],
);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Uncertain);
assert_eq!(outcome.reason, Some(UncertainReason::CloudflareChallenge));
}
#[tokio::test]
async fn ban_detection_does_not_fire_on_legitimate_403() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(403))
.mount(&server)
.await;
let site = site_with(
&server,
vec![
Signal::StatusFound { codes: vec![200] },
Signal::StatusNotFound { codes: vec![403] },
],
);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::NotFound);
assert!(outcome.reason.is_none());
}
#[tokio::test]
async fn retry_recovers_after_transient_429() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(429))
.up_to_n_times(1)
.mount(&server)
.await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let client = Client::builder()
.timeout(Duration::from_secs(2))
.min_request_interval(Duration::ZERO)
.max_retries(2)
.base_backoff_delay(Duration::from_millis(20))
.max_backoff_delay(Duration::from_millis(100))
.build()
.unwrap();
let outcome = client.check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
assert!(outcome.reason.is_none());
}
#[tokio::test]
async fn retry_exhausts_and_returns_uncertain() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(429))
.mount(&server)
.await;
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let client = Client::builder()
.timeout(Duration::from_secs(2))
.min_request_interval(Duration::ZERO)
.max_retries(2)
.base_backoff_delay(Duration::from_millis(10))
.max_backoff_delay(Duration::from_millis(50))
.build()
.unwrap();
let outcome = client.check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Uncertain);
assert_eq!(outcome.reason, Some(UncertainReason::RateLimited));
}
#[tokio::test]
async fn retry_does_not_fire_on_network_error() {
let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
let port = listener.local_addr().unwrap().port();
drop(listener);
let site = Site {
name: "Dead".into(),
url: UrlTemplate::new(format!("http://127.0.0.1:{port}/{{username}}")).unwrap(),
signals: vec![Signal::StatusFound { codes: vec![200] }],
known_present: None,
known_absent: None,
extract: Vec::new(),
tags: Vec::new(),
request_headers: std::collections::BTreeMap::new(),
regex_check: None,
engine: None,
strip_bad_char: None,
request_method: crate::site::HttpMethod::Get,
request_body: None,
protection: Vec::new(),
disabled: false,
disabled_reason: None,
source: None,
popularity: None,
access: crate::AccessPolicy::default(),
};
let client = Client::builder()
.timeout(Duration::from_millis(500))
.connect_timeout(Duration::from_millis(500))
.min_request_interval(Duration::ZERO)
.max_retries(3)
.base_backoff_delay(Duration::from_secs(60))
.build()
.unwrap();
let started = Instant::now();
let outcome = client.check(&site, &user()).await;
assert!(started.elapsed() < Duration::from_secs(5));
assert_eq!(outcome.kind, MatchKind::Uncertain);
assert!(
matches!(outcome.reason, Some(UncertainReason::Network(_))),
"got {:?}",
outcome.reason,
);
}
#[tokio::test]
async fn rotates_user_agent_per_request() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.and(wiremock::matchers::header("user-agent", "RotatorUA/9.9"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let client = Client::builder()
.min_request_interval(Duration::ZERO)
.max_retries(0)
.rotate_user_agents(vec!["RotatorUA/9.9".into()])
.build()
.unwrap();
let outcome = client.check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
}
#[test]
fn invalid_proxy_url_fails_build() {
let err = Client::builder().proxy("not a url").build().unwrap_err();
assert!(matches!(err, Error::HttpSetup { .. }));
}
#[test]
fn schemeless_proxy_is_rejected_up_front() {
let err = Client::builder().proxy("not-a-url").build().unwrap_err();
let Error::HttpSetup { message } = err else {
panic!("expected HttpSetup, got {err:?}");
};
assert!(message.contains("must start with"), "{message}");
}
#[test]
fn socks5_proxy_scheme_is_accepted() {
assert!(
Client::builder()
.proxy("socks5://127.0.0.1:9050")
.build()
.is_ok()
);
}
#[tokio::test]
async fn global_rps_cap_spaces_requests_across_hosts() {
let server = MockServer::start().await;
Mock::given(any())
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let site_a = Site {
name: "A".into(),
url: UrlTemplate::new(format!("{}/a/{{username}}", server.uri())).unwrap(),
signals: vec![Signal::StatusFound { codes: vec![200] }],
known_present: None,
known_absent: None,
extract: Vec::new(),
tags: Vec::new(),
request_headers: std::collections::BTreeMap::new(),
regex_check: None,
engine: None,
strip_bad_char: None,
request_method: crate::site::HttpMethod::Get,
request_body: None,
protection: Vec::new(),
disabled: false,
disabled_reason: None,
source: None,
popularity: None,
access: crate::AccessPolicy::default(),
};
let site_b = Site {
name: "B".into(),
url: UrlTemplate::new(format!("{}/b/{{username}}", server.uri())).unwrap(),
signals: vec![Signal::StatusFound { codes: vec![200] }],
known_present: None,
known_absent: None,
extract: Vec::new(),
tags: Vec::new(),
request_headers: std::collections::BTreeMap::new(),
regex_check: None,
engine: None,
strip_bad_char: None,
request_method: crate::site::HttpMethod::Get,
request_body: None,
protection: Vec::new(),
disabled: false,
disabled_reason: None,
source: None,
popularity: None,
access: crate::AccessPolicy::default(),
};
let client = Client::builder()
.min_request_interval(Duration::ZERO)
.max_retries(0)
.max_rps(std::num::NonZeroU32::new(2).unwrap())
.build()
.unwrap();
client.check(&site_a, &user()).await;
let started = Instant::now();
client.check(&site_b, &user()).await;
assert!(
started.elapsed() >= Duration::from_millis(350),
"global cap should space cross-host requests, got {:?}",
started.elapsed(),
);
}
#[tokio::test]
async fn respect_robots_skips_disallowed_paths() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/robots.txt"))
.respond_with(
ResponseTemplate::new(200).set_body_string("User-agent: *\nDisallow: /no"),
)
.mount(&server)
.await;
Mock::given(any())
.and(path("/no/alice"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(any())
.and(path("/yes/alice"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let client = Client::builder()
.min_request_interval(Duration::ZERO)
.max_retries(0)
.respect_robots(true)
.build()
.unwrap();
let disallowed = Site {
name: "No".into(),
url: UrlTemplate::new(format!("{}/no/{{username}}", server.uri())).unwrap(),
signals: vec![Signal::StatusFound { codes: vec![200] }],
known_present: None,
known_absent: None,
extract: Vec::new(),
tags: Vec::new(),
request_headers: std::collections::BTreeMap::new(),
regex_check: None,
engine: None,
strip_bad_char: None,
request_method: crate::site::HttpMethod::Get,
request_body: None,
protection: Vec::new(),
disabled: false,
disabled_reason: None,
source: None,
popularity: None,
access: crate::AccessPolicy::default(),
};
let allowed = Site {
name: "Yes".into(),
url: UrlTemplate::new(format!("{}/yes/{{username}}", server.uri())).unwrap(),
signals: vec![Signal::StatusFound { codes: vec![200] }],
known_present: None,
known_absent: None,
extract: Vec::new(),
tags: Vec::new(),
request_headers: std::collections::BTreeMap::new(),
regex_check: None,
engine: None,
strip_bad_char: None,
request_method: crate::site::HttpMethod::Get,
request_body: None,
protection: Vec::new(),
disabled: false,
disabled_reason: None,
source: None,
popularity: None,
access: crate::AccessPolicy::default(),
};
let no = client.check(&disallowed, &user()).await;
assert_eq!(no.kind, MatchKind::Uncertain);
assert_eq!(no.reason, Some(UncertainReason::RobotsDisallowed));
let yes = client.check(&allowed, &user()).await;
assert_eq!(yes.kind, MatchKind::Found);
}
#[tokio::test]
async fn body_read_skipped_when_no_body_signal_needed() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200).set_body_string("Profile not found"))
.mount(&server)
.await;
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
}
#[derive(Debug)]
struct RecordingBackend {
page: RenderedPage,
calls: std::sync::atomic::AtomicUsize,
}
impl RecordingBackend {
fn with_page(page: RenderedPage) -> Self {
Self {
page,
calls: std::sync::atomic::AtomicUsize::new(0),
}
}
fn call_count(&self) -> usize {
self.calls.load(std::sync::atomic::Ordering::SeqCst)
}
}
#[async_trait::async_trait]
impl BrowserBackend for RecordingBackend {
async fn fetch(
&self,
_url: &url::Url,
_headers: &std::collections::BTreeMap<String, String>,
_timeout: Duration,
) -> Result<RenderedPage> {
self.calls.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
Ok(self.page.clone())
}
}
fn site_bot_protected(server: &MockServer) -> Site {
let mut s = site_with(server, vec![Signal::StatusFound { codes: vec![200] }]);
s.tags = vec![BOT_PROTECTED_TAG.into()];
s
}
#[tokio::test]
async fn browser_routes_bot_protected_sites() {
let server = MockServer::start().await;
let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
status: 200,
final_url: url::Url::parse("https://example.com/alice").unwrap(),
body: "<html></html>".into(),
elapsed_ms: 42,
}));
let client = Client::builder()
.min_request_interval(Duration::ZERO)
.max_retries(0)
.browser(backend.clone())
.build()
.unwrap();
let outcome = client.check(&site_bot_protected(&server), &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
assert_eq!(backend.call_count(), 1, "browser invoked exactly once");
}
#[tokio::test]
async fn non_bot_protected_sites_skip_browser() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
status: 500, final_url: url::Url::parse("https://x/").unwrap(),
body: String::new(),
elapsed_ms: 0,
}));
let client = Client::builder()
.min_request_interval(Duration::ZERO)
.max_retries(0)
.browser(backend.clone())
.build()
.unwrap();
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let outcome = client.check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
assert_eq!(backend.call_count(), 0, "browser must not be touched");
}
#[tokio::test]
async fn browser_budget_exhaust_yields_uncertain() {
let server = MockServer::start().await;
let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
status: 200,
final_url: url::Url::parse("https://x/").unwrap(),
body: String::new(),
elapsed_ms: 0,
}));
let client = Client::builder()
.min_request_interval(Duration::ZERO)
.max_retries(0)
.browser(backend.clone())
.browser_budget(1)
.build()
.unwrap();
let site = site_bot_protected(&server);
let first = client.check(&site, &user()).await;
assert_eq!(first.kind, MatchKind::Found);
let second = client.check(&site, &user()).await;
assert_eq!(second.kind, MatchKind::Uncertain);
assert!(matches!(
second.reason,
Some(UncertainReason::BrowserBudget)
));
assert_eq!(
backend.call_count(),
1,
"second call must not invoke backend"
);
}
#[tokio::test]
async fn browser_failure_surfaces_as_uncertain_browser_failed() {
struct FailingBackend;
#[async_trait::async_trait]
impl BrowserBackend for FailingBackend {
async fn fetch(
&self,
_url: &url::Url,
_headers: &std::collections::BTreeMap<String, String>,
_timeout: Duration,
) -> Result<RenderedPage> {
Err(Error::BrowserSetup {
message: "simulated crash".into(),
})
}
}
impl std::fmt::Debug for FailingBackend {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("FailingBackend")
}
}
let server = MockServer::start().await;
let client = Client::builder()
.min_request_interval(Duration::ZERO)
.max_retries(0)
.browser(Arc::new(FailingBackend))
.build()
.unwrap();
let outcome = client.check(&site_bot_protected(&server), &user()).await;
assert_eq!(outcome.kind, MatchKind::Uncertain);
match outcome.reason {
Some(UncertainReason::BrowserFailed(msg)) => {
assert!(msg.contains("simulated crash"), "got: {msg}");
}
other => panic!("expected BrowserFailed, got {other:?}"),
}
}
#[tokio::test]
async fn status_only_site_uses_head_request() {
let server = MockServer::start().await;
Mock::given(method("HEAD"))
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
let recvd = server.received_requests().await.unwrap_or_default();
assert_eq!(recvd.len(), 1);
assert_eq!(recvd[0].method.as_str(), "HEAD");
}
#[tokio::test]
async fn body_signal_site_uses_get_request() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200).set_body_string("hello alice"))
.mount(&server)
.await;
let site = site_with(
&server,
vec![Signal::BodyPresent {
text: "hello".into(),
}],
);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
let recvd = server.received_requests().await.unwrap_or_default();
assert_eq!(recvd[0].method.as_str(), "GET");
}
#[tokio::test]
async fn protection_field_routes_through_browser_like_bot_protected_tag() {
let server = MockServer::start().await;
Mock::given(any())
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
site.protection = vec![crate::site::ProtectionKind::Cloudflare];
let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
status: 200,
final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
body: String::new(),
elapsed_ms: 0,
}));
let client = Client::builder()
.min_request_interval(Duration::ZERO)
.max_retries(0)
.browser(backend)
.build()
.unwrap();
let outcome = client.check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
let recvd = server.received_requests().await.unwrap_or_default();
assert_eq!(
recvd.len(),
0,
"structured protection must skip the raw HTTP path"
);
}
#[tokio::test]
async fn user_auth_protection_alone_uses_http_session_path() {
let server = MockServer::start().await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
status: 500,
final_url: url::Url::parse("https://x/").unwrap(),
body: String::new(),
elapsed_ms: 0,
}));
let client = Client::builder()
.min_request_interval(Duration::ZERO)
.max_retries(0)
.browser(backend.clone())
.build()
.unwrap();
let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
site.protection = vec![ProtectionKind::UserAuth];
let outcome = client.check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
assert_eq!(
backend.call_count(),
0,
"user-auth alone must not invoke browser"
);
let recvd = server.received_requests().await.unwrap_or_default();
assert_eq!(recvd.len(), 1, "user-auth alone should use raw HTTP");
}
#[tokio::test]
async fn post_method_sends_body_with_username_substituted() {
let server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/api"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let site = Site {
name: "ApiPost".into(),
url: UrlTemplate::new(format!("{}/api?_={{username}}", server.uri())).unwrap(),
signals: vec![Signal::StatusFound { codes: vec![200] }],
known_present: None,
known_absent: None,
extract: Vec::new(),
tags: Vec::new(),
request_headers: std::collections::BTreeMap::new(),
regex_check: None,
engine: None,
strip_bad_char: None,
request_method: HttpMethod::Post,
request_body: Some(r#"{"name":"{username}"}"#.into()),
protection: Vec::new(),
disabled: false,
disabled_reason: None,
source: None,
popularity: None,
access: crate::AccessPolicy::default(),
};
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
let recvd = server.received_requests().await.unwrap_or_default();
assert_eq!(recvd.len(), 1);
assert_eq!(recvd[0].method.as_str(), "POST");
let body = String::from_utf8_lossy(&recvd[0].body).to_string();
assert!(body.contains("\"name\":\"alice\""), "body was: {body}");
}
#[tokio::test]
async fn head_405_falls_back_to_get() {
let server = MockServer::start().await;
Mock::given(method("HEAD"))
.and(path("/alice"))
.respond_with(ResponseTemplate::new(405))
.mount(&server)
.await;
Mock::given(any())
.and(path("/alice"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
let recvd = server.received_requests().await.unwrap_or_default();
assert_eq!(recvd.len(), 2);
assert_eq!(recvd[0].method.as_str(), "HEAD");
assert_eq!(recvd[1].method.as_str(), "GET");
}
async fn cloudflare_503_server() -> MockServer {
let server = MockServer::start().await;
Mock::given(any())
.respond_with(ResponseTemplate::new(503).insert_header("server", "cloudflare"))
.mount(&server)
.await;
server
}
#[tokio::test]
async fn http_success_stamps_http_transport_no_escalations() {
let server = MockServer::start().await;
Mock::given(any())
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let outcome = build_client().check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Found);
assert_eq!(
outcome.transport,
Some(crate::escalation::TransportTier::Http),
"successful HTTP probe must stamp Http transport"
);
assert_eq!(outcome.escalations, 0, "no escalation on the happy path");
}
#[tokio::test]
async fn escalates_cloudflare_uncertain_to_browser_and_stamps_one() {
let server = cloudflare_503_server().await;
let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
status: 200,
final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
body: String::new(),
elapsed_ms: 5,
}));
let client = Client::builder()
.min_request_interval(Duration::ZERO)
.max_retries(0)
.browser(Arc::clone(&backend) as Arc<dyn BrowserBackend>)
.build()
.unwrap();
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let outcome = client.check(&site, &user()).await;
assert_eq!(
outcome.kind,
MatchKind::Found,
"escalation should flip CF challenge to Found via browser (reason {:?})",
outcome.reason
);
assert_eq!(
outcome.transport,
Some(crate::escalation::TransportTier::Browser),
"escalated outcome must be stamped Browser"
);
assert_eq!(
outcome.escalations, 1,
"exactly one escalation should have fired"
);
assert_eq!(backend.call_count(), 1, "browser invoked exactly once");
}
#[tokio::test]
async fn disable_escalation_leaves_cloudflare_uncertain_untouched() {
let server = cloudflare_503_server().await;
let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
status: 200,
final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
body: String::new(),
elapsed_ms: 0,
}));
let client = Client::builder()
.min_request_interval(Duration::ZERO)
.max_retries(0)
.browser(Arc::clone(&backend) as Arc<dyn BrowserBackend>)
.disable_escalation()
.build()
.unwrap();
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let outcome = client.check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Uncertain);
assert!(matches!(
outcome.reason,
Some(UncertainReason::CloudflareChallenge)
));
assert_eq!(
outcome.transport,
Some(crate::escalation::TransportTier::Http),
"primary transport must still be stamped"
);
assert_eq!(outcome.escalations, 0);
assert_eq!(
backend.call_count(),
0,
"browser must not be touched when --no-escalation"
);
}
#[tokio::test]
async fn escalation_budget_zero_keeps_browser_untouched() {
let server = cloudflare_503_server().await;
let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
status: 200,
final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
body: String::new(),
elapsed_ms: 0,
}));
let client = Client::builder()
.min_request_interval(Duration::ZERO)
.max_retries(0)
.browser(Arc::clone(&backend) as Arc<dyn BrowserBackend>)
.escalation_budget(0)
.build()
.unwrap();
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let outcome = client.check(&site, &user()).await;
assert_eq!(outcome.kind, MatchKind::Uncertain);
assert!(matches!(
outcome.reason,
Some(UncertainReason::CloudflareChallenge)
));
assert_eq!(outcome.escalations, 0);
assert_eq!(
backend.call_count(),
0,
"zero budget must deny every escalation"
);
}
#[tokio::test]
async fn escalation_consumes_budget_then_stops() {
let server = cloudflare_503_server().await;
let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
status: 200,
final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
body: String::new(),
elapsed_ms: 0,
}));
let client = Client::builder()
.min_request_interval(Duration::ZERO)
.max_retries(0)
.browser(Arc::clone(&backend) as Arc<dyn BrowserBackend>)
.escalation_budget(1)
.build()
.unwrap();
let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
let first = client.check(&site, &user()).await;
assert_eq!(first.kind, MatchKind::Found);
assert_eq!(first.escalations, 1);
let second = client.check(&site, &user()).await;
assert_eq!(second.kind, MatchKind::Uncertain);
assert!(matches!(
second.reason,
Some(UncertainReason::CloudflareChallenge)
));
assert_eq!(second.escalations, 0);
assert_eq!(backend.call_count(), 1, "browser called exactly once total");
}
}