1use std::borrow::Cow;
11use std::collections::BTreeMap;
12use std::fmt;
13use std::num::NonZeroU32;
14use std::sync::Arc;
15use std::time::{Duration, Instant};
16
17use reqwest::redirect;
18
19use crate::access::{EgressChoice, EgressPool, EgressSpec, SessionStore};
20use crate::browser::{BrowserBackend, BrowserBudget};
21use crate::check::{CheckOutcome, MatchKind, UncertainReason};
22use crate::error::{Error, Result};
23use crate::retry::{self, RetryPolicy};
24use crate::robots::RobotsCache;
25use crate::site::{HttpMethod, Probe, Signal, SignalVerdict, Site, aggregate};
26use crate::throttle::HostThrottle;
27#[cfg(feature = "impersonate")]
28use crate::transport::ImpersonateFetcher;
29use crate::transport::{
30 BROWSER_TIMEOUT, BrowserFetcher, FetchError, FetchRequest, Fetcher, HttpFetcher,
31};
32use crate::username::Username;
33
34const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
35const DEFAULT_CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
36const DEFAULT_REDIRECT_LIMIT: usize = 8;
37const DEFAULT_PER_HOST_INTERVAL: Duration = Duration::from_millis(100);
38const GLOBAL_THROTTLE_KEY: &str = "*global*";
40
41#[derive(Clone)]
49pub struct Client {
50 http: Arc<HttpFetcher>,
51 egress: Arc<EgressPool>,
54 sessions: Arc<SessionStore>,
57 throttle: HostThrottle,
58 global_throttle: Option<HostThrottle>,
60 retry: RetryPolicy,
61 user_agents: Arc<[String]>,
64 enrich: bool,
66 robots: Option<RobotsCache>,
68 browser: Option<Arc<dyn BrowserBackend>>,
71 #[cfg(feature = "impersonate")]
75 impersonate: Option<Arc<ImpersonateFetcher>>,
76 browser_budget: Arc<BrowserBudget>,
79}
80
81impl Client {
82 pub fn builder() -> ClientBuilder {
84 ClientBuilder::default()
85 }
86
87 #[tracing::instrument(skip(self), fields(site = %site.name, user = %username))]
101 pub async fn check(&self, site: &Site, username: &Username) -> CheckOutcome {
102 let mut attempt: u32 = 0;
103 loop {
104 let outcome = self.probe_once(site, username).await;
105 if !retry::should_retry(&outcome, attempt, &self.retry) {
106 return outcome;
107 }
108 let delay = retry::backoff_delay(attempt, &self.retry);
109 tracing::info!(
110 site = %site.name,
111 attempt = attempt + 1,
112 reason = outcome.reason.as_ref().map(ToString::to_string).unwrap_or_default(),
113 ?delay,
114 "transient ban, retrying",
115 );
116 tokio::time::sleep(delay).await;
117 attempt += 1;
118 }
119 }
120
121 pub async fn fetch(&self, url: &str) -> Option<RawResponse> {
130 let host = host_of(url);
131 if let Some(global) = &self.global_throttle {
132 global.wait(GLOBAL_THROTTLE_KEY).await;
133 }
134 self.throttle.wait(&host).await;
135 let mut request = self.http.client().get(url);
136 if let Some(ua) = self.pick_user_agent() {
137 request = request.header(reqwest::header::USER_AGENT, ua);
138 }
139 let response = request.send().await.ok()?;
140 let status = response.status().as_u16();
141 let final_url = response.url().to_string();
142 let body = response.text().await.unwrap_or_default();
143 Some(RawResponse {
144 status,
145 final_url,
146 body,
147 })
148 }
149
150 pub async fn fetch_for_doctor(&self, site: &Site, url: &str) -> Option<RawResponse> {
161 if let Some(backend) = self.browser.as_deref() {
162 let has_tag = site
163 .tags
164 .iter()
165 .any(|t| t.eq_ignore_ascii_case(BOT_PROTECTED_TAG));
166 if has_tag || !site.protection.is_empty() {
167 let parsed = url::Url::parse(url).ok()?;
168 match backend
169 .fetch(&parsed, &site.request_headers, BROWSER_TIMEOUT)
170 .await
171 {
172 Ok(page) => {
173 return Some(RawResponse {
174 status: page.status,
175 final_url: page.final_url.to_string(),
176 body: page.body,
177 });
178 }
179 Err(err) => {
180 tracing::warn!(
181 site = %site.name, %url, error = %err,
182 "browser fetch failed in doctor; falling back to raw HTTP",
183 );
184 }
185 }
186 }
187 }
188 self.fetch(url).await
189 }
190
191 fn pick_user_agent(&self) -> Option<&str> {
194 match self.user_agents.len() {
195 0 => None,
196 1 => Some(&self.user_agents[0]),
197 n => Some(&self.user_agents[fastrand::usize(0..n)]),
198 }
199 }
200
201 #[allow(clippy::too_many_lines)]
204 async fn probe_once(&self, site: &Site, username: &Username) -> CheckOutcome {
205 let url = site.url_for(username);
206
207 if let Some(pat) = &site.regex_check {
217 if let Ok(re) = regex::Regex::new(pat) {
218 if !re.is_match(username.as_str()) {
219 return uncertain(
220 &site.name,
221 url,
222 Instant::now(),
223 UncertainReason::UsernameNotAllowed,
224 );
225 }
226 }
227 }
228
229 let session_headers: Cow<'_, BTreeMap<String, String>> = match &site.access.session {
236 None => Cow::Borrowed(&site.request_headers),
237 Some(name) => match self.sessions.get(name) {
238 Some(session) => Cow::Owned(session.apply(&site.request_headers)),
239 None => {
240 return uncertain(
241 &site.name,
242 url,
243 Instant::now(),
244 UncertainReason::SessionRequired,
245 );
246 }
247 },
248 };
249 let headers: &BTreeMap<String, String> = &session_headers;
250
251 if let Some(backend) = &self.browser {
258 let has_tag = site
259 .tags
260 .iter()
261 .any(|t| t.eq_ignore_ascii_case(BOT_PROTECTED_TAG));
262 if has_tag || !site.protection.is_empty() {
263 if self.browser_budget.try_consume() {
264 let started = Instant::now();
265 let req = FetchRequest {
266 method: site.request_method,
267 url: &url,
268 body: None,
269 user_agent: None,
270 headers,
271 want_body: true,
272 };
273 let fetcher = BrowserFetcher::new(Arc::clone(backend));
274 return match fetcher.fetch(&req).await {
275 Ok(resp) => self.finish(site, url, started, &resp),
276 Err(FetchError(reason)) => uncertain(&site.name, url, started, reason),
277 };
278 }
279 tracing::warn!(site = %site.name, "browser budget exhausted");
280 return uncertain(
281 &site.name,
282 url,
283 Instant::now(),
284 UncertainReason::BrowserBudget,
285 );
286 }
287 }
288
289 #[cfg(feature = "impersonate")]
296 if let Some(fetcher) = &self.impersonate {
297 let pure_tls = site.protection.len() == 1
298 && site.protection[0] == crate::site::ProtectionKind::TlsFingerprint
299 && !site
300 .tags
301 .iter()
302 .any(|t| t.eq_ignore_ascii_case(BOT_PROTECTED_TAG));
303 if pure_tls {
304 let started = Instant::now();
305 let req = FetchRequest {
306 method: site.request_method,
307 url: &url,
308 body: None,
309 user_agent: self.pick_user_agent(),
310 headers,
311 want_body: true,
312 };
313 return match fetcher.fetch(&req).await {
314 Ok(resp) => self.finish(site, url, started, &resp),
315 Err(FetchError(reason)) => uncertain(&site.name, url, started, reason),
316 };
317 }
318 }
319
320 let egress: Arc<HttpFetcher> = match self.egress.select(&site.access) {
327 EgressChoice::Default => Arc::clone(&self.http),
328 EgressChoice::Use(fetcher) => fetcher,
329 EgressChoice::Unavailable => {
330 return uncertain(
331 &site.name,
332 url,
333 Instant::now(),
334 UncertainReason::GeoUnavailable,
335 );
336 }
337 };
338
339 let host = host_of(&url);
340
341 if let Some(robots) = &self.robots {
343 if let Some((origin, path)) = origin_and_path(&url) {
344 if !robots.allowed(&origin, &path).await {
345 tracing::debug!(%url, "skipped by robots.txt");
346 return uncertain(
347 &site.name,
348 url,
349 Instant::now(),
350 UncertainReason::RobotsDisallowed,
351 );
352 }
353 }
354 }
355
356 if let Some(global) = &self.global_throttle {
358 global.wait(GLOBAL_THROTTLE_KEY).await;
359 }
360 self.throttle.wait(&host).await;
361 let started = Instant::now();
362 tracing::debug!(%url, %host, "probing");
363
364 let want_enrich = self.enrich && !site.extract.is_empty();
367 let needs_body = want_enrich || site.signals.iter().any(crate::site::Signal::needs_body);
368
369 let body_for_post: Option<String> = if matches!(site.request_method, HttpMethod::Post) {
374 const USERNAME_PH: &str = "{username}";
375 site.request_body
376 .as_deref()
377 .map(|t| t.replace(USERNAME_PH, username.as_str()))
378 } else {
379 None
380 };
381
382 let req = FetchRequest {
383 method: site.request_method,
384 url: &url,
385 body: body_for_post.as_deref(),
386 user_agent: self.pick_user_agent(),
387 headers,
388 want_body: needs_body,
389 };
390 match egress.fetch(&req).await {
391 Ok(resp) => self.finish(site, url, started, &resp),
392 Err(FetchError(reason)) => uncertain(&site.name, url, started, reason),
393 }
394 }
395
396 fn finish(
400 &self,
401 site: &Site,
402 url: String,
403 started: Instant,
404 resp: &crate::transport::FetchResponse,
405 ) -> CheckOutcome {
406 let probe = Probe {
407 status: resp.status,
408 final_url: &resp.final_url,
409 body: &resp.body,
410 };
411 let votes: Vec<(&Signal, SignalVerdict)> = site
412 .signals
413 .iter()
414 .map(|s| (s, s.evaluate(&probe)))
415 .collect();
416 let kind = aggregate(votes.iter().map(|(_, v)| *v));
417 let mut result = outcome(&site.name, url, started, kind);
418 let winning = match kind {
420 MatchKind::Found => Some(SignalVerdict::Found),
421 MatchKind::NotFound => Some(SignalVerdict::NotFound),
422 MatchKind::Uncertain => None,
423 };
424 if let Some(want) = winning {
425 result.evidence = votes
426 .iter()
427 .filter(|(_, v)| *v == want)
428 .map(|(s, _)| s.describe_match(&probe))
429 .collect();
430 }
431 if self.enrich && kind == MatchKind::Found && !site.extract.is_empty() {
432 result.enrichment = crate::enrich::extract(&resp.body, &site.extract);
433 }
434 result
435 }
436}
437
438#[derive(Debug, Clone)]
440pub struct RawResponse {
441 pub status: u16,
443 pub final_url: String,
445 pub body: String,
447}
448
449#[derive(Clone)]
451#[must_use = "ClientBuilder does nothing until `.build()` is called"]
452pub struct ClientBuilder {
453 timeout: Duration,
454 connect_timeout: Duration,
455 user_agent: String,
456 follow_redirects: bool,
457 redirect_limit: usize,
458 min_request_interval: Duration,
459 max_rps: Option<NonZeroU32>,
460 retry: RetryPolicy,
461 proxy: Option<String>,
462 user_agents: Vec<String>,
463 enrich: bool,
464 respect_robots: bool,
465 browser: Option<Arc<dyn BrowserBackend>>,
466 browser_budget: usize,
467 egress: Vec<EgressSpec>,
468 sessions: SessionStore,
469}
470
471impl Default for ClientBuilder {
472 fn default() -> Self {
473 Self {
474 timeout: DEFAULT_TIMEOUT,
475 connect_timeout: DEFAULT_CONNECT_TIMEOUT,
476 user_agent: default_user_agent(),
477 follow_redirects: true,
478 redirect_limit: DEFAULT_REDIRECT_LIMIT,
479 min_request_interval: DEFAULT_PER_HOST_INTERVAL,
480 max_rps: None,
481 retry: RetryPolicy::default(),
482 proxy: None,
483 user_agents: Vec::new(),
484 enrich: false,
485 respect_robots: false,
486 browser: None,
487 browser_budget: DEFAULT_BROWSER_BUDGET,
488 egress: Vec::new(),
489 sessions: SessionStore::new(),
490 }
491 }
492}
493
494impl ClientBuilder {
495 pub fn timeout(mut self, timeout: Duration) -> Self {
497 self.timeout = timeout;
498 self
499 }
500
501 pub fn connect_timeout(mut self, timeout: Duration) -> Self {
503 self.connect_timeout = timeout;
504 self
505 }
506
507 pub fn user_agent(mut self, user_agent: impl Into<String>) -> Self {
509 self.user_agent = user_agent.into();
510 self
511 }
512
513 pub fn follow_redirects(mut self, follow: bool) -> Self {
516 self.follow_redirects = follow;
517 self
518 }
519
520 pub fn min_request_interval(mut self, interval: Duration) -> Self {
526 self.min_request_interval = interval;
527 self
528 }
529
530 pub fn max_rps(mut self, rps: NonZeroU32) -> Self {
535 self.max_rps = Some(rps);
536 self
537 }
538
539 pub fn max_retries(mut self, n: u32) -> Self {
542 self.retry.max_retries = n;
543 self
544 }
545
546 pub fn base_backoff_delay(mut self, d: Duration) -> Self {
549 self.retry.base_delay = d;
550 self
551 }
552
553 pub fn max_backoff_delay(mut self, d: Duration) -> Self {
555 self.retry.max_delay = d;
556 self
557 }
558
559 pub fn proxy(mut self, url: impl Into<String>) -> Self {
562 self.proxy = Some(url.into());
563 self
564 }
565
566 pub fn rotate_user_agents(mut self, agents: Vec<String>) -> Self {
570 self.user_agents = agents;
571 self
572 }
573
574 pub fn enrich(mut self, enrich: bool) -> Self {
577 self.enrich = enrich;
578 self
579 }
580
581 pub fn respect_robots(mut self, respect: bool) -> Self {
585 self.respect_robots = respect;
586 self
587 }
588
589 pub fn browser(mut self, backend: Arc<dyn BrowserBackend>) -> Self {
593 self.browser = Some(backend);
594 self
595 }
596
597 pub const fn browser_budget(mut self, cap: usize) -> Self {
602 self.browser_budget = cap;
603 self
604 }
605
606 pub fn egress_pool(mut self, egress: Vec<EgressSpec>) -> Self {
611 self.egress = egress;
612 self
613 }
614
615 pub fn sessions(mut self, sessions: SessionStore) -> Self {
621 self.sessions = sessions;
622 self
623 }
624
625 pub fn build(self) -> Result<Client> {
627 let inner = build_reqwest(
628 &self.user_agent,
629 self.timeout,
630 self.connect_timeout,
631 self.follow_redirects,
632 self.redirect_limit,
633 self.proxy.as_deref(),
634 )?;
635
636 let mut egress_entries = Vec::with_capacity(self.egress.len());
640 for spec in &self.egress {
641 let client = build_reqwest(
642 &self.user_agent,
643 self.timeout,
644 self.connect_timeout,
645 self.follow_redirects,
646 self.redirect_limit,
647 Some(&spec.url),
648 )?;
649 egress_entries.push((
650 spec.country.clone(),
651 spec.kind,
652 Arc::new(HttpFetcher::new(client)),
653 ));
654 }
655
656 let global_throttle = self.max_rps.map(|rps| {
657 let interval = Duration::from_secs(1) / rps.get();
659 HostThrottle::new(interval)
660 });
661 let robots = self
662 .respect_robots
663 .then(|| RobotsCache::new(inner.clone(), "adler"));
664 #[cfg(feature = "impersonate")]
668 let impersonate = Some(Arc::new(ImpersonateFetcher::new()?));
669 Ok(Client {
670 http: Arc::new(HttpFetcher::new(inner)),
671 egress: Arc::new(EgressPool::new(egress_entries)),
672 sessions: Arc::new(self.sessions),
673 throttle: HostThrottle::new(self.min_request_interval),
674 global_throttle,
675 retry: self.retry,
676 user_agents: Arc::from(self.user_agents),
677 enrich: self.enrich,
678 robots,
679 browser: self.browser,
680 browser_budget: Arc::new(BrowserBudget::new(self.browser_budget)),
681 #[cfg(feature = "impersonate")]
682 impersonate,
683 })
684 }
685}
686
687fn build_reqwest(
691 user_agent: &str,
692 timeout: Duration,
693 connect_timeout: Duration,
694 follow_redirects: bool,
695 redirect_limit: usize,
696 proxy: Option<&str>,
697) -> Result<reqwest::Client> {
698 let redirect_policy = if follow_redirects {
699 redirect::Policy::limited(redirect_limit)
700 } else {
701 redirect::Policy::none()
702 };
703 let mut builder = reqwest::Client::builder()
704 .user_agent(user_agent.to_owned())
705 .timeout(timeout)
706 .connect_timeout(connect_timeout)
707 .redirect(redirect_policy);
708 if let Some(proxy_url) = proxy {
709 const SCHEMES: [&str; 4] = ["http://", "https://", "socks5://", "socks5h://"];
713 if !SCHEMES.iter().any(|s| proxy_url.starts_with(s)) {
714 return Err(Error::HttpSetup {
715 message: format!(
716 "invalid proxy {proxy_url:?}: must start with one of {}",
717 SCHEMES.join(", ")
718 ),
719 });
720 }
721 let proxy = reqwest::Proxy::all(proxy_url).map_err(|e| Error::HttpSetup {
722 message: format!("invalid proxy {proxy_url:?}: {e}"),
723 })?;
724 builder = builder.proxy(proxy);
725 }
726 builder.build().map_err(|e| Error::HttpSetup {
727 message: e.to_string(),
728 })
729}
730
731pub const DEFAULT_BROWSER_BUDGET: usize = 50;
738
739impl fmt::Debug for Client {
740 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
741 f.debug_struct("Client")
742 .field("throttle", &self.throttle)
743 .field("global_throttle", &self.global_throttle)
744 .field("retry", &self.retry)
745 .field("user_agents", &self.user_agents)
746 .field("enrich", &self.enrich)
747 .field("robots", &self.robots.is_some())
748 .field("browser", &self.browser.is_some())
749 .field("browser_budget", &self.browser_budget)
750 .finish_non_exhaustive()
751 }
752}
753
754impl fmt::Debug for ClientBuilder {
755 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
756 f.debug_struct("ClientBuilder")
757 .field("timeout", &self.timeout)
758 .field("connect_timeout", &self.connect_timeout)
759 .field("user_agent", &self.user_agent)
760 .field("follow_redirects", &self.follow_redirects)
761 .field("redirect_limit", &self.redirect_limit)
762 .field("min_request_interval", &self.min_request_interval)
763 .field("max_rps", &self.max_rps)
764 .field("retry", &self.retry)
765 .field("proxy", &self.proxy)
766 .field("user_agents", &self.user_agents)
767 .field("enrich", &self.enrich)
768 .field("respect_robots", &self.respect_robots)
769 .field("browser", &self.browser.is_some())
770 .field("browser_budget", &self.browser_budget)
771 .field("egress", &self.egress)
772 .field("sessions", &self.sessions)
773 .finish()
774 }
775}
776
777const BOT_PROTECTED_TAG: &str = "bot-protected";
778
779fn default_user_agent() -> String {
780 format!("adler/{}", env!("CARGO_PKG_VERSION"))
781}
782
783fn host_of(url: &str) -> String {
784 reqwest::Url::parse(url)
785 .ok()
786 .and_then(|u| u.host_str().map(str::to_owned))
787 .unwrap_or_else(|| "unknown".into())
788}
789
790fn origin_and_path(url: &str) -> Option<(String, String)> {
793 let parsed = reqwest::Url::parse(url).ok()?;
794 let host = parsed.host_str()?;
795 let port = parsed.port().map_or_else(String::new, |p| format!(":{p}"));
796 let origin = format!("{}://{host}{port}", parsed.scheme());
797 let path = parsed.query().map_or_else(
798 || parsed.path().to_owned(),
799 |q| format!("{}?{q}", parsed.path()),
800 );
801 Some((origin, path))
802}
803
804fn outcome(site: &str, url: String, started: Instant, kind: MatchKind) -> CheckOutcome {
805 CheckOutcome {
806 site: site.to_owned(),
807 url,
808 kind,
809 reason: None,
810 elapsed_ms: elapsed_ms(started),
811 enrichment: std::collections::BTreeMap::new(),
812 evidence: Vec::new(),
813 }
814}
815
816fn uncertain(site: &str, url: String, started: Instant, reason: UncertainReason) -> CheckOutcome {
817 CheckOutcome {
818 site: site.to_owned(),
819 url,
820 kind: MatchKind::Uncertain,
821 reason: Some(reason),
822 elapsed_ms: elapsed_ms(started),
823 enrichment: std::collections::BTreeMap::new(),
824 evidence: Vec::new(),
825 }
826}
827
828fn elapsed_ms(started: Instant) -> u64 {
829 u64::try_from(started.elapsed().as_millis()).unwrap_or(u64::MAX)
830}
831
832#[cfg(test)]
833mod tests {
834 use super::*;
835 use crate::browser::RenderedPage;
836 use crate::site::{Signal, UrlTemplate};
837 use wiremock::matchers::{any, method, path};
838 use wiremock::{Mock, MockServer, ResponseTemplate};
839
840 fn build_client() -> Client {
841 Client::builder()
842 .timeout(Duration::from_secs(2))
843 .min_request_interval(Duration::ZERO)
846 .max_retries(0)
849 .build()
850 .expect("client builds")
851 }
852
853 fn site_with(server: &MockServer, signals: Vec<Signal>) -> Site {
854 Site {
855 name: "Mock".into(),
856 url: UrlTemplate::new(format!("{}/{{username}}", server.uri())).unwrap(),
857 signals,
858 known_present: None,
859 known_absent: None,
860 extract: Vec::new(),
861 tags: Vec::new(),
862 request_headers: std::collections::BTreeMap::new(),
863 regex_check: None,
864 engine: None,
865 strip_bad_char: None,
866 request_method: crate::site::HttpMethod::Get,
867 request_body: None,
868 protection: Vec::new(),
869 disabled: false,
870 source: None,
871 popularity: None,
872 access: crate::AccessPolicy::default(),
873 }
874 }
875
876 fn user() -> Username {
877 Username::new("alice").unwrap()
878 }
879
880 #[tokio::test]
881 async fn regex_check_short_circuits_before_any_request() {
882 let server = MockServer::start().await;
886 Mock::given(any())
887 .respond_with(ResponseTemplate::new(200))
888 .mount(&server)
889 .await;
890 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
891 site.regex_check = Some("^[A-Za-z]{8,}$".into());
893 let outcome = build_client().check(&site, &user()).await;
894 assert_eq!(outcome.kind, MatchKind::Uncertain);
895 assert!(
896 matches!(outcome.reason, Some(UncertainReason::UsernameNotAllowed)),
897 "expected UsernameNotAllowed, got {:?}",
898 outcome.reason,
899 );
900 let recvd = server.received_requests().await.unwrap_or_default();
903 assert_eq!(
904 recvd.len(),
905 0,
906 "regex_check mismatch must skip the HTTP request entirely"
907 );
908 }
909
910 #[tokio::test]
911 async fn geo_constrained_site_with_no_egress_is_geo_unavailable() {
912 let server = MockServer::start().await;
915 Mock::given(any())
916 .respond_with(ResponseTemplate::new(200))
917 .mount(&server)
918 .await;
919 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
920 site.access = crate::access::AccessPolicy {
923 geo: vec![crate::access::CountryCode::new("pl").unwrap()],
924 ip_type: None,
925 session: None,
926 };
927 let outcome = build_client().check(&site, &user()).await;
928 assert_eq!(outcome.kind, MatchKind::Uncertain);
929 assert!(
930 matches!(outcome.reason, Some(UncertainReason::GeoUnavailable)),
931 "expected GeoUnavailable, got {:?}",
932 outcome.reason,
933 );
934 let recvd = server.received_requests().await.unwrap_or_default();
937 assert_eq!(
938 recvd.len(),
939 0,
940 "geo-unavailable must skip the HTTP request entirely"
941 );
942 }
943
944 #[tokio::test]
945 async fn session_headers_are_sent_on_probe() {
946 let server = MockServer::start().await;
949 Mock::given(any())
950 .and(wiremock::matchers::header("cookie", "sessionid=real"))
951 .respond_with(ResponseTemplate::new(200))
952 .mount(&server)
953 .await;
954 let mut headers = std::collections::BTreeMap::new();
955 headers.insert("Cookie".to_string(), "sessionid=real".to_string());
956 let mut store = SessionStore::new();
957 store.insert("acct", crate::access::Session::from_headers(headers));
958 let client = Client::builder()
959 .timeout(Duration::from_secs(2))
960 .min_request_interval(Duration::ZERO)
961 .max_retries(0)
962 .sessions(store)
963 .build()
964 .expect("client builds");
965 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
966 site.access.session = Some("acct".to_string());
967 let outcome = client.check(&site, &user()).await;
968 assert_eq!(
969 outcome.kind,
970 MatchKind::Found,
971 "session cookie should unlock the 200 (got {:?})",
972 outcome.reason,
973 );
974 }
975
976 #[tokio::test]
977 async fn missing_named_session_is_session_required() {
978 let server = MockServer::start().await;
979 Mock::given(any())
980 .respond_with(ResponseTemplate::new(200))
981 .mount(&server)
982 .await;
983 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
984 site.access.session = Some("not-configured".to_string());
986 let outcome = build_client().check(&site, &user()).await;
987 assert_eq!(outcome.kind, MatchKind::Uncertain);
988 assert!(
989 matches!(outcome.reason, Some(UncertainReason::SessionRequired)),
990 "expected SessionRequired, got {:?}",
991 outcome.reason,
992 );
993 let recvd = server.received_requests().await.unwrap_or_default();
994 assert_eq!(
995 recvd.len(),
996 0,
997 "a missing session must skip the request, not probe unauthenticated"
998 );
999 }
1000
1001 #[cfg(feature = "impersonate")]
1002 #[tokio::test]
1003 async fn impersonate_routes_pure_tls_fingerprint_site() {
1004 let server = MockServer::start().await;
1005 Mock::given(any())
1006 .respond_with(ResponseTemplate::new(200))
1007 .mount(&server)
1008 .await;
1009 let client = Client::builder()
1010 .timeout(Duration::from_secs(2))
1011 .min_request_interval(Duration::ZERO)
1012 .max_retries(0)
1013 .build()
1014 .expect("client builds with impersonate");
1015 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1016 site.protection = vec![crate::site::ProtectionKind::TlsFingerprint];
1019 let outcome = client.check(&site, &user()).await;
1020 assert_eq!(
1021 outcome.kind,
1022 MatchKind::Found,
1023 "expected Found (reason {:?})",
1024 outcome.reason,
1025 );
1026 let recvd = server.received_requests().await.expect("received requests");
1030 assert_eq!(recvd.len(), 1, "expected exactly one request");
1031 let ua = recvd[0]
1032 .headers
1033 .get("user-agent")
1034 .and_then(|v| v.to_str().ok())
1035 .unwrap_or("");
1036 assert!(
1037 ua.contains("Chrome/"),
1038 "expected Chrome-shaped UA from wreq, got {ua:?}"
1039 );
1040 }
1041
1042 #[tokio::test]
1043 async fn regex_check_pass_proceeds_to_probe() {
1044 let server = MockServer::start().await;
1045 Mock::given(any())
1046 .and(path("/alice"))
1047 .respond_with(ResponseTemplate::new(200))
1048 .mount(&server)
1049 .await;
1050 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1051 site.regex_check = Some("^[a-z]{3,}$".into());
1053 let outcome = build_client().check(&site, &user()).await;
1054 assert_eq!(outcome.kind, MatchKind::Found);
1055 }
1056
1057 #[tokio::test]
1058 async fn status_signal_reports_found_on_match() {
1059 let server = MockServer::start().await;
1060 Mock::given(any())
1061 .and(path("/alice"))
1062 .respond_with(ResponseTemplate::new(200))
1063 .mount(&server)
1064 .await;
1065 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1066 let outcome = build_client().check(&site, &user()).await;
1067 assert_eq!(outcome.kind, MatchKind::Found);
1068 assert!(outcome.url.ends_with("/alice"));
1069 assert!(outcome.reason.is_none());
1070 assert_eq!(outcome.evidence, ["HTTP 200 (status_found)"]);
1071 }
1072
1073 #[tokio::test]
1074 async fn status_signal_pair_reports_not_found_on_404() {
1075 let server = MockServer::start().await;
1076 Mock::given(any())
1077 .and(path("/alice"))
1078 .respond_with(ResponseTemplate::new(404))
1079 .mount(&server)
1080 .await;
1081 let site = site_with(
1082 &server,
1083 vec![
1084 Signal::StatusFound { codes: vec![200] },
1085 Signal::StatusNotFound { codes: vec![404] },
1086 ],
1087 );
1088 let outcome = build_client().check(&site, &user()).await;
1089 assert_eq!(outcome.kind, MatchKind::NotFound);
1090 assert_eq!(outcome.evidence, ["HTTP 404 (status_not_found)"]);
1092 }
1093
1094 #[tokio::test]
1095 async fn body_absent_signal_detects_missing_account() {
1096 let server = MockServer::start().await;
1097 Mock::given(any())
1098 .and(path("/alice"))
1099 .respond_with(ResponseTemplate::new(200).set_body_string("<h1>Profile not found</h1>"))
1100 .mount(&server)
1101 .await;
1102 let site = site_with(
1103 &server,
1104 vec![Signal::BodyAbsent {
1105 text: "Profile not found".into(),
1106 }],
1107 );
1108 let outcome = build_client().check(&site, &user()).await;
1109 assert_eq!(outcome.kind, MatchKind::NotFound);
1110 }
1111
1112 #[tokio::test]
1113 async fn body_absent_alone_yields_uncertain_when_marker_missing() {
1114 let server = MockServer::start().await;
1117 Mock::given(any())
1118 .and(path("/alice"))
1119 .respond_with(ResponseTemplate::new(200).set_body_string("<h1>Welcome alice</h1>"))
1120 .mount(&server)
1121 .await;
1122 let site = site_with(
1123 &server,
1124 vec![Signal::BodyAbsent {
1125 text: "Profile not found".into(),
1126 }],
1127 );
1128 let outcome = build_client().check(&site, &user()).await;
1129 assert_eq!(outcome.kind, MatchKind::Uncertain);
1130 }
1131
1132 #[tokio::test]
1133 async fn body_present_plus_absent_resolve_to_found() {
1134 let server = MockServer::start().await;
1135 Mock::given(any())
1136 .and(path("/alice"))
1137 .respond_with(
1138 ResponseTemplate::new(200)
1139 .set_body_string(r#"<div class="profile-card">alice</div>"#),
1140 )
1141 .mount(&server)
1142 .await;
1143 let site = site_with(
1144 &server,
1145 vec![
1146 Signal::BodyPresent {
1147 text: "profile-card".into(),
1148 },
1149 Signal::BodyAbsent {
1150 text: "Profile not found".into(),
1151 },
1152 ],
1153 );
1154 let outcome = build_client().check(&site, &user()).await;
1155 assert_eq!(outcome.kind, MatchKind::Found);
1156 }
1157
1158 #[tokio::test]
1159 async fn redirect_absent_signal_detects_missing_account() {
1160 let server = MockServer::start().await;
1161 Mock::given(any())
1162 .and(path("/alice"))
1163 .respond_with(
1164 ResponseTemplate::new(302).insert_header("location", "/login?next=/alice"),
1165 )
1166 .mount(&server)
1167 .await;
1168 Mock::given(any())
1169 .and(path("/login"))
1170 .respond_with(ResponseTemplate::new(200).set_body_string("login page"))
1171 .mount(&server)
1172 .await;
1173 let site = site_with(
1174 &server,
1175 vec![Signal::RedirectAbsent {
1176 fragment: "/login".into(),
1177 }],
1178 );
1179 let outcome = build_client().check(&site, &user()).await;
1180 assert_eq!(outcome.kind, MatchKind::NotFound);
1181 }
1182
1183 #[tokio::test]
1184 async fn negative_signal_wins_over_positive() {
1185 let server = MockServer::start().await;
1190 Mock::given(any())
1191 .and(path("/alice"))
1192 .respond_with(ResponseTemplate::new(200).set_body_string("Profile not found"))
1193 .mount(&server)
1194 .await;
1195 let site = site_with(
1196 &server,
1197 vec![
1198 Signal::StatusFound { codes: vec![200] },
1199 Signal::BodyAbsent {
1200 text: "Profile not found".into(),
1201 },
1202 ],
1203 );
1204 let outcome = build_client().check(&site, &user()).await;
1205 assert_eq!(outcome.kind, MatchKind::NotFound);
1206 }
1207
1208 #[tokio::test]
1209 async fn network_failure_yields_uncertain() {
1210 let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
1211 let port = listener.local_addr().unwrap().port();
1212 drop(listener);
1213
1214 let site = Site {
1215 name: "Dead".into(),
1216 url: UrlTemplate::new(format!("http://127.0.0.1:{port}/{{username}}")).unwrap(),
1217 signals: vec![Signal::StatusFound { codes: vec![200] }],
1218 known_present: None,
1219 known_absent: None,
1220 extract: Vec::new(),
1221 tags: Vec::new(),
1222 request_headers: std::collections::BTreeMap::new(),
1223 regex_check: None,
1224 engine: None,
1225 strip_bad_char: None,
1226 request_method: crate::site::HttpMethod::Get,
1227 request_body: None,
1228 protection: Vec::new(),
1229 disabled: false,
1230 source: None,
1231 popularity: None,
1232 access: crate::AccessPolicy::default(),
1233 };
1234 let client = Client::builder()
1235 .timeout(Duration::from_millis(500))
1236 .connect_timeout(Duration::from_millis(500))
1237 .max_retries(0)
1238 .build()
1239 .unwrap();
1240 let outcome = client.check(&site, &user()).await;
1241 assert_eq!(outcome.kind, MatchKind::Uncertain);
1242 assert!(outcome.reason.is_some());
1243 }
1244
1245 #[tokio::test]
1246 async fn throttle_spaces_consecutive_calls_to_same_host() {
1247 let server = MockServer::start().await;
1248 Mock::given(any())
1249 .and(path("/alice"))
1250 .respond_with(ResponseTemplate::new(200))
1251 .mount(&server)
1252 .await;
1253 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1254 let client = Client::builder()
1259 .timeout(Duration::from_secs(2))
1260 .min_request_interval(Duration::from_millis(300))
1261 .build()
1262 .unwrap();
1263
1264 client.check(&site, &user()).await;
1265 let started = Instant::now();
1266 client.check(&site, &user()).await;
1267 let elapsed = started.elapsed();
1268 assert!(
1269 elapsed >= Duration::from_millis(200),
1270 "second probe to the same host should wait ≥200 ms, got {elapsed:?}",
1271 );
1272 }
1273
1274 #[tokio::test]
1275 async fn builder_overrides_user_agent() {
1276 let server = MockServer::start().await;
1277 Mock::given(any())
1278 .and(path("/alice"))
1279 .and(wiremock::matchers::header("user-agent", "adler-test/1.0"))
1280 .respond_with(ResponseTemplate::new(200))
1281 .mount(&server)
1282 .await;
1283 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1284 let client = Client::builder()
1285 .user_agent("adler-test/1.0")
1286 .build()
1287 .unwrap();
1288 let outcome = client.check(&site, &user()).await;
1289 assert_eq!(outcome.kind, MatchKind::Found);
1290 }
1291
1292 #[tokio::test]
1293 async fn rate_limit_429_yields_uncertain_with_note() {
1294 let server = MockServer::start().await;
1295 Mock::given(any())
1296 .and(path("/alice"))
1297 .respond_with(ResponseTemplate::new(429))
1298 .mount(&server)
1299 .await;
1300 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1301 let outcome = build_client().check(&site, &user()).await;
1302 assert_eq!(outcome.kind, MatchKind::Uncertain);
1303 assert_eq!(outcome.reason, Some(UncertainReason::RateLimited));
1304 }
1305
1306 #[tokio::test]
1307 async fn cloudflare_server_header_yields_uncertain() {
1308 let server = MockServer::start().await;
1309 Mock::given(any())
1310 .and(path("/alice"))
1311 .respond_with(ResponseTemplate::new(503).insert_header("server", "cloudflare"))
1312 .mount(&server)
1313 .await;
1314 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1315 let outcome = build_client().check(&site, &user()).await;
1316 assert_eq!(outcome.kind, MatchKind::Uncertain);
1317 assert_eq!(outcome.reason, Some(UncertainReason::CloudflareChallenge));
1318 }
1319
1320 #[tokio::test]
1321 async fn cloudflare_interstitial_in_body_yields_uncertain() {
1322 let server = MockServer::start().await;
1325 Mock::given(any())
1326 .and(path("/alice"))
1327 .respond_with(
1328 ResponseTemplate::new(200)
1329 .set_body_string("<html><head><title>Just a moment...</title></head></html>"),
1330 )
1331 .mount(&server)
1332 .await;
1333 let site = site_with(
1334 &server,
1335 vec![Signal::BodyAbsent {
1336 text: "Profile not found".into(),
1337 }],
1338 );
1339 let outcome = build_client().check(&site, &user()).await;
1340 assert_eq!(outcome.kind, MatchKind::Uncertain);
1341 assert_eq!(outcome.reason, Some(UncertainReason::CloudflareChallenge));
1342 }
1343
1344 #[tokio::test]
1345 async fn ban_detection_does_not_fire_on_legitimate_403() {
1346 let server = MockServer::start().await;
1347 Mock::given(any())
1348 .and(path("/alice"))
1349 .respond_with(ResponseTemplate::new(403))
1350 .mount(&server)
1351 .await;
1352 let site = site_with(
1353 &server,
1354 vec![
1355 Signal::StatusFound { codes: vec![200] },
1356 Signal::StatusNotFound { codes: vec![403] },
1357 ],
1358 );
1359 let outcome = build_client().check(&site, &user()).await;
1360 assert_eq!(outcome.kind, MatchKind::NotFound);
1362 assert!(outcome.reason.is_none());
1363 }
1364
1365 #[tokio::test]
1366 async fn retry_recovers_after_transient_429() {
1367 let server = MockServer::start().await;
1368 Mock::given(any())
1370 .and(path("/alice"))
1371 .respond_with(ResponseTemplate::new(429))
1372 .up_to_n_times(1)
1373 .mount(&server)
1374 .await;
1375 Mock::given(any())
1376 .and(path("/alice"))
1377 .respond_with(ResponseTemplate::new(200))
1378 .mount(&server)
1379 .await;
1380 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1381 let client = Client::builder()
1382 .timeout(Duration::from_secs(2))
1383 .min_request_interval(Duration::ZERO)
1384 .max_retries(2)
1385 .base_backoff_delay(Duration::from_millis(20))
1386 .max_backoff_delay(Duration::from_millis(100))
1387 .build()
1388 .unwrap();
1389 let outcome = client.check(&site, &user()).await;
1390 assert_eq!(outcome.kind, MatchKind::Found);
1391 assert!(outcome.reason.is_none());
1392 }
1393
1394 #[tokio::test]
1395 async fn retry_exhausts_and_returns_uncertain() {
1396 let server = MockServer::start().await;
1397 Mock::given(any())
1398 .and(path("/alice"))
1399 .respond_with(ResponseTemplate::new(429))
1400 .mount(&server)
1401 .await;
1402 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1403 let client = Client::builder()
1404 .timeout(Duration::from_secs(2))
1405 .min_request_interval(Duration::ZERO)
1406 .max_retries(2)
1407 .base_backoff_delay(Duration::from_millis(10))
1408 .max_backoff_delay(Duration::from_millis(50))
1409 .build()
1410 .unwrap();
1411 let outcome = client.check(&site, &user()).await;
1412 assert_eq!(outcome.kind, MatchKind::Uncertain);
1413 assert_eq!(outcome.reason, Some(UncertainReason::RateLimited));
1414 }
1415
1416 #[tokio::test]
1417 async fn retry_does_not_fire_on_network_error() {
1418 let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
1422 let port = listener.local_addr().unwrap().port();
1423 drop(listener);
1424 let site = Site {
1425 name: "Dead".into(),
1426 url: UrlTemplate::new(format!("http://127.0.0.1:{port}/{{username}}")).unwrap(),
1427 signals: vec![Signal::StatusFound { codes: vec![200] }],
1428 known_present: None,
1429 known_absent: None,
1430 extract: Vec::new(),
1431 tags: Vec::new(),
1432 request_headers: std::collections::BTreeMap::new(),
1433 regex_check: None,
1434 engine: None,
1435 strip_bad_char: None,
1436 request_method: crate::site::HttpMethod::Get,
1437 request_body: None,
1438 protection: Vec::new(),
1439 disabled: false,
1440 source: None,
1441 popularity: None,
1442 access: crate::AccessPolicy::default(),
1443 };
1444 let client = Client::builder()
1445 .timeout(Duration::from_millis(500))
1446 .connect_timeout(Duration::from_millis(500))
1447 .min_request_interval(Duration::ZERO)
1448 .max_retries(3)
1449 .base_backoff_delay(Duration::from_secs(60))
1450 .build()
1451 .unwrap();
1452 let started = Instant::now();
1453 let outcome = client.check(&site, &user()).await;
1454 assert!(started.elapsed() < Duration::from_secs(5));
1457 assert_eq!(outcome.kind, MatchKind::Uncertain);
1458 assert!(
1459 matches!(outcome.reason, Some(UncertainReason::Network(_))),
1460 "got {:?}",
1461 outcome.reason,
1462 );
1463 }
1464
1465 #[tokio::test]
1466 async fn rotates_user_agent_per_request() {
1467 let server = MockServer::start().await;
1471 Mock::given(any())
1472 .and(path("/alice"))
1473 .and(wiremock::matchers::header("user-agent", "RotatorUA/9.9"))
1474 .respond_with(ResponseTemplate::new(200))
1475 .mount(&server)
1476 .await;
1477 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1478 let client = Client::builder()
1479 .min_request_interval(Duration::ZERO)
1480 .max_retries(0)
1481 .rotate_user_agents(vec!["RotatorUA/9.9".into()])
1482 .build()
1483 .unwrap();
1484 let outcome = client.check(&site, &user()).await;
1485 assert_eq!(outcome.kind, MatchKind::Found);
1486 }
1487
1488 #[test]
1489 fn invalid_proxy_url_fails_build() {
1490 let err = Client::builder().proxy("not a url").build().unwrap_err();
1491 assert!(matches!(err, Error::HttpSetup { .. }));
1492 }
1493
1494 #[test]
1495 fn schemeless_proxy_is_rejected_up_front() {
1496 let err = Client::builder().proxy("not-a-url").build().unwrap_err();
1498 let Error::HttpSetup { message } = err else {
1499 panic!("expected HttpSetup, got {err:?}");
1500 };
1501 assert!(message.contains("must start with"), "{message}");
1502 }
1503
1504 #[test]
1505 fn socks5_proxy_scheme_is_accepted() {
1506 assert!(
1508 Client::builder()
1509 .proxy("socks5://127.0.0.1:9050")
1510 .build()
1511 .is_ok()
1512 );
1513 }
1514
1515 #[tokio::test]
1516 async fn global_rps_cap_spaces_requests_across_hosts() {
1517 let server = MockServer::start().await;
1520 Mock::given(any())
1521 .respond_with(ResponseTemplate::new(200))
1522 .mount(&server)
1523 .await;
1524 let site_a = Site {
1525 name: "A".into(),
1526 url: UrlTemplate::new(format!("{}/a/{{username}}", server.uri())).unwrap(),
1527 signals: vec![Signal::StatusFound { codes: vec![200] }],
1528 known_present: None,
1529 known_absent: None,
1530 extract: Vec::new(),
1531 tags: Vec::new(),
1532 request_headers: std::collections::BTreeMap::new(),
1533 regex_check: None,
1534 engine: None,
1535 strip_bad_char: None,
1536 request_method: crate::site::HttpMethod::Get,
1537 request_body: None,
1538 protection: Vec::new(),
1539 disabled: false,
1540 source: None,
1541 popularity: None,
1542 access: crate::AccessPolicy::default(),
1543 };
1544 let site_b = Site {
1545 name: "B".into(),
1546 url: UrlTemplate::new(format!("{}/b/{{username}}", server.uri())).unwrap(),
1547 signals: vec![Signal::StatusFound { codes: vec![200] }],
1548 known_present: None,
1549 known_absent: None,
1550 extract: Vec::new(),
1551 tags: Vec::new(),
1552 request_headers: std::collections::BTreeMap::new(),
1553 regex_check: None,
1554 engine: None,
1555 strip_bad_char: None,
1556 request_method: crate::site::HttpMethod::Get,
1557 request_body: None,
1558 protection: Vec::new(),
1559 disabled: false,
1560 source: None,
1561 popularity: None,
1562 access: crate::AccessPolicy::default(),
1563 };
1564 let client = Client::builder()
1569 .min_request_interval(Duration::ZERO)
1570 .max_retries(0)
1571 .max_rps(std::num::NonZeroU32::new(2).unwrap())
1572 .build()
1573 .unwrap();
1574 client.check(&site_a, &user()).await;
1577 let started = Instant::now();
1578 client.check(&site_b, &user()).await;
1579 assert!(
1580 started.elapsed() >= Duration::from_millis(350),
1581 "global cap should space cross-host requests, got {:?}",
1582 started.elapsed(),
1583 );
1584 }
1585
1586 #[tokio::test]
1587 async fn respect_robots_skips_disallowed_paths() {
1588 let server = MockServer::start().await;
1589 Mock::given(any())
1590 .and(path("/robots.txt"))
1591 .respond_with(
1592 ResponseTemplate::new(200).set_body_string("User-agent: *\nDisallow: /no"),
1593 )
1594 .mount(&server)
1595 .await;
1596 Mock::given(any())
1597 .and(path("/no/alice"))
1598 .respond_with(ResponseTemplate::new(200))
1599 .mount(&server)
1600 .await;
1601 Mock::given(any())
1602 .and(path("/yes/alice"))
1603 .respond_with(ResponseTemplate::new(200))
1604 .mount(&server)
1605 .await;
1606 let client = Client::builder()
1607 .min_request_interval(Duration::ZERO)
1608 .max_retries(0)
1609 .respect_robots(true)
1610 .build()
1611 .unwrap();
1612
1613 let disallowed = Site {
1614 name: "No".into(),
1615 url: UrlTemplate::new(format!("{}/no/{{username}}", server.uri())).unwrap(),
1616 signals: vec![Signal::StatusFound { codes: vec![200] }],
1617 known_present: None,
1618 known_absent: None,
1619 extract: Vec::new(),
1620 tags: Vec::new(),
1621 request_headers: std::collections::BTreeMap::new(),
1622 regex_check: None,
1623 engine: None,
1624 strip_bad_char: None,
1625 request_method: crate::site::HttpMethod::Get,
1626 request_body: None,
1627 protection: Vec::new(),
1628 disabled: false,
1629 source: None,
1630 popularity: None,
1631 access: crate::AccessPolicy::default(),
1632 };
1633 let allowed = Site {
1634 name: "Yes".into(),
1635 url: UrlTemplate::new(format!("{}/yes/{{username}}", server.uri())).unwrap(),
1636 signals: vec![Signal::StatusFound { codes: vec![200] }],
1637 known_present: None,
1638 known_absent: None,
1639 extract: Vec::new(),
1640 tags: Vec::new(),
1641 request_headers: std::collections::BTreeMap::new(),
1642 regex_check: None,
1643 engine: None,
1644 strip_bad_char: None,
1645 request_method: crate::site::HttpMethod::Get,
1646 request_body: None,
1647 protection: Vec::new(),
1648 disabled: false,
1649 source: None,
1650 popularity: None,
1651 access: crate::AccessPolicy::default(),
1652 };
1653
1654 let no = client.check(&disallowed, &user()).await;
1655 assert_eq!(no.kind, MatchKind::Uncertain);
1656 assert_eq!(no.reason, Some(UncertainReason::RobotsDisallowed));
1657
1658 let yes = client.check(&allowed, &user()).await;
1659 assert_eq!(yes.kind, MatchKind::Found);
1660 }
1661
1662 #[tokio::test]
1663 async fn body_read_skipped_when_no_body_signal_needed() {
1664 let server = MockServer::start().await;
1667 Mock::given(any())
1668 .and(path("/alice"))
1669 .respond_with(ResponseTemplate::new(200).set_body_string("Profile not found"))
1670 .mount(&server)
1671 .await;
1672 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1673 let outcome = build_client().check(&site, &user()).await;
1674 assert_eq!(outcome.kind, MatchKind::Found);
1675 }
1676
1677 #[derive(Debug)]
1683 struct RecordingBackend {
1684 page: RenderedPage,
1685 calls: std::sync::atomic::AtomicUsize,
1686 }
1687
1688 impl RecordingBackend {
1689 fn with_page(page: RenderedPage) -> Self {
1690 Self {
1691 page,
1692 calls: std::sync::atomic::AtomicUsize::new(0),
1693 }
1694 }
1695 fn call_count(&self) -> usize {
1696 self.calls.load(std::sync::atomic::Ordering::SeqCst)
1697 }
1698 }
1699
1700 #[async_trait::async_trait]
1701 impl BrowserBackend for RecordingBackend {
1702 async fn fetch(
1703 &self,
1704 _url: &url::Url,
1705 _headers: &std::collections::BTreeMap<String, String>,
1706 _timeout: Duration,
1707 ) -> Result<RenderedPage> {
1708 self.calls.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
1709 Ok(self.page.clone())
1710 }
1711 }
1712
1713 fn site_bot_protected(server: &MockServer) -> Site {
1714 let mut s = site_with(server, vec![Signal::StatusFound { codes: vec![200] }]);
1715 s.tags = vec!["bot-protected".into()];
1716 s
1717 }
1718
1719 #[tokio::test]
1720 async fn browser_routes_bot_protected_sites() {
1721 let server = MockServer::start().await;
1724 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1725 status: 200,
1726 final_url: url::Url::parse("https://example.com/alice").unwrap(),
1727 body: "<html></html>".into(),
1728 elapsed_ms: 42,
1729 }));
1730 let client = Client::builder()
1731 .min_request_interval(Duration::ZERO)
1732 .max_retries(0)
1733 .browser(backend.clone())
1734 .build()
1735 .unwrap();
1736 let outcome = client.check(&site_bot_protected(&server), &user()).await;
1737 assert_eq!(outcome.kind, MatchKind::Found);
1738 assert_eq!(backend.call_count(), 1, "browser invoked exactly once");
1739 }
1740
1741 #[tokio::test]
1742 async fn non_bot_protected_sites_skip_browser() {
1743 let server = MockServer::start().await;
1744 Mock::given(any())
1745 .and(path("/alice"))
1746 .respond_with(ResponseTemplate::new(200))
1747 .mount(&server)
1748 .await;
1749 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1750 status: 500, final_url: url::Url::parse("https://x/").unwrap(),
1752 body: String::new(),
1753 elapsed_ms: 0,
1754 }));
1755 let client = Client::builder()
1756 .min_request_interval(Duration::ZERO)
1757 .max_retries(0)
1758 .browser(backend.clone())
1759 .build()
1760 .unwrap();
1761 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1763 let outcome = client.check(&site, &user()).await;
1764 assert_eq!(outcome.kind, MatchKind::Found);
1765 assert_eq!(backend.call_count(), 0, "browser must not be touched");
1766 }
1767
1768 #[tokio::test]
1769 async fn browser_budget_exhaust_yields_uncertain() {
1770 let server = MockServer::start().await;
1771 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1772 status: 200,
1773 final_url: url::Url::parse("https://x/").unwrap(),
1774 body: String::new(),
1775 elapsed_ms: 0,
1776 }));
1777 let client = Client::builder()
1778 .min_request_interval(Duration::ZERO)
1779 .max_retries(0)
1780 .browser(backend.clone())
1781 .browser_budget(1)
1782 .build()
1783 .unwrap();
1784 let site = site_bot_protected(&server);
1785 let first = client.check(&site, &user()).await;
1787 assert_eq!(first.kind, MatchKind::Found);
1788 let second = client.check(&site, &user()).await;
1790 assert_eq!(second.kind, MatchKind::Uncertain);
1791 assert!(matches!(
1792 second.reason,
1793 Some(UncertainReason::BrowserBudget)
1794 ));
1795 assert_eq!(
1796 backend.call_count(),
1797 1,
1798 "second call must not invoke backend"
1799 );
1800 }
1801
1802 #[tokio::test]
1803 async fn browser_failure_surfaces_as_uncertain_browser_failed() {
1804 struct FailingBackend;
1805 #[async_trait::async_trait]
1806 impl BrowserBackend for FailingBackend {
1807 async fn fetch(
1808 &self,
1809 _url: &url::Url,
1810 _headers: &std::collections::BTreeMap<String, String>,
1811 _timeout: Duration,
1812 ) -> Result<RenderedPage> {
1813 Err(Error::BrowserSetup {
1814 message: "simulated crash".into(),
1815 })
1816 }
1817 }
1818 impl std::fmt::Debug for FailingBackend {
1819 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1820 f.write_str("FailingBackend")
1821 }
1822 }
1823
1824 let server = MockServer::start().await;
1825 let client = Client::builder()
1826 .min_request_interval(Duration::ZERO)
1827 .max_retries(0)
1828 .browser(Arc::new(FailingBackend))
1829 .build()
1830 .unwrap();
1831 let outcome = client.check(&site_bot_protected(&server), &user()).await;
1832 assert_eq!(outcome.kind, MatchKind::Uncertain);
1833 match outcome.reason {
1834 Some(UncertainReason::BrowserFailed(msg)) => {
1835 assert!(msg.contains("simulated crash"), "got: {msg}");
1836 }
1837 other => panic!("expected BrowserFailed, got {other:?}"),
1838 }
1839 }
1840
1841 #[tokio::test]
1842 async fn status_only_site_uses_head_request() {
1843 let server = MockServer::start().await;
1847 Mock::given(method("HEAD"))
1848 .and(path("/alice"))
1849 .respond_with(ResponseTemplate::new(200))
1850 .mount(&server)
1851 .await;
1852 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1853 let outcome = build_client().check(&site, &user()).await;
1854 assert_eq!(outcome.kind, MatchKind::Found);
1855 let recvd = server.received_requests().await.unwrap_or_default();
1856 assert_eq!(recvd.len(), 1);
1857 assert_eq!(recvd[0].method.as_str(), "HEAD");
1858 }
1859
1860 #[tokio::test]
1861 async fn body_signal_site_uses_get_request() {
1862 let server = MockServer::start().await;
1865 Mock::given(any())
1866 .and(path("/alice"))
1867 .respond_with(ResponseTemplate::new(200).set_body_string("hello alice"))
1868 .mount(&server)
1869 .await;
1870 let site = site_with(
1871 &server,
1872 vec![Signal::BodyPresent {
1873 text: "hello".into(),
1874 }],
1875 );
1876 let outcome = build_client().check(&site, &user()).await;
1877 assert_eq!(outcome.kind, MatchKind::Found);
1878 let recvd = server.received_requests().await.unwrap_or_default();
1879 assert_eq!(recvd[0].method.as_str(), "GET");
1880 }
1881
1882 #[tokio::test]
1883 async fn protection_field_routes_through_browser_like_bot_protected_tag() {
1884 let server = MockServer::start().await;
1889 Mock::given(any())
1890 .respond_with(ResponseTemplate::new(200))
1891 .mount(&server)
1892 .await;
1893 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1894 site.protection = vec![crate::site::ProtectionKind::Cloudflare];
1895 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1897 status: 200,
1898 final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
1899 body: String::new(),
1900 elapsed_ms: 0,
1901 }));
1902 let client = Client::builder()
1903 .min_request_interval(Duration::ZERO)
1904 .max_retries(0)
1905 .browser(backend)
1906 .build()
1907 .unwrap();
1908 let outcome = client.check(&site, &user()).await;
1909 assert_eq!(outcome.kind, MatchKind::Found);
1912 let recvd = server.received_requests().await.unwrap_or_default();
1914 assert_eq!(
1915 recvd.len(),
1916 0,
1917 "structured protection must skip the raw HTTP path"
1918 );
1919 }
1920
1921 #[tokio::test]
1922 async fn post_method_sends_body_with_username_substituted() {
1923 let server = MockServer::start().await;
1927 Mock::given(method("POST"))
1928 .and(path("/api"))
1929 .respond_with(ResponseTemplate::new(200))
1930 .mount(&server)
1931 .await;
1932 let site = Site {
1937 name: "ApiPost".into(),
1938 url: UrlTemplate::new(format!("{}/api?_={{username}}", server.uri())).unwrap(),
1939 signals: vec![Signal::StatusFound { codes: vec![200] }],
1940 known_present: None,
1941 known_absent: None,
1942 extract: Vec::new(),
1943 tags: Vec::new(),
1944 request_headers: std::collections::BTreeMap::new(),
1945 regex_check: None,
1946 engine: None,
1947 strip_bad_char: None,
1948 request_method: HttpMethod::Post,
1949 request_body: Some(r#"{"name":"{username}"}"#.into()),
1950 protection: Vec::new(),
1951 disabled: false,
1952 source: None,
1953 popularity: None,
1954 access: crate::AccessPolicy::default(),
1955 };
1956 let outcome = build_client().check(&site, &user()).await;
1957 assert_eq!(outcome.kind, MatchKind::Found);
1958 let recvd = server.received_requests().await.unwrap_or_default();
1959 assert_eq!(recvd.len(), 1);
1960 assert_eq!(recvd[0].method.as_str(), "POST");
1961 let body = String::from_utf8_lossy(&recvd[0].body).to_string();
1962 assert!(body.contains("\"name\":\"alice\""), "body was: {body}");
1963 }
1964
1965 #[tokio::test]
1966 async fn head_405_falls_back_to_get() {
1967 let server = MockServer::start().await;
1970 Mock::given(method("HEAD"))
1971 .and(path("/alice"))
1972 .respond_with(ResponseTemplate::new(405))
1973 .mount(&server)
1974 .await;
1975 Mock::given(any())
1976 .and(path("/alice"))
1977 .respond_with(ResponseTemplate::new(200))
1978 .mount(&server)
1979 .await;
1980 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1981 let outcome = build_client().check(&site, &user()).await;
1982 assert_eq!(outcome.kind, MatchKind::Found);
1983 let recvd = server.received_requests().await.unwrap_or_default();
1984 assert_eq!(recvd.len(), 2);
1985 assert_eq!(recvd[0].method.as_str(), "HEAD");
1986 assert_eq!(recvd[1].method.as_str(), "GET");
1987 }
1988}