1use std::fmt;
11use std::num::NonZeroU32;
12use std::sync::Arc;
13use std::time::{Duration, Instant};
14
15use reqwest::redirect;
16
17use crate::access::{EgressChoice, EgressPool, EgressSpec};
18use crate::browser::{BrowserBackend, BrowserBudget};
19use crate::check::{CheckOutcome, MatchKind, UncertainReason};
20use crate::error::{Error, Result};
21use crate::retry::{self, RetryPolicy};
22use crate::robots::RobotsCache;
23use crate::site::{HttpMethod, Probe, Signal, SignalVerdict, Site, aggregate};
24use crate::throttle::HostThrottle;
25use crate::transport::{
26 BROWSER_TIMEOUT, BrowserFetcher, FetchError, FetchRequest, Fetcher, HttpFetcher,
27};
28use crate::username::Username;
29
30const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
31const DEFAULT_CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
32const DEFAULT_REDIRECT_LIMIT: usize = 8;
33const DEFAULT_PER_HOST_INTERVAL: Duration = Duration::from_millis(100);
34const GLOBAL_THROTTLE_KEY: &str = "*global*";
36
37#[derive(Clone)]
45pub struct Client {
46 http: Arc<HttpFetcher>,
47 egress: Arc<EgressPool>,
50 throttle: HostThrottle,
51 global_throttle: Option<HostThrottle>,
53 retry: RetryPolicy,
54 user_agents: Arc<[String]>,
57 enrich: bool,
59 robots: Option<RobotsCache>,
61 browser: Option<Arc<dyn BrowserBackend>>,
64 browser_budget: Arc<BrowserBudget>,
67}
68
69impl Client {
70 pub fn builder() -> ClientBuilder {
72 ClientBuilder::default()
73 }
74
75 #[tracing::instrument(skip(self), fields(site = %site.name, user = %username))]
89 pub async fn check(&self, site: &Site, username: &Username) -> CheckOutcome {
90 let mut attempt: u32 = 0;
91 loop {
92 let outcome = self.probe_once(site, username).await;
93 if !retry::should_retry(&outcome, attempt, &self.retry) {
94 return outcome;
95 }
96 let delay = retry::backoff_delay(attempt, &self.retry);
97 tracing::info!(
98 site = %site.name,
99 attempt = attempt + 1,
100 reason = outcome.reason.as_ref().map(ToString::to_string).unwrap_or_default(),
101 ?delay,
102 "transient ban, retrying",
103 );
104 tokio::time::sleep(delay).await;
105 attempt += 1;
106 }
107 }
108
109 pub async fn fetch(&self, url: &str) -> Option<RawResponse> {
118 let host = host_of(url);
119 if let Some(global) = &self.global_throttle {
120 global.wait(GLOBAL_THROTTLE_KEY).await;
121 }
122 self.throttle.wait(&host).await;
123 let mut request = self.http.client().get(url);
124 if let Some(ua) = self.pick_user_agent() {
125 request = request.header(reqwest::header::USER_AGENT, ua);
126 }
127 let response = request.send().await.ok()?;
128 let status = response.status().as_u16();
129 let final_url = response.url().to_string();
130 let body = response.text().await.unwrap_or_default();
131 Some(RawResponse {
132 status,
133 final_url,
134 body,
135 })
136 }
137
138 pub async fn fetch_for_doctor(&self, site: &Site, url: &str) -> Option<RawResponse> {
149 if let Some(backend) = self.browser.as_deref() {
150 let has_tag = site
151 .tags
152 .iter()
153 .any(|t| t.eq_ignore_ascii_case(BOT_PROTECTED_TAG));
154 if has_tag || !site.protection.is_empty() {
155 let parsed = url::Url::parse(url).ok()?;
156 match backend
157 .fetch(&parsed, &site.request_headers, BROWSER_TIMEOUT)
158 .await
159 {
160 Ok(page) => {
161 return Some(RawResponse {
162 status: page.status,
163 final_url: page.final_url.to_string(),
164 body: page.body,
165 });
166 }
167 Err(err) => {
168 tracing::warn!(
169 site = %site.name, %url, error = %err,
170 "browser fetch failed in doctor; falling back to raw HTTP",
171 );
172 }
173 }
174 }
175 }
176 self.fetch(url).await
177 }
178
179 fn pick_user_agent(&self) -> Option<&str> {
182 match self.user_agents.len() {
183 0 => None,
184 1 => Some(&self.user_agents[0]),
185 n => Some(&self.user_agents[fastrand::usize(0..n)]),
186 }
187 }
188
189 #[allow(clippy::too_many_lines)]
192 async fn probe_once(&self, site: &Site, username: &Username) -> CheckOutcome {
193 let url = site.url_for(username);
194
195 if let Some(pat) = &site.regex_check {
205 if let Ok(re) = regex::Regex::new(pat) {
206 if !re.is_match(username.as_str()) {
207 return uncertain(
208 &site.name,
209 url,
210 Instant::now(),
211 UncertainReason::UsernameNotAllowed,
212 );
213 }
214 }
215 }
216
217 if let Some(backend) = &self.browser {
224 let has_tag = site
225 .tags
226 .iter()
227 .any(|t| t.eq_ignore_ascii_case(BOT_PROTECTED_TAG));
228 if has_tag || !site.protection.is_empty() {
229 if self.browser_budget.try_consume() {
230 let started = Instant::now();
231 let req = FetchRequest {
232 method: site.request_method,
233 url: &url,
234 body: None,
235 user_agent: None,
236 headers: &site.request_headers,
237 want_body: true,
238 };
239 let fetcher = BrowserFetcher::new(Arc::clone(backend));
240 return match fetcher.fetch(&req).await {
241 Ok(resp) => self.finish(site, url, started, &resp),
242 Err(FetchError(reason)) => uncertain(&site.name, url, started, reason),
243 };
244 }
245 tracing::warn!(site = %site.name, "browser budget exhausted");
246 return uncertain(
247 &site.name,
248 url,
249 Instant::now(),
250 UncertainReason::BrowserBudget,
251 );
252 }
253 }
254
255 let egress: Arc<HttpFetcher> = match self.egress.select(&site.access) {
262 EgressChoice::Default => Arc::clone(&self.http),
263 EgressChoice::Use(fetcher) => fetcher,
264 EgressChoice::Unavailable => {
265 return uncertain(
266 &site.name,
267 url,
268 Instant::now(),
269 UncertainReason::GeoUnavailable,
270 );
271 }
272 };
273
274 let host = host_of(&url);
275
276 if let Some(robots) = &self.robots {
278 if let Some((origin, path)) = origin_and_path(&url) {
279 if !robots.allowed(&origin, &path).await {
280 tracing::debug!(%url, "skipped by robots.txt");
281 return uncertain(
282 &site.name,
283 url,
284 Instant::now(),
285 UncertainReason::RobotsDisallowed,
286 );
287 }
288 }
289 }
290
291 if let Some(global) = &self.global_throttle {
293 global.wait(GLOBAL_THROTTLE_KEY).await;
294 }
295 self.throttle.wait(&host).await;
296 let started = Instant::now();
297 tracing::debug!(%url, %host, "probing");
298
299 let want_enrich = self.enrich && !site.extract.is_empty();
302 let needs_body = want_enrich || site.signals.iter().any(crate::site::Signal::needs_body);
303
304 let body_for_post: Option<String> = if matches!(site.request_method, HttpMethod::Post) {
309 const USERNAME_PH: &str = "{username}";
310 site.request_body
311 .as_deref()
312 .map(|t| t.replace(USERNAME_PH, username.as_str()))
313 } else {
314 None
315 };
316
317 let req = FetchRequest {
318 method: site.request_method,
319 url: &url,
320 body: body_for_post.as_deref(),
321 user_agent: self.pick_user_agent(),
322 headers: &site.request_headers,
323 want_body: needs_body,
324 };
325 match egress.fetch(&req).await {
326 Ok(resp) => self.finish(site, url, started, &resp),
327 Err(FetchError(reason)) => uncertain(&site.name, url, started, reason),
328 }
329 }
330
331 fn finish(
335 &self,
336 site: &Site,
337 url: String,
338 started: Instant,
339 resp: &crate::transport::FetchResponse,
340 ) -> CheckOutcome {
341 let probe = Probe {
342 status: resp.status,
343 final_url: &resp.final_url,
344 body: &resp.body,
345 };
346 let votes: Vec<(&Signal, SignalVerdict)> = site
347 .signals
348 .iter()
349 .map(|s| (s, s.evaluate(&probe)))
350 .collect();
351 let kind = aggregate(votes.iter().map(|(_, v)| *v));
352 let mut result = outcome(&site.name, url, started, kind);
353 let winning = match kind {
355 MatchKind::Found => Some(SignalVerdict::Found),
356 MatchKind::NotFound => Some(SignalVerdict::NotFound),
357 MatchKind::Uncertain => None,
358 };
359 if let Some(want) = winning {
360 result.evidence = votes
361 .iter()
362 .filter(|(_, v)| *v == want)
363 .map(|(s, _)| s.describe_match(&probe))
364 .collect();
365 }
366 if self.enrich && kind == MatchKind::Found && !site.extract.is_empty() {
367 result.enrichment = crate::enrich::extract(&resp.body, &site.extract);
368 }
369 result
370 }
371}
372
373#[derive(Debug, Clone)]
375pub struct RawResponse {
376 pub status: u16,
378 pub final_url: String,
380 pub body: String,
382}
383
384#[derive(Clone)]
386#[must_use = "ClientBuilder does nothing until `.build()` is called"]
387pub struct ClientBuilder {
388 timeout: Duration,
389 connect_timeout: Duration,
390 user_agent: String,
391 follow_redirects: bool,
392 redirect_limit: usize,
393 min_request_interval: Duration,
394 max_rps: Option<NonZeroU32>,
395 retry: RetryPolicy,
396 proxy: Option<String>,
397 user_agents: Vec<String>,
398 enrich: bool,
399 respect_robots: bool,
400 browser: Option<Arc<dyn BrowserBackend>>,
401 browser_budget: usize,
402 egress: Vec<EgressSpec>,
403}
404
405impl Default for ClientBuilder {
406 fn default() -> Self {
407 Self {
408 timeout: DEFAULT_TIMEOUT,
409 connect_timeout: DEFAULT_CONNECT_TIMEOUT,
410 user_agent: default_user_agent(),
411 follow_redirects: true,
412 redirect_limit: DEFAULT_REDIRECT_LIMIT,
413 min_request_interval: DEFAULT_PER_HOST_INTERVAL,
414 max_rps: None,
415 retry: RetryPolicy::default(),
416 proxy: None,
417 user_agents: Vec::new(),
418 enrich: false,
419 respect_robots: false,
420 browser: None,
421 browser_budget: DEFAULT_BROWSER_BUDGET,
422 egress: Vec::new(),
423 }
424 }
425}
426
427impl ClientBuilder {
428 pub fn timeout(mut self, timeout: Duration) -> Self {
430 self.timeout = timeout;
431 self
432 }
433
434 pub fn connect_timeout(mut self, timeout: Duration) -> Self {
436 self.connect_timeout = timeout;
437 self
438 }
439
440 pub fn user_agent(mut self, user_agent: impl Into<String>) -> Self {
442 self.user_agent = user_agent.into();
443 self
444 }
445
446 pub fn follow_redirects(mut self, follow: bool) -> Self {
449 self.follow_redirects = follow;
450 self
451 }
452
453 pub fn min_request_interval(mut self, interval: Duration) -> Self {
459 self.min_request_interval = interval;
460 self
461 }
462
463 pub fn max_rps(mut self, rps: NonZeroU32) -> Self {
468 self.max_rps = Some(rps);
469 self
470 }
471
472 pub fn max_retries(mut self, n: u32) -> Self {
475 self.retry.max_retries = n;
476 self
477 }
478
479 pub fn base_backoff_delay(mut self, d: Duration) -> Self {
482 self.retry.base_delay = d;
483 self
484 }
485
486 pub fn max_backoff_delay(mut self, d: Duration) -> Self {
488 self.retry.max_delay = d;
489 self
490 }
491
492 pub fn proxy(mut self, url: impl Into<String>) -> Self {
495 self.proxy = Some(url.into());
496 self
497 }
498
499 pub fn rotate_user_agents(mut self, agents: Vec<String>) -> Self {
503 self.user_agents = agents;
504 self
505 }
506
507 pub fn enrich(mut self, enrich: bool) -> Self {
510 self.enrich = enrich;
511 self
512 }
513
514 pub fn respect_robots(mut self, respect: bool) -> Self {
518 self.respect_robots = respect;
519 self
520 }
521
522 pub fn browser(mut self, backend: Arc<dyn BrowserBackend>) -> Self {
526 self.browser = Some(backend);
527 self
528 }
529
530 pub const fn browser_budget(mut self, cap: usize) -> Self {
535 self.browser_budget = cap;
536 self
537 }
538
539 pub fn egress_pool(mut self, egress: Vec<EgressSpec>) -> Self {
544 self.egress = egress;
545 self
546 }
547
548 pub fn build(self) -> Result<Client> {
550 let inner = build_reqwest(
551 &self.user_agent,
552 self.timeout,
553 self.connect_timeout,
554 self.follow_redirects,
555 self.redirect_limit,
556 self.proxy.as_deref(),
557 )?;
558
559 let mut egress_entries = Vec::with_capacity(self.egress.len());
563 for spec in &self.egress {
564 let client = build_reqwest(
565 &self.user_agent,
566 self.timeout,
567 self.connect_timeout,
568 self.follow_redirects,
569 self.redirect_limit,
570 Some(&spec.url),
571 )?;
572 egress_entries.push((
573 spec.country.clone(),
574 spec.kind,
575 Arc::new(HttpFetcher::new(client)),
576 ));
577 }
578
579 let global_throttle = self.max_rps.map(|rps| {
580 let interval = Duration::from_secs(1) / rps.get();
582 HostThrottle::new(interval)
583 });
584 let robots = self
585 .respect_robots
586 .then(|| RobotsCache::new(inner.clone(), "adler"));
587 Ok(Client {
588 http: Arc::new(HttpFetcher::new(inner)),
589 egress: Arc::new(EgressPool::new(egress_entries)),
590 throttle: HostThrottle::new(self.min_request_interval),
591 global_throttle,
592 retry: self.retry,
593 user_agents: Arc::from(self.user_agents),
594 enrich: self.enrich,
595 robots,
596 browser: self.browser,
597 browser_budget: Arc::new(BrowserBudget::new(self.browser_budget)),
598 })
599 }
600}
601
602fn build_reqwest(
606 user_agent: &str,
607 timeout: Duration,
608 connect_timeout: Duration,
609 follow_redirects: bool,
610 redirect_limit: usize,
611 proxy: Option<&str>,
612) -> Result<reqwest::Client> {
613 let redirect_policy = if follow_redirects {
614 redirect::Policy::limited(redirect_limit)
615 } else {
616 redirect::Policy::none()
617 };
618 let mut builder = reqwest::Client::builder()
619 .user_agent(user_agent.to_owned())
620 .timeout(timeout)
621 .connect_timeout(connect_timeout)
622 .redirect(redirect_policy);
623 if let Some(proxy_url) = proxy {
624 const SCHEMES: [&str; 4] = ["http://", "https://", "socks5://", "socks5h://"];
628 if !SCHEMES.iter().any(|s| proxy_url.starts_with(s)) {
629 return Err(Error::HttpSetup {
630 message: format!(
631 "invalid proxy {proxy_url:?}: must start with one of {}",
632 SCHEMES.join(", ")
633 ),
634 });
635 }
636 let proxy = reqwest::Proxy::all(proxy_url).map_err(|e| Error::HttpSetup {
637 message: format!("invalid proxy {proxy_url:?}: {e}"),
638 })?;
639 builder = builder.proxy(proxy);
640 }
641 builder.build().map_err(|e| Error::HttpSetup {
642 message: e.to_string(),
643 })
644}
645
646pub const DEFAULT_BROWSER_BUDGET: usize = 50;
653
654impl fmt::Debug for Client {
655 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
656 f.debug_struct("Client")
657 .field("throttle", &self.throttle)
658 .field("global_throttle", &self.global_throttle)
659 .field("retry", &self.retry)
660 .field("user_agents", &self.user_agents)
661 .field("enrich", &self.enrich)
662 .field("robots", &self.robots.is_some())
663 .field("browser", &self.browser.is_some())
664 .field("browser_budget", &self.browser_budget)
665 .finish_non_exhaustive()
666 }
667}
668
669impl fmt::Debug for ClientBuilder {
670 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
671 f.debug_struct("ClientBuilder")
672 .field("timeout", &self.timeout)
673 .field("connect_timeout", &self.connect_timeout)
674 .field("user_agent", &self.user_agent)
675 .field("follow_redirects", &self.follow_redirects)
676 .field("redirect_limit", &self.redirect_limit)
677 .field("min_request_interval", &self.min_request_interval)
678 .field("max_rps", &self.max_rps)
679 .field("retry", &self.retry)
680 .field("proxy", &self.proxy)
681 .field("user_agents", &self.user_agents)
682 .field("enrich", &self.enrich)
683 .field("respect_robots", &self.respect_robots)
684 .field("browser", &self.browser.is_some())
685 .field("browser_budget", &self.browser_budget)
686 .field("egress", &self.egress)
687 .finish()
688 }
689}
690
691const BOT_PROTECTED_TAG: &str = "bot-protected";
692
693fn default_user_agent() -> String {
694 format!("adler/{}", env!("CARGO_PKG_VERSION"))
695}
696
697fn host_of(url: &str) -> String {
698 reqwest::Url::parse(url)
699 .ok()
700 .and_then(|u| u.host_str().map(str::to_owned))
701 .unwrap_or_else(|| "unknown".into())
702}
703
704fn origin_and_path(url: &str) -> Option<(String, String)> {
707 let parsed = reqwest::Url::parse(url).ok()?;
708 let host = parsed.host_str()?;
709 let port = parsed.port().map_or_else(String::new, |p| format!(":{p}"));
710 let origin = format!("{}://{host}{port}", parsed.scheme());
711 let path = parsed.query().map_or_else(
712 || parsed.path().to_owned(),
713 |q| format!("{}?{q}", parsed.path()),
714 );
715 Some((origin, path))
716}
717
718fn outcome(site: &str, url: String, started: Instant, kind: MatchKind) -> CheckOutcome {
719 CheckOutcome {
720 site: site.to_owned(),
721 url,
722 kind,
723 reason: None,
724 elapsed_ms: elapsed_ms(started),
725 enrichment: std::collections::BTreeMap::new(),
726 evidence: Vec::new(),
727 }
728}
729
730fn uncertain(site: &str, url: String, started: Instant, reason: UncertainReason) -> CheckOutcome {
731 CheckOutcome {
732 site: site.to_owned(),
733 url,
734 kind: MatchKind::Uncertain,
735 reason: Some(reason),
736 elapsed_ms: elapsed_ms(started),
737 enrichment: std::collections::BTreeMap::new(),
738 evidence: Vec::new(),
739 }
740}
741
742fn elapsed_ms(started: Instant) -> u64 {
743 u64::try_from(started.elapsed().as_millis()).unwrap_or(u64::MAX)
744}
745
746#[cfg(test)]
747mod tests {
748 use super::*;
749 use crate::browser::RenderedPage;
750 use crate::site::{Signal, UrlTemplate};
751 use wiremock::matchers::{any, method, path};
752 use wiremock::{Mock, MockServer, ResponseTemplate};
753
754 fn build_client() -> Client {
755 Client::builder()
756 .timeout(Duration::from_secs(2))
757 .min_request_interval(Duration::ZERO)
760 .max_retries(0)
763 .build()
764 .expect("client builds")
765 }
766
767 fn site_with(server: &MockServer, signals: Vec<Signal>) -> Site {
768 Site {
769 name: "Mock".into(),
770 url: UrlTemplate::new(format!("{}/{{username}}", server.uri())).unwrap(),
771 signals,
772 known_present: None,
773 known_absent: None,
774 extract: Vec::new(),
775 tags: Vec::new(),
776 request_headers: std::collections::BTreeMap::new(),
777 regex_check: None,
778 engine: None,
779 strip_bad_char: None,
780 request_method: crate::site::HttpMethod::Get,
781 request_body: None,
782 protection: Vec::new(),
783 disabled: false,
784 source: None,
785 popularity: None,
786 access: crate::AccessPolicy::default(),
787 }
788 }
789
790 fn user() -> Username {
791 Username::new("alice").unwrap()
792 }
793
794 #[tokio::test]
795 async fn regex_check_short_circuits_before_any_request() {
796 let server = MockServer::start().await;
800 Mock::given(any())
801 .respond_with(ResponseTemplate::new(200))
802 .mount(&server)
803 .await;
804 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
805 site.regex_check = Some("^[A-Za-z]{8,}$".into());
807 let outcome = build_client().check(&site, &user()).await;
808 assert_eq!(outcome.kind, MatchKind::Uncertain);
809 assert!(
810 matches!(outcome.reason, Some(UncertainReason::UsernameNotAllowed)),
811 "expected UsernameNotAllowed, got {:?}",
812 outcome.reason,
813 );
814 let recvd = server.received_requests().await.unwrap_or_default();
817 assert_eq!(
818 recvd.len(),
819 0,
820 "regex_check mismatch must skip the HTTP request entirely"
821 );
822 }
823
824 #[tokio::test]
825 async fn geo_constrained_site_with_no_egress_is_geo_unavailable() {
826 let server = MockServer::start().await;
829 Mock::given(any())
830 .respond_with(ResponseTemplate::new(200))
831 .mount(&server)
832 .await;
833 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
834 site.access = crate::access::AccessPolicy {
837 geo: vec![crate::access::CountryCode::new("pl").unwrap()],
838 ip_type: None,
839 };
840 let outcome = build_client().check(&site, &user()).await;
841 assert_eq!(outcome.kind, MatchKind::Uncertain);
842 assert!(
843 matches!(outcome.reason, Some(UncertainReason::GeoUnavailable)),
844 "expected GeoUnavailable, got {:?}",
845 outcome.reason,
846 );
847 let recvd = server.received_requests().await.unwrap_or_default();
850 assert_eq!(
851 recvd.len(),
852 0,
853 "geo-unavailable must skip the HTTP request entirely"
854 );
855 }
856
857 #[tokio::test]
858 async fn regex_check_pass_proceeds_to_probe() {
859 let server = MockServer::start().await;
860 Mock::given(any())
861 .and(path("/alice"))
862 .respond_with(ResponseTemplate::new(200))
863 .mount(&server)
864 .await;
865 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
866 site.regex_check = Some("^[a-z]{3,}$".into());
868 let outcome = build_client().check(&site, &user()).await;
869 assert_eq!(outcome.kind, MatchKind::Found);
870 }
871
872 #[tokio::test]
873 async fn status_signal_reports_found_on_match() {
874 let server = MockServer::start().await;
875 Mock::given(any())
876 .and(path("/alice"))
877 .respond_with(ResponseTemplate::new(200))
878 .mount(&server)
879 .await;
880 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
881 let outcome = build_client().check(&site, &user()).await;
882 assert_eq!(outcome.kind, MatchKind::Found);
883 assert!(outcome.url.ends_with("/alice"));
884 assert!(outcome.reason.is_none());
885 assert_eq!(outcome.evidence, ["HTTP 200 (status_found)"]);
886 }
887
888 #[tokio::test]
889 async fn status_signal_pair_reports_not_found_on_404() {
890 let server = MockServer::start().await;
891 Mock::given(any())
892 .and(path("/alice"))
893 .respond_with(ResponseTemplate::new(404))
894 .mount(&server)
895 .await;
896 let site = site_with(
897 &server,
898 vec![
899 Signal::StatusFound { codes: vec![200] },
900 Signal::StatusNotFound { codes: vec![404] },
901 ],
902 );
903 let outcome = build_client().check(&site, &user()).await;
904 assert_eq!(outcome.kind, MatchKind::NotFound);
905 assert_eq!(outcome.evidence, ["HTTP 404 (status_not_found)"]);
907 }
908
909 #[tokio::test]
910 async fn body_absent_signal_detects_missing_account() {
911 let server = MockServer::start().await;
912 Mock::given(any())
913 .and(path("/alice"))
914 .respond_with(ResponseTemplate::new(200).set_body_string("<h1>Profile not found</h1>"))
915 .mount(&server)
916 .await;
917 let site = site_with(
918 &server,
919 vec![Signal::BodyAbsent {
920 text: "Profile not found".into(),
921 }],
922 );
923 let outcome = build_client().check(&site, &user()).await;
924 assert_eq!(outcome.kind, MatchKind::NotFound);
925 }
926
927 #[tokio::test]
928 async fn body_absent_alone_yields_uncertain_when_marker_missing() {
929 let server = MockServer::start().await;
932 Mock::given(any())
933 .and(path("/alice"))
934 .respond_with(ResponseTemplate::new(200).set_body_string("<h1>Welcome alice</h1>"))
935 .mount(&server)
936 .await;
937 let site = site_with(
938 &server,
939 vec![Signal::BodyAbsent {
940 text: "Profile not found".into(),
941 }],
942 );
943 let outcome = build_client().check(&site, &user()).await;
944 assert_eq!(outcome.kind, MatchKind::Uncertain);
945 }
946
947 #[tokio::test]
948 async fn body_present_plus_absent_resolve_to_found() {
949 let server = MockServer::start().await;
950 Mock::given(any())
951 .and(path("/alice"))
952 .respond_with(
953 ResponseTemplate::new(200)
954 .set_body_string(r#"<div class="profile-card">alice</div>"#),
955 )
956 .mount(&server)
957 .await;
958 let site = site_with(
959 &server,
960 vec![
961 Signal::BodyPresent {
962 text: "profile-card".into(),
963 },
964 Signal::BodyAbsent {
965 text: "Profile not found".into(),
966 },
967 ],
968 );
969 let outcome = build_client().check(&site, &user()).await;
970 assert_eq!(outcome.kind, MatchKind::Found);
971 }
972
973 #[tokio::test]
974 async fn redirect_absent_signal_detects_missing_account() {
975 let server = MockServer::start().await;
976 Mock::given(any())
977 .and(path("/alice"))
978 .respond_with(
979 ResponseTemplate::new(302).insert_header("location", "/login?next=/alice"),
980 )
981 .mount(&server)
982 .await;
983 Mock::given(any())
984 .and(path("/login"))
985 .respond_with(ResponseTemplate::new(200).set_body_string("login page"))
986 .mount(&server)
987 .await;
988 let site = site_with(
989 &server,
990 vec![Signal::RedirectAbsent {
991 fragment: "/login".into(),
992 }],
993 );
994 let outcome = build_client().check(&site, &user()).await;
995 assert_eq!(outcome.kind, MatchKind::NotFound);
996 }
997
998 #[tokio::test]
999 async fn negative_signal_wins_over_positive() {
1000 let server = MockServer::start().await;
1005 Mock::given(any())
1006 .and(path("/alice"))
1007 .respond_with(ResponseTemplate::new(200).set_body_string("Profile not found"))
1008 .mount(&server)
1009 .await;
1010 let site = site_with(
1011 &server,
1012 vec![
1013 Signal::StatusFound { codes: vec![200] },
1014 Signal::BodyAbsent {
1015 text: "Profile not found".into(),
1016 },
1017 ],
1018 );
1019 let outcome = build_client().check(&site, &user()).await;
1020 assert_eq!(outcome.kind, MatchKind::NotFound);
1021 }
1022
1023 #[tokio::test]
1024 async fn network_failure_yields_uncertain() {
1025 let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
1026 let port = listener.local_addr().unwrap().port();
1027 drop(listener);
1028
1029 let site = Site {
1030 name: "Dead".into(),
1031 url: UrlTemplate::new(format!("http://127.0.0.1:{port}/{{username}}")).unwrap(),
1032 signals: vec![Signal::StatusFound { codes: vec![200] }],
1033 known_present: None,
1034 known_absent: None,
1035 extract: Vec::new(),
1036 tags: Vec::new(),
1037 request_headers: std::collections::BTreeMap::new(),
1038 regex_check: None,
1039 engine: None,
1040 strip_bad_char: None,
1041 request_method: crate::site::HttpMethod::Get,
1042 request_body: None,
1043 protection: Vec::new(),
1044 disabled: false,
1045 source: None,
1046 popularity: None,
1047 access: crate::AccessPolicy::default(),
1048 };
1049 let client = Client::builder()
1050 .timeout(Duration::from_millis(500))
1051 .connect_timeout(Duration::from_millis(500))
1052 .max_retries(0)
1053 .build()
1054 .unwrap();
1055 let outcome = client.check(&site, &user()).await;
1056 assert_eq!(outcome.kind, MatchKind::Uncertain);
1057 assert!(outcome.reason.is_some());
1058 }
1059
1060 #[tokio::test]
1061 async fn throttle_spaces_consecutive_calls_to_same_host() {
1062 let server = MockServer::start().await;
1063 Mock::given(any())
1064 .and(path("/alice"))
1065 .respond_with(ResponseTemplate::new(200))
1066 .mount(&server)
1067 .await;
1068 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1069 let client = Client::builder()
1074 .timeout(Duration::from_secs(2))
1075 .min_request_interval(Duration::from_millis(300))
1076 .build()
1077 .unwrap();
1078
1079 client.check(&site, &user()).await;
1080 let started = Instant::now();
1081 client.check(&site, &user()).await;
1082 let elapsed = started.elapsed();
1083 assert!(
1084 elapsed >= Duration::from_millis(200),
1085 "second probe to the same host should wait ≥200 ms, got {elapsed:?}",
1086 );
1087 }
1088
1089 #[tokio::test]
1090 async fn builder_overrides_user_agent() {
1091 let server = MockServer::start().await;
1092 Mock::given(any())
1093 .and(path("/alice"))
1094 .and(wiremock::matchers::header("user-agent", "adler-test/1.0"))
1095 .respond_with(ResponseTemplate::new(200))
1096 .mount(&server)
1097 .await;
1098 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1099 let client = Client::builder()
1100 .user_agent("adler-test/1.0")
1101 .build()
1102 .unwrap();
1103 let outcome = client.check(&site, &user()).await;
1104 assert_eq!(outcome.kind, MatchKind::Found);
1105 }
1106
1107 #[tokio::test]
1108 async fn rate_limit_429_yields_uncertain_with_note() {
1109 let server = MockServer::start().await;
1110 Mock::given(any())
1111 .and(path("/alice"))
1112 .respond_with(ResponseTemplate::new(429))
1113 .mount(&server)
1114 .await;
1115 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1116 let outcome = build_client().check(&site, &user()).await;
1117 assert_eq!(outcome.kind, MatchKind::Uncertain);
1118 assert_eq!(outcome.reason, Some(UncertainReason::RateLimited));
1119 }
1120
1121 #[tokio::test]
1122 async fn cloudflare_server_header_yields_uncertain() {
1123 let server = MockServer::start().await;
1124 Mock::given(any())
1125 .and(path("/alice"))
1126 .respond_with(ResponseTemplate::new(503).insert_header("server", "cloudflare"))
1127 .mount(&server)
1128 .await;
1129 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1130 let outcome = build_client().check(&site, &user()).await;
1131 assert_eq!(outcome.kind, MatchKind::Uncertain);
1132 assert_eq!(outcome.reason, Some(UncertainReason::CloudflareChallenge));
1133 }
1134
1135 #[tokio::test]
1136 async fn cloudflare_interstitial_in_body_yields_uncertain() {
1137 let server = MockServer::start().await;
1140 Mock::given(any())
1141 .and(path("/alice"))
1142 .respond_with(
1143 ResponseTemplate::new(200)
1144 .set_body_string("<html><head><title>Just a moment...</title></head></html>"),
1145 )
1146 .mount(&server)
1147 .await;
1148 let site = site_with(
1149 &server,
1150 vec![Signal::BodyAbsent {
1151 text: "Profile not found".into(),
1152 }],
1153 );
1154 let outcome = build_client().check(&site, &user()).await;
1155 assert_eq!(outcome.kind, MatchKind::Uncertain);
1156 assert_eq!(outcome.reason, Some(UncertainReason::CloudflareChallenge));
1157 }
1158
1159 #[tokio::test]
1160 async fn ban_detection_does_not_fire_on_legitimate_403() {
1161 let server = MockServer::start().await;
1162 Mock::given(any())
1163 .and(path("/alice"))
1164 .respond_with(ResponseTemplate::new(403))
1165 .mount(&server)
1166 .await;
1167 let site = site_with(
1168 &server,
1169 vec![
1170 Signal::StatusFound { codes: vec![200] },
1171 Signal::StatusNotFound { codes: vec![403] },
1172 ],
1173 );
1174 let outcome = build_client().check(&site, &user()).await;
1175 assert_eq!(outcome.kind, MatchKind::NotFound);
1177 assert!(outcome.reason.is_none());
1178 }
1179
1180 #[tokio::test]
1181 async fn retry_recovers_after_transient_429() {
1182 let server = MockServer::start().await;
1183 Mock::given(any())
1185 .and(path("/alice"))
1186 .respond_with(ResponseTemplate::new(429))
1187 .up_to_n_times(1)
1188 .mount(&server)
1189 .await;
1190 Mock::given(any())
1191 .and(path("/alice"))
1192 .respond_with(ResponseTemplate::new(200))
1193 .mount(&server)
1194 .await;
1195 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1196 let client = Client::builder()
1197 .timeout(Duration::from_secs(2))
1198 .min_request_interval(Duration::ZERO)
1199 .max_retries(2)
1200 .base_backoff_delay(Duration::from_millis(20))
1201 .max_backoff_delay(Duration::from_millis(100))
1202 .build()
1203 .unwrap();
1204 let outcome = client.check(&site, &user()).await;
1205 assert_eq!(outcome.kind, MatchKind::Found);
1206 assert!(outcome.reason.is_none());
1207 }
1208
1209 #[tokio::test]
1210 async fn retry_exhausts_and_returns_uncertain() {
1211 let server = MockServer::start().await;
1212 Mock::given(any())
1213 .and(path("/alice"))
1214 .respond_with(ResponseTemplate::new(429))
1215 .mount(&server)
1216 .await;
1217 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1218 let client = Client::builder()
1219 .timeout(Duration::from_secs(2))
1220 .min_request_interval(Duration::ZERO)
1221 .max_retries(2)
1222 .base_backoff_delay(Duration::from_millis(10))
1223 .max_backoff_delay(Duration::from_millis(50))
1224 .build()
1225 .unwrap();
1226 let outcome = client.check(&site, &user()).await;
1227 assert_eq!(outcome.kind, MatchKind::Uncertain);
1228 assert_eq!(outcome.reason, Some(UncertainReason::RateLimited));
1229 }
1230
1231 #[tokio::test]
1232 async fn retry_does_not_fire_on_network_error() {
1233 let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
1237 let port = listener.local_addr().unwrap().port();
1238 drop(listener);
1239 let site = Site {
1240 name: "Dead".into(),
1241 url: UrlTemplate::new(format!("http://127.0.0.1:{port}/{{username}}")).unwrap(),
1242 signals: vec![Signal::StatusFound { codes: vec![200] }],
1243 known_present: None,
1244 known_absent: None,
1245 extract: Vec::new(),
1246 tags: Vec::new(),
1247 request_headers: std::collections::BTreeMap::new(),
1248 regex_check: None,
1249 engine: None,
1250 strip_bad_char: None,
1251 request_method: crate::site::HttpMethod::Get,
1252 request_body: None,
1253 protection: Vec::new(),
1254 disabled: false,
1255 source: None,
1256 popularity: None,
1257 access: crate::AccessPolicy::default(),
1258 };
1259 let client = Client::builder()
1260 .timeout(Duration::from_millis(500))
1261 .connect_timeout(Duration::from_millis(500))
1262 .min_request_interval(Duration::ZERO)
1263 .max_retries(3)
1264 .base_backoff_delay(Duration::from_secs(60))
1265 .build()
1266 .unwrap();
1267 let started = Instant::now();
1268 let outcome = client.check(&site, &user()).await;
1269 assert!(started.elapsed() < Duration::from_secs(5));
1272 assert_eq!(outcome.kind, MatchKind::Uncertain);
1273 assert!(
1274 matches!(outcome.reason, Some(UncertainReason::Network(_))),
1275 "got {:?}",
1276 outcome.reason,
1277 );
1278 }
1279
1280 #[tokio::test]
1281 async fn rotates_user_agent_per_request() {
1282 let server = MockServer::start().await;
1286 Mock::given(any())
1287 .and(path("/alice"))
1288 .and(wiremock::matchers::header("user-agent", "RotatorUA/9.9"))
1289 .respond_with(ResponseTemplate::new(200))
1290 .mount(&server)
1291 .await;
1292 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1293 let client = Client::builder()
1294 .min_request_interval(Duration::ZERO)
1295 .max_retries(0)
1296 .rotate_user_agents(vec!["RotatorUA/9.9".into()])
1297 .build()
1298 .unwrap();
1299 let outcome = client.check(&site, &user()).await;
1300 assert_eq!(outcome.kind, MatchKind::Found);
1301 }
1302
1303 #[test]
1304 fn invalid_proxy_url_fails_build() {
1305 let err = Client::builder().proxy("not a url").build().unwrap_err();
1306 assert!(matches!(err, Error::HttpSetup { .. }));
1307 }
1308
1309 #[test]
1310 fn schemeless_proxy_is_rejected_up_front() {
1311 let err = Client::builder().proxy("not-a-url").build().unwrap_err();
1313 let Error::HttpSetup { message } = err else {
1314 panic!("expected HttpSetup, got {err:?}");
1315 };
1316 assert!(message.contains("must start with"), "{message}");
1317 }
1318
1319 #[test]
1320 fn socks5_proxy_scheme_is_accepted() {
1321 assert!(
1323 Client::builder()
1324 .proxy("socks5://127.0.0.1:9050")
1325 .build()
1326 .is_ok()
1327 );
1328 }
1329
1330 #[tokio::test]
1331 async fn global_rps_cap_spaces_requests_across_hosts() {
1332 let server = MockServer::start().await;
1335 Mock::given(any())
1336 .respond_with(ResponseTemplate::new(200))
1337 .mount(&server)
1338 .await;
1339 let site_a = Site {
1340 name: "A".into(),
1341 url: UrlTemplate::new(format!("{}/a/{{username}}", server.uri())).unwrap(),
1342 signals: vec![Signal::StatusFound { codes: vec![200] }],
1343 known_present: None,
1344 known_absent: None,
1345 extract: Vec::new(),
1346 tags: Vec::new(),
1347 request_headers: std::collections::BTreeMap::new(),
1348 regex_check: None,
1349 engine: None,
1350 strip_bad_char: None,
1351 request_method: crate::site::HttpMethod::Get,
1352 request_body: None,
1353 protection: Vec::new(),
1354 disabled: false,
1355 source: None,
1356 popularity: None,
1357 access: crate::AccessPolicy::default(),
1358 };
1359 let site_b = Site {
1360 name: "B".into(),
1361 url: UrlTemplate::new(format!("{}/b/{{username}}", server.uri())).unwrap(),
1362 signals: vec![Signal::StatusFound { codes: vec![200] }],
1363 known_present: None,
1364 known_absent: None,
1365 extract: Vec::new(),
1366 tags: Vec::new(),
1367 request_headers: std::collections::BTreeMap::new(),
1368 regex_check: None,
1369 engine: None,
1370 strip_bad_char: None,
1371 request_method: crate::site::HttpMethod::Get,
1372 request_body: None,
1373 protection: Vec::new(),
1374 disabled: false,
1375 source: None,
1376 popularity: None,
1377 access: crate::AccessPolicy::default(),
1378 };
1379 let client = Client::builder()
1384 .min_request_interval(Duration::ZERO)
1385 .max_retries(0)
1386 .max_rps(std::num::NonZeroU32::new(2).unwrap())
1387 .build()
1388 .unwrap();
1389 client.check(&site_a, &user()).await;
1392 let started = Instant::now();
1393 client.check(&site_b, &user()).await;
1394 assert!(
1395 started.elapsed() >= Duration::from_millis(350),
1396 "global cap should space cross-host requests, got {:?}",
1397 started.elapsed(),
1398 );
1399 }
1400
1401 #[tokio::test]
1402 async fn respect_robots_skips_disallowed_paths() {
1403 let server = MockServer::start().await;
1404 Mock::given(any())
1405 .and(path("/robots.txt"))
1406 .respond_with(
1407 ResponseTemplate::new(200).set_body_string("User-agent: *\nDisallow: /no"),
1408 )
1409 .mount(&server)
1410 .await;
1411 Mock::given(any())
1412 .and(path("/no/alice"))
1413 .respond_with(ResponseTemplate::new(200))
1414 .mount(&server)
1415 .await;
1416 Mock::given(any())
1417 .and(path("/yes/alice"))
1418 .respond_with(ResponseTemplate::new(200))
1419 .mount(&server)
1420 .await;
1421 let client = Client::builder()
1422 .min_request_interval(Duration::ZERO)
1423 .max_retries(0)
1424 .respect_robots(true)
1425 .build()
1426 .unwrap();
1427
1428 let disallowed = Site {
1429 name: "No".into(),
1430 url: UrlTemplate::new(format!("{}/no/{{username}}", server.uri())).unwrap(),
1431 signals: vec![Signal::StatusFound { codes: vec![200] }],
1432 known_present: None,
1433 known_absent: None,
1434 extract: Vec::new(),
1435 tags: Vec::new(),
1436 request_headers: std::collections::BTreeMap::new(),
1437 regex_check: None,
1438 engine: None,
1439 strip_bad_char: None,
1440 request_method: crate::site::HttpMethod::Get,
1441 request_body: None,
1442 protection: Vec::new(),
1443 disabled: false,
1444 source: None,
1445 popularity: None,
1446 access: crate::AccessPolicy::default(),
1447 };
1448 let allowed = Site {
1449 name: "Yes".into(),
1450 url: UrlTemplate::new(format!("{}/yes/{{username}}", server.uri())).unwrap(),
1451 signals: vec![Signal::StatusFound { codes: vec![200] }],
1452 known_present: None,
1453 known_absent: None,
1454 extract: Vec::new(),
1455 tags: Vec::new(),
1456 request_headers: std::collections::BTreeMap::new(),
1457 regex_check: None,
1458 engine: None,
1459 strip_bad_char: None,
1460 request_method: crate::site::HttpMethod::Get,
1461 request_body: None,
1462 protection: Vec::new(),
1463 disabled: false,
1464 source: None,
1465 popularity: None,
1466 access: crate::AccessPolicy::default(),
1467 };
1468
1469 let no = client.check(&disallowed, &user()).await;
1470 assert_eq!(no.kind, MatchKind::Uncertain);
1471 assert_eq!(no.reason, Some(UncertainReason::RobotsDisallowed));
1472
1473 let yes = client.check(&allowed, &user()).await;
1474 assert_eq!(yes.kind, MatchKind::Found);
1475 }
1476
1477 #[tokio::test]
1478 async fn body_read_skipped_when_no_body_signal_needed() {
1479 let server = MockServer::start().await;
1482 Mock::given(any())
1483 .and(path("/alice"))
1484 .respond_with(ResponseTemplate::new(200).set_body_string("Profile not found"))
1485 .mount(&server)
1486 .await;
1487 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1488 let outcome = build_client().check(&site, &user()).await;
1489 assert_eq!(outcome.kind, MatchKind::Found);
1490 }
1491
1492 #[derive(Debug)]
1498 struct RecordingBackend {
1499 page: RenderedPage,
1500 calls: std::sync::atomic::AtomicUsize,
1501 }
1502
1503 impl RecordingBackend {
1504 fn with_page(page: RenderedPage) -> Self {
1505 Self {
1506 page,
1507 calls: std::sync::atomic::AtomicUsize::new(0),
1508 }
1509 }
1510 fn call_count(&self) -> usize {
1511 self.calls.load(std::sync::atomic::Ordering::SeqCst)
1512 }
1513 }
1514
1515 #[async_trait::async_trait]
1516 impl BrowserBackend for RecordingBackend {
1517 async fn fetch(
1518 &self,
1519 _url: &url::Url,
1520 _headers: &std::collections::BTreeMap<String, String>,
1521 _timeout: Duration,
1522 ) -> Result<RenderedPage> {
1523 self.calls.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
1524 Ok(self.page.clone())
1525 }
1526 }
1527
1528 fn site_bot_protected(server: &MockServer) -> Site {
1529 let mut s = site_with(server, vec![Signal::StatusFound { codes: vec![200] }]);
1530 s.tags = vec!["bot-protected".into()];
1531 s
1532 }
1533
1534 #[tokio::test]
1535 async fn browser_routes_bot_protected_sites() {
1536 let server = MockServer::start().await;
1539 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1540 status: 200,
1541 final_url: url::Url::parse("https://example.com/alice").unwrap(),
1542 body: "<html></html>".into(),
1543 elapsed_ms: 42,
1544 }));
1545 let client = Client::builder()
1546 .min_request_interval(Duration::ZERO)
1547 .max_retries(0)
1548 .browser(backend.clone())
1549 .build()
1550 .unwrap();
1551 let outcome = client.check(&site_bot_protected(&server), &user()).await;
1552 assert_eq!(outcome.kind, MatchKind::Found);
1553 assert_eq!(backend.call_count(), 1, "browser invoked exactly once");
1554 }
1555
1556 #[tokio::test]
1557 async fn non_bot_protected_sites_skip_browser() {
1558 let server = MockServer::start().await;
1559 Mock::given(any())
1560 .and(path("/alice"))
1561 .respond_with(ResponseTemplate::new(200))
1562 .mount(&server)
1563 .await;
1564 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1565 status: 500, final_url: url::Url::parse("https://x/").unwrap(),
1567 body: String::new(),
1568 elapsed_ms: 0,
1569 }));
1570 let client = Client::builder()
1571 .min_request_interval(Duration::ZERO)
1572 .max_retries(0)
1573 .browser(backend.clone())
1574 .build()
1575 .unwrap();
1576 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1578 let outcome = client.check(&site, &user()).await;
1579 assert_eq!(outcome.kind, MatchKind::Found);
1580 assert_eq!(backend.call_count(), 0, "browser must not be touched");
1581 }
1582
1583 #[tokio::test]
1584 async fn browser_budget_exhaust_yields_uncertain() {
1585 let server = MockServer::start().await;
1586 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1587 status: 200,
1588 final_url: url::Url::parse("https://x/").unwrap(),
1589 body: String::new(),
1590 elapsed_ms: 0,
1591 }));
1592 let client = Client::builder()
1593 .min_request_interval(Duration::ZERO)
1594 .max_retries(0)
1595 .browser(backend.clone())
1596 .browser_budget(1)
1597 .build()
1598 .unwrap();
1599 let site = site_bot_protected(&server);
1600 let first = client.check(&site, &user()).await;
1602 assert_eq!(first.kind, MatchKind::Found);
1603 let second = client.check(&site, &user()).await;
1605 assert_eq!(second.kind, MatchKind::Uncertain);
1606 assert!(matches!(
1607 second.reason,
1608 Some(UncertainReason::BrowserBudget)
1609 ));
1610 assert_eq!(
1611 backend.call_count(),
1612 1,
1613 "second call must not invoke backend"
1614 );
1615 }
1616
1617 #[tokio::test]
1618 async fn browser_failure_surfaces_as_uncertain_browser_failed() {
1619 struct FailingBackend;
1620 #[async_trait::async_trait]
1621 impl BrowserBackend for FailingBackend {
1622 async fn fetch(
1623 &self,
1624 _url: &url::Url,
1625 _headers: &std::collections::BTreeMap<String, String>,
1626 _timeout: Duration,
1627 ) -> Result<RenderedPage> {
1628 Err(Error::BrowserSetup {
1629 message: "simulated crash".into(),
1630 })
1631 }
1632 }
1633 impl std::fmt::Debug for FailingBackend {
1634 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1635 f.write_str("FailingBackend")
1636 }
1637 }
1638
1639 let server = MockServer::start().await;
1640 let client = Client::builder()
1641 .min_request_interval(Duration::ZERO)
1642 .max_retries(0)
1643 .browser(Arc::new(FailingBackend))
1644 .build()
1645 .unwrap();
1646 let outcome = client.check(&site_bot_protected(&server), &user()).await;
1647 assert_eq!(outcome.kind, MatchKind::Uncertain);
1648 match outcome.reason {
1649 Some(UncertainReason::BrowserFailed(msg)) => {
1650 assert!(msg.contains("simulated crash"), "got: {msg}");
1651 }
1652 other => panic!("expected BrowserFailed, got {other:?}"),
1653 }
1654 }
1655
1656 #[tokio::test]
1657 async fn status_only_site_uses_head_request() {
1658 let server = MockServer::start().await;
1662 Mock::given(method("HEAD"))
1663 .and(path("/alice"))
1664 .respond_with(ResponseTemplate::new(200))
1665 .mount(&server)
1666 .await;
1667 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1668 let outcome = build_client().check(&site, &user()).await;
1669 assert_eq!(outcome.kind, MatchKind::Found);
1670 let recvd = server.received_requests().await.unwrap_or_default();
1671 assert_eq!(recvd.len(), 1);
1672 assert_eq!(recvd[0].method.as_str(), "HEAD");
1673 }
1674
1675 #[tokio::test]
1676 async fn body_signal_site_uses_get_request() {
1677 let server = MockServer::start().await;
1680 Mock::given(any())
1681 .and(path("/alice"))
1682 .respond_with(ResponseTemplate::new(200).set_body_string("hello alice"))
1683 .mount(&server)
1684 .await;
1685 let site = site_with(
1686 &server,
1687 vec![Signal::BodyPresent {
1688 text: "hello".into(),
1689 }],
1690 );
1691 let outcome = build_client().check(&site, &user()).await;
1692 assert_eq!(outcome.kind, MatchKind::Found);
1693 let recvd = server.received_requests().await.unwrap_or_default();
1694 assert_eq!(recvd[0].method.as_str(), "GET");
1695 }
1696
1697 #[tokio::test]
1698 async fn protection_field_routes_through_browser_like_bot_protected_tag() {
1699 let server = MockServer::start().await;
1704 Mock::given(any())
1705 .respond_with(ResponseTemplate::new(200))
1706 .mount(&server)
1707 .await;
1708 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1709 site.protection = vec![crate::site::ProtectionKind::Cloudflare];
1710 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1712 status: 200,
1713 final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
1714 body: String::new(),
1715 elapsed_ms: 0,
1716 }));
1717 let client = Client::builder()
1718 .min_request_interval(Duration::ZERO)
1719 .max_retries(0)
1720 .browser(backend)
1721 .build()
1722 .unwrap();
1723 let outcome = client.check(&site, &user()).await;
1724 assert_eq!(outcome.kind, MatchKind::Found);
1727 let recvd = server.received_requests().await.unwrap_or_default();
1729 assert_eq!(
1730 recvd.len(),
1731 0,
1732 "structured protection must skip the raw HTTP path"
1733 );
1734 }
1735
1736 #[tokio::test]
1737 async fn post_method_sends_body_with_username_substituted() {
1738 let server = MockServer::start().await;
1742 Mock::given(method("POST"))
1743 .and(path("/api"))
1744 .respond_with(ResponseTemplate::new(200))
1745 .mount(&server)
1746 .await;
1747 let site = Site {
1752 name: "ApiPost".into(),
1753 url: UrlTemplate::new(format!("{}/api?_={{username}}", server.uri())).unwrap(),
1754 signals: vec![Signal::StatusFound { codes: vec![200] }],
1755 known_present: None,
1756 known_absent: None,
1757 extract: Vec::new(),
1758 tags: Vec::new(),
1759 request_headers: std::collections::BTreeMap::new(),
1760 regex_check: None,
1761 engine: None,
1762 strip_bad_char: None,
1763 request_method: HttpMethod::Post,
1764 request_body: Some(r#"{"name":"{username}"}"#.into()),
1765 protection: Vec::new(),
1766 disabled: false,
1767 source: None,
1768 popularity: None,
1769 access: crate::AccessPolicy::default(),
1770 };
1771 let outcome = build_client().check(&site, &user()).await;
1772 assert_eq!(outcome.kind, MatchKind::Found);
1773 let recvd = server.received_requests().await.unwrap_or_default();
1774 assert_eq!(recvd.len(), 1);
1775 assert_eq!(recvd[0].method.as_str(), "POST");
1776 let body = String::from_utf8_lossy(&recvd[0].body).to_string();
1777 assert!(body.contains("\"name\":\"alice\""), "body was: {body}");
1778 }
1779
1780 #[tokio::test]
1781 async fn head_405_falls_back_to_get() {
1782 let server = MockServer::start().await;
1785 Mock::given(method("HEAD"))
1786 .and(path("/alice"))
1787 .respond_with(ResponseTemplate::new(405))
1788 .mount(&server)
1789 .await;
1790 Mock::given(any())
1791 .and(path("/alice"))
1792 .respond_with(ResponseTemplate::new(200))
1793 .mount(&server)
1794 .await;
1795 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1796 let outcome = build_client().check(&site, &user()).await;
1797 assert_eq!(outcome.kind, MatchKind::Found);
1798 let recvd = server.received_requests().await.unwrap_or_default();
1799 assert_eq!(recvd.len(), 2);
1800 assert_eq!(recvd[0].method.as_str(), "HEAD");
1801 assert_eq!(recvd[1].method.as_str(), "GET");
1802 }
1803}