1use std::fmt;
11use std::num::NonZeroU32;
12use std::sync::Arc;
13use std::time::{Duration, Instant};
14
15use reqwest::redirect;
16
17use crate::ban;
18use crate::browser::{BrowserBackend, BrowserBudget, RenderedPage};
19use crate::check::{CheckOutcome, MatchKind, UncertainReason};
20use crate::error::{Error, Result};
21use crate::retry::{self, RetryPolicy};
22use crate::robots::RobotsCache;
23use crate::site::{HttpMethod, Probe, Signal, SignalVerdict, Site, aggregate};
24use crate::throttle::HostThrottle;
25use crate::username::Username;
26
27const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
28const DEFAULT_CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
29const DEFAULT_REDIRECT_LIMIT: usize = 8;
30const DEFAULT_PER_HOST_INTERVAL: Duration = Duration::from_millis(100);
31const GLOBAL_THROTTLE_KEY: &str = "*global*";
33
34#[derive(Clone)]
42pub struct Client {
43 inner: reqwest::Client,
44 throttle: HostThrottle,
45 global_throttle: Option<HostThrottle>,
47 retry: RetryPolicy,
48 user_agents: Arc<[String]>,
51 enrich: bool,
53 robots: Option<RobotsCache>,
55 browser: Option<Arc<dyn BrowserBackend>>,
58 browser_budget: Arc<BrowserBudget>,
61}
62
63impl Client {
64 pub fn builder() -> ClientBuilder {
66 ClientBuilder::default()
67 }
68
69 #[tracing::instrument(skip(self), fields(site = %site.name, user = %username))]
83 pub async fn check(&self, site: &Site, username: &Username) -> CheckOutcome {
84 let mut attempt: u32 = 0;
85 loop {
86 let outcome = self.probe_once(site, username).await;
87 if !retry::should_retry(&outcome, attempt, &self.retry) {
88 return outcome;
89 }
90 let delay = retry::backoff_delay(attempt, &self.retry);
91 tracing::info!(
92 site = %site.name,
93 attempt = attempt + 1,
94 reason = outcome.reason.as_ref().map(ToString::to_string).unwrap_or_default(),
95 ?delay,
96 "transient ban, retrying",
97 );
98 tokio::time::sleep(delay).await;
99 attempt += 1;
100 }
101 }
102
103 pub async fn fetch(&self, url: &str) -> Option<RawResponse> {
112 let host = host_of(url);
113 if let Some(global) = &self.global_throttle {
114 global.wait(GLOBAL_THROTTLE_KEY).await;
115 }
116 self.throttle.wait(&host).await;
117 let mut request = self.inner.get(url);
118 if let Some(ua) = self.pick_user_agent() {
119 request = request.header(reqwest::header::USER_AGENT, ua);
120 }
121 let response = request.send().await.ok()?;
122 let status = response.status().as_u16();
123 let final_url = response.url().to_string();
124 let body = response.text().await.unwrap_or_default();
125 Some(RawResponse {
126 status,
127 final_url,
128 body,
129 })
130 }
131
132 pub async fn fetch_for_doctor(&self, site: &Site, url: &str) -> Option<RawResponse> {
143 if let Some(backend) = self.browser.as_deref() {
144 let has_tag = site
145 .tags
146 .iter()
147 .any(|t| t.eq_ignore_ascii_case(BOT_PROTECTED_TAG));
148 if has_tag || !site.protection.is_empty() {
149 let parsed = url::Url::parse(url).ok()?;
150 match backend
151 .fetch(&parsed, &site.request_headers, BROWSER_TIMEOUT)
152 .await
153 {
154 Ok(page) => {
155 return Some(RawResponse {
156 status: page.status,
157 final_url: page.final_url.to_string(),
158 body: page.body,
159 });
160 }
161 Err(err) => {
162 tracing::warn!(
163 site = %site.name, %url, error = %err,
164 "browser fetch failed in doctor; falling back to raw HTTP",
165 );
166 }
167 }
168 }
169 }
170 self.fetch(url).await
171 }
172
173 fn pick_user_agent(&self) -> Option<&str> {
176 match self.user_agents.len() {
177 0 => None,
178 1 => Some(&self.user_agents[0]),
179 n => Some(&self.user_agents[fastrand::usize(0..n)]),
180 }
181 }
182
183 #[allow(clippy::too_many_lines)]
186 async fn probe_once(&self, site: &Site, username: &Username) -> CheckOutcome {
187 let url = site.url_for(username);
188
189 if let Some(pat) = &site.regex_check {
199 if let Ok(re) = regex::Regex::new(pat) {
200 if !re.is_match(username.as_str()) {
201 return uncertain(
202 &site.name,
203 url,
204 Instant::now(),
205 UncertainReason::UsernameNotAllowed,
206 );
207 }
208 }
209 }
210
211 if let Some(backend) = self.browser.as_deref() {
218 let has_tag = site
219 .tags
220 .iter()
221 .any(|t| t.eq_ignore_ascii_case(BOT_PROTECTED_TAG));
222 if has_tag || !site.protection.is_empty() {
223 if self.browser_budget.try_consume() {
224 return self.probe_with_browser(site, &url, backend).await;
225 }
226 tracing::warn!(site = %site.name, "browser budget exhausted");
227 return uncertain(
228 &site.name,
229 url,
230 Instant::now(),
231 UncertainReason::BrowserBudget,
232 );
233 }
234 }
235
236 let host = host_of(&url);
237
238 if let Some(robots) = &self.robots {
240 if let Some((origin, path)) = origin_and_path(&url) {
241 if !robots.allowed(&origin, &path).await {
242 tracing::debug!(%url, "skipped by robots.txt");
243 return uncertain(
244 &site.name,
245 url,
246 Instant::now(),
247 UncertainReason::RobotsDisallowed,
248 );
249 }
250 }
251 }
252
253 if let Some(global) = &self.global_throttle {
255 global.wait(GLOBAL_THROTTLE_KEY).await;
256 }
257 self.throttle.wait(&host).await;
258 let started = Instant::now();
259 tracing::debug!(%url, %host, "probing");
260
261 let want_enrich = self.enrich && !site.extract.is_empty();
264 let needs_body = want_enrich || site.signals.iter().any(crate::site::Signal::needs_body);
265
266 let body_for_post: Option<String> = if matches!(site.request_method, HttpMethod::Post) {
273 const USERNAME_PH: &str = "{username}";
274 site.request_body
275 .as_deref()
276 .map(|t| t.replace(USERNAME_PH, username.as_str()))
277 } else {
278 None
279 };
280
281 let response = match site.request_method {
289 HttpMethod::Post => {
290 send_request_with_body(
291 &self.inner,
292 reqwest::Method::POST,
293 &url,
294 self.pick_user_agent(),
295 body_for_post.as_deref(),
296 )
297 .await
298 }
299 HttpMethod::Get if needs_body => {
300 send_request(
301 &self.inner,
302 reqwest::Method::GET,
303 &url,
304 self.pick_user_agent(),
305 )
306 .await
307 }
308 HttpMethod::Get => {
309 match send_request(
310 &self.inner,
311 reqwest::Method::HEAD,
312 &url,
313 self.pick_user_agent(),
314 )
315 .await
316 {
317 Ok(r) if r.status().as_u16() == 405 => {
318 send_request(
319 &self.inner,
320 reqwest::Method::GET,
321 &url,
322 self.pick_user_agent(),
323 )
324 .await
325 }
326 other => other,
327 }
328 }
329 };
330 let response = match response {
331 Ok(r) => r,
332 Err(err) => {
333 tracing::debug!(error = %err, "request failed");
334 return uncertain(
335 &site.name,
336 url,
337 started,
338 UncertainReason::Network(err.to_string()),
339 );
340 }
341 };
342
343 let status = response.status().as_u16();
344 let final_url = response.url().to_string();
345
346 if let Some(reason) = ban::detect_pre_body(status, response.headers()) {
347 tracing::warn!(%host, status, %reason, "ban-like response");
348 return uncertain(&site.name, url, started, reason);
349 }
350 let body = if needs_body {
351 match response.text().await {
352 Ok(b) => b,
353 Err(err) => {
354 return uncertain(
355 &site.name,
356 url,
357 started,
358 UncertainReason::BodyRead(err.to_string()),
359 );
360 }
361 }
362 } else {
363 String::new()
364 };
365
366 if !body.is_empty() {
367 if let Some(reason) = ban::detect_in_body(&body) {
368 tracing::warn!(%host, %reason, "ban-like body");
369 return uncertain(&site.name, url, started, reason);
370 }
371 }
372
373 let probe = Probe {
374 status,
375 final_url: &final_url,
376 body: &body,
377 };
378 let votes: Vec<(&Signal, SignalVerdict)> = site
379 .signals
380 .iter()
381 .map(|s| (s, s.evaluate(&probe)))
382 .collect();
383 let kind = aggregate(votes.iter().map(|(_, v)| *v));
384 let mut result = outcome(&site.name, url, started, kind);
385 let winning = match kind {
387 MatchKind::Found => Some(SignalVerdict::Found),
388 MatchKind::NotFound => Some(SignalVerdict::NotFound),
389 MatchKind::Uncertain => None,
390 };
391 if let Some(want) = winning {
392 result.evidence = votes
393 .iter()
394 .filter(|(_, v)| *v == want)
395 .map(|(s, _)| s.describe_match(&probe))
396 .collect();
397 }
398 if want_enrich && kind == MatchKind::Found {
399 result.enrichment = crate::enrich::extract(&body, &site.extract);
400 }
401 result
402 }
403
404 async fn probe_with_browser(
409 &self,
410 site: &Site,
411 url: &str,
412 backend: &dyn BrowserBackend,
413 ) -> CheckOutcome {
414 let started = Instant::now();
415 let parsed = match url::Url::parse(url) {
416 Ok(u) => u,
417 Err(err) => {
418 return uncertain(
419 &site.name,
420 url.to_owned(),
421 started,
422 UncertainReason::Other(format!("invalid url: {err}")),
423 );
424 }
425 };
426
427 let page: RenderedPage = match backend
428 .fetch(&parsed, &site.request_headers, BROWSER_TIMEOUT)
429 .await
430 {
431 Ok(p) => p,
432 Err(err) => {
433 tracing::warn!(site = %site.name, %url, error = %err, "browser fetch failed");
434 return uncertain(
435 &site.name,
436 url.to_owned(),
437 started,
438 UncertainReason::BrowserFailed(err.to_string()),
439 );
440 }
441 };
442
443 let final_url_str = page.final_url.as_str().to_owned();
444 let probe = Probe {
445 status: page.status,
446 final_url: &final_url_str,
447 body: &page.body,
448 };
449 let votes: Vec<(&Signal, SignalVerdict)> = site
450 .signals
451 .iter()
452 .map(|s| (s, s.evaluate(&probe)))
453 .collect();
454 let kind = aggregate(votes.iter().map(|(_, v)| *v));
455 let mut result = outcome(&site.name, url.to_owned(), started, kind);
456 let winning = match kind {
457 MatchKind::Found => Some(SignalVerdict::Found),
458 MatchKind::NotFound => Some(SignalVerdict::NotFound),
459 MatchKind::Uncertain => None,
460 };
461 if let Some(want) = winning {
462 result.evidence = votes
463 .iter()
464 .filter(|(_, v)| *v == want)
465 .map(|(s, _)| s.describe_match(&probe))
466 .collect();
467 }
468 if self.enrich && kind == MatchKind::Found && !site.extract.is_empty() {
469 result.enrichment = crate::enrich::extract(&page.body, &site.extract);
470 }
471 result
472 }
473}
474
475#[derive(Debug, Clone)]
477pub struct RawResponse {
478 pub status: u16,
480 pub final_url: String,
482 pub body: String,
484}
485
486#[derive(Clone)]
488#[must_use = "ClientBuilder does nothing until `.build()` is called"]
489pub struct ClientBuilder {
490 timeout: Duration,
491 connect_timeout: Duration,
492 user_agent: String,
493 follow_redirects: bool,
494 redirect_limit: usize,
495 min_request_interval: Duration,
496 max_rps: Option<NonZeroU32>,
497 retry: RetryPolicy,
498 proxy: Option<String>,
499 user_agents: Vec<String>,
500 enrich: bool,
501 respect_robots: bool,
502 browser: Option<Arc<dyn BrowserBackend>>,
503 browser_budget: usize,
504}
505
506impl Default for ClientBuilder {
507 fn default() -> Self {
508 Self {
509 timeout: DEFAULT_TIMEOUT,
510 connect_timeout: DEFAULT_CONNECT_TIMEOUT,
511 user_agent: default_user_agent(),
512 follow_redirects: true,
513 redirect_limit: DEFAULT_REDIRECT_LIMIT,
514 min_request_interval: DEFAULT_PER_HOST_INTERVAL,
515 max_rps: None,
516 retry: RetryPolicy::default(),
517 proxy: None,
518 user_agents: Vec::new(),
519 enrich: false,
520 respect_robots: false,
521 browser: None,
522 browser_budget: DEFAULT_BROWSER_BUDGET,
523 }
524 }
525}
526
527impl ClientBuilder {
528 pub fn timeout(mut self, timeout: Duration) -> Self {
530 self.timeout = timeout;
531 self
532 }
533
534 pub fn connect_timeout(mut self, timeout: Duration) -> Self {
536 self.connect_timeout = timeout;
537 self
538 }
539
540 pub fn user_agent(mut self, user_agent: impl Into<String>) -> Self {
542 self.user_agent = user_agent.into();
543 self
544 }
545
546 pub fn follow_redirects(mut self, follow: bool) -> Self {
549 self.follow_redirects = follow;
550 self
551 }
552
553 pub fn min_request_interval(mut self, interval: Duration) -> Self {
559 self.min_request_interval = interval;
560 self
561 }
562
563 pub fn max_rps(mut self, rps: NonZeroU32) -> Self {
568 self.max_rps = Some(rps);
569 self
570 }
571
572 pub fn max_retries(mut self, n: u32) -> Self {
575 self.retry.max_retries = n;
576 self
577 }
578
579 pub fn base_backoff_delay(mut self, d: Duration) -> Self {
582 self.retry.base_delay = d;
583 self
584 }
585
586 pub fn max_backoff_delay(mut self, d: Duration) -> Self {
588 self.retry.max_delay = d;
589 self
590 }
591
592 pub fn proxy(mut self, url: impl Into<String>) -> Self {
595 self.proxy = Some(url.into());
596 self
597 }
598
599 pub fn rotate_user_agents(mut self, agents: Vec<String>) -> Self {
603 self.user_agents = agents;
604 self
605 }
606
607 pub fn enrich(mut self, enrich: bool) -> Self {
610 self.enrich = enrich;
611 self
612 }
613
614 pub fn respect_robots(mut self, respect: bool) -> Self {
618 self.respect_robots = respect;
619 self
620 }
621
622 pub fn browser(mut self, backend: Arc<dyn BrowserBackend>) -> Self {
626 self.browser = Some(backend);
627 self
628 }
629
630 pub const fn browser_budget(mut self, cap: usize) -> Self {
635 self.browser_budget = cap;
636 self
637 }
638
639 pub fn build(self) -> Result<Client> {
641 let redirect_policy = if self.follow_redirects {
642 redirect::Policy::limited(self.redirect_limit)
643 } else {
644 redirect::Policy::none()
645 };
646 let mut builder = reqwest::Client::builder()
647 .user_agent(self.user_agent)
648 .timeout(self.timeout)
649 .connect_timeout(self.connect_timeout)
650 .redirect(redirect_policy);
651 if let Some(proxy_url) = &self.proxy {
652 const SCHEMES: [&str; 4] = ["http://", "https://", "socks5://", "socks5h://"];
656 if !SCHEMES.iter().any(|s| proxy_url.starts_with(s)) {
657 return Err(Error::HttpSetup {
658 message: format!(
659 "invalid proxy {proxy_url:?}: must start with one of {}",
660 SCHEMES.join(", ")
661 ),
662 });
663 }
664 let proxy = reqwest::Proxy::all(proxy_url).map_err(|e| Error::HttpSetup {
665 message: format!("invalid proxy {proxy_url:?}: {e}"),
666 })?;
667 builder = builder.proxy(proxy);
668 }
669 let inner = builder.build().map_err(|e| Error::HttpSetup {
670 message: e.to_string(),
671 })?;
672 let global_throttle = self.max_rps.map(|rps| {
673 let interval = Duration::from_secs(1) / rps.get();
675 HostThrottle::new(interval)
676 });
677 let robots = self
678 .respect_robots
679 .then(|| RobotsCache::new(inner.clone(), "adler"));
680 Ok(Client {
681 inner,
682 throttle: HostThrottle::new(self.min_request_interval),
683 global_throttle,
684 retry: self.retry,
685 user_agents: Arc::from(self.user_agents),
686 enrich: self.enrich,
687 robots,
688 browser: self.browser,
689 browser_budget: Arc::new(BrowserBudget::new(self.browser_budget)),
690 })
691 }
692}
693
694pub const DEFAULT_BROWSER_BUDGET: usize = 50;
701
702impl fmt::Debug for Client {
703 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
704 f.debug_struct("Client")
705 .field("throttle", &self.throttle)
706 .field("global_throttle", &self.global_throttle)
707 .field("retry", &self.retry)
708 .field("user_agents", &self.user_agents)
709 .field("enrich", &self.enrich)
710 .field("robots", &self.robots.is_some())
711 .field("browser", &self.browser.is_some())
712 .field("browser_budget", &self.browser_budget)
713 .finish_non_exhaustive()
714 }
715}
716
717impl fmt::Debug for ClientBuilder {
718 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
719 f.debug_struct("ClientBuilder")
720 .field("timeout", &self.timeout)
721 .field("connect_timeout", &self.connect_timeout)
722 .field("user_agent", &self.user_agent)
723 .field("follow_redirects", &self.follow_redirects)
724 .field("redirect_limit", &self.redirect_limit)
725 .field("min_request_interval", &self.min_request_interval)
726 .field("max_rps", &self.max_rps)
727 .field("retry", &self.retry)
728 .field("proxy", &self.proxy)
729 .field("user_agents", &self.user_agents)
730 .field("enrich", &self.enrich)
731 .field("respect_robots", &self.respect_robots)
732 .field("browser", &self.browser.is_some())
733 .field("browser_budget", &self.browser_budget)
734 .finish()
735 }
736}
737
738const BROWSER_TIMEOUT: Duration = Duration::from_secs(60);
742
743const BOT_PROTECTED_TAG: &str = "bot-protected";
744
745fn default_user_agent() -> String {
746 format!("adler/{}", env!("CARGO_PKG_VERSION"))
747}
748
749async fn send_request(
754 client: &reqwest::Client,
755 method: reqwest::Method,
756 url: &str,
757 ua: Option<&str>,
758) -> reqwest::Result<reqwest::Response> {
759 send_request_with_body(client, method, url, ua, None).await
760}
761
762async fn send_request_with_body(
768 client: &reqwest::Client,
769 method: reqwest::Method,
770 url: &str,
771 ua: Option<&str>,
772 body: Option<&str>,
773) -> reqwest::Result<reqwest::Response> {
774 let mut request = client.request(method, url);
775 if let Some(ua) = ua {
776 request = request.header(reqwest::header::USER_AGENT, ua);
777 }
778 if let Some(b) = body {
779 request = request
780 .header(reqwest::header::CONTENT_TYPE, "application/json")
781 .body(b.to_owned());
782 }
783 request.send().await
784}
785
786fn host_of(url: &str) -> String {
787 reqwest::Url::parse(url)
788 .ok()
789 .and_then(|u| u.host_str().map(str::to_owned))
790 .unwrap_or_else(|| "unknown".into())
791}
792
793fn origin_and_path(url: &str) -> Option<(String, String)> {
796 let parsed = reqwest::Url::parse(url).ok()?;
797 let host = parsed.host_str()?;
798 let port = parsed.port().map_or_else(String::new, |p| format!(":{p}"));
799 let origin = format!("{}://{host}{port}", parsed.scheme());
800 let path = parsed.query().map_or_else(
801 || parsed.path().to_owned(),
802 |q| format!("{}?{q}", parsed.path()),
803 );
804 Some((origin, path))
805}
806
807fn outcome(site: &str, url: String, started: Instant, kind: MatchKind) -> CheckOutcome {
808 CheckOutcome {
809 site: site.to_owned(),
810 url,
811 kind,
812 reason: None,
813 elapsed_ms: elapsed_ms(started),
814 enrichment: std::collections::BTreeMap::new(),
815 evidence: Vec::new(),
816 }
817}
818
819fn uncertain(site: &str, url: String, started: Instant, reason: UncertainReason) -> CheckOutcome {
820 CheckOutcome {
821 site: site.to_owned(),
822 url,
823 kind: MatchKind::Uncertain,
824 reason: Some(reason),
825 elapsed_ms: elapsed_ms(started),
826 enrichment: std::collections::BTreeMap::new(),
827 evidence: Vec::new(),
828 }
829}
830
831fn elapsed_ms(started: Instant) -> u64 {
832 u64::try_from(started.elapsed().as_millis()).unwrap_or(u64::MAX)
833}
834
835#[cfg(test)]
836mod tests {
837 use super::*;
838 use crate::site::{Signal, UrlTemplate};
839 use wiremock::matchers::{any, method, path};
840 use wiremock::{Mock, MockServer, ResponseTemplate};
841
842 fn build_client() -> Client {
843 Client::builder()
844 .timeout(Duration::from_secs(2))
845 .min_request_interval(Duration::ZERO)
848 .max_retries(0)
851 .build()
852 .expect("client builds")
853 }
854
855 fn site_with(server: &MockServer, signals: Vec<Signal>) -> Site {
856 Site {
857 name: "Mock".into(),
858 url: UrlTemplate::new(format!("{}/{{username}}", server.uri())).unwrap(),
859 signals,
860 known_present: None,
861 known_absent: None,
862 extract: Vec::new(),
863 tags: Vec::new(),
864 request_headers: std::collections::BTreeMap::new(),
865 regex_check: None,
866 engine: None,
867 strip_bad_char: None,
868 request_method: crate::site::HttpMethod::Get,
869 request_body: None,
870 protection: Vec::new(),
871 }
872 }
873
874 fn user() -> Username {
875 Username::new("alice").unwrap()
876 }
877
878 #[tokio::test]
879 async fn regex_check_short_circuits_before_any_request() {
880 let server = MockServer::start().await;
884 Mock::given(any())
885 .respond_with(ResponseTemplate::new(200))
886 .mount(&server)
887 .await;
888 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
889 site.regex_check = Some("^[A-Za-z]{8,}$".into());
891 let outcome = build_client().check(&site, &user()).await;
892 assert_eq!(outcome.kind, MatchKind::Uncertain);
893 assert!(
894 matches!(outcome.reason, Some(UncertainReason::UsernameNotAllowed)),
895 "expected UsernameNotAllowed, got {:?}",
896 outcome.reason,
897 );
898 let recvd = server.received_requests().await.unwrap_or_default();
901 assert_eq!(
902 recvd.len(),
903 0,
904 "regex_check mismatch must skip the HTTP request entirely"
905 );
906 }
907
908 #[tokio::test]
909 async fn regex_check_pass_proceeds_to_probe() {
910 let server = MockServer::start().await;
911 Mock::given(any())
912 .and(path("/alice"))
913 .respond_with(ResponseTemplate::new(200))
914 .mount(&server)
915 .await;
916 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
917 site.regex_check = Some("^[a-z]{3,}$".into());
919 let outcome = build_client().check(&site, &user()).await;
920 assert_eq!(outcome.kind, MatchKind::Found);
921 }
922
923 #[tokio::test]
924 async fn status_signal_reports_found_on_match() {
925 let server = MockServer::start().await;
926 Mock::given(any())
927 .and(path("/alice"))
928 .respond_with(ResponseTemplate::new(200))
929 .mount(&server)
930 .await;
931 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
932 let outcome = build_client().check(&site, &user()).await;
933 assert_eq!(outcome.kind, MatchKind::Found);
934 assert!(outcome.url.ends_with("/alice"));
935 assert!(outcome.reason.is_none());
936 assert_eq!(outcome.evidence, ["HTTP 200 (status_found)"]);
937 }
938
939 #[tokio::test]
940 async fn status_signal_pair_reports_not_found_on_404() {
941 let server = MockServer::start().await;
942 Mock::given(any())
943 .and(path("/alice"))
944 .respond_with(ResponseTemplate::new(404))
945 .mount(&server)
946 .await;
947 let site = site_with(
948 &server,
949 vec![
950 Signal::StatusFound { codes: vec![200] },
951 Signal::StatusNotFound { codes: vec![404] },
952 ],
953 );
954 let outcome = build_client().check(&site, &user()).await;
955 assert_eq!(outcome.kind, MatchKind::NotFound);
956 assert_eq!(outcome.evidence, ["HTTP 404 (status_not_found)"]);
958 }
959
960 #[tokio::test]
961 async fn body_absent_signal_detects_missing_account() {
962 let server = MockServer::start().await;
963 Mock::given(any())
964 .and(path("/alice"))
965 .respond_with(ResponseTemplate::new(200).set_body_string("<h1>Profile not found</h1>"))
966 .mount(&server)
967 .await;
968 let site = site_with(
969 &server,
970 vec![Signal::BodyAbsent {
971 text: "Profile not found".into(),
972 }],
973 );
974 let outcome = build_client().check(&site, &user()).await;
975 assert_eq!(outcome.kind, MatchKind::NotFound);
976 }
977
978 #[tokio::test]
979 async fn body_absent_alone_yields_uncertain_when_marker_missing() {
980 let server = MockServer::start().await;
983 Mock::given(any())
984 .and(path("/alice"))
985 .respond_with(ResponseTemplate::new(200).set_body_string("<h1>Welcome alice</h1>"))
986 .mount(&server)
987 .await;
988 let site = site_with(
989 &server,
990 vec![Signal::BodyAbsent {
991 text: "Profile not found".into(),
992 }],
993 );
994 let outcome = build_client().check(&site, &user()).await;
995 assert_eq!(outcome.kind, MatchKind::Uncertain);
996 }
997
998 #[tokio::test]
999 async fn body_present_plus_absent_resolve_to_found() {
1000 let server = MockServer::start().await;
1001 Mock::given(any())
1002 .and(path("/alice"))
1003 .respond_with(
1004 ResponseTemplate::new(200)
1005 .set_body_string(r#"<div class="profile-card">alice</div>"#),
1006 )
1007 .mount(&server)
1008 .await;
1009 let site = site_with(
1010 &server,
1011 vec![
1012 Signal::BodyPresent {
1013 text: "profile-card".into(),
1014 },
1015 Signal::BodyAbsent {
1016 text: "Profile not found".into(),
1017 },
1018 ],
1019 );
1020 let outcome = build_client().check(&site, &user()).await;
1021 assert_eq!(outcome.kind, MatchKind::Found);
1022 }
1023
1024 #[tokio::test]
1025 async fn redirect_absent_signal_detects_missing_account() {
1026 let server = MockServer::start().await;
1027 Mock::given(any())
1028 .and(path("/alice"))
1029 .respond_with(
1030 ResponseTemplate::new(302).insert_header("location", "/login?next=/alice"),
1031 )
1032 .mount(&server)
1033 .await;
1034 Mock::given(any())
1035 .and(path("/login"))
1036 .respond_with(ResponseTemplate::new(200).set_body_string("login page"))
1037 .mount(&server)
1038 .await;
1039 let site = site_with(
1040 &server,
1041 vec![Signal::RedirectAbsent {
1042 fragment: "/login".into(),
1043 }],
1044 );
1045 let outcome = build_client().check(&site, &user()).await;
1046 assert_eq!(outcome.kind, MatchKind::NotFound);
1047 }
1048
1049 #[tokio::test]
1050 async fn negative_signal_wins_over_positive() {
1051 let server = MockServer::start().await;
1056 Mock::given(any())
1057 .and(path("/alice"))
1058 .respond_with(ResponseTemplate::new(200).set_body_string("Profile not found"))
1059 .mount(&server)
1060 .await;
1061 let site = site_with(
1062 &server,
1063 vec![
1064 Signal::StatusFound { codes: vec![200] },
1065 Signal::BodyAbsent {
1066 text: "Profile not found".into(),
1067 },
1068 ],
1069 );
1070 let outcome = build_client().check(&site, &user()).await;
1071 assert_eq!(outcome.kind, MatchKind::NotFound);
1072 }
1073
1074 #[tokio::test]
1075 async fn network_failure_yields_uncertain() {
1076 let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
1077 let port = listener.local_addr().unwrap().port();
1078 drop(listener);
1079
1080 let site = Site {
1081 name: "Dead".into(),
1082 url: UrlTemplate::new(format!("http://127.0.0.1:{port}/{{username}}")).unwrap(),
1083 signals: vec![Signal::StatusFound { codes: vec![200] }],
1084 known_present: None,
1085 known_absent: None,
1086 extract: Vec::new(),
1087 tags: Vec::new(),
1088 request_headers: std::collections::BTreeMap::new(),
1089 regex_check: None,
1090 engine: None,
1091 strip_bad_char: None,
1092 request_method: crate::site::HttpMethod::Get,
1093 request_body: None,
1094 protection: Vec::new(),
1095 };
1096 let client = Client::builder()
1097 .timeout(Duration::from_millis(500))
1098 .connect_timeout(Duration::from_millis(500))
1099 .max_retries(0)
1100 .build()
1101 .unwrap();
1102 let outcome = client.check(&site, &user()).await;
1103 assert_eq!(outcome.kind, MatchKind::Uncertain);
1104 assert!(outcome.reason.is_some());
1105 }
1106
1107 #[tokio::test]
1108 async fn throttle_spaces_consecutive_calls_to_same_host() {
1109 let server = MockServer::start().await;
1110 Mock::given(any())
1111 .and(path("/alice"))
1112 .respond_with(ResponseTemplate::new(200))
1113 .mount(&server)
1114 .await;
1115 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1116 let client = Client::builder()
1121 .timeout(Duration::from_secs(2))
1122 .min_request_interval(Duration::from_millis(300))
1123 .build()
1124 .unwrap();
1125
1126 client.check(&site, &user()).await;
1127 let started = Instant::now();
1128 client.check(&site, &user()).await;
1129 let elapsed = started.elapsed();
1130 assert!(
1131 elapsed >= Duration::from_millis(200),
1132 "second probe to the same host should wait ≥200 ms, got {elapsed:?}",
1133 );
1134 }
1135
1136 #[tokio::test]
1137 async fn builder_overrides_user_agent() {
1138 let server = MockServer::start().await;
1139 Mock::given(any())
1140 .and(path("/alice"))
1141 .and(wiremock::matchers::header("user-agent", "adler-test/1.0"))
1142 .respond_with(ResponseTemplate::new(200))
1143 .mount(&server)
1144 .await;
1145 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1146 let client = Client::builder()
1147 .user_agent("adler-test/1.0")
1148 .build()
1149 .unwrap();
1150 let outcome = client.check(&site, &user()).await;
1151 assert_eq!(outcome.kind, MatchKind::Found);
1152 }
1153
1154 #[tokio::test]
1155 async fn rate_limit_429_yields_uncertain_with_note() {
1156 let server = MockServer::start().await;
1157 Mock::given(any())
1158 .and(path("/alice"))
1159 .respond_with(ResponseTemplate::new(429))
1160 .mount(&server)
1161 .await;
1162 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1163 let outcome = build_client().check(&site, &user()).await;
1164 assert_eq!(outcome.kind, MatchKind::Uncertain);
1165 assert_eq!(outcome.reason, Some(UncertainReason::RateLimited));
1166 }
1167
1168 #[tokio::test]
1169 async fn cloudflare_server_header_yields_uncertain() {
1170 let server = MockServer::start().await;
1171 Mock::given(any())
1172 .and(path("/alice"))
1173 .respond_with(ResponseTemplate::new(503).insert_header("server", "cloudflare"))
1174 .mount(&server)
1175 .await;
1176 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1177 let outcome = build_client().check(&site, &user()).await;
1178 assert_eq!(outcome.kind, MatchKind::Uncertain);
1179 assert_eq!(outcome.reason, Some(UncertainReason::CloudflareChallenge));
1180 }
1181
1182 #[tokio::test]
1183 async fn cloudflare_interstitial_in_body_yields_uncertain() {
1184 let server = MockServer::start().await;
1187 Mock::given(any())
1188 .and(path("/alice"))
1189 .respond_with(
1190 ResponseTemplate::new(200)
1191 .set_body_string("<html><head><title>Just a moment...</title></head></html>"),
1192 )
1193 .mount(&server)
1194 .await;
1195 let site = site_with(
1196 &server,
1197 vec![Signal::BodyAbsent {
1198 text: "Profile not found".into(),
1199 }],
1200 );
1201 let outcome = build_client().check(&site, &user()).await;
1202 assert_eq!(outcome.kind, MatchKind::Uncertain);
1203 assert_eq!(outcome.reason, Some(UncertainReason::CloudflareChallenge));
1204 }
1205
1206 #[tokio::test]
1207 async fn ban_detection_does_not_fire_on_legitimate_403() {
1208 let server = MockServer::start().await;
1209 Mock::given(any())
1210 .and(path("/alice"))
1211 .respond_with(ResponseTemplate::new(403))
1212 .mount(&server)
1213 .await;
1214 let site = site_with(
1215 &server,
1216 vec![
1217 Signal::StatusFound { codes: vec![200] },
1218 Signal::StatusNotFound { codes: vec![403] },
1219 ],
1220 );
1221 let outcome = build_client().check(&site, &user()).await;
1222 assert_eq!(outcome.kind, MatchKind::NotFound);
1224 assert!(outcome.reason.is_none());
1225 }
1226
1227 #[tokio::test]
1228 async fn retry_recovers_after_transient_429() {
1229 let server = MockServer::start().await;
1230 Mock::given(any())
1232 .and(path("/alice"))
1233 .respond_with(ResponseTemplate::new(429))
1234 .up_to_n_times(1)
1235 .mount(&server)
1236 .await;
1237 Mock::given(any())
1238 .and(path("/alice"))
1239 .respond_with(ResponseTemplate::new(200))
1240 .mount(&server)
1241 .await;
1242 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1243 let client = Client::builder()
1244 .timeout(Duration::from_secs(2))
1245 .min_request_interval(Duration::ZERO)
1246 .max_retries(2)
1247 .base_backoff_delay(Duration::from_millis(20))
1248 .max_backoff_delay(Duration::from_millis(100))
1249 .build()
1250 .unwrap();
1251 let outcome = client.check(&site, &user()).await;
1252 assert_eq!(outcome.kind, MatchKind::Found);
1253 assert!(outcome.reason.is_none());
1254 }
1255
1256 #[tokio::test]
1257 async fn retry_exhausts_and_returns_uncertain() {
1258 let server = MockServer::start().await;
1259 Mock::given(any())
1260 .and(path("/alice"))
1261 .respond_with(ResponseTemplate::new(429))
1262 .mount(&server)
1263 .await;
1264 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1265 let client = Client::builder()
1266 .timeout(Duration::from_secs(2))
1267 .min_request_interval(Duration::ZERO)
1268 .max_retries(2)
1269 .base_backoff_delay(Duration::from_millis(10))
1270 .max_backoff_delay(Duration::from_millis(50))
1271 .build()
1272 .unwrap();
1273 let outcome = client.check(&site, &user()).await;
1274 assert_eq!(outcome.kind, MatchKind::Uncertain);
1275 assert_eq!(outcome.reason, Some(UncertainReason::RateLimited));
1276 }
1277
1278 #[tokio::test]
1279 async fn retry_does_not_fire_on_network_error() {
1280 let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
1284 let port = listener.local_addr().unwrap().port();
1285 drop(listener);
1286 let site = Site {
1287 name: "Dead".into(),
1288 url: UrlTemplate::new(format!("http://127.0.0.1:{port}/{{username}}")).unwrap(),
1289 signals: vec![Signal::StatusFound { codes: vec![200] }],
1290 known_present: None,
1291 known_absent: None,
1292 extract: Vec::new(),
1293 tags: Vec::new(),
1294 request_headers: std::collections::BTreeMap::new(),
1295 regex_check: None,
1296 engine: None,
1297 strip_bad_char: None,
1298 request_method: crate::site::HttpMethod::Get,
1299 request_body: None,
1300 protection: Vec::new(),
1301 };
1302 let client = Client::builder()
1303 .timeout(Duration::from_millis(500))
1304 .connect_timeout(Duration::from_millis(500))
1305 .min_request_interval(Duration::ZERO)
1306 .max_retries(3)
1307 .base_backoff_delay(Duration::from_secs(60))
1308 .build()
1309 .unwrap();
1310 let started = Instant::now();
1311 let outcome = client.check(&site, &user()).await;
1312 assert!(started.elapsed() < Duration::from_secs(5));
1315 assert_eq!(outcome.kind, MatchKind::Uncertain);
1316 assert!(
1317 matches!(outcome.reason, Some(UncertainReason::Network(_))),
1318 "got {:?}",
1319 outcome.reason,
1320 );
1321 }
1322
1323 #[tokio::test]
1324 async fn rotates_user_agent_per_request() {
1325 let server = MockServer::start().await;
1329 Mock::given(any())
1330 .and(path("/alice"))
1331 .and(wiremock::matchers::header("user-agent", "RotatorUA/9.9"))
1332 .respond_with(ResponseTemplate::new(200))
1333 .mount(&server)
1334 .await;
1335 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1336 let client = Client::builder()
1337 .min_request_interval(Duration::ZERO)
1338 .max_retries(0)
1339 .rotate_user_agents(vec!["RotatorUA/9.9".into()])
1340 .build()
1341 .unwrap();
1342 let outcome = client.check(&site, &user()).await;
1343 assert_eq!(outcome.kind, MatchKind::Found);
1344 }
1345
1346 #[test]
1347 fn invalid_proxy_url_fails_build() {
1348 let err = Client::builder().proxy("not a url").build().unwrap_err();
1349 assert!(matches!(err, Error::HttpSetup { .. }));
1350 }
1351
1352 #[test]
1353 fn schemeless_proxy_is_rejected_up_front() {
1354 let err = Client::builder().proxy("not-a-url").build().unwrap_err();
1356 let Error::HttpSetup { message } = err else {
1357 panic!("expected HttpSetup, got {err:?}");
1358 };
1359 assert!(message.contains("must start with"), "{message}");
1360 }
1361
1362 #[test]
1363 fn socks5_proxy_scheme_is_accepted() {
1364 assert!(
1366 Client::builder()
1367 .proxy("socks5://127.0.0.1:9050")
1368 .build()
1369 .is_ok()
1370 );
1371 }
1372
1373 #[tokio::test]
1374 async fn global_rps_cap_spaces_requests_across_hosts() {
1375 let server = MockServer::start().await;
1378 Mock::given(any())
1379 .respond_with(ResponseTemplate::new(200))
1380 .mount(&server)
1381 .await;
1382 let site_a = Site {
1383 name: "A".into(),
1384 url: UrlTemplate::new(format!("{}/a/{{username}}", server.uri())).unwrap(),
1385 signals: vec![Signal::StatusFound { codes: vec![200] }],
1386 known_present: None,
1387 known_absent: None,
1388 extract: Vec::new(),
1389 tags: Vec::new(),
1390 request_headers: std::collections::BTreeMap::new(),
1391 regex_check: None,
1392 engine: None,
1393 strip_bad_char: None,
1394 request_method: crate::site::HttpMethod::Get,
1395 request_body: None,
1396 protection: Vec::new(),
1397 };
1398 let site_b = Site {
1399 name: "B".into(),
1400 url: UrlTemplate::new(format!("{}/b/{{username}}", server.uri())).unwrap(),
1401 signals: vec![Signal::StatusFound { codes: vec![200] }],
1402 known_present: None,
1403 known_absent: None,
1404 extract: Vec::new(),
1405 tags: Vec::new(),
1406 request_headers: std::collections::BTreeMap::new(),
1407 regex_check: None,
1408 engine: None,
1409 strip_bad_char: None,
1410 request_method: crate::site::HttpMethod::Get,
1411 request_body: None,
1412 protection: Vec::new(),
1413 };
1414 let client = Client::builder()
1419 .min_request_interval(Duration::ZERO)
1420 .max_retries(0)
1421 .max_rps(std::num::NonZeroU32::new(2).unwrap())
1422 .build()
1423 .unwrap();
1424 client.check(&site_a, &user()).await;
1427 let started = Instant::now();
1428 client.check(&site_b, &user()).await;
1429 assert!(
1430 started.elapsed() >= Duration::from_millis(350),
1431 "global cap should space cross-host requests, got {:?}",
1432 started.elapsed(),
1433 );
1434 }
1435
1436 #[tokio::test]
1437 async fn respect_robots_skips_disallowed_paths() {
1438 let server = MockServer::start().await;
1439 Mock::given(any())
1440 .and(path("/robots.txt"))
1441 .respond_with(
1442 ResponseTemplate::new(200).set_body_string("User-agent: *\nDisallow: /no"),
1443 )
1444 .mount(&server)
1445 .await;
1446 Mock::given(any())
1447 .and(path("/no/alice"))
1448 .respond_with(ResponseTemplate::new(200))
1449 .mount(&server)
1450 .await;
1451 Mock::given(any())
1452 .and(path("/yes/alice"))
1453 .respond_with(ResponseTemplate::new(200))
1454 .mount(&server)
1455 .await;
1456 let client = Client::builder()
1457 .min_request_interval(Duration::ZERO)
1458 .max_retries(0)
1459 .respect_robots(true)
1460 .build()
1461 .unwrap();
1462
1463 let disallowed = Site {
1464 name: "No".into(),
1465 url: UrlTemplate::new(format!("{}/no/{{username}}", server.uri())).unwrap(),
1466 signals: vec![Signal::StatusFound { codes: vec![200] }],
1467 known_present: None,
1468 known_absent: None,
1469 extract: Vec::new(),
1470 tags: Vec::new(),
1471 request_headers: std::collections::BTreeMap::new(),
1472 regex_check: None,
1473 engine: None,
1474 strip_bad_char: None,
1475 request_method: crate::site::HttpMethod::Get,
1476 request_body: None,
1477 protection: Vec::new(),
1478 };
1479 let allowed = Site {
1480 name: "Yes".into(),
1481 url: UrlTemplate::new(format!("{}/yes/{{username}}", server.uri())).unwrap(),
1482 signals: vec![Signal::StatusFound { codes: vec![200] }],
1483 known_present: None,
1484 known_absent: None,
1485 extract: Vec::new(),
1486 tags: Vec::new(),
1487 request_headers: std::collections::BTreeMap::new(),
1488 regex_check: None,
1489 engine: None,
1490 strip_bad_char: None,
1491 request_method: crate::site::HttpMethod::Get,
1492 request_body: None,
1493 protection: Vec::new(),
1494 };
1495
1496 let no = client.check(&disallowed, &user()).await;
1497 assert_eq!(no.kind, MatchKind::Uncertain);
1498 assert_eq!(no.reason, Some(UncertainReason::RobotsDisallowed));
1499
1500 let yes = client.check(&allowed, &user()).await;
1501 assert_eq!(yes.kind, MatchKind::Found);
1502 }
1503
1504 #[tokio::test]
1505 async fn body_read_skipped_when_no_body_signal_needed() {
1506 let server = MockServer::start().await;
1509 Mock::given(any())
1510 .and(path("/alice"))
1511 .respond_with(ResponseTemplate::new(200).set_body_string("Profile not found"))
1512 .mount(&server)
1513 .await;
1514 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1515 let outcome = build_client().check(&site, &user()).await;
1516 assert_eq!(outcome.kind, MatchKind::Found);
1517 }
1518
1519 #[derive(Debug)]
1525 struct RecordingBackend {
1526 page: RenderedPage,
1527 calls: std::sync::atomic::AtomicUsize,
1528 }
1529
1530 impl RecordingBackend {
1531 fn with_page(page: RenderedPage) -> Self {
1532 Self {
1533 page,
1534 calls: std::sync::atomic::AtomicUsize::new(0),
1535 }
1536 }
1537 fn call_count(&self) -> usize {
1538 self.calls.load(std::sync::atomic::Ordering::SeqCst)
1539 }
1540 }
1541
1542 #[async_trait::async_trait]
1543 impl BrowserBackend for RecordingBackend {
1544 async fn fetch(
1545 &self,
1546 _url: &url::Url,
1547 _headers: &std::collections::BTreeMap<String, String>,
1548 _timeout: Duration,
1549 ) -> Result<RenderedPage> {
1550 self.calls.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
1551 Ok(self.page.clone())
1552 }
1553 }
1554
1555 fn site_bot_protected(server: &MockServer) -> Site {
1556 let mut s = site_with(server, vec![Signal::StatusFound { codes: vec![200] }]);
1557 s.tags = vec!["bot-protected".into()];
1558 s
1559 }
1560
1561 #[tokio::test]
1562 async fn browser_routes_bot_protected_sites() {
1563 let server = MockServer::start().await;
1566 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1567 status: 200,
1568 final_url: url::Url::parse("https://example.com/alice").unwrap(),
1569 body: "<html></html>".into(),
1570 elapsed_ms: 42,
1571 }));
1572 let client = Client::builder()
1573 .min_request_interval(Duration::ZERO)
1574 .max_retries(0)
1575 .browser(backend.clone())
1576 .build()
1577 .unwrap();
1578 let outcome = client.check(&site_bot_protected(&server), &user()).await;
1579 assert_eq!(outcome.kind, MatchKind::Found);
1580 assert_eq!(backend.call_count(), 1, "browser invoked exactly once");
1581 }
1582
1583 #[tokio::test]
1584 async fn non_bot_protected_sites_skip_browser() {
1585 let server = MockServer::start().await;
1586 Mock::given(any())
1587 .and(path("/alice"))
1588 .respond_with(ResponseTemplate::new(200))
1589 .mount(&server)
1590 .await;
1591 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1592 status: 500, final_url: url::Url::parse("https://x/").unwrap(),
1594 body: String::new(),
1595 elapsed_ms: 0,
1596 }));
1597 let client = Client::builder()
1598 .min_request_interval(Duration::ZERO)
1599 .max_retries(0)
1600 .browser(backend.clone())
1601 .build()
1602 .unwrap();
1603 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1605 let outcome = client.check(&site, &user()).await;
1606 assert_eq!(outcome.kind, MatchKind::Found);
1607 assert_eq!(backend.call_count(), 0, "browser must not be touched");
1608 }
1609
1610 #[tokio::test]
1611 async fn browser_budget_exhaust_yields_uncertain() {
1612 let server = MockServer::start().await;
1613 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1614 status: 200,
1615 final_url: url::Url::parse("https://x/").unwrap(),
1616 body: String::new(),
1617 elapsed_ms: 0,
1618 }));
1619 let client = Client::builder()
1620 .min_request_interval(Duration::ZERO)
1621 .max_retries(0)
1622 .browser(backend.clone())
1623 .browser_budget(1)
1624 .build()
1625 .unwrap();
1626 let site = site_bot_protected(&server);
1627 let first = client.check(&site, &user()).await;
1629 assert_eq!(first.kind, MatchKind::Found);
1630 let second = client.check(&site, &user()).await;
1632 assert_eq!(second.kind, MatchKind::Uncertain);
1633 assert!(matches!(
1634 second.reason,
1635 Some(UncertainReason::BrowserBudget)
1636 ));
1637 assert_eq!(
1638 backend.call_count(),
1639 1,
1640 "second call must not invoke backend"
1641 );
1642 }
1643
1644 #[tokio::test]
1645 async fn browser_failure_surfaces_as_uncertain_browser_failed() {
1646 struct FailingBackend;
1647 #[async_trait::async_trait]
1648 impl BrowserBackend for FailingBackend {
1649 async fn fetch(
1650 &self,
1651 _url: &url::Url,
1652 _headers: &std::collections::BTreeMap<String, String>,
1653 _timeout: Duration,
1654 ) -> Result<RenderedPage> {
1655 Err(Error::BrowserSetup {
1656 message: "simulated crash".into(),
1657 })
1658 }
1659 }
1660 impl std::fmt::Debug for FailingBackend {
1661 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1662 f.write_str("FailingBackend")
1663 }
1664 }
1665
1666 let server = MockServer::start().await;
1667 let client = Client::builder()
1668 .min_request_interval(Duration::ZERO)
1669 .max_retries(0)
1670 .browser(Arc::new(FailingBackend))
1671 .build()
1672 .unwrap();
1673 let outcome = client.check(&site_bot_protected(&server), &user()).await;
1674 assert_eq!(outcome.kind, MatchKind::Uncertain);
1675 match outcome.reason {
1676 Some(UncertainReason::BrowserFailed(msg)) => {
1677 assert!(msg.contains("simulated crash"), "got: {msg}");
1678 }
1679 other => panic!("expected BrowserFailed, got {other:?}"),
1680 }
1681 }
1682
1683 #[tokio::test]
1684 async fn status_only_site_uses_head_request() {
1685 let server = MockServer::start().await;
1689 Mock::given(method("HEAD"))
1690 .and(path("/alice"))
1691 .respond_with(ResponseTemplate::new(200))
1692 .mount(&server)
1693 .await;
1694 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1695 let outcome = build_client().check(&site, &user()).await;
1696 assert_eq!(outcome.kind, MatchKind::Found);
1697 let recvd = server.received_requests().await.unwrap_or_default();
1698 assert_eq!(recvd.len(), 1);
1699 assert_eq!(recvd[0].method.as_str(), "HEAD");
1700 }
1701
1702 #[tokio::test]
1703 async fn body_signal_site_uses_get_request() {
1704 let server = MockServer::start().await;
1707 Mock::given(any())
1708 .and(path("/alice"))
1709 .respond_with(ResponseTemplate::new(200).set_body_string("hello alice"))
1710 .mount(&server)
1711 .await;
1712 let site = site_with(
1713 &server,
1714 vec![Signal::BodyPresent {
1715 text: "hello".into(),
1716 }],
1717 );
1718 let outcome = build_client().check(&site, &user()).await;
1719 assert_eq!(outcome.kind, MatchKind::Found);
1720 let recvd = server.received_requests().await.unwrap_or_default();
1721 assert_eq!(recvd[0].method.as_str(), "GET");
1722 }
1723
1724 #[tokio::test]
1725 async fn protection_field_routes_through_browser_like_bot_protected_tag() {
1726 let server = MockServer::start().await;
1731 Mock::given(any())
1732 .respond_with(ResponseTemplate::new(200))
1733 .mount(&server)
1734 .await;
1735 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1736 site.protection = vec![crate::site::ProtectionKind::Cloudflare];
1737 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1739 status: 200,
1740 final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
1741 body: String::new(),
1742 elapsed_ms: 0,
1743 }));
1744 let client = Client::builder()
1745 .min_request_interval(Duration::ZERO)
1746 .max_retries(0)
1747 .browser(backend)
1748 .build()
1749 .unwrap();
1750 let outcome = client.check(&site, &user()).await;
1751 assert_eq!(outcome.kind, MatchKind::Found);
1754 let recvd = server.received_requests().await.unwrap_or_default();
1756 assert_eq!(
1757 recvd.len(),
1758 0,
1759 "structured protection must skip the raw HTTP path"
1760 );
1761 }
1762
1763 #[tokio::test]
1764 async fn post_method_sends_body_with_username_substituted() {
1765 let server = MockServer::start().await;
1769 Mock::given(method("POST"))
1770 .and(path("/api"))
1771 .respond_with(ResponseTemplate::new(200))
1772 .mount(&server)
1773 .await;
1774 let site = Site {
1779 name: "ApiPost".into(),
1780 url: UrlTemplate::new(format!("{}/api?_={{username}}", server.uri())).unwrap(),
1781 signals: vec![Signal::StatusFound { codes: vec![200] }],
1782 known_present: None,
1783 known_absent: None,
1784 extract: Vec::new(),
1785 tags: Vec::new(),
1786 request_headers: std::collections::BTreeMap::new(),
1787 regex_check: None,
1788 engine: None,
1789 strip_bad_char: None,
1790 request_method: HttpMethod::Post,
1791 request_body: Some(r#"{"name":"{username}"}"#.into()),
1792 protection: Vec::new(),
1793 };
1794 let outcome = build_client().check(&site, &user()).await;
1795 assert_eq!(outcome.kind, MatchKind::Found);
1796 let recvd = server.received_requests().await.unwrap_or_default();
1797 assert_eq!(recvd.len(), 1);
1798 assert_eq!(recvd[0].method.as_str(), "POST");
1799 let body = String::from_utf8_lossy(&recvd[0].body).to_string();
1800 assert!(body.contains("\"name\":\"alice\""), "body was: {body}");
1801 }
1802
1803 #[tokio::test]
1804 async fn head_405_falls_back_to_get() {
1805 let server = MockServer::start().await;
1808 Mock::given(method("HEAD"))
1809 .and(path("/alice"))
1810 .respond_with(ResponseTemplate::new(405))
1811 .mount(&server)
1812 .await;
1813 Mock::given(any())
1814 .and(path("/alice"))
1815 .respond_with(ResponseTemplate::new(200))
1816 .mount(&server)
1817 .await;
1818 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1819 let outcome = build_client().check(&site, &user()).await;
1820 assert_eq!(outcome.kind, MatchKind::Found);
1821 let recvd = server.received_requests().await.unwrap_or_default();
1822 assert_eq!(recvd.len(), 2);
1823 assert_eq!(recvd[0].method.as_str(), "HEAD");
1824 assert_eq!(recvd[1].method.as_str(), "GET");
1825 }
1826}