1use std::fmt;
11use std::num::NonZeroU32;
12use std::sync::Arc;
13use std::time::{Duration, Instant};
14
15use reqwest::redirect;
16
17use crate::ban;
18use crate::browser::{BrowserBackend, BrowserBudget, RenderedPage};
19use crate::check::{CheckOutcome, MatchKind, UncertainReason};
20use crate::error::{Error, Result};
21use crate::retry::{self, RetryPolicy};
22use crate::robots::RobotsCache;
23use crate::site::{HttpMethod, Probe, Signal, SignalVerdict, Site, aggregate};
24use crate::throttle::HostThrottle;
25use crate::username::Username;
26
27const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
28const DEFAULT_CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
29const DEFAULT_REDIRECT_LIMIT: usize = 8;
30const DEFAULT_PER_HOST_INTERVAL: Duration = Duration::from_millis(100);
31const GLOBAL_THROTTLE_KEY: &str = "*global*";
33
34#[derive(Clone)]
42pub struct Client {
43 inner: reqwest::Client,
44 throttle: HostThrottle,
45 global_throttle: Option<HostThrottle>,
47 retry: RetryPolicy,
48 user_agents: Arc<[String]>,
51 enrich: bool,
53 robots: Option<RobotsCache>,
55 browser: Option<Arc<dyn BrowserBackend>>,
58 browser_budget: Arc<BrowserBudget>,
61}
62
63impl Client {
64 pub fn builder() -> ClientBuilder {
66 ClientBuilder::default()
67 }
68
69 #[tracing::instrument(skip(self), fields(site = %site.name, user = %username))]
83 pub async fn check(&self, site: &Site, username: &Username) -> CheckOutcome {
84 let mut attempt: u32 = 0;
85 loop {
86 let outcome = self.probe_once(site, username).await;
87 if !retry::should_retry(&outcome, attempt, &self.retry) {
88 return outcome;
89 }
90 let delay = retry::backoff_delay(attempt, &self.retry);
91 tracing::info!(
92 site = %site.name,
93 attempt = attempt + 1,
94 reason = outcome.reason.as_ref().map(ToString::to_string).unwrap_or_default(),
95 ?delay,
96 "transient ban, retrying",
97 );
98 tokio::time::sleep(delay).await;
99 attempt += 1;
100 }
101 }
102
103 pub async fn fetch(&self, url: &str) -> Option<RawResponse> {
112 let host = host_of(url);
113 if let Some(global) = &self.global_throttle {
114 global.wait(GLOBAL_THROTTLE_KEY).await;
115 }
116 self.throttle.wait(&host).await;
117 let mut request = self.inner.get(url);
118 if let Some(ua) = self.pick_user_agent() {
119 request = request.header(reqwest::header::USER_AGENT, ua);
120 }
121 let response = request.send().await.ok()?;
122 let status = response.status().as_u16();
123 let final_url = response.url().to_string();
124 let body = response.text().await.unwrap_or_default();
125 Some(RawResponse {
126 status,
127 final_url,
128 body,
129 })
130 }
131
132 pub async fn fetch_for_doctor(&self, site: &Site, url: &str) -> Option<RawResponse> {
143 if let Some(backend) = self.browser.as_deref() {
144 let has_tag = site
145 .tags
146 .iter()
147 .any(|t| t.eq_ignore_ascii_case(BOT_PROTECTED_TAG));
148 if has_tag || !site.protection.is_empty() {
149 let parsed = url::Url::parse(url).ok()?;
150 match backend
151 .fetch(&parsed, &site.request_headers, BROWSER_TIMEOUT)
152 .await
153 {
154 Ok(page) => {
155 return Some(RawResponse {
156 status: page.status,
157 final_url: page.final_url.to_string(),
158 body: page.body,
159 });
160 }
161 Err(err) => {
162 tracing::warn!(
163 site = %site.name, %url, error = %err,
164 "browser fetch failed in doctor; falling back to raw HTTP",
165 );
166 }
167 }
168 }
169 }
170 self.fetch(url).await
171 }
172
173 fn pick_user_agent(&self) -> Option<&str> {
176 match self.user_agents.len() {
177 0 => None,
178 1 => Some(&self.user_agents[0]),
179 n => Some(&self.user_agents[fastrand::usize(0..n)]),
180 }
181 }
182
183 #[allow(clippy::too_many_lines)]
186 async fn probe_once(&self, site: &Site, username: &Username) -> CheckOutcome {
187 let url = site.url_for(username);
188
189 if let Some(pat) = &site.regex_check {
199 if let Ok(re) = regex::Regex::new(pat) {
200 if !re.is_match(username.as_str()) {
201 return uncertain(
202 &site.name,
203 url,
204 Instant::now(),
205 UncertainReason::UsernameNotAllowed,
206 );
207 }
208 }
209 }
210
211 if let Some(backend) = self.browser.as_deref() {
218 let has_tag = site
219 .tags
220 .iter()
221 .any(|t| t.eq_ignore_ascii_case(BOT_PROTECTED_TAG));
222 if has_tag || !site.protection.is_empty() {
223 if self.browser_budget.try_consume() {
224 return self.probe_with_browser(site, &url, backend).await;
225 }
226 tracing::warn!(site = %site.name, "browser budget exhausted");
227 return uncertain(
228 &site.name,
229 url,
230 Instant::now(),
231 UncertainReason::BrowserBudget,
232 );
233 }
234 }
235
236 let host = host_of(&url);
237
238 if let Some(robots) = &self.robots {
240 if let Some((origin, path)) = origin_and_path(&url) {
241 if !robots.allowed(&origin, &path).await {
242 tracing::debug!(%url, "skipped by robots.txt");
243 return uncertain(
244 &site.name,
245 url,
246 Instant::now(),
247 UncertainReason::RobotsDisallowed,
248 );
249 }
250 }
251 }
252
253 if let Some(global) = &self.global_throttle {
255 global.wait(GLOBAL_THROTTLE_KEY).await;
256 }
257 self.throttle.wait(&host).await;
258 let started = Instant::now();
259 tracing::debug!(%url, %host, "probing");
260
261 let want_enrich = self.enrich && !site.extract.is_empty();
264 let needs_body = want_enrich || site.signals.iter().any(crate::site::Signal::needs_body);
265
266 let body_for_post: Option<String> = if matches!(site.request_method, HttpMethod::Post) {
273 const USERNAME_PH: &str = "{username}";
274 site.request_body
275 .as_deref()
276 .map(|t| t.replace(USERNAME_PH, username.as_str()))
277 } else {
278 None
279 };
280
281 let response = match site.request_method {
289 HttpMethod::Post => {
290 send_request_with_body(
291 &self.inner,
292 reqwest::Method::POST,
293 &url,
294 self.pick_user_agent(),
295 body_for_post.as_deref(),
296 )
297 .await
298 }
299 HttpMethod::Get if needs_body => {
300 send_request(
301 &self.inner,
302 reqwest::Method::GET,
303 &url,
304 self.pick_user_agent(),
305 )
306 .await
307 }
308 HttpMethod::Get => {
309 match send_request(
310 &self.inner,
311 reqwest::Method::HEAD,
312 &url,
313 self.pick_user_agent(),
314 )
315 .await
316 {
317 Ok(r) if r.status().as_u16() == 405 => {
318 send_request(
319 &self.inner,
320 reqwest::Method::GET,
321 &url,
322 self.pick_user_agent(),
323 )
324 .await
325 }
326 other => other,
327 }
328 }
329 };
330 let response = match response {
331 Ok(r) => r,
332 Err(err) => {
333 tracing::debug!(error = %err, "request failed");
334 return uncertain(
335 &site.name,
336 url,
337 started,
338 UncertainReason::Network(err.to_string()),
339 );
340 }
341 };
342
343 let status = response.status().as_u16();
344 let final_url = response.url().to_string();
345
346 if let Some(reason) = ban::detect_pre_body(status, response.headers()) {
347 tracing::warn!(%host, status, %reason, "ban-like response");
348 return uncertain(&site.name, url, started, reason);
349 }
350 let body = if needs_body {
351 match response.text().await {
352 Ok(b) => b,
353 Err(err) => {
354 return uncertain(
355 &site.name,
356 url,
357 started,
358 UncertainReason::BodyRead(err.to_string()),
359 );
360 }
361 }
362 } else {
363 String::new()
364 };
365
366 if !body.is_empty() {
367 if let Some(reason) = ban::detect_in_body(&body) {
368 tracing::warn!(%host, %reason, "ban-like body");
369 return uncertain(&site.name, url, started, reason);
370 }
371 }
372
373 let probe = Probe {
374 status,
375 final_url: &final_url,
376 body: &body,
377 };
378 let votes: Vec<(&Signal, SignalVerdict)> = site
379 .signals
380 .iter()
381 .map(|s| (s, s.evaluate(&probe)))
382 .collect();
383 let kind = aggregate(votes.iter().map(|(_, v)| *v));
384 let mut result = outcome(&site.name, url, started, kind);
385 let winning = match kind {
387 MatchKind::Found => Some(SignalVerdict::Found),
388 MatchKind::NotFound => Some(SignalVerdict::NotFound),
389 MatchKind::Uncertain => None,
390 };
391 if let Some(want) = winning {
392 result.evidence = votes
393 .iter()
394 .filter(|(_, v)| *v == want)
395 .map(|(s, _)| s.describe_match(&probe))
396 .collect();
397 }
398 if want_enrich && kind == MatchKind::Found {
399 result.enrichment = crate::enrich::extract(&body, &site.extract);
400 }
401 result
402 }
403
404 async fn probe_with_browser(
409 &self,
410 site: &Site,
411 url: &str,
412 backend: &dyn BrowserBackend,
413 ) -> CheckOutcome {
414 let started = Instant::now();
415 let parsed = match url::Url::parse(url) {
416 Ok(u) => u,
417 Err(err) => {
418 return uncertain(
419 &site.name,
420 url.to_owned(),
421 started,
422 UncertainReason::Other(format!("invalid url: {err}")),
423 );
424 }
425 };
426
427 let page: RenderedPage = match backend
428 .fetch(&parsed, &site.request_headers, BROWSER_TIMEOUT)
429 .await
430 {
431 Ok(p) => p,
432 Err(err) => {
433 tracing::warn!(site = %site.name, %url, error = %err, "browser fetch failed");
434 return uncertain(
435 &site.name,
436 url.to_owned(),
437 started,
438 UncertainReason::BrowserFailed(err.to_string()),
439 );
440 }
441 };
442
443 let final_url_str = page.final_url.as_str().to_owned();
444 let probe = Probe {
445 status: page.status,
446 final_url: &final_url_str,
447 body: &page.body,
448 };
449 let votes: Vec<(&Signal, SignalVerdict)> = site
450 .signals
451 .iter()
452 .map(|s| (s, s.evaluate(&probe)))
453 .collect();
454 let kind = aggregate(votes.iter().map(|(_, v)| *v));
455 let mut result = outcome(&site.name, url.to_owned(), started, kind);
456 let winning = match kind {
457 MatchKind::Found => Some(SignalVerdict::Found),
458 MatchKind::NotFound => Some(SignalVerdict::NotFound),
459 MatchKind::Uncertain => None,
460 };
461 if let Some(want) = winning {
462 result.evidence = votes
463 .iter()
464 .filter(|(_, v)| *v == want)
465 .map(|(s, _)| s.describe_match(&probe))
466 .collect();
467 }
468 if self.enrich && kind == MatchKind::Found && !site.extract.is_empty() {
469 result.enrichment = crate::enrich::extract(&page.body, &site.extract);
470 }
471 result
472 }
473}
474
475#[derive(Debug, Clone)]
477pub struct RawResponse {
478 pub status: u16,
480 pub final_url: String,
482 pub body: String,
484}
485
486#[derive(Clone)]
488#[must_use = "ClientBuilder does nothing until `.build()` is called"]
489pub struct ClientBuilder {
490 timeout: Duration,
491 connect_timeout: Duration,
492 user_agent: String,
493 follow_redirects: bool,
494 redirect_limit: usize,
495 min_request_interval: Duration,
496 max_rps: Option<NonZeroU32>,
497 retry: RetryPolicy,
498 proxy: Option<String>,
499 user_agents: Vec<String>,
500 enrich: bool,
501 respect_robots: bool,
502 browser: Option<Arc<dyn BrowserBackend>>,
503 browser_budget: usize,
504}
505
506impl Default for ClientBuilder {
507 fn default() -> Self {
508 Self {
509 timeout: DEFAULT_TIMEOUT,
510 connect_timeout: DEFAULT_CONNECT_TIMEOUT,
511 user_agent: default_user_agent(),
512 follow_redirects: true,
513 redirect_limit: DEFAULT_REDIRECT_LIMIT,
514 min_request_interval: DEFAULT_PER_HOST_INTERVAL,
515 max_rps: None,
516 retry: RetryPolicy::default(),
517 proxy: None,
518 user_agents: Vec::new(),
519 enrich: false,
520 respect_robots: false,
521 browser: None,
522 browser_budget: DEFAULT_BROWSER_BUDGET,
523 }
524 }
525}
526
527impl ClientBuilder {
528 pub fn timeout(mut self, timeout: Duration) -> Self {
530 self.timeout = timeout;
531 self
532 }
533
534 pub fn connect_timeout(mut self, timeout: Duration) -> Self {
536 self.connect_timeout = timeout;
537 self
538 }
539
540 pub fn user_agent(mut self, user_agent: impl Into<String>) -> Self {
542 self.user_agent = user_agent.into();
543 self
544 }
545
546 pub fn follow_redirects(mut self, follow: bool) -> Self {
549 self.follow_redirects = follow;
550 self
551 }
552
553 pub fn min_request_interval(mut self, interval: Duration) -> Self {
559 self.min_request_interval = interval;
560 self
561 }
562
563 pub fn max_rps(mut self, rps: NonZeroU32) -> Self {
568 self.max_rps = Some(rps);
569 self
570 }
571
572 pub fn max_retries(mut self, n: u32) -> Self {
575 self.retry.max_retries = n;
576 self
577 }
578
579 pub fn base_backoff_delay(mut self, d: Duration) -> Self {
582 self.retry.base_delay = d;
583 self
584 }
585
586 pub fn max_backoff_delay(mut self, d: Duration) -> Self {
588 self.retry.max_delay = d;
589 self
590 }
591
592 pub fn proxy(mut self, url: impl Into<String>) -> Self {
595 self.proxy = Some(url.into());
596 self
597 }
598
599 pub fn rotate_user_agents(mut self, agents: Vec<String>) -> Self {
603 self.user_agents = agents;
604 self
605 }
606
607 pub fn enrich(mut self, enrich: bool) -> Self {
610 self.enrich = enrich;
611 self
612 }
613
614 pub fn respect_robots(mut self, respect: bool) -> Self {
618 self.respect_robots = respect;
619 self
620 }
621
622 pub fn browser(mut self, backend: Arc<dyn BrowserBackend>) -> Self {
626 self.browser = Some(backend);
627 self
628 }
629
630 pub const fn browser_budget(mut self, cap: usize) -> Self {
635 self.browser_budget = cap;
636 self
637 }
638
639 pub fn build(self) -> Result<Client> {
641 let redirect_policy = if self.follow_redirects {
642 redirect::Policy::limited(self.redirect_limit)
643 } else {
644 redirect::Policy::none()
645 };
646 let mut builder = reqwest::Client::builder()
647 .user_agent(self.user_agent)
648 .timeout(self.timeout)
649 .connect_timeout(self.connect_timeout)
650 .redirect(redirect_policy);
651 if let Some(proxy_url) = &self.proxy {
652 const SCHEMES: [&str; 4] = ["http://", "https://", "socks5://", "socks5h://"];
656 if !SCHEMES.iter().any(|s| proxy_url.starts_with(s)) {
657 return Err(Error::HttpSetup {
658 message: format!(
659 "invalid proxy {proxy_url:?}: must start with one of {}",
660 SCHEMES.join(", ")
661 ),
662 });
663 }
664 let proxy = reqwest::Proxy::all(proxy_url).map_err(|e| Error::HttpSetup {
665 message: format!("invalid proxy {proxy_url:?}: {e}"),
666 })?;
667 builder = builder.proxy(proxy);
668 }
669 let inner = builder.build().map_err(|e| Error::HttpSetup {
670 message: e.to_string(),
671 })?;
672 let global_throttle = self.max_rps.map(|rps| {
673 let interval = Duration::from_secs(1) / rps.get();
675 HostThrottle::new(interval)
676 });
677 let robots = self
678 .respect_robots
679 .then(|| RobotsCache::new(inner.clone(), "adler"));
680 Ok(Client {
681 inner,
682 throttle: HostThrottle::new(self.min_request_interval),
683 global_throttle,
684 retry: self.retry,
685 user_agents: Arc::from(self.user_agents),
686 enrich: self.enrich,
687 robots,
688 browser: self.browser,
689 browser_budget: Arc::new(BrowserBudget::new(self.browser_budget)),
690 })
691 }
692}
693
694pub const DEFAULT_BROWSER_BUDGET: usize = 50;
701
702impl fmt::Debug for Client {
703 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
704 f.debug_struct("Client")
705 .field("throttle", &self.throttle)
706 .field("global_throttle", &self.global_throttle)
707 .field("retry", &self.retry)
708 .field("user_agents", &self.user_agents)
709 .field("enrich", &self.enrich)
710 .field("robots", &self.robots.is_some())
711 .field("browser", &self.browser.is_some())
712 .field("browser_budget", &self.browser_budget)
713 .finish_non_exhaustive()
714 }
715}
716
717impl fmt::Debug for ClientBuilder {
718 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
719 f.debug_struct("ClientBuilder")
720 .field("timeout", &self.timeout)
721 .field("connect_timeout", &self.connect_timeout)
722 .field("user_agent", &self.user_agent)
723 .field("follow_redirects", &self.follow_redirects)
724 .field("redirect_limit", &self.redirect_limit)
725 .field("min_request_interval", &self.min_request_interval)
726 .field("max_rps", &self.max_rps)
727 .field("retry", &self.retry)
728 .field("proxy", &self.proxy)
729 .field("user_agents", &self.user_agents)
730 .field("enrich", &self.enrich)
731 .field("respect_robots", &self.respect_robots)
732 .field("browser", &self.browser.is_some())
733 .field("browser_budget", &self.browser_budget)
734 .finish()
735 }
736}
737
738const BROWSER_TIMEOUT: Duration = Duration::from_secs(60);
742
743const BOT_PROTECTED_TAG: &str = "bot-protected";
744
745fn default_user_agent() -> String {
746 format!("adler/{}", env!("CARGO_PKG_VERSION"))
747}
748
749async fn send_request(
754 client: &reqwest::Client,
755 method: reqwest::Method,
756 url: &str,
757 ua: Option<&str>,
758) -> reqwest::Result<reqwest::Response> {
759 send_request_with_body(client, method, url, ua, None).await
760}
761
762async fn send_request_with_body(
768 client: &reqwest::Client,
769 method: reqwest::Method,
770 url: &str,
771 ua: Option<&str>,
772 body: Option<&str>,
773) -> reqwest::Result<reqwest::Response> {
774 let mut request = client.request(method, url);
775 if let Some(ua) = ua {
776 request = request.header(reqwest::header::USER_AGENT, ua);
777 }
778 if let Some(b) = body {
779 request = request
780 .header(reqwest::header::CONTENT_TYPE, "application/json")
781 .body(b.to_owned());
782 }
783 request.send().await
784}
785
786fn host_of(url: &str) -> String {
787 reqwest::Url::parse(url)
788 .ok()
789 .and_then(|u| u.host_str().map(str::to_owned))
790 .unwrap_or_else(|| "unknown".into())
791}
792
793fn origin_and_path(url: &str) -> Option<(String, String)> {
796 let parsed = reqwest::Url::parse(url).ok()?;
797 let host = parsed.host_str()?;
798 let port = parsed.port().map_or_else(String::new, |p| format!(":{p}"));
799 let origin = format!("{}://{host}{port}", parsed.scheme());
800 let path = parsed.query().map_or_else(
801 || parsed.path().to_owned(),
802 |q| format!("{}?{q}", parsed.path()),
803 );
804 Some((origin, path))
805}
806
807fn outcome(site: &str, url: String, started: Instant, kind: MatchKind) -> CheckOutcome {
808 CheckOutcome {
809 site: site.to_owned(),
810 url,
811 kind,
812 reason: None,
813 elapsed_ms: elapsed_ms(started),
814 enrichment: std::collections::BTreeMap::new(),
815 evidence: Vec::new(),
816 }
817}
818
819fn uncertain(site: &str, url: String, started: Instant, reason: UncertainReason) -> CheckOutcome {
820 CheckOutcome {
821 site: site.to_owned(),
822 url,
823 kind: MatchKind::Uncertain,
824 reason: Some(reason),
825 elapsed_ms: elapsed_ms(started),
826 enrichment: std::collections::BTreeMap::new(),
827 evidence: Vec::new(),
828 }
829}
830
831fn elapsed_ms(started: Instant) -> u64 {
832 u64::try_from(started.elapsed().as_millis()).unwrap_or(u64::MAX)
833}
834
835#[cfg(test)]
836mod tests {
837 use super::*;
838 use crate::site::{Signal, UrlTemplate};
839 use wiremock::matchers::{any, method, path};
840 use wiremock::{Mock, MockServer, ResponseTemplate};
841
842 fn build_client() -> Client {
843 Client::builder()
844 .timeout(Duration::from_secs(2))
845 .min_request_interval(Duration::ZERO)
848 .max_retries(0)
851 .build()
852 .expect("client builds")
853 }
854
855 fn site_with(server: &MockServer, signals: Vec<Signal>) -> Site {
856 Site {
857 name: "Mock".into(),
858 url: UrlTemplate::new(format!("{}/{{username}}", server.uri())).unwrap(),
859 signals,
860 known_present: None,
861 known_absent: None,
862 extract: Vec::new(),
863 tags: Vec::new(),
864 request_headers: std::collections::BTreeMap::new(),
865 regex_check: None,
866 engine: None,
867 strip_bad_char: None,
868 request_method: crate::site::HttpMethod::Get,
869 request_body: None,
870 protection: Vec::new(),
871 disabled: false,
872 source: None,
873 popularity: None,
874 }
875 }
876
877 fn user() -> Username {
878 Username::new("alice").unwrap()
879 }
880
881 #[tokio::test]
882 async fn regex_check_short_circuits_before_any_request() {
883 let server = MockServer::start().await;
887 Mock::given(any())
888 .respond_with(ResponseTemplate::new(200))
889 .mount(&server)
890 .await;
891 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
892 site.regex_check = Some("^[A-Za-z]{8,}$".into());
894 let outcome = build_client().check(&site, &user()).await;
895 assert_eq!(outcome.kind, MatchKind::Uncertain);
896 assert!(
897 matches!(outcome.reason, Some(UncertainReason::UsernameNotAllowed)),
898 "expected UsernameNotAllowed, got {:?}",
899 outcome.reason,
900 );
901 let recvd = server.received_requests().await.unwrap_or_default();
904 assert_eq!(
905 recvd.len(),
906 0,
907 "regex_check mismatch must skip the HTTP request entirely"
908 );
909 }
910
911 #[tokio::test]
912 async fn regex_check_pass_proceeds_to_probe() {
913 let server = MockServer::start().await;
914 Mock::given(any())
915 .and(path("/alice"))
916 .respond_with(ResponseTemplate::new(200))
917 .mount(&server)
918 .await;
919 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
920 site.regex_check = Some("^[a-z]{3,}$".into());
922 let outcome = build_client().check(&site, &user()).await;
923 assert_eq!(outcome.kind, MatchKind::Found);
924 }
925
926 #[tokio::test]
927 async fn status_signal_reports_found_on_match() {
928 let server = MockServer::start().await;
929 Mock::given(any())
930 .and(path("/alice"))
931 .respond_with(ResponseTemplate::new(200))
932 .mount(&server)
933 .await;
934 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
935 let outcome = build_client().check(&site, &user()).await;
936 assert_eq!(outcome.kind, MatchKind::Found);
937 assert!(outcome.url.ends_with("/alice"));
938 assert!(outcome.reason.is_none());
939 assert_eq!(outcome.evidence, ["HTTP 200 (status_found)"]);
940 }
941
942 #[tokio::test]
943 async fn status_signal_pair_reports_not_found_on_404() {
944 let server = MockServer::start().await;
945 Mock::given(any())
946 .and(path("/alice"))
947 .respond_with(ResponseTemplate::new(404))
948 .mount(&server)
949 .await;
950 let site = site_with(
951 &server,
952 vec![
953 Signal::StatusFound { codes: vec![200] },
954 Signal::StatusNotFound { codes: vec![404] },
955 ],
956 );
957 let outcome = build_client().check(&site, &user()).await;
958 assert_eq!(outcome.kind, MatchKind::NotFound);
959 assert_eq!(outcome.evidence, ["HTTP 404 (status_not_found)"]);
961 }
962
963 #[tokio::test]
964 async fn body_absent_signal_detects_missing_account() {
965 let server = MockServer::start().await;
966 Mock::given(any())
967 .and(path("/alice"))
968 .respond_with(ResponseTemplate::new(200).set_body_string("<h1>Profile not found</h1>"))
969 .mount(&server)
970 .await;
971 let site = site_with(
972 &server,
973 vec![Signal::BodyAbsent {
974 text: "Profile not found".into(),
975 }],
976 );
977 let outcome = build_client().check(&site, &user()).await;
978 assert_eq!(outcome.kind, MatchKind::NotFound);
979 }
980
981 #[tokio::test]
982 async fn body_absent_alone_yields_uncertain_when_marker_missing() {
983 let server = MockServer::start().await;
986 Mock::given(any())
987 .and(path("/alice"))
988 .respond_with(ResponseTemplate::new(200).set_body_string("<h1>Welcome alice</h1>"))
989 .mount(&server)
990 .await;
991 let site = site_with(
992 &server,
993 vec![Signal::BodyAbsent {
994 text: "Profile not found".into(),
995 }],
996 );
997 let outcome = build_client().check(&site, &user()).await;
998 assert_eq!(outcome.kind, MatchKind::Uncertain);
999 }
1000
1001 #[tokio::test]
1002 async fn body_present_plus_absent_resolve_to_found() {
1003 let server = MockServer::start().await;
1004 Mock::given(any())
1005 .and(path("/alice"))
1006 .respond_with(
1007 ResponseTemplate::new(200)
1008 .set_body_string(r#"<div class="profile-card">alice</div>"#),
1009 )
1010 .mount(&server)
1011 .await;
1012 let site = site_with(
1013 &server,
1014 vec![
1015 Signal::BodyPresent {
1016 text: "profile-card".into(),
1017 },
1018 Signal::BodyAbsent {
1019 text: "Profile not found".into(),
1020 },
1021 ],
1022 );
1023 let outcome = build_client().check(&site, &user()).await;
1024 assert_eq!(outcome.kind, MatchKind::Found);
1025 }
1026
1027 #[tokio::test]
1028 async fn redirect_absent_signal_detects_missing_account() {
1029 let server = MockServer::start().await;
1030 Mock::given(any())
1031 .and(path("/alice"))
1032 .respond_with(
1033 ResponseTemplate::new(302).insert_header("location", "/login?next=/alice"),
1034 )
1035 .mount(&server)
1036 .await;
1037 Mock::given(any())
1038 .and(path("/login"))
1039 .respond_with(ResponseTemplate::new(200).set_body_string("login page"))
1040 .mount(&server)
1041 .await;
1042 let site = site_with(
1043 &server,
1044 vec![Signal::RedirectAbsent {
1045 fragment: "/login".into(),
1046 }],
1047 );
1048 let outcome = build_client().check(&site, &user()).await;
1049 assert_eq!(outcome.kind, MatchKind::NotFound);
1050 }
1051
1052 #[tokio::test]
1053 async fn negative_signal_wins_over_positive() {
1054 let server = MockServer::start().await;
1059 Mock::given(any())
1060 .and(path("/alice"))
1061 .respond_with(ResponseTemplate::new(200).set_body_string("Profile not found"))
1062 .mount(&server)
1063 .await;
1064 let site = site_with(
1065 &server,
1066 vec![
1067 Signal::StatusFound { codes: vec![200] },
1068 Signal::BodyAbsent {
1069 text: "Profile not found".into(),
1070 },
1071 ],
1072 );
1073 let outcome = build_client().check(&site, &user()).await;
1074 assert_eq!(outcome.kind, MatchKind::NotFound);
1075 }
1076
1077 #[tokio::test]
1078 async fn network_failure_yields_uncertain() {
1079 let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
1080 let port = listener.local_addr().unwrap().port();
1081 drop(listener);
1082
1083 let site = Site {
1084 name: "Dead".into(),
1085 url: UrlTemplate::new(format!("http://127.0.0.1:{port}/{{username}}")).unwrap(),
1086 signals: vec![Signal::StatusFound { codes: vec![200] }],
1087 known_present: None,
1088 known_absent: None,
1089 extract: Vec::new(),
1090 tags: Vec::new(),
1091 request_headers: std::collections::BTreeMap::new(),
1092 regex_check: None,
1093 engine: None,
1094 strip_bad_char: None,
1095 request_method: crate::site::HttpMethod::Get,
1096 request_body: None,
1097 protection: Vec::new(),
1098 disabled: false,
1099 source: None,
1100 popularity: None,
1101 };
1102 let client = Client::builder()
1103 .timeout(Duration::from_millis(500))
1104 .connect_timeout(Duration::from_millis(500))
1105 .max_retries(0)
1106 .build()
1107 .unwrap();
1108 let outcome = client.check(&site, &user()).await;
1109 assert_eq!(outcome.kind, MatchKind::Uncertain);
1110 assert!(outcome.reason.is_some());
1111 }
1112
1113 #[tokio::test]
1114 async fn throttle_spaces_consecutive_calls_to_same_host() {
1115 let server = MockServer::start().await;
1116 Mock::given(any())
1117 .and(path("/alice"))
1118 .respond_with(ResponseTemplate::new(200))
1119 .mount(&server)
1120 .await;
1121 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1122 let client = Client::builder()
1127 .timeout(Duration::from_secs(2))
1128 .min_request_interval(Duration::from_millis(300))
1129 .build()
1130 .unwrap();
1131
1132 client.check(&site, &user()).await;
1133 let started = Instant::now();
1134 client.check(&site, &user()).await;
1135 let elapsed = started.elapsed();
1136 assert!(
1137 elapsed >= Duration::from_millis(200),
1138 "second probe to the same host should wait ≥200 ms, got {elapsed:?}",
1139 );
1140 }
1141
1142 #[tokio::test]
1143 async fn builder_overrides_user_agent() {
1144 let server = MockServer::start().await;
1145 Mock::given(any())
1146 .and(path("/alice"))
1147 .and(wiremock::matchers::header("user-agent", "adler-test/1.0"))
1148 .respond_with(ResponseTemplate::new(200))
1149 .mount(&server)
1150 .await;
1151 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1152 let client = Client::builder()
1153 .user_agent("adler-test/1.0")
1154 .build()
1155 .unwrap();
1156 let outcome = client.check(&site, &user()).await;
1157 assert_eq!(outcome.kind, MatchKind::Found);
1158 }
1159
1160 #[tokio::test]
1161 async fn rate_limit_429_yields_uncertain_with_note() {
1162 let server = MockServer::start().await;
1163 Mock::given(any())
1164 .and(path("/alice"))
1165 .respond_with(ResponseTemplate::new(429))
1166 .mount(&server)
1167 .await;
1168 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1169 let outcome = build_client().check(&site, &user()).await;
1170 assert_eq!(outcome.kind, MatchKind::Uncertain);
1171 assert_eq!(outcome.reason, Some(UncertainReason::RateLimited));
1172 }
1173
1174 #[tokio::test]
1175 async fn cloudflare_server_header_yields_uncertain() {
1176 let server = MockServer::start().await;
1177 Mock::given(any())
1178 .and(path("/alice"))
1179 .respond_with(ResponseTemplate::new(503).insert_header("server", "cloudflare"))
1180 .mount(&server)
1181 .await;
1182 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1183 let outcome = build_client().check(&site, &user()).await;
1184 assert_eq!(outcome.kind, MatchKind::Uncertain);
1185 assert_eq!(outcome.reason, Some(UncertainReason::CloudflareChallenge));
1186 }
1187
1188 #[tokio::test]
1189 async fn cloudflare_interstitial_in_body_yields_uncertain() {
1190 let server = MockServer::start().await;
1193 Mock::given(any())
1194 .and(path("/alice"))
1195 .respond_with(
1196 ResponseTemplate::new(200)
1197 .set_body_string("<html><head><title>Just a moment...</title></head></html>"),
1198 )
1199 .mount(&server)
1200 .await;
1201 let site = site_with(
1202 &server,
1203 vec![Signal::BodyAbsent {
1204 text: "Profile not found".into(),
1205 }],
1206 );
1207 let outcome = build_client().check(&site, &user()).await;
1208 assert_eq!(outcome.kind, MatchKind::Uncertain);
1209 assert_eq!(outcome.reason, Some(UncertainReason::CloudflareChallenge));
1210 }
1211
1212 #[tokio::test]
1213 async fn ban_detection_does_not_fire_on_legitimate_403() {
1214 let server = MockServer::start().await;
1215 Mock::given(any())
1216 .and(path("/alice"))
1217 .respond_with(ResponseTemplate::new(403))
1218 .mount(&server)
1219 .await;
1220 let site = site_with(
1221 &server,
1222 vec![
1223 Signal::StatusFound { codes: vec![200] },
1224 Signal::StatusNotFound { codes: vec![403] },
1225 ],
1226 );
1227 let outcome = build_client().check(&site, &user()).await;
1228 assert_eq!(outcome.kind, MatchKind::NotFound);
1230 assert!(outcome.reason.is_none());
1231 }
1232
1233 #[tokio::test]
1234 async fn retry_recovers_after_transient_429() {
1235 let server = MockServer::start().await;
1236 Mock::given(any())
1238 .and(path("/alice"))
1239 .respond_with(ResponseTemplate::new(429))
1240 .up_to_n_times(1)
1241 .mount(&server)
1242 .await;
1243 Mock::given(any())
1244 .and(path("/alice"))
1245 .respond_with(ResponseTemplate::new(200))
1246 .mount(&server)
1247 .await;
1248 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1249 let client = Client::builder()
1250 .timeout(Duration::from_secs(2))
1251 .min_request_interval(Duration::ZERO)
1252 .max_retries(2)
1253 .base_backoff_delay(Duration::from_millis(20))
1254 .max_backoff_delay(Duration::from_millis(100))
1255 .build()
1256 .unwrap();
1257 let outcome = client.check(&site, &user()).await;
1258 assert_eq!(outcome.kind, MatchKind::Found);
1259 assert!(outcome.reason.is_none());
1260 }
1261
1262 #[tokio::test]
1263 async fn retry_exhausts_and_returns_uncertain() {
1264 let server = MockServer::start().await;
1265 Mock::given(any())
1266 .and(path("/alice"))
1267 .respond_with(ResponseTemplate::new(429))
1268 .mount(&server)
1269 .await;
1270 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1271 let client = Client::builder()
1272 .timeout(Duration::from_secs(2))
1273 .min_request_interval(Duration::ZERO)
1274 .max_retries(2)
1275 .base_backoff_delay(Duration::from_millis(10))
1276 .max_backoff_delay(Duration::from_millis(50))
1277 .build()
1278 .unwrap();
1279 let outcome = client.check(&site, &user()).await;
1280 assert_eq!(outcome.kind, MatchKind::Uncertain);
1281 assert_eq!(outcome.reason, Some(UncertainReason::RateLimited));
1282 }
1283
1284 #[tokio::test]
1285 async fn retry_does_not_fire_on_network_error() {
1286 let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
1290 let port = listener.local_addr().unwrap().port();
1291 drop(listener);
1292 let site = Site {
1293 name: "Dead".into(),
1294 url: UrlTemplate::new(format!("http://127.0.0.1:{port}/{{username}}")).unwrap(),
1295 signals: vec![Signal::StatusFound { codes: vec![200] }],
1296 known_present: None,
1297 known_absent: None,
1298 extract: Vec::new(),
1299 tags: Vec::new(),
1300 request_headers: std::collections::BTreeMap::new(),
1301 regex_check: None,
1302 engine: None,
1303 strip_bad_char: None,
1304 request_method: crate::site::HttpMethod::Get,
1305 request_body: None,
1306 protection: Vec::new(),
1307 disabled: false,
1308 source: None,
1309 popularity: None,
1310 };
1311 let client = Client::builder()
1312 .timeout(Duration::from_millis(500))
1313 .connect_timeout(Duration::from_millis(500))
1314 .min_request_interval(Duration::ZERO)
1315 .max_retries(3)
1316 .base_backoff_delay(Duration::from_secs(60))
1317 .build()
1318 .unwrap();
1319 let started = Instant::now();
1320 let outcome = client.check(&site, &user()).await;
1321 assert!(started.elapsed() < Duration::from_secs(5));
1324 assert_eq!(outcome.kind, MatchKind::Uncertain);
1325 assert!(
1326 matches!(outcome.reason, Some(UncertainReason::Network(_))),
1327 "got {:?}",
1328 outcome.reason,
1329 );
1330 }
1331
1332 #[tokio::test]
1333 async fn rotates_user_agent_per_request() {
1334 let server = MockServer::start().await;
1338 Mock::given(any())
1339 .and(path("/alice"))
1340 .and(wiremock::matchers::header("user-agent", "RotatorUA/9.9"))
1341 .respond_with(ResponseTemplate::new(200))
1342 .mount(&server)
1343 .await;
1344 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1345 let client = Client::builder()
1346 .min_request_interval(Duration::ZERO)
1347 .max_retries(0)
1348 .rotate_user_agents(vec!["RotatorUA/9.9".into()])
1349 .build()
1350 .unwrap();
1351 let outcome = client.check(&site, &user()).await;
1352 assert_eq!(outcome.kind, MatchKind::Found);
1353 }
1354
1355 #[test]
1356 fn invalid_proxy_url_fails_build() {
1357 let err = Client::builder().proxy("not a url").build().unwrap_err();
1358 assert!(matches!(err, Error::HttpSetup { .. }));
1359 }
1360
1361 #[test]
1362 fn schemeless_proxy_is_rejected_up_front() {
1363 let err = Client::builder().proxy("not-a-url").build().unwrap_err();
1365 let Error::HttpSetup { message } = err else {
1366 panic!("expected HttpSetup, got {err:?}");
1367 };
1368 assert!(message.contains("must start with"), "{message}");
1369 }
1370
1371 #[test]
1372 fn socks5_proxy_scheme_is_accepted() {
1373 assert!(
1375 Client::builder()
1376 .proxy("socks5://127.0.0.1:9050")
1377 .build()
1378 .is_ok()
1379 );
1380 }
1381
1382 #[tokio::test]
1383 async fn global_rps_cap_spaces_requests_across_hosts() {
1384 let server = MockServer::start().await;
1387 Mock::given(any())
1388 .respond_with(ResponseTemplate::new(200))
1389 .mount(&server)
1390 .await;
1391 let site_a = Site {
1392 name: "A".into(),
1393 url: UrlTemplate::new(format!("{}/a/{{username}}", server.uri())).unwrap(),
1394 signals: vec![Signal::StatusFound { codes: vec![200] }],
1395 known_present: None,
1396 known_absent: None,
1397 extract: Vec::new(),
1398 tags: Vec::new(),
1399 request_headers: std::collections::BTreeMap::new(),
1400 regex_check: None,
1401 engine: None,
1402 strip_bad_char: None,
1403 request_method: crate::site::HttpMethod::Get,
1404 request_body: None,
1405 protection: Vec::new(),
1406 disabled: false,
1407 source: None,
1408 popularity: None,
1409 };
1410 let site_b = Site {
1411 name: "B".into(),
1412 url: UrlTemplate::new(format!("{}/b/{{username}}", server.uri())).unwrap(),
1413 signals: vec![Signal::StatusFound { codes: vec![200] }],
1414 known_present: None,
1415 known_absent: None,
1416 extract: Vec::new(),
1417 tags: Vec::new(),
1418 request_headers: std::collections::BTreeMap::new(),
1419 regex_check: None,
1420 engine: None,
1421 strip_bad_char: None,
1422 request_method: crate::site::HttpMethod::Get,
1423 request_body: None,
1424 protection: Vec::new(),
1425 disabled: false,
1426 source: None,
1427 popularity: None,
1428 };
1429 let client = Client::builder()
1434 .min_request_interval(Duration::ZERO)
1435 .max_retries(0)
1436 .max_rps(std::num::NonZeroU32::new(2).unwrap())
1437 .build()
1438 .unwrap();
1439 client.check(&site_a, &user()).await;
1442 let started = Instant::now();
1443 client.check(&site_b, &user()).await;
1444 assert!(
1445 started.elapsed() >= Duration::from_millis(350),
1446 "global cap should space cross-host requests, got {:?}",
1447 started.elapsed(),
1448 );
1449 }
1450
1451 #[tokio::test]
1452 async fn respect_robots_skips_disallowed_paths() {
1453 let server = MockServer::start().await;
1454 Mock::given(any())
1455 .and(path("/robots.txt"))
1456 .respond_with(
1457 ResponseTemplate::new(200).set_body_string("User-agent: *\nDisallow: /no"),
1458 )
1459 .mount(&server)
1460 .await;
1461 Mock::given(any())
1462 .and(path("/no/alice"))
1463 .respond_with(ResponseTemplate::new(200))
1464 .mount(&server)
1465 .await;
1466 Mock::given(any())
1467 .and(path("/yes/alice"))
1468 .respond_with(ResponseTemplate::new(200))
1469 .mount(&server)
1470 .await;
1471 let client = Client::builder()
1472 .min_request_interval(Duration::ZERO)
1473 .max_retries(0)
1474 .respect_robots(true)
1475 .build()
1476 .unwrap();
1477
1478 let disallowed = Site {
1479 name: "No".into(),
1480 url: UrlTemplate::new(format!("{}/no/{{username}}", server.uri())).unwrap(),
1481 signals: vec![Signal::StatusFound { codes: vec![200] }],
1482 known_present: None,
1483 known_absent: None,
1484 extract: Vec::new(),
1485 tags: Vec::new(),
1486 request_headers: std::collections::BTreeMap::new(),
1487 regex_check: None,
1488 engine: None,
1489 strip_bad_char: None,
1490 request_method: crate::site::HttpMethod::Get,
1491 request_body: None,
1492 protection: Vec::new(),
1493 disabled: false,
1494 source: None,
1495 popularity: None,
1496 };
1497 let allowed = Site {
1498 name: "Yes".into(),
1499 url: UrlTemplate::new(format!("{}/yes/{{username}}", server.uri())).unwrap(),
1500 signals: vec![Signal::StatusFound { codes: vec![200] }],
1501 known_present: None,
1502 known_absent: None,
1503 extract: Vec::new(),
1504 tags: Vec::new(),
1505 request_headers: std::collections::BTreeMap::new(),
1506 regex_check: None,
1507 engine: None,
1508 strip_bad_char: None,
1509 request_method: crate::site::HttpMethod::Get,
1510 request_body: None,
1511 protection: Vec::new(),
1512 disabled: false,
1513 source: None,
1514 popularity: None,
1515 };
1516
1517 let no = client.check(&disallowed, &user()).await;
1518 assert_eq!(no.kind, MatchKind::Uncertain);
1519 assert_eq!(no.reason, Some(UncertainReason::RobotsDisallowed));
1520
1521 let yes = client.check(&allowed, &user()).await;
1522 assert_eq!(yes.kind, MatchKind::Found);
1523 }
1524
1525 #[tokio::test]
1526 async fn body_read_skipped_when_no_body_signal_needed() {
1527 let server = MockServer::start().await;
1530 Mock::given(any())
1531 .and(path("/alice"))
1532 .respond_with(ResponseTemplate::new(200).set_body_string("Profile not found"))
1533 .mount(&server)
1534 .await;
1535 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1536 let outcome = build_client().check(&site, &user()).await;
1537 assert_eq!(outcome.kind, MatchKind::Found);
1538 }
1539
1540 #[derive(Debug)]
1546 struct RecordingBackend {
1547 page: RenderedPage,
1548 calls: std::sync::atomic::AtomicUsize,
1549 }
1550
1551 impl RecordingBackend {
1552 fn with_page(page: RenderedPage) -> Self {
1553 Self {
1554 page,
1555 calls: std::sync::atomic::AtomicUsize::new(0),
1556 }
1557 }
1558 fn call_count(&self) -> usize {
1559 self.calls.load(std::sync::atomic::Ordering::SeqCst)
1560 }
1561 }
1562
1563 #[async_trait::async_trait]
1564 impl BrowserBackend for RecordingBackend {
1565 async fn fetch(
1566 &self,
1567 _url: &url::Url,
1568 _headers: &std::collections::BTreeMap<String, String>,
1569 _timeout: Duration,
1570 ) -> Result<RenderedPage> {
1571 self.calls.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
1572 Ok(self.page.clone())
1573 }
1574 }
1575
1576 fn site_bot_protected(server: &MockServer) -> Site {
1577 let mut s = site_with(server, vec![Signal::StatusFound { codes: vec![200] }]);
1578 s.tags = vec!["bot-protected".into()];
1579 s
1580 }
1581
1582 #[tokio::test]
1583 async fn browser_routes_bot_protected_sites() {
1584 let server = MockServer::start().await;
1587 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1588 status: 200,
1589 final_url: url::Url::parse("https://example.com/alice").unwrap(),
1590 body: "<html></html>".into(),
1591 elapsed_ms: 42,
1592 }));
1593 let client = Client::builder()
1594 .min_request_interval(Duration::ZERO)
1595 .max_retries(0)
1596 .browser(backend.clone())
1597 .build()
1598 .unwrap();
1599 let outcome = client.check(&site_bot_protected(&server), &user()).await;
1600 assert_eq!(outcome.kind, MatchKind::Found);
1601 assert_eq!(backend.call_count(), 1, "browser invoked exactly once");
1602 }
1603
1604 #[tokio::test]
1605 async fn non_bot_protected_sites_skip_browser() {
1606 let server = MockServer::start().await;
1607 Mock::given(any())
1608 .and(path("/alice"))
1609 .respond_with(ResponseTemplate::new(200))
1610 .mount(&server)
1611 .await;
1612 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1613 status: 500, final_url: url::Url::parse("https://x/").unwrap(),
1615 body: String::new(),
1616 elapsed_ms: 0,
1617 }));
1618 let client = Client::builder()
1619 .min_request_interval(Duration::ZERO)
1620 .max_retries(0)
1621 .browser(backend.clone())
1622 .build()
1623 .unwrap();
1624 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1626 let outcome = client.check(&site, &user()).await;
1627 assert_eq!(outcome.kind, MatchKind::Found);
1628 assert_eq!(backend.call_count(), 0, "browser must not be touched");
1629 }
1630
1631 #[tokio::test]
1632 async fn browser_budget_exhaust_yields_uncertain() {
1633 let server = MockServer::start().await;
1634 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1635 status: 200,
1636 final_url: url::Url::parse("https://x/").unwrap(),
1637 body: String::new(),
1638 elapsed_ms: 0,
1639 }));
1640 let client = Client::builder()
1641 .min_request_interval(Duration::ZERO)
1642 .max_retries(0)
1643 .browser(backend.clone())
1644 .browser_budget(1)
1645 .build()
1646 .unwrap();
1647 let site = site_bot_protected(&server);
1648 let first = client.check(&site, &user()).await;
1650 assert_eq!(first.kind, MatchKind::Found);
1651 let second = client.check(&site, &user()).await;
1653 assert_eq!(second.kind, MatchKind::Uncertain);
1654 assert!(matches!(
1655 second.reason,
1656 Some(UncertainReason::BrowserBudget)
1657 ));
1658 assert_eq!(
1659 backend.call_count(),
1660 1,
1661 "second call must not invoke backend"
1662 );
1663 }
1664
1665 #[tokio::test]
1666 async fn browser_failure_surfaces_as_uncertain_browser_failed() {
1667 struct FailingBackend;
1668 #[async_trait::async_trait]
1669 impl BrowserBackend for FailingBackend {
1670 async fn fetch(
1671 &self,
1672 _url: &url::Url,
1673 _headers: &std::collections::BTreeMap<String, String>,
1674 _timeout: Duration,
1675 ) -> Result<RenderedPage> {
1676 Err(Error::BrowserSetup {
1677 message: "simulated crash".into(),
1678 })
1679 }
1680 }
1681 impl std::fmt::Debug for FailingBackend {
1682 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1683 f.write_str("FailingBackend")
1684 }
1685 }
1686
1687 let server = MockServer::start().await;
1688 let client = Client::builder()
1689 .min_request_interval(Duration::ZERO)
1690 .max_retries(0)
1691 .browser(Arc::new(FailingBackend))
1692 .build()
1693 .unwrap();
1694 let outcome = client.check(&site_bot_protected(&server), &user()).await;
1695 assert_eq!(outcome.kind, MatchKind::Uncertain);
1696 match outcome.reason {
1697 Some(UncertainReason::BrowserFailed(msg)) => {
1698 assert!(msg.contains("simulated crash"), "got: {msg}");
1699 }
1700 other => panic!("expected BrowserFailed, got {other:?}"),
1701 }
1702 }
1703
1704 #[tokio::test]
1705 async fn status_only_site_uses_head_request() {
1706 let server = MockServer::start().await;
1710 Mock::given(method("HEAD"))
1711 .and(path("/alice"))
1712 .respond_with(ResponseTemplate::new(200))
1713 .mount(&server)
1714 .await;
1715 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1716 let outcome = build_client().check(&site, &user()).await;
1717 assert_eq!(outcome.kind, MatchKind::Found);
1718 let recvd = server.received_requests().await.unwrap_or_default();
1719 assert_eq!(recvd.len(), 1);
1720 assert_eq!(recvd[0].method.as_str(), "HEAD");
1721 }
1722
1723 #[tokio::test]
1724 async fn body_signal_site_uses_get_request() {
1725 let server = MockServer::start().await;
1728 Mock::given(any())
1729 .and(path("/alice"))
1730 .respond_with(ResponseTemplate::new(200).set_body_string("hello alice"))
1731 .mount(&server)
1732 .await;
1733 let site = site_with(
1734 &server,
1735 vec![Signal::BodyPresent {
1736 text: "hello".into(),
1737 }],
1738 );
1739 let outcome = build_client().check(&site, &user()).await;
1740 assert_eq!(outcome.kind, MatchKind::Found);
1741 let recvd = server.received_requests().await.unwrap_or_default();
1742 assert_eq!(recvd[0].method.as_str(), "GET");
1743 }
1744
1745 #[tokio::test]
1746 async fn protection_field_routes_through_browser_like_bot_protected_tag() {
1747 let server = MockServer::start().await;
1752 Mock::given(any())
1753 .respond_with(ResponseTemplate::new(200))
1754 .mount(&server)
1755 .await;
1756 let mut site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1757 site.protection = vec![crate::site::ProtectionKind::Cloudflare];
1758 let backend = Arc::new(RecordingBackend::with_page(RenderedPage {
1760 status: 200,
1761 final_url: url::Url::parse(&format!("{}/alice", server.uri())).unwrap(),
1762 body: String::new(),
1763 elapsed_ms: 0,
1764 }));
1765 let client = Client::builder()
1766 .min_request_interval(Duration::ZERO)
1767 .max_retries(0)
1768 .browser(backend)
1769 .build()
1770 .unwrap();
1771 let outcome = client.check(&site, &user()).await;
1772 assert_eq!(outcome.kind, MatchKind::Found);
1775 let recvd = server.received_requests().await.unwrap_or_default();
1777 assert_eq!(
1778 recvd.len(),
1779 0,
1780 "structured protection must skip the raw HTTP path"
1781 );
1782 }
1783
1784 #[tokio::test]
1785 async fn post_method_sends_body_with_username_substituted() {
1786 let server = MockServer::start().await;
1790 Mock::given(method("POST"))
1791 .and(path("/api"))
1792 .respond_with(ResponseTemplate::new(200))
1793 .mount(&server)
1794 .await;
1795 let site = Site {
1800 name: "ApiPost".into(),
1801 url: UrlTemplate::new(format!("{}/api?_={{username}}", server.uri())).unwrap(),
1802 signals: vec![Signal::StatusFound { codes: vec![200] }],
1803 known_present: None,
1804 known_absent: None,
1805 extract: Vec::new(),
1806 tags: Vec::new(),
1807 request_headers: std::collections::BTreeMap::new(),
1808 regex_check: None,
1809 engine: None,
1810 strip_bad_char: None,
1811 request_method: HttpMethod::Post,
1812 request_body: Some(r#"{"name":"{username}"}"#.into()),
1813 protection: Vec::new(),
1814 disabled: false,
1815 source: None,
1816 popularity: None,
1817 };
1818 let outcome = build_client().check(&site, &user()).await;
1819 assert_eq!(outcome.kind, MatchKind::Found);
1820 let recvd = server.received_requests().await.unwrap_or_default();
1821 assert_eq!(recvd.len(), 1);
1822 assert_eq!(recvd[0].method.as_str(), "POST");
1823 let body = String::from_utf8_lossy(&recvd[0].body).to_string();
1824 assert!(body.contains("\"name\":\"alice\""), "body was: {body}");
1825 }
1826
1827 #[tokio::test]
1828 async fn head_405_falls_back_to_get() {
1829 let server = MockServer::start().await;
1832 Mock::given(method("HEAD"))
1833 .and(path("/alice"))
1834 .respond_with(ResponseTemplate::new(405))
1835 .mount(&server)
1836 .await;
1837 Mock::given(any())
1838 .and(path("/alice"))
1839 .respond_with(ResponseTemplate::new(200))
1840 .mount(&server)
1841 .await;
1842 let site = site_with(&server, vec![Signal::StatusFound { codes: vec![200] }]);
1843 let outcome = build_client().check(&site, &user()).await;
1844 assert_eq!(outcome.kind, MatchKind::Found);
1845 let recvd = server.received_requests().await.unwrap_or_default();
1846 assert_eq!(recvd.len(), 2);
1847 assert_eq!(recvd[0].method.as_str(), "HEAD");
1848 assert_eq!(recvd[1].method.as_str(), "GET");
1849 }
1850}