use crate::check::{CheckOutcome, MatchKind};
use crate::client::Client;
use crate::error::Result;
use crate::site::{KnownPresent, Signal, Site, UrlTemplate};
use crate::username::Username;
const NONSENSE_LEN: usize = 24;
const MAX_TITLE_MARKER: usize = 120;
#[derive(Debug, Clone)]
pub enum DoctorReport {
Healthy {
present: Vec<(String, CheckOutcome)>,
absent: CheckOutcome,
},
Unhealthy {
issues: Vec<String>,
present: Vec<(String, CheckOutcome)>,
absent: CheckOutcome,
},
}
impl DoctorReport {
pub fn is_healthy(&self) -> bool {
matches!(self, Self::Healthy { .. })
}
}
pub async fn check_site(client: &Client, site: &Site) -> DoctorReport {
let mut issues: Vec<String> = Vec::new();
let mut present_outcomes: Vec<(String, CheckOutcome)> = Vec::new();
if let Some(kp) = &site.known_present {
for name in kp.as_slice() {
match Username::new(name.clone()) {
Ok(user) => {
let outcome = client.check(site, &user).await;
present_outcomes.push((name.clone(), outcome));
}
Err(err) => {
issues.push(format!(
"known_present {name:?} is not a valid username: {err}"
));
}
}
}
if !present_outcomes.is_empty()
&& !present_outcomes
.iter()
.any(|(_, o)| o.kind == MatchKind::Found)
{
let summary = present_outcomes
.iter()
.map(|(n, o)| format!("{n}={:?}", o.kind))
.collect::<Vec<_>>()
.join(", ");
issues.push(format!(
"no known-present user yielded Found (tried: {summary})"
));
}
}
let nonsense = site
.known_absent
.clone()
.unwrap_or_else(random_nonsense_username);
let absent_outcome = match Username::new(nonsense.clone()) {
Ok(user) => client.check(site, &user).await,
Err(err) => {
issues.push(format!(
"could not build absent-probe username {nonsense:?}: {err}",
));
dummy_outcome(&site.name, "skipped: bad absent username")
}
};
if absent_outcome.kind == MatchKind::Found {
issues.push(format!(
"nonsense user {nonsense:?} reported Found — detection rule too permissive",
));
}
if issues.is_empty() {
DoctorReport::Healthy {
present: present_outcomes,
absent: absent_outcome,
}
} else {
DoctorReport::Unhealthy {
issues,
present: present_outcomes,
absent: absent_outcome,
}
}
}
fn random_nonsense_username() -> String {
let mut s = String::with_capacity(NONSENSE_LEN + 7);
s.push_str("adlerx");
for _ in 0..NONSENSE_LEN {
s.push(fastrand::alphanumeric());
}
s
}
const DEFAULT_CANDIDATES: &[&str] = &[
"torvalds", "octocat", "dhh", "tj", "admin", "support", "test",
];
#[must_use]
pub fn default_candidate_pool(site: &Site) -> Vec<String> {
use std::collections::HashSet;
let mut pool: Vec<String> = Vec::with_capacity(DEFAULT_CANDIDATES.len() + 1);
let mut seen: HashSet<String> = HashSet::new();
let push = |pool: &mut Vec<String>, seen: &mut HashSet<String>, name: String| {
if !name.is_empty() && seen.insert(name.clone()) {
pool.push(name);
}
};
if let Some(brand) = brand_name_from_site(site) {
push(&mut pool, &mut seen, brand);
}
for name in DEFAULT_CANDIDATES {
push(&mut pool, &mut seen, (*name).to_owned());
}
pool
}
fn brand_name_from_site(site: &Site) -> Option<String> {
let probe = site.url.as_str().replace("{username}", "_");
let url = url::Url::parse(&probe).ok()?;
let host = url.host_str()?;
let parts: Vec<&str> = host.split('.').collect();
let label = if parts.len() >= 2 {
parts[parts.len() - 2]
} else {
parts[0]
};
if label.is_empty() {
None
} else {
Some(label.to_lowercase())
}
}
pub async fn discover_known_present(
client: &Client,
site: &Site,
candidates: &[String],
) -> Option<String> {
for name in candidates {
let Ok(user) = Username::new(name.clone()) else {
continue;
};
let outcome = client.check(site, &user).await;
if outcome.kind == MatchKind::Found {
return Some(name.clone());
}
}
None
}
#[derive(Debug, Clone)]
pub struct FixSuggestion {
pub site: String,
pub signals: Vec<Signal>,
pub rationale: String,
}
pub async fn suggest_fix(client: &Client, site: &Site) -> Option<FixSuggestion> {
let present_name = site.known_present.as_ref()?.primary()?;
let present_user = Username::new(present_name.to_owned()).ok()?;
let absent_user = Username::new(random_nonsense_username()).ok()?;
let present = client
.fetch_for_doctor(site, &site.url_for(&present_user))
.await?;
let absent = client
.fetch_for_doctor(site, &site.url_for(&absent_user))
.await?;
if present.status != absent.status && (200..400).contains(&present.status) {
return Some(FixSuggestion {
site: site.name.clone(),
signals: vec![
Signal::StatusFound {
codes: vec![present.status],
},
Signal::StatusNotFound {
codes: vec![absent.status],
},
],
rationale: format!(
"status differs: present={}, absent={}",
present.status, absent.status
),
});
}
if let (Some(present_title), Some(absent_title)) =
(html_title(&present.body), html_title(&absent.body))
{
let usable = present_title != absent_title
&& !absent_title.is_empty()
&& !present.body.contains(&absent_title);
if usable {
return Some(FixSuggestion {
site: site.name.clone(),
signals: vec![
Signal::StatusFound {
codes: vec![present.status],
},
Signal::BodyAbsent {
text: absent_title.clone(),
},
],
rationale: format!(
"same status {}, distinct page titles; absent title {absent_title:?} \
does not appear on the present page",
present.status
),
});
}
}
None
}
pub async fn scaffold_site(
client: &Client,
name: &str,
url: &str,
known_present: &str,
) -> Result<Option<(Site, String)>> {
let probe = Site {
name: name.to_owned(),
url: UrlTemplate::new(url)?,
signals: vec![Signal::StatusFound { codes: vec![200] }],
known_present: Some(KnownPresent::Single(known_present.to_owned())),
known_absent: None,
extract: Vec::new(),
tags: Vec::new(),
request_headers: std::collections::BTreeMap::new(),
regex_check: None,
engine: None,
};
Ok(suggest_fix(client, &probe).await.map(|fix| {
(
Site {
signals: fix.signals,
..probe
},
fix.rationale,
)
}))
}
fn html_title(body: &str) -> Option<String> {
let lower = body.to_ascii_lowercase();
let open = lower.find("<title")?;
let gt = lower[open..].find('>')? + open + 1;
let close = lower[gt..].find("</title>")? + gt;
let title = body[gt..close].trim();
if title.is_empty() {
return None;
}
Some(title.chars().take(MAX_TITLE_MARKER).collect())
}
fn dummy_outcome(site: &str, note: &str) -> CheckOutcome {
CheckOutcome {
site: site.to_owned(),
url: String::new(),
kind: MatchKind::Uncertain,
reason: Some(crate::check::UncertainReason::Other(note.to_owned())),
elapsed_ms: 0,
enrichment: std::collections::BTreeMap::new(),
evidence: Vec::new(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::site::{Signal, UrlTemplate};
use wiremock::matchers::{method, path_regex};
use wiremock::{Mock, MockServer, ResponseTemplate};
fn build_client() -> Client {
Client::builder()
.timeout(std::time::Duration::from_secs(2))
.min_request_interval(std::time::Duration::ZERO)
.max_retries(0)
.build()
.unwrap()
}
fn site(server: &MockServer, name: &str, known_present: Option<&str>) -> Site {
Site {
name: name.into(),
url: UrlTemplate::new(format!("{}/{{username}}", server.uri())).unwrap(),
signals: vec![
Signal::StatusFound { codes: vec![200] },
Signal::StatusNotFound { codes: vec![404] },
],
known_present: known_present.map(KnownPresent::from),
known_absent: None,
extract: Vec::new(),
tags: Vec::new(),
request_headers: std::collections::BTreeMap::new(),
regex_check: None,
engine: None,
}
}
#[tokio::test]
async fn healthy_when_present_returns_200_and_random_returns_404() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path_regex("^/alice$"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(404))
.mount(&server)
.await;
let site = site(&server, "Mock", Some("alice"));
let report = check_site(&build_client(), &site).await;
assert!(report.is_healthy(), "{report:?}");
}
#[tokio::test]
async fn unhealthy_when_known_present_not_found() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(404))
.mount(&server)
.await;
let site = site(&server, "Mock", Some("alice"));
let report = check_site(&build_client(), &site).await;
match report {
DoctorReport::Unhealthy { issues, .. } => {
assert!(
issues.iter().any(|i| i.contains("known-present")),
"issues: {issues:?}",
);
}
other @ DoctorReport::Healthy { .. } => {
panic!("expected Unhealthy, got {other:?}")
}
}
}
#[tokio::test]
async fn unhealthy_when_random_user_reports_found() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let site = site(&server, "Mock", None);
let report = check_site(&build_client(), &site).await;
match report {
DoctorReport::Unhealthy { issues, .. } => {
assert!(
issues.iter().any(|i| i.contains("too permissive")),
"issues: {issues:?}",
);
}
other @ DoctorReport::Healthy { .. } => {
panic!("expected Unhealthy, got {other:?}")
}
}
}
#[tokio::test]
async fn healthy_when_one_of_several_known_present_yields_found() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path_regex("^/torvalds$"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(404))
.mount(&server)
.await;
let mut s = site(&server, "Mock", None);
s.known_present = Some(KnownPresent::Multiple(vec![
"instagram".into(),
"torvalds".into(),
]));
let report = check_site(&build_client(), &s).await;
assert!(report.is_healthy(), "{report:?}");
let DoctorReport::Healthy { present, .. } = &report else {
unreachable!()
};
assert_eq!(present.len(), 2);
assert!(
present
.iter()
.any(|(n, o)| n == "torvalds" && o.kind == MatchKind::Found),
"expected torvalds=Found in {present:?}"
);
}
#[tokio::test]
async fn unhealthy_when_no_known_present_candidate_is_found() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(404))
.mount(&server)
.await;
let mut s = site(&server, "Mock", None);
s.known_present = Some(KnownPresent::Multiple(vec!["alpha".into(), "beta".into()]));
let report = check_site(&build_client(), &s).await;
match report {
DoctorReport::Unhealthy {
issues, present, ..
} => {
assert_eq!(present.len(), 2, "both candidates should be reported");
let summary = issues.iter().find(|i| i.contains("known-present"));
let summary = summary.expect("present-check issue should be raised");
assert!(summary.contains("alpha"), "issue lacks alpha: {summary}");
assert!(summary.contains("beta"), "issue lacks beta: {summary}");
}
other @ DoctorReport::Healthy { .. } => {
panic!("expected Unhealthy, got {other:?}")
}
}
}
#[tokio::test]
async fn skips_present_check_when_known_present_is_none() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(404))
.mount(&server)
.await;
let site = site(&server, "Mock", None);
let report = check_site(&build_client(), &site).await;
assert!(report.is_healthy(), "{report:?}");
let DoctorReport::Healthy { present, .. } = &report else {
unreachable!()
};
assert!(present.is_empty());
}
#[test]
fn random_username_passes_validation() {
let name = random_nonsense_username();
let result = Username::new(&name);
assert!(result.is_ok(), "generated {name:?} should pass validation");
assert!(name.chars().all(|c| c.is_ascii_alphanumeric()));
}
#[test]
fn html_title_extracts_and_trims() {
assert_eq!(
html_title("<html><head><TITLE> Hello </TITLE></head>").as_deref(),
Some("Hello")
);
assert_eq!(html_title("<html>no title here</html>"), None);
assert_eq!(html_title("<title></title>"), None);
}
#[tokio::test]
async fn suggest_fix_derives_status_signals_when_status_differs() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path_regex("^/blue$"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(410)) .mount(&server)
.await;
let s = site(&server, "Mock", Some("blue"));
let fix = suggest_fix(&build_client(), &s)
.await
.expect("a suggestion");
assert!(fix.rationale.contains("status differs"));
assert!(matches!(
fix.signals.as_slice(),
[
Signal::StatusFound { codes: f },
Signal::StatusNotFound { codes: nf },
] if f == &[200] && nf == &[410]
));
}
#[tokio::test]
async fn suggest_fix_derives_body_marker_from_title() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path_regex("^/blue$"))
.respond_with(
ResponseTemplate::new(200).set_body_string("<title>blue · Profile</title>ok"),
)
.mount(&server)
.await;
Mock::given(method("GET"))
.respond_with(
ResponseTemplate::new(200).set_body_string("<title>Page not found</title>"),
)
.mount(&server)
.await;
let s = site(&server, "Mock", Some("blue"));
let fix = suggest_fix(&build_client(), &s)
.await
.expect("a suggestion");
assert!(matches!(
fix.signals.as_slice(),
[Signal::StatusFound { .. }, Signal::BodyAbsent { text }]
if text == "Page not found"
));
}
#[tokio::test]
async fn suggest_fix_routes_bot_protected_sites_through_browser_backend() {
use std::sync::Arc;
use std::sync::Mutex;
use serde_json::json;
use crate::browser::cdp::CdpClient;
use crate::browser::mock_cdp::{FrameOut, MockCdpServer};
use crate::browser::{BrowserBackend, BrowserbaseBackend};
let last_url: Arc<Mutex<String>> = Arc::new(Mutex::new(String::new()));
let last_url_for_handler = Arc::clone(&last_url);
let server = MockCdpServer::start(move |method, params, _sid| match method {
"Target.createTarget" => vec![FrameOut::Response(json!({ "targetId": "T1" }))],
"Target.attachToTarget" => vec![FrameOut::Response(json!({ "sessionId": "S1" }))],
"Page.navigate" => {
let url = params
.get("url")
.and_then(serde_json::Value::as_str)
.unwrap_or("")
.to_owned();
*last_url_for_handler.lock().unwrap() = url.clone();
vec![
FrameOut::Response(json!({ "frameId": "F1" })),
FrameOut::Event {
method: "Network.responseReceived".into(),
params: json!({
"type": "Document",
"response": { "status": 200, "url": url },
}),
session_id: Some("S1".into()),
},
FrameOut::Event {
method: "Page.frameStoppedLoading".into(),
params: json!({ "frameId": "F1" }),
session_id: Some("S1".into()),
},
]
}
"Runtime.evaluate" => {
let url = last_url_for_handler.lock().unwrap().clone();
let body = if url.contains("/torvalds") {
"<html><head><title>torvalds · profile</title></head>\
<body>real content</body></html>"
} else {
"<html><head><title>Profile not found</title></head>\
<body>Profile not found</body></html>"
};
vec![FrameOut::Response(json!({
"result": { "type": "string", "value": body },
}))]
}
_ => vec![FrameOut::Response(json!({}))],
})
.await;
let cdp = CdpClient::connect(&server.ws_url()).await.unwrap();
let backend: std::sync::Arc<dyn BrowserBackend> =
std::sync::Arc::new(BrowserbaseBackend::from_parts(cdp, "test-session".into()));
let http_server = MockServer::start().await;
let url_template = format!("{}/{{username}}", http_server.uri());
let s = Site {
name: "MockBP".into(),
url: UrlTemplate::new(url_template).unwrap(),
signals: vec![Signal::StatusFound { codes: vec![200] }],
known_present: Some(KnownPresent::Single("torvalds".into())),
known_absent: None,
extract: Vec::new(),
tags: vec!["bot-protected".into()],
request_headers: std::collections::BTreeMap::new(),
regex_check: None,
engine: None,
};
let client = Client::builder()
.timeout(std::time::Duration::from_secs(5))
.min_request_interval(std::time::Duration::ZERO)
.max_retries(0)
.browser(backend)
.build()
.unwrap();
let fix = suggest_fix(&client, &s)
.await
.expect("suggest_fix should derive a signature from the browser-rendered diff");
assert!(
matches!(
fix.signals.as_slice(),
[Signal::StatusFound { codes }, Signal::BodyAbsent { text }]
if codes == &[200] && text.contains("not found")
),
"unexpected signals: {:?}",
fix.signals,
);
assert!(
fix.rationale.contains("titles") || fix.rationale.contains("title"),
"rationale should mention titles, got: {}",
fix.rationale,
);
}
#[tokio::test]
async fn suggest_fix_returns_none_when_indistinguishable() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(200).set_body_string("<title>Same</title>"))
.mount(&server)
.await;
let s = site(&server, "Mock", Some("blue"));
assert!(suggest_fix(&build_client(), &s).await.is_none());
}
#[tokio::test]
async fn scaffold_site_builds_complete_entry_from_status_diff() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path_regex("^/torvalds$"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(404))
.mount(&server)
.await;
let url = format!("{}/{{username}}", server.uri());
let (site, rationale) = scaffold_site(&build_client(), "Mock", &url, "torvalds")
.await
.expect("valid url")
.expect("a derived signature");
assert_eq!(site.name, "Mock");
assert_eq!(
site.known_present.as_ref().and_then(KnownPresent::primary),
Some("torvalds")
);
assert!(rationale.contains("status differs"));
assert!(matches!(
site.signals.as_slice(),
[Signal::StatusFound { codes: f }, Signal::StatusNotFound { codes: nf }]
if f == &[200] && nf == &[404]
));
}
#[tokio::test]
async fn scaffold_site_none_when_indistinguishable() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(200).set_body_string("<title>Same</title>"))
.mount(&server)
.await;
let url = format!("{}/{{username}}", server.uri());
let scaffold = scaffold_site(&build_client(), "Mock", &url, "blue")
.await
.expect("valid url");
assert!(scaffold.is_none());
}
#[tokio::test]
async fn scaffold_site_rejects_bad_url() {
let err = scaffold_site(&build_client(), "Bad", "not-a-url-no-placeholder", "u").await;
assert!(err.is_err());
}
#[tokio::test]
async fn suggest_fix_none_without_known_present() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
let s = site(&server, "Mock", None);
assert!(suggest_fix(&build_client(), &s).await.is_none());
}
#[tokio::test]
async fn discover_returns_first_candidate_that_yields_found() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path_regex("^/dhh$"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(404))
.mount(&server)
.await;
let s = site(&server, "Mock", None);
let candidates = vec![
"torvalds".into(),
"octocat".into(),
"dhh".into(),
"admin".into(),
];
let found = discover_known_present(&build_client(), &s, &candidates).await;
assert_eq!(found.as_deref(), Some("dhh"));
}
#[tokio::test]
async fn discover_returns_none_when_no_candidate_yields_found() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(404))
.mount(&server)
.await;
let s = site(&server, "Mock", None);
let candidates = vec!["torvalds".into(), "admin".into()];
let found = discover_known_present(&build_client(), &s, &candidates).await;
assert!(found.is_none());
}
#[tokio::test]
async fn discover_skips_invalid_usernames_silently() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path_regex("^/dhh$"))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(404))
.mount(&server)
.await;
let s = site(&server, "Mock", None);
let candidates = vec![String::new(), "bad user with space".into(), "dhh".into()];
let found = discover_known_present(&build_client(), &s, &candidates).await;
assert_eq!(found.as_deref(), Some("dhh"));
}
#[test]
fn default_pool_puts_brand_first_when_derivable() {
let site = Site {
name: "GitHub".into(),
url: UrlTemplate::new("https://www.github.com/{username}").unwrap(),
signals: vec![Signal::StatusFound { codes: vec![200] }],
known_present: None,
known_absent: None,
extract: Vec::new(),
tags: Vec::new(),
request_headers: std::collections::BTreeMap::new(),
regex_check: None,
engine: None,
};
let pool = default_candidate_pool(&site);
assert_eq!(pool.first().map(String::as_str), Some("github"));
let brand_occurrences = pool.iter().filter(|n| n.as_str() == "github").count();
assert_eq!(brand_occurrences, 1, "brand should be deduplicated");
for expected in ["torvalds", "octocat", "admin"] {
assert!(
pool.iter().any(|n| n == expected),
"pool missing {expected:?}; got {pool:?}"
);
}
}
#[test]
fn default_pool_falls_back_to_canned_list_when_brand_underivable() {
let site = Site {
name: "Local".into(),
url: UrlTemplate::new("http://localhost/{username}").unwrap(),
signals: vec![Signal::StatusFound { codes: vec![200] }],
known_present: None,
known_absent: None,
extract: Vec::new(),
tags: Vec::new(),
request_headers: std::collections::BTreeMap::new(),
regex_check: None,
engine: None,
};
let pool = default_candidate_pool(&site);
assert!(pool.contains(&"torvalds".to_owned()));
assert!(pool.contains(&"admin".to_owned()));
}
}