use url::Url;
pub const WELL_KNOWN_PATHS: &[&str] = &[
"/.well-known/security.txt",
"/.well-known/openid-configuration",
"/.well-known/oauth-authorization-server",
"/.well-known/apple-app-site-association",
"/.well-known/assetlinks.json",
"/.well-known/host-meta",
"/.well-known/host-meta.json",
"/.well-known/change-password",
"/.well-known/webfinger",
"/.well-known/nodeinfo",
"/.well-known/dnt-policy.txt",
"/.well-known/brand-indicators-for-message-identification",
];
pub fn probe_urls(origin: &Url) -> Vec<Url> {
WELL_KNOWN_PATHS
.iter()
.filter_map(|p| origin.join(p).ok())
.collect()
}
pub fn extract_urls_from_body(body: &str) -> Vec<Url> {
let mut out = Vec::new();
let bytes = body.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'h'
&& bytes[i..].starts_with(b"http")
&& (bytes[i..].starts_with(b"http://") || bytes[i..].starts_with(b"https://"))
{
let start = i;
let mut end = i;
while end < bytes.len() {
let c = bytes[end];
if c == b' '
|| c == b'\n'
|| c == b'\r'
|| c == b'\t'
|| c == b'"'
|| c == b'\''
|| c == b'<'
|| c == b'>'
|| c == b'`'
|| c == b','
|| c == b')'
|| c == b']'
|| c == b'}'
{
break;
}
end += 1;
}
if end > start + 8 {
let raw = &body[start..end];
let trimmed = raw.trim_end_matches(|c: char| ".,;:!?".contains(c));
if let Ok(u) = Url::parse(trimmed) {
out.push(u);
}
}
i = end + 1;
} else {
i += 1;
}
}
out
}