use std::collections::BTreeSet;
pub(super) const TRUSTED_API_HOSTS: &[&str] = &[
"*.googleapis.com",
"*.google.com",
"github.com",
"api.github.com",
"*.github.com",
"*.githubusercontent.com",
"api.openai.com",
"api.anthropic.com",
"api.x.ai",
"api.deepseek.com",
"openrouter.ai",
"api.openrouter.ai",
"ollama.com",
"api.ollama.com",
"huggingface.co",
"*.huggingface.co",
"*.hf.co",
"atlassian.com",
"*.atlassian.net",
"*.atlassian.com",
"api.notion.com",
"*.slack.com",
"slack.com",
"hooks.slack.com",
"graph.microsoft.com",
"login.microsoftonline.com",
"*.amazonaws.com",
"*.cloudflare.com",
"*.pages.dev",
"*.workers.dev",
"*.zapier.com",
"*.make.com",
"*.n8n.cloud",
"*.pipedream.com",
"*.pipedream.net",
"*.ifttt.com",
"*.tavily.com",
"iana.org",
"*.iana.org",
"ietf.org",
"*.ietf.org",
"api.stripe.com",
"api.twilio.com",
"api.sendgrid.com",
"api.mailgun.net",
"api.postmarkapp.com",
"api.linear.app",
"api.figma.com",
"api.zoom.us",
"api.dropbox.com",
"api.intercom.io",
"api.hubapi.com",
"api.asana.com",
"api.trello.com",
"api.airtable.com",
"api.basecamp.com",
"api.calendly.com",
"api.discord.com",
"discord.com",
"api.telegram.org",
"api.spotify.com",
"api.youtube.com",
];
pub(super) const DOCUMENTATION_OR_RESERVED_HOSTS: &[&str] = &[
"example.com",
"example.org",
"example.net",
"*.example.com",
"*.example.org",
"*.example.net",
"*.example",
"*.test",
"*.invalid",
"localhost",
"*.localhost",
];
#[must_use]
pub(super) fn is_documentation_or_reserved_host(endpoint: &str) -> bool {
let host = match extract_host(endpoint) {
Some(h) => h.to_ascii_lowercase(),
None => return false,
};
if host.is_empty() {
return false;
}
if is_loopback_ipv4(&host) {
return true;
}
for pattern in DOCUMENTATION_OR_RESERVED_HOSTS {
if matches_host_pattern(&host, pattern) {
return true;
}
}
false
}
fn is_loopback_ipv4(host: &str) -> bool {
if !is_ipv4_literal(host) {
return false;
}
host.starts_with("127.")
}
#[must_use]
pub(super) fn is_trusted_api_host(endpoint: &str) -> bool {
let host = match extract_host(endpoint) {
Some(h) => h.to_ascii_lowercase(),
None => return false,
};
if host.is_empty() {
return false;
}
if is_ipv4_literal(&host) {
return false;
}
for pattern in TRUSTED_API_HOSTS {
if matches_host_pattern(&host, pattern) {
return true;
}
}
false
}
const SECRET_NAME_STOPWORDS: &[&str] = &[
"api",
"key",
"keys",
"token",
"tokens",
"secret",
"secrets",
"auth",
"oauth",
"client",
"bearer",
"access",
"refresh",
"env",
"environ",
"config",
"url",
"uri",
"host",
"hostname",
"endpoint",
"bot",
"pat",
"cred",
"creds",
"credential",
"credentials",
"password",
"passwd",
"pwd",
"user",
"username",
"login",
"session",
"cookie",
"prod",
"production",
"dev",
"development",
"stage",
"staging",
"test",
"sandbox",
"live",
"http",
"https",
"www",
"com",
"net",
"org",
"default",
"value",
"string",
"data",
"file",
"path",
"name",
];
const COMPOUND_TLD_PENULTIMATES: &[&str] = &["com", "net", "org", "co", "gov", "edu", "ac"];
fn registrable_label(endpoint: &str) -> Option<String> {
let host = extract_host(endpoint)?.to_ascii_lowercase();
if host.is_empty() || is_ipv4_literal(&host) {
return None;
}
let labels: Vec<&str> = host.split('.').filter(|l| !l.is_empty()).collect();
if labels.len() < 2 {
return None;
}
let suffix_len = if labels.len() >= 3
&& COMPOUND_TLD_PENULTIMATES.contains(&labels[labels.len() - 2])
&& labels[labels.len() - 1].len() <= 3
{
2
} else {
1
};
if labels.len() <= suffix_len {
return None;
}
let label = labels[labels.len() - suffix_len - 1];
if label.len() < 4 {
return None;
}
Some(label.to_string())
}
fn secret_identity_tokens(name: &str) -> BTreeSet<String> {
name.to_ascii_lowercase()
.split(|c: char| !c.is_ascii_alphanumeric())
.filter(|t| t.len() >= 4)
.filter(|t| !SECRET_NAME_STOPWORDS.contains(t))
.filter(|t| !t.chars().all(|c| c.is_ascii_digit()))
.map(str::to_string)
.collect()
}
pub(super) fn host_matches_secret_owner(endpoint: &str, secret_targets: &BTreeSet<String>) -> bool {
let Some(label) = registrable_label(endpoint) else {
return false;
};
for target in secret_targets {
for token in secret_identity_tokens(target) {
let (shorter, longer) = if token.len() <= label.len() {
(token.as_str(), label.as_str())
} else {
(label.as_str(), token.as_str())
};
if shorter.len() >= 4 && longer.contains(shorter) {
return true;
}
}
}
false
}
fn extract_host(endpoint: &str) -> Option<&str> {
let trimmed = endpoint.trim();
if trimmed.is_empty() {
return None;
}
let after_scheme = trimmed
.split_once("://")
.map(|(_, rest)| rest)
.unwrap_or(trimmed);
let after_userinfo = after_scheme
.rsplit_once('@')
.map(|(_, rest)| rest)
.unwrap_or(after_scheme);
let host_with_port = after_userinfo
.split(['/', '?', '#'])
.next()
.unwrap_or(after_userinfo);
let host = host_with_port
.rsplit_once(':')
.map(|(h, _port)| h)
.unwrap_or(host_with_port);
if host.is_empty() {
None
} else {
Some(host)
}
}
fn matches_host_pattern(host: &str, pattern: &str) -> bool {
let pattern_lc = pattern.to_ascii_lowercase();
if let Some(suffix) = pattern_lc.strip_prefix("*.") {
if host.len() <= suffix.len() {
return false;
}
return host.ends_with(suffix) && host.as_bytes()[host.len() - suffix.len() - 1] == b'.';
}
host == pattern_lc
}
fn is_ipv4_literal(host: &str) -> bool {
let mut octets = 0;
for part in host.split('.') {
if part.is_empty() || part.len() > 3 {
return false;
}
if !part.chars().all(|c| c.is_ascii_digit()) {
return false;
}
if part.parse::<u8>().is_err() {
return false;
}
octets += 1;
}
octets == 4
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn literal_entry_matches_exact_host_only() {
assert!(is_trusted_api_host("https://api.github.com/users/me"));
assert!(is_trusted_api_host("api.github.com"));
}
#[test]
fn wildcard_subdomain_match_requires_dot_separator() {
assert!(is_trusted_api_host("https://sheets.googleapis.com/v4"));
assert!(is_trusted_api_host("storage.googleapis.com"));
assert!(!is_trusted_api_host("evilgoogleapis.com"));
assert!(!is_trusted_api_host("googleapis.com"));
}
#[test]
fn substring_attack_does_not_match() {
assert!(!is_trusted_api_host("https://attacker.com/api.github.com"));
assert!(!is_trusted_api_host("https://api.github.com.evil.com/x"));
}
#[test]
fn ipv4_literal_never_trusted() {
assert!(!is_trusted_api_host("https://192.168.1.1/api"));
assert!(!is_trusted_api_host("10.0.0.1:8080"));
assert!(!is_trusted_api_host("8.8.8.8"));
}
#[test]
fn matching_is_case_insensitive() {
assert!(is_trusted_api_host("https://API.GITHUB.COM/users"));
assert!(is_trusted_api_host("Sheets.GoogleAPIs.com"));
}
#[test]
fn schemeless_and_port_forms_parse() {
assert!(is_trusted_api_host("api.openai.com"));
assert!(is_trusted_api_host("api.openai.com:443"));
assert!(is_trusted_api_host("api.openai.com/v1/chat/completions"));
}
#[test]
fn malformed_input_never_matches() {
assert!(!is_trusted_api_host(""));
assert!(!is_trusted_api_host(" "));
assert!(!is_trusted_api_host("https://"));
assert!(!is_trusted_api_host("not_a_url"));
}
#[test]
fn documentation_hosts_recognised_as_reserved() {
for endpoint in [
"https://example.com/api",
"http://example.org",
"https://api.example.net/v1",
"https://foo.example",
"https://bar.test",
"http://baz.invalid",
"http://localhost:8080/health",
"http://api.localhost",
"http://127.0.0.1:5000",
"http://127.5.5.5",
] {
assert!(
is_documentation_or_reserved_host(endpoint),
"expected {endpoint} to be flagged as documentation/reserved",
);
}
}
#[test]
fn documentation_host_check_does_not_overmatch() {
for endpoint in [
"https://example-corp.com/api",
"https://examplecdn.io",
"https://attacker.com/example.com",
"https://10.0.0.5",
"https://192.168.1.1",
"https://8.8.8.8",
] {
assert!(
!is_documentation_or_reserved_host(endpoint),
"expected {endpoint} NOT to be flagged as documentation/reserved",
);
}
}
#[test]
fn allowlist_includes_major_llm_providers() {
for host in [
"https://api.openai.com/v1",
"https://api.anthropic.com/v1/messages",
"https://api.x.ai/v1",
"https://ollama.com/api/chat",
"https://api.deepseek.com/v1",
] {
assert!(
is_trusted_api_host(host),
"expected {host} to be on allowlist",
);
}
}
fn names(items: &[&str]) -> BTreeSet<String> {
items.iter().map(|s| s.to_string()).collect()
}
#[test]
fn host_matches_secret_owner_accepts_first_party_credential() {
for (target, sink) in [
("WAHOO_ACCESS_TOKEN", "https://api.wahooligan.com/v1/user"),
("ATOLL_API_KEY", "https://atollhq.com/api/feedback"),
("AGENTCALL_API_KEY", "https://api.agentcall.co/llms.txt"),
("SPEAK_API_KEY", "https://mcp.speakai.co"),
("NOTION_TOKEN", "https://notion.so/v1/pages"),
] {
assert!(
host_matches_secret_owner(sink, &names(&[target])),
"{target} must be recognised as first-party to {sink}"
);
}
}
#[test]
fn host_matches_secret_owner_rejects_cross_party_exfil() {
let cases: &[(&str, &str)] = &[
("AWS_SECRET_ACCESS_KEY", "https://collector.evil.com/up"),
("OPENAI_API_KEY", "https://exfil.example/post"),
(".env", "https://attacker.net/log"),
("~/.ssh/id_rsa", "https://drop.host.io/x"),
("GITHUB_TOKEN", "https://pastebin.com/raw/abc"),
("STRIPE_API_KEY", "https://api.evil.co"),
];
for (target, sink) in cases {
assert!(
!host_matches_secret_owner(sink, &names(&[target])),
"{target} → {sink} must NOT be treated as first-party"
);
}
assert!(!host_matches_secret_owner(
"https://api.wahooligan.com",
&BTreeSet::new()
));
}
#[test]
fn host_matches_secret_owner_ignores_spoofed_subdomain_label() {
assert!(!host_matches_secret_owner(
"https://openai-telemetry.attacker.com/collect",
&names(&["OPENAI_API_KEY"])
));
assert!(!host_matches_secret_owner(
"https://stripe.evilcorp.com/x",
&names(&["STRIPE_API_KEY"])
));
}
#[test]
fn registrable_label_extracts_owning_label() {
assert_eq!(
registrable_label("https://cloud-api.wahooligan.com/x").as_deref(),
Some("wahooligan")
);
assert_eq!(
registrable_label("https://api.speakai.co").as_deref(),
Some("speakai")
);
assert_eq!(
registrable_label("https://atollhq.com/api").as_deref(),
Some("atollhq")
);
assert_eq!(registrable_label("https://192.168.1.1/x"), None);
assert_eq!(registrable_label("https://localhost:8080"), None);
assert_eq!(registrable_label("https://api.x.io"), None);
}
}