use std::collections::HashMap;
pub fn builtin_service_domains() -> &'static HashMap<&'static str, &'static [&'static str]> {
use std::sync::OnceLock;
static MAP: OnceLock<HashMap<&'static str, &'static [&'static str]>> = OnceLock::new();
MAP.get_or_init(|| {
let mut m: HashMap<&'static str, &'static [&'static str]> = HashMap::new();
m.insert("aws", &["amazonaws.com", "aws.amazon.com", "on.aws"]);
m.insert(
"github",
&["github.com", "githubusercontent.com", "githubapp.com"],
);
m.insert("gitlab", &["gitlab.com"]);
m.insert("bitbucket", &["bitbucket.org", "atlassian.com"]);
m.insert(
"gcp",
&["googleapis.com", "google.com", "googleusercontent.com"],
);
m.insert(
"google",
&["googleapis.com", "google.com", "googleusercontent.com"],
);
m.insert(
"azure",
&[
"azure.com",
"microsoft.com",
"microsoftonline.com",
"azurewebsites.net",
"windows.net",
"azure-api.net",
],
);
m.insert("slack", &["slack.com"]);
m.insert("discord", &["discord.com", "discordapp.com"]);
m.insert("telegram", &["telegram.org", "t.me"]);
m.insert("twilio", &["twilio.com"]);
m.insert("sendgrid", &["sendgrid.com", "api.sendgrid.com"]);
m.insert("mailgun", &["mailgun.net", "mailgun.com"]);
m.insert("postmark", &["postmarkapp.com"]);
m.insert("stripe", &["stripe.com"]);
m.insert("paypal", &["paypal.com", "paypalobjects.com"]);
m.insert("square", &["squareup.com", "squarecdn.com"]);
m.insert("braintree", &["braintreegateway.com", "braintree-api.com"]);
m.insert("plaid", &["plaid.com"]);
m.insert("twitter", &["twitter.com", "x.com", "twitterapi.com"]);
m.insert("openai", &["openai.com", "openai.azure.com"]);
m.insert("anthropic", &["anthropic.com"]);
m.insert("huggingface", &["huggingface.co", "hf.co"]);
m.insert("replicate", &["replicate.com", "replicate.delivery"]);
m.insert("notion", &["notion.so", "notion.com"]);
m.insert("airtable", &["airtable.com"]);
m.insert("asana", &["asana.com"]);
m.insert("trello", &["trello.com", "atlassian.com"]);
m.insert("jira", &["atlassian.com", "atlassian.net"]);
m.insert("confluence", &["atlassian.com", "atlassian.net"]);
m.insert("digitalocean", &["digitalocean.com"]);
m.insert("heroku", &["heroku.com", "herokuapp.com"]);
m.insert("netlify", &["netlify.com", "netlify.app"]);
m.insert("vercel", &["vercel.com", "vercel.app"]);
m.insert("cloudflare", &["cloudflare.com"]);
m.insert("fastly", &["fastly.com"]);
m.insert("akamai", &["akamai.com", "akamaihd.net"]);
m.insert("datadog", &["datadoghq.com", "datadoghq.eu"]);
m.insert("pagerduty", &["pagerduty.com"]);
m.insert("newrelic", &["newrelic.com"]);
m.insert("sentry", &["sentry.io"]);
m.insert("rollbar", &["rollbar.com"]);
m.insert("bugsnag", &["bugsnag.com"]);
m.insert("npm", &["npmjs.com", "npmjs.org"]);
m.insert("pypi", &["pypi.org"]);
m.insert("rubygems", &["rubygems.org"]);
m.insert("dockerhub", &["docker.com", "docker.io"]);
m.insert("docker", &["docker.com", "docker.io"]);
m.insert("crates", &["crates.io"]);
m.insert("npm_token", &["npmjs.com", "npmjs.org"]);
m.insert("shopify", &["shopify.com", "myshopify.com"]);
m.insert("zendesk", &["zendesk.com"]);
m.insert("freshdesk", &["freshdesk.com"]);
m.insert("hubspot", &["hubapi.com", "hubspot.com"]);
m.insert("intercom", &["intercom.io", "intercom.com"]);
m.insert("linear", &["linear.app"]);
m.insert("monday", &["monday.com"]);
m.insert("clickup", &["clickup.com"]);
m.insert("figma", &["figma.com"]);
m.insert(
"dropbox",
&["dropbox.com", "dropboxapi.com", "dropboxusercontent.com"],
);
m.insert("box", &["box.com", "boxcloud.com"]);
m.insert("zoom", &["zoom.us"]);
m.insert("okta", &["okta.com", "oktapreview.com"]);
m.insert("auth0", &["auth0.com"]);
m.insert("keycloak", &["keycloak.org"]);
m.insert("upstash", &["upstash.io", "upstash.com"]);
m.insert("redis", &["redis.com", "redislabs.com"]);
m.insert("mongodb", &["mongodb.com", "mongodb.net"]);
m.insert("supabase", &["supabase.co", "supabase.com"]);
m.insert(
"firebase",
&["firebaseio.com", "firebaseapp.com", "googleapis.com"],
);
m.insert("snyk", &["snyk.io"]);
m.insert("sonarqube", &["sonarsource.com", "sonarcloud.io"]);
m.insert("sonarcloud", &["sonarsource.com", "sonarcloud.io"]);
m.insert("circleci", &["circleci.com"]);
m.insert("travisci", &["travis-ci.com", "travis-ci.org"]);
m.insert("buildkite", &["buildkite.com"]);
m.insert("jfrog", &["jfrog.io", "jfrog.com"]);
m.insert("artifactory", &["jfrog.io", "jfrog.com"]);
m.insert("nexus", &["sonatype.com"]);
m.insert("paloalto", &["paloaltonetworks.com"]);
m.insert("fortinet", &["fortinet.com", "fortigate.com"]);
m.insert("cisco", &["cisco.com"]);
m.insert("canvas", &["instructure.com"]);
m.insert("authentik", &["goauthentik.io"]);
m.insert("ansible", &["ansible.com", "redhat.com"]);
m.insert("thales", &["thalesgroup.com", "ciphertrust.com"]);
m.insert("cypress", &["cypress.io"]);
m.insert("uploadcare", &["uploadcare.com"]);
m.insert("bigcommerce", &["bigcommerce.com"]);
m.insert("wechat", &["weixin.qq.com", "wechat.com"]);
m.insert("huawei", &["huaweicloud.com", "huawei.com"]);
m.insert("jwt", &[]); m.insert("generic", &[]); m
})
}
pub fn effective_allowlist(spec: &keyhog_core::VerifySpec) -> Option<Vec<String>> {
if !spec.allowed_domains.is_empty() {
return Some(
spec.allowed_domains
.iter()
.map(|d| {
d.trim()
.trim_start_matches("https://")
.trim_start_matches("http://")
.to_lowercase()
})
.filter(|d| !d.is_empty())
.collect(),
);
}
let key = spec.service.as_str();
if key.is_empty() {
return None;
}
builtin_service_domains()
.get(key)
.map(|domains| domains.iter().map(|d| d.to_string()).collect())
}
pub fn host_is_allowed(host: &str, allowlist: &[String]) -> bool {
if host.is_empty() || allowlist.is_empty() {
return false;
}
let host = host.trim_end_matches('.').to_lowercase();
allowlist.iter().any(|allowed| {
let allowed = allowed.trim_end_matches('.').to_lowercase();
host == allowed || host.ends_with(&format!(".{allowed}"))
})
}
pub fn check_url_against_spec(raw_url: &str, spec: &keyhog_core::VerifySpec) -> Result<(), String> {
let url =
reqwest::Url::parse(raw_url).map_err(|e| format!("blocked: invalid verify URL: {e}"))?;
let host = url.host_str().unwrap_or("");
let Some(allowlist) = effective_allowlist(spec) else {
return Err(format!(
"blocked: detector service '{}' has no domain allowlist (set verify.allowed_domains in the detector TOML)",
spec.service
));
};
if !host_is_allowed(host, &allowlist) {
return Err(format!(
"blocked: host '{host}' is not in the allowlist for service '{}' (allowed: {})",
spec.service,
allowlist.join(", ")
));
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use keyhog_core::VerifySpec;
fn spec_with(service: &str, allowed: Vec<String>) -> VerifySpec {
VerifySpec {
service: service.to_string(),
allowed_domains: allowed,
..VerifySpec::default()
}
}
#[test]
fn explicit_allowlist_overrides_builtin() {
let spec = spec_with("github", vec!["only-this.example.com".into()]);
assert!(check_url_against_spec("https://only-this.example.com/x", &spec).is_ok());
assert!(check_url_against_spec("https://api.github.com/x", &spec).is_err());
}
#[test]
fn builtin_used_when_no_explicit_list() {
let spec = spec_with("github", vec![]);
assert!(check_url_against_spec("https://api.github.com/x", &spec).is_ok());
assert!(check_url_against_spec("https://attacker.com/x", &spec).is_err());
}
#[test]
fn unknown_service_with_no_explicit_list_is_refused() {
let spec = spec_with("attacker-controlled", vec![]);
assert!(check_url_against_spec("https://anything.com/x", &spec).is_err());
}
#[test]
fn empty_service_with_no_explicit_list_is_refused() {
let spec = spec_with("", vec![]);
assert!(check_url_against_spec("https://api.github.com/x", &spec).is_err());
}
#[test]
fn subdomain_match_works() {
let spec = spec_with("aws", vec![]);
assert!(check_url_against_spec("https://lambda.us-east-1.amazonaws.com/x", &spec).is_ok());
}
#[test]
fn lookalike_does_not_match() {
let spec = spec_with("github", vec![]);
assert!(check_url_against_spec("https://evilgithub.com/x", &spec).is_err());
}
#[test]
fn discord_webhook_still_works() {
let spec = spec_with("discord", vec![]);
assert!(check_url_against_spec("https://discord.com/api/webhooks/123/abc", &spec).is_ok());
assert!(check_url_against_spec("https://attacker.example.com/exfil", &spec).is_err());
}
#[test]
fn slack_webhook_still_works() {
let spec = spec_with("slack", vec![]);
assert!(
check_url_against_spec("https://hooks.slack.com/services/T0/B0/abc", &spec).is_ok()
);
assert!(check_url_against_spec("https://attacker.example.com/exfil", &spec).is_err());
}
}