use crate::artifact_graph::{ArtifactEdge, EndpointKind};
use crate::detectors::scripts::references_dotenv_file;
pub(super) fn looks_like_secret_target(target: &str) -> bool {
let lower = target.to_ascii_lowercase();
if references_dotenv_file(&lower) {
return true;
}
let specific_patterns = [
".npmrc",
".ssh",
"id_rsa",
"known_hosts",
"aws_secret_access_key",
"aws_session_token",
"openai_api_key",
"github_token",
"gh_token",
"google_application_credentials",
"slack_bot_token",
];
if specific_patterns
.iter()
.any(|needle| lower.contains(needle))
{
return true;
}
let generic_keywords = ["token", "secret", "cookie", "session"];
generic_keywords
.iter()
.any(|keyword| lower.contains(keyword) && has_word_boundary(&lower, keyword))
}
pub(super) fn has_word_boundary(text: &str, keyword: &str) -> bool {
let mut start = 0;
while let Some(pos) = text[start..].find(keyword) {
let abs_pos = start + pos;
let before_ok = text[..abs_pos]
.chars()
.next_back()
.is_none_or(|c| !c.is_alphanumeric());
let after_pos = abs_pos + keyword.len();
let after_ok = text[after_pos..]
.chars()
.next()
.is_none_or(|c| !c.is_alphanumeric());
if before_ok && after_ok {
return true;
}
start = abs_pos + 1;
}
false
}
pub(super) fn looks_like_identity_target(target: &str) -> bool {
let lower = target.to_ascii_lowercase();
if lower.contains("oauth") || lower.contains("identity") {
return true;
}
let generic_keywords = ["token", "session", "cookie", "credential"];
generic_keywords
.iter()
.any(|keyword| lower.contains(keyword) && has_word_boundary(&lower, keyword))
}
pub(super) fn looks_like_external_sink(edge: &ArtifactEdge) -> bool {
if matches!(
edge.endpoint_kind,
Some(EndpointKind::Remote | EndpointKind::Transient | EndpointKind::ControlPlane)
) {
return true;
}
if matches!(
edge.endpoint_kind,
Some(EndpointKind::Registry | EndpointKind::Local)
) {
return false;
}
let lower = edge.to.to_ascii_lowercase();
let known_external = [
"discord.com/api/webhooks",
"api.telegram.org/bot",
"pastebin.com",
"ngrok",
"trycloudflare",
"raw.githubusercontent.com",
"sendgrid",
"mailgun",
]
.iter()
.any(|needle| lower.contains(needle))
|| lower.split('/').any(|segment| segment == "webhook" || segment.starts_with("webhook?") || segment.starts_with("webhook#"))
|| lower.contains("webhook.site");
if known_external {
return true;
}
(lower.starts_with("http://") || lower.starts_with("https://"))
&& !looks_like_registry_url(&edge.to)
&& !looks_like_software_distribution_url(&lower)
&& !looks_like_local_endpoint(&lower)
}
pub(super) fn looks_like_software_distribution_url(lower: &str) -> bool {
let after_scheme = lower
.split_once("://")
.map(|(_, rest)| rest)
.unwrap_or(lower);
let path = after_scheme
.split(['?', '#'])
.next()
.unwrap_or(after_scheme)
.trim_end_matches('/');
if path.contains("/releases/download/") || path.contains("/dist/") {
return true;
}
const ARTIFACT_EXTENSIONS: &[&str] = &[
".tar.gz",
".tar.bz2",
".tar.xz",
".tar.zst",
".tgz",
".tbz2",
".txz",
".rpm",
".deb",
".pkg",
".dmg",
".msi",
".apk",
".appimage",
".whl",
".gem",
".jar",
".nupkg",
".crate",
".snap",
".flatpak",
".7z",
".zst",
];
ARTIFACT_EXTENSIONS.iter().any(|ext| path.ends_with(ext))
}
pub(super) fn looks_like_local_endpoint(lower: &str) -> bool {
lower.contains("localhost")
|| lower.contains("127.0.0.1")
|| looks_like_bind_all_address(lower)
|| lower.contains("::1")
|| lower.contains(".local/")
|| lower.contains(".local:")
|| lower.ends_with(".local")
|| lower.contains(".internal/")
|| lower.contains(".internal:")
|| lower.ends_with(".internal")
}
fn looks_like_bind_all_address(text: &str) -> bool {
let mut start = 0;
while let Some(pos) = text[start..].find("0.0.0.0") {
let abs = start + pos;
let before_ok = abs == 0 || !text.as_bytes()[abs - 1].is_ascii_digit();
let after = abs + "0.0.0.0".len();
let after_ok = after >= text.len() || !text.as_bytes()[after].is_ascii_digit();
if before_ok && after_ok {
return true;
}
start = abs + 1;
}
false
}
pub(super) fn looks_like_registry_url(url: &str) -> bool {
let lower = url.to_ascii_lowercase();
[
"registry.npmjs.org",
"registry.yarnpkg.com",
"files.pythonhosted.org",
"pypi.org/packages",
"crates.io/api",
"static.crates.io",
"index.crates.io",
"registry.hub.docker.com",
"ghcr.io",
]
.iter()
.any(|needle| lower.contains(needle))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn has_word_boundary_rejects_adjacent_non_ascii_letter() {
assert!(
!has_word_boundary("ñtoken", "token"),
"non-ASCII letter before keyword must NOT be a word boundary",
);
assert!(
!has_word_boundary("tokenñ", "token"),
"non-ASCII letter after keyword must NOT be a word boundary",
);
}
#[test]
fn has_word_boundary_accepts_ascii_separators_and_bare_keyword() {
assert!(has_word_boundary("token", "token"));
assert!(has_word_boundary("/token=", "token"));
assert!(has_word_boundary("auth_token foo", "token"));
assert!(has_word_boundary("session.token", "token"));
}
#[test]
fn has_word_boundary_rejects_ascii_alphanumeric_flanks() {
assert!(!has_word_boundary("tokenizer", "token"));
assert!(!has_word_boundary("mytoken", "token"));
assert!(!has_word_boundary("token1", "token"));
}
#[test]
fn looks_like_external_sink_rejects_webhook_documentation_urls() {
use crate::artifact_graph::{ArtifactEdge, ArtifactRelation};
let local_doc_url = ArtifactEdge {
from: "a".to_string(),
to: "http://localhost:3000/webhook-setup-guide".to_string(),
relation: ArtifactRelation::ConnectsTo,
endpoint_kind: None,
};
assert!(
!looks_like_external_sink(&local_doc_url),
"localhost URL with '/webhook-setup-guide' must NOT be classified as an external sink"
);
let real_webhook = ArtifactEdge {
from: "a".to_string(),
to: "https://hooks.slack.com/services/webhook".to_string(),
relation: ArtifactRelation::ConnectsTo,
endpoint_kind: None,
};
assert!(
looks_like_external_sink(&real_webhook),
"actual webhook endpoint URL must be classified as an external sink"
);
let webhook_site = ArtifactEdge {
from: "a".to_string(),
to: "https://webhook.site/abc123".to_string(),
relation: ArtifactRelation::ConnectsTo,
endpoint_kind: None,
};
assert!(
looks_like_external_sink(&webhook_site),
"webhook.site URLs must be classified as an external sink"
);
let webhook_path = ArtifactEdge {
from: "a".to_string(),
to: "https://api.example.com/webhook/notify".to_string(),
relation: ArtifactRelation::ConnectsTo,
endpoint_kind: None,
};
assert!(
looks_like_external_sink(&webhook_path),
"/webhook/ as a path segment must be classified as an external sink"
);
let doc_path = ArtifactEdge {
from: "a".to_string(),
to: "http://localhost:8080/webhook-setup-guide".to_string(),
relation: ArtifactRelation::ConnectsTo,
endpoint_kind: None,
};
assert!(
!looks_like_external_sink(&doc_path),
"local URL with '/webhook-setup-guide' must NOT match the webhook pattern"
);
}
#[test]
fn software_distribution_urls_are_not_exfil_sinks() {
use crate::artifact_graph::ArtifactRelation;
for url in [
"https://repo.percona.com/yum/percona-release-latest.noarch.rpm",
"https://github.com/org/tool/releases/download/v1.2.3/tool-linux.tar.gz",
"https://host.example/pkg/app.whl",
"https://downloads.vendor.io/cli/cli_amd64.deb",
"https://get.example.org/installer.pkg?os=mac",
"https://cdn.vendor.net/dist/bundle.zst",
] {
assert!(
looks_like_software_distribution_url(&url.to_ascii_lowercase()),
"{url} must be classified as a software-distribution download"
);
let edge = ArtifactEdge {
from: "a".to_string(),
to: url.to_string(),
relation: ArtifactRelation::ConnectsTo,
endpoint_kind: None,
};
assert!(
!looks_like_external_sink(&edge),
"{url} must NOT be an external exfil sink"
);
}
}
#[test]
fn distribution_carveout_preserves_exfil_recall() {
use crate::artifact_graph::ArtifactRelation;
for url in [
"https://attacker.example/collect",
"https://api.evil.net/v1/log?d=secret",
"https://exfil.example/upload.php",
"https://hooks.example.com/webhook/abc",
] {
let edge = ArtifactEdge {
from: "a".to_string(),
to: url.to_string(),
relation: ArtifactRelation::ConnectsTo,
endpoint_kind: None,
};
assert!(
looks_like_external_sink(&edge),
"{url} must remain an external exfil sink"
);
}
let rpm_on_pastebin = ArtifactEdge {
from: "a".to_string(),
to: "https://pastebin.com/raw/payload.rpm".to_string(),
relation: ArtifactRelation::ConnectsTo,
endpoint_kind: None,
};
assert!(
looks_like_external_sink(&rpm_on_pastebin),
"a package extension on a known drop host must still fire"
);
}
}