use crate::artifact_graph::{ArtifactEdge, EndpointKind};
use crate::detectors::scripts::references_dotenv_file;
pub(super) fn looks_like_secret_target(target: &str) -> bool {
let lower = target.to_ascii_lowercase();
if references_dotenv_file(&lower) {
return true;
}
let specific_patterns = [
".npmrc",
".ssh",
"id_rsa",
"known_hosts",
"aws_secret_access_key",
"aws_session_token",
"openai_api_key",
"github_token",
"gh_token",
"google_application_credentials",
"slack_bot_token",
];
if specific_patterns
.iter()
.any(|needle| lower.contains(needle))
{
return true;
}
let generic_keywords = ["token", "secret", "cookie", "session"];
generic_keywords
.iter()
.any(|keyword| lower.contains(keyword) && has_word_boundary(&lower, keyword))
}
pub(super) fn has_word_boundary(text: &str, keyword: &str) -> bool {
let mut start = 0;
while let Some(pos) = text[start..].find(keyword) {
let abs_pos = start + pos;
let before_ok = text[..abs_pos]
.chars()
.next_back()
.is_none_or(|c| !c.is_alphanumeric());
let after_pos = abs_pos + keyword.len();
let after_ok = text[after_pos..]
.chars()
.next()
.is_none_or(|c| !c.is_alphanumeric());
if before_ok && after_ok {
return true;
}
start = abs_pos + 1;
}
false
}
pub(super) fn looks_like_identity_target(target: &str) -> bool {
let lower = target.to_ascii_lowercase();
if lower.contains("oauth") || lower.contains("identity") {
return true;
}
let generic_keywords = ["token", "session", "cookie", "credential"];
generic_keywords
.iter()
.any(|keyword| lower.contains(keyword) && has_word_boundary(&lower, keyword))
}
pub(super) fn looks_like_external_sink(edge: &ArtifactEdge) -> bool {
if matches!(
edge.endpoint_kind,
Some(EndpointKind::Remote | EndpointKind::Transient | EndpointKind::ControlPlane)
) {
return true;
}
if matches!(
edge.endpoint_kind,
Some(EndpointKind::Registry | EndpointKind::Local)
) {
return false;
}
let lower = edge.to.to_ascii_lowercase();
let known_external = [
"discord.com/api/webhooks",
"api.telegram.org/bot",
"pastebin.com",
"ngrok",
"trycloudflare",
"raw.githubusercontent.com",
"sendgrid",
"mailgun",
]
.iter()
.any(|needle| lower.contains(needle))
|| lower.split('/').any(|segment| segment == "webhook" || segment.starts_with("webhook?") || segment.starts_with("webhook#"))
|| lower.contains("webhook.site");
if known_external {
return true;
}
(lower.starts_with("http://") || lower.starts_with("https://"))
&& !looks_like_registry_url(&edge.to)
&& !looks_like_local_endpoint(&lower)
}
pub(super) fn looks_like_local_endpoint(lower: &str) -> bool {
lower.contains("localhost")
|| lower.contains("127.0.0.1")
|| looks_like_bind_all_address(lower)
|| lower.contains("::1")
|| lower.contains(".local/")
|| lower.contains(".local:")
|| lower.ends_with(".local")
|| lower.contains(".internal/")
|| lower.contains(".internal:")
|| lower.ends_with(".internal")
}
fn looks_like_bind_all_address(text: &str) -> bool {
let mut start = 0;
while let Some(pos) = text[start..].find("0.0.0.0") {
let abs = start + pos;
let before_ok = abs == 0 || !text.as_bytes()[abs - 1].is_ascii_digit();
let after = abs + "0.0.0.0".len();
let after_ok = after >= text.len() || !text.as_bytes()[after].is_ascii_digit();
if before_ok && after_ok {
return true;
}
start = abs + 1;
}
false
}
pub(super) fn looks_like_registry_url(url: &str) -> bool {
let lower = url.to_ascii_lowercase();
[
"registry.npmjs.org",
"registry.yarnpkg.com",
"files.pythonhosted.org",
"pypi.org/packages",
"crates.io/api",
"static.crates.io",
"index.crates.io",
"registry.hub.docker.com",
"ghcr.io",
]
.iter()
.any(|needle| lower.contains(needle))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn has_word_boundary_rejects_adjacent_non_ascii_letter() {
assert!(
!has_word_boundary("ñtoken", "token"),
"non-ASCII letter before keyword must NOT be a word boundary",
);
assert!(
!has_word_boundary("tokenñ", "token"),
"non-ASCII letter after keyword must NOT be a word boundary",
);
}
#[test]
fn has_word_boundary_accepts_ascii_separators_and_bare_keyword() {
assert!(has_word_boundary("token", "token"));
assert!(has_word_boundary("/token=", "token"));
assert!(has_word_boundary("auth_token foo", "token"));
assert!(has_word_boundary("session.token", "token"));
}
#[test]
fn has_word_boundary_rejects_ascii_alphanumeric_flanks() {
assert!(!has_word_boundary("tokenizer", "token"));
assert!(!has_word_boundary("mytoken", "token"));
assert!(!has_word_boundary("token1", "token"));
}
#[test]
fn looks_like_external_sink_rejects_webhook_documentation_urls() {
use crate::artifact_graph::{ArtifactEdge, ArtifactRelation};
let local_doc_url = ArtifactEdge {
from: "a".to_string(),
to: "http://localhost:3000/webhook-setup-guide".to_string(),
relation: ArtifactRelation::ConnectsTo,
endpoint_kind: None,
};
assert!(
!looks_like_external_sink(&local_doc_url),
"localhost URL with '/webhook-setup-guide' must NOT be classified as an external sink"
);
let real_webhook = ArtifactEdge {
from: "a".to_string(),
to: "https://hooks.slack.com/services/webhook".to_string(),
relation: ArtifactRelation::ConnectsTo,
endpoint_kind: None,
};
assert!(
looks_like_external_sink(&real_webhook),
"actual webhook endpoint URL must be classified as an external sink"
);
let webhook_site = ArtifactEdge {
from: "a".to_string(),
to: "https://webhook.site/abc123".to_string(),
relation: ArtifactRelation::ConnectsTo,
endpoint_kind: None,
};
assert!(
looks_like_external_sink(&webhook_site),
"webhook.site URLs must be classified as an external sink"
);
let webhook_path = ArtifactEdge {
from: "a".to_string(),
to: "https://api.example.com/webhook/notify".to_string(),
relation: ArtifactRelation::ConnectsTo,
endpoint_kind: None,
};
assert!(
looks_like_external_sink(&webhook_path),
"/webhook/ as a path segment must be classified as an external sink"
);
let doc_path = ArtifactEdge {
from: "a".to_string(),
to: "http://localhost:8080/webhook-setup-guide".to_string(),
relation: ArtifactRelation::ConnectsTo,
endpoint_kind: None,
};
assert!(
!looks_like_external_sink(&doc_path),
"local URL with '/webhook-setup-guide' must NOT match the webhook pattern"
);
}
}