use super::decision::should_suppress_inner;
use super::path_filter::{looks_like_secret_scanner_source, looks_like_vendored_minified_path};
use super::shape::{
contains_uuid_v4_substring, looks_like_credential_colliding_punctuation,
looks_like_email_address, looks_like_pure_identifier, looks_like_regex_literal_tail,
looks_like_scheme_prefixed_uri, looks_like_syntactic_punctuation_marker,
looks_like_url_or_path_segment, looks_like_word_separated_identifier,
};
use crate::context;
pub fn should_suppress_known_example_credential(
credential: &str,
path: Option<&str>,
context: context::CodeContext,
) -> bool {
should_suppress_known_example_credential_with_source(credential, path, context, None)
}
pub fn should_suppress_known_example_credential_with_source(
credential: &str,
path: Option<&str>,
context: context::CodeContext,
source_type: Option<&str>,
) -> bool {
should_suppress_inner(credential, path, context, source_type, false, false, None)
}
pub(crate) fn should_suppress_known_example_credential_with_source_and_entropy(
credential: &str,
path: Option<&str>,
context: context::CodeContext,
source_type: Option<&str>,
entropy: f64,
) -> bool {
should_suppress_inner(
credential,
path,
context,
source_type,
false,
false,
Some(entropy),
)
}
pub fn should_suppress_named_detector_finding(
credential: &str,
path: Option<&str>,
context: context::CodeContext,
source_type: Option<&str>,
detector_id: &str,
) -> bool {
should_suppress_named_detector_finding_weak(
credential,
path,
context,
source_type,
detector_id,
false,
)
}
pub fn should_suppress_named_detector_finding_weak(
credential: &str,
path: Option<&str>,
context: context::CodeContext,
source_type: Option<&str>,
detector_id: &str,
weak_anchor: bool,
) -> bool {
let apply_tier_b = is_generic_or_entropy(detector_id, weak_anchor);
if apply_tier_b && looks_like_pure_identifier(credential) {
crate::telemetry::record_example_suppression(
"pipeline",
path,
credential,
"pure_identifier_no_digit",
);
return true;
}
if apply_tier_b && looks_like_word_separated_identifier(credential) {
crate::telemetry::record_example_suppression(
"pipeline",
path,
credential,
"word_separated_identifier",
);
return true;
}
if apply_tier_b && looks_like_scheme_prefixed_uri(credential) {
crate::telemetry::record_example_suppression(
"pipeline",
path,
credential,
"scheme_prefixed_uri",
);
return true;
}
if looks_like_syntactic_punctuation_marker(credential) {
crate::telemetry::record_example_suppression(
"pipeline",
path,
credential,
"syntactic_punctuation_marker",
);
return true;
}
if apply_tier_b && looks_like_credential_colliding_punctuation(credential) {
crate::telemetry::record_example_suppression(
"pipeline",
path,
credential,
"credential_colliding_punctuation",
);
return true;
}
if apply_tier_b && looks_like_url_or_path_segment(credential) {
crate::telemetry::record_example_suppression(
"pipeline",
path,
credential,
"url_or_path_segment",
);
return true;
}
if apply_tier_b && contains_uuid_v4_substring(credential) {
crate::telemetry::record_example_suppression(
"pipeline",
path,
credential,
"contains_uuid_v4",
);
return true;
}
if looks_like_email_address(credential) {
crate::telemetry::record_example_suppression("pipeline", path, credential, "email_address");
return true;
}
if looks_like_vendored_minified_path(path) {
crate::telemetry::record_example_suppression(
"pipeline",
path,
credential,
"vendored_minified_path",
);
return true;
}
if source_type.is_some_and(|s| s.contains("binary-strings") || s.contains("archive-binary")) {
crate::telemetry::record_example_suppression(
"pipeline",
path,
credential,
"native_binary_strings",
);
return true;
}
if looks_like_secret_scanner_source(path) {
crate::telemetry::record_example_suppression(
"pipeline",
path,
credential,
"secret_scanner_source",
);
return true;
}
if path.is_some_and(|p| {
let bytes = p.as_bytes();
if crate::ascii_ci::ends_with_ignore_ascii_case(bytes, b".b64")
|| crate::ascii_ci::ends_with_ignore_ascii_case(bytes, b".base64")
{
return true;
}
let basename = bytes
.iter()
.rposition(|&b| b == b'/' || b == b'\\')
.map(|i| &bytes[i + 1..])
.unwrap_or(bytes);
basename
.get(..7)
.is_some_and(|p| p.eq_ignore_ascii_case(b"base64_"))
|| crate::ascii_ci::ci_find(basename, b"base64_string")
|| basename.eq_ignore_ascii_case(b"base64.txt")
}) && source_type.is_some_and(|s| s == "filesystem")
{
crate::telemetry::record_example_suppression(
"pipeline",
path,
credential,
"raw_base64_file",
);
return true;
}
if looks_like_regex_literal_tail(credential) {
crate::telemetry::record_example_suppression(
"pipeline",
path,
credential,
"regex_literal_tail",
);
return true;
}
let bypass_shape_gates = !detector_id.starts_with("generic-")
&& !detector_id.starts_with("entropy-")
&& !weak_anchor
&& detector_id != "private-key";
should_suppress_inner(
credential,
path,
context,
source_type,
false,
bypass_shape_gates,
None,
)
}
fn is_generic_or_entropy(detector_id: &str, weak_anchor: bool) -> bool {
detector_id.starts_with("generic-") || detector_id.starts_with("entropy-") || weak_anchor
}
const RESIDUAL_WEAK_ANCHORED: &[&str] = &[
"aerisweather-api-credentials",
"base-api-credentials",
"flickr-api-key",
"census-api-key",
"workato-api-credentials",
"adobe-api-key",
"alchemy-api-key",
"azure-openai-api-key",
"datadog-api-key",
"etherscan-api-key",
"spotify-client-credentials",
"bamboohr-api-key",
"calendly-api-key",
"crowdin-api-token",
"github-oauth-secret",
"sonarcloud-token",
"activecampaign-api-key",
"chef-automate-token",
"foundation-api-key",
"getresponse-api-key",
"rudder-api-token",
];
pub fn detector_weak_anchor(spec: &keyhog_core::DetectorSpec) -> bool {
let id = spec.id.as_str();
if id.starts_with("generic-") || id.starts_with("entropy-") || id == "private-key" {
return false;
}
if spec.min_confidence.is_some() {
return false;
}
RESIDUAL_WEAK_ANCHORED.contains(&id)
|| spec
.patterns
.iter()
.any(|p| has_broad_identifier_capture(&p.regex))
}
fn has_broad_identifier_capture(regex: &str) -> bool {
let mut search_from = 0;
while let Some(rel) = regex[search_from..].find("([") {
let class_open = search_from + rel + 1; let Some(rel_close) = regex[class_open..].find(']') else {
break;
};
let class_close = class_open + rel_close; let body = ®ex[class_open + 1..class_close];
if let Some(min_len) = group_capture_min_len(®ex[class_close + 1..]) {
if min_len <= 1 && is_full_alpha_identifier_class(body) {
return true;
}
}
search_from = class_close + 1;
}
false
}
fn group_capture_min_len(after: &str) -> Option<usize> {
let bytes = after.as_bytes();
match bytes.first()? {
b'+' if bytes.get(1) == Some(&b')') => Some(1),
b'*' if bytes.get(1) == Some(&b')') => Some(0),
b'{' => {
let close = after.find('}')?;
if after.as_bytes().get(close + 1) != Some(&b')') {
return None;
}
after[1..close].split(',').next()?.parse::<usize>().ok()
}
_ => None,
}
}
fn is_full_alpha_identifier_class(body: &str) -> bool {
const TOKENS: &[&str] = &["a-z", "A-Z", "0-9", "\\w", "\\d", "_", "-"];
let mut full_alpha = false;
let mut rest = body;
while !rest.is_empty() {
match TOKENS.iter().find(|t| rest.starts_with(**t)) {
Some(t) => {
if *t == "a-z" || *t == "A-Z" || *t == "\\w" {
full_alpha = true;
}
rest = &rest[t.len()..];
}
None => return false,
}
}
full_alpha
}