use crate::models::{FactorCategory, ScoreFactor};
use crate::simd;
#[derive(Debug, Clone)]
pub struct BenignNameConfig {
pub benign_terms: Vec<TermMatcher>,
pub suspicious_terms: Vec<TermMatcher>,
pub exact_ignore: Vec<String>,
pub patterns_ignore: Vec<String>,
}
impl Default for BenignNameConfig {
fn default() -> Self {
Self {
benign_terms: default_benign_terms(),
suspicious_terms: default_suspicious_terms(),
exact_ignore: vec![],
patterns_ignore: vec![],
}
}
}
#[derive(Debug, Clone)]
pub struct TermMatcher {
pub term: String,
pub score_mod: i32,
pub reason: &'static str,
pub mode: MatchMode,
}
#[derive(Debug, Clone, Copy)]
pub enum MatchMode {
Contains,
Prefix,
Suffix,
Exact,
}
fn default_benign_terms() -> Vec<TermMatcher> {
vec![
TermMatcher {
term: "version".into(),
score_mod: -100,
reason: "Version identifiers are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "pkg_name".into(),
score_mod: -100,
reason: "Package names are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "crate_name".into(),
score_mod: -100,
reason: "Crate names are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "mime".into(),
score_mod: -100,
reason: "MIME types are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "content_type".into(),
score_mod: -100,
reason: "Content types are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "encoding".into(),
score_mod: -100,
reason: "Encoding names are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "default_".into(),
score_mod: -50,
reason: "Default values are often not secrets",
mode: MatchMode::Prefix,
},
TermMatcher {
term: "_default".into(),
score_mod: -50,
reason: "Default values are often not secrets",
mode: MatchMode::Suffix,
},
TermMatcher {
term: "path".into(),
score_mod: -60,
reason: "Paths are typically not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "endpoint".into(),
score_mod: -40,
reason: "Endpoint names are often not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "route".into(),
score_mod: -60,
reason: "Routes are typically not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "magic".into(),
score_mod: -100,
reason: "Magic numbers/bytes are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "delimiter".into(),
score_mod: -100,
reason: "Delimiters are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "separator".into(),
score_mod: -100,
reason: "Separators are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "color".into(),
score_mod: -100,
reason: "Colors are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "theme".into(),
score_mod: -100,
reason: "Theme names are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "font".into(),
score_mod: -100,
reason: "Font names are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "style".into(),
score_mod: -100,
reason: "Style names are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "error_".into(),
score_mod: -80,
reason: "Error messages are typically not secrets",
mode: MatchMode::Prefix,
},
TermMatcher {
term: "_error".into(),
score_mod: -80,
reason: "Error messages are typically not secrets",
mode: MatchMode::Suffix,
},
TermMatcher {
term: "status".into(),
score_mod: -60,
reason: "Status names are typically not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "description".into(),
score_mod: -100,
reason: "Descriptions are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "help".into(),
score_mod: -100,
reason: "Help text is not a secret",
mode: MatchMode::Contains,
},
TermMatcher {
term: "usage".into(),
score_mod: -100,
reason: "Usage text is not a secret",
mode: MatchMode::Contains,
},
TermMatcher {
term: "topic".into(),
score_mod: -100,
reason: "Topic names are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "checksum".into(),
score_mod: -100,
reason: "Checksum type names are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "comment".into(),
score_mod: -100,
reason: "Comment field names are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "broker".into(),
score_mod: -50,
reason: "Broker addresses are typically not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "acl".into(),
score_mod: -50,
reason: "ACL field names are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "hash".into(),
score_mod: -50,
reason: "Hash type names are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "header".into(),
score_mod: -80,
reason: "Header names are typically not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "prefix".into(),
score_mod: -100,
reason: "Prefix strings are typically not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "suffix".into(),
score_mod: -100,
reason: "Suffix strings are typically not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "urn".into(),
score_mod: -100,
reason: "URN identifiers are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "namespace".into(),
score_mod: -100,
reason: "Namespace identifiers are not secrets",
mode: MatchMode::Contains,
},
TermMatcher {
term: "schema".into(),
score_mod: -80,
reason: "Schema names are typically not secrets",
mode: MatchMode::Contains,
},
]
}
fn default_suspicious_terms() -> Vec<TermMatcher> {
vec![
TermMatcher {
term: "token".into(),
score_mod: 25,
reason: "Tokens are often sensitive",
mode: MatchMode::Contains,
},
TermMatcher {
term: "secret".into(),
score_mod: 35,
reason: "Named 'secret' suggests sensitivity",
mode: MatchMode::Contains,
},
TermMatcher {
term: "key".into(),
score_mod: 20,
reason: "Keys are often sensitive",
mode: MatchMode::Contains,
},
TermMatcher {
term: "password".into(),
score_mod: 40,
reason: "Passwords are sensitive",
mode: MatchMode::Contains,
},
TermMatcher {
term: "passwd".into(),
score_mod: 40,
reason: "Passwords are sensitive",
mode: MatchMode::Contains,
},
TermMatcher {
term: "auth".into(),
score_mod: 25,
reason: "Authentication-related values are sensitive",
mode: MatchMode::Contains,
},
TermMatcher {
term: "credential".into(),
score_mod: 35,
reason: "Credentials are sensitive",
mode: MatchMode::Contains,
},
TermMatcher {
term: "private".into(),
score_mod: 30,
reason: "Private values are sensitive",
mode: MatchMode::Contains,
},
TermMatcher {
term: "api_key".into(),
score_mod: 35,
reason: "API keys are sensitive",
mode: MatchMode::Contains,
},
TermMatcher {
term: "apikey".into(),
score_mod: 35,
reason: "API keys are sensitive",
mode: MatchMode::Contains,
},
TermMatcher {
term: "access_key".into(),
score_mod: 35,
reason: "Access keys are sensitive",
mode: MatchMode::Contains,
},
TermMatcher {
term: "signing".into(),
score_mod: 25,
reason: "Signing keys are sensitive",
mode: MatchMode::Contains,
},
TermMatcher {
term: "hmac".into(),
score_mod: 25,
reason: "HMAC secrets are sensitive",
mode: MatchMode::Contains,
},
TermMatcher {
term: "db_pass".into(),
score_mod: 40,
reason: "Database passwords are sensitive",
mode: MatchMode::Contains,
},
TermMatcher {
term: "connection_string".into(),
score_mod: 30,
reason: "Connection strings may contain credentials",
mode: MatchMode::Contains,
},
TermMatcher {
term: "admin".into(),
score_mod: 15,
reason: "Admin-related constants need scrutiny",
mode: MatchMode::Contains,
},
TermMatcher {
term: "backdoor".into(),
score_mod: 50,
reason: "Backdoor terminology is highly suspicious",
mode: MatchMode::Contains,
},
]
}
pub struct BenignNameFilter {
config: BenignNameConfig,
compiled_patterns: Vec<regex::Regex>,
}
impl BenignNameFilter {
pub fn new(config: BenignNameConfig) -> Result<Self, regex::Error> {
let compiled_patterns = config
.patterns_ignore
.iter()
.map(|p| regex::Regex::new(p))
.collect::<Result<Vec<_>, _>>()?;
Ok(Self {
config,
compiled_patterns,
})
}
pub fn analyze(&self, name: &str) -> Vec<ScoreFactor> {
let mut factors = Vec::new();
let lower_name = simd::to_ascii_lowercase(name);
if self
.config
.exact_ignore
.iter()
.any(|n| n.eq_ignore_ascii_case(name))
{
factors.push(ScoreFactor::kill(
"exact_ignore",
format!("Name '{}' is in the ignore list", name),
));
return factors;
}
for pattern in &self.compiled_patterns {
if pattern.is_match(name) {
factors.push(ScoreFactor::kill(
"pattern_ignore",
format!(
"Name '{}' matches ignore pattern '{}'",
name,
pattern.as_str()
),
));
return factors;
}
}
for term in &self.config.benign_terms {
if self.term_matches(&lower_name, term) {
factors.push(
ScoreFactor::new(
format!("benign_term:{}", term.term),
FactorCategory::Name,
term.score_mod,
term.reason,
)
.with_evidence(format!("Name '{}' contains '{}'", name, term.term)),
);
}
}
let killed = factors.iter().any(|f| f.contribution <= -100);
if !killed {
for term in &self.config.suspicious_terms {
if self.term_matches(&lower_name, term) {
factors.push(
ScoreFactor::new(
format!("suspicious_term:{}", term.term),
FactorCategory::Name,
term.score_mod,
term.reason,
)
.with_evidence(format!("Name '{}' contains '{}'", name, term.term)),
);
}
}
}
factors
}
fn term_matches(&self, name: &str, term: &TermMatcher) -> bool {
let term_lower = simd::to_ascii_lowercase(&term.term);
match term.mode {
MatchMode::Contains => name.contains(&term_lower),
MatchMode::Prefix => name.starts_with(&term_lower),
MatchMode::Suffix => name.ends_with(&term_lower),
MatchMode::Exact => name == term_lower,
}
}
pub fn is_definitely_benign(&self, name: &str) -> bool {
let lower_name = simd::to_ascii_lowercase(name);
self.config
.benign_terms
.iter()
.filter(|t| t.score_mod <= -100)
.any(|t| self.term_matches(&lower_name, t))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_version_is_benign() {
let filter = BenignNameFilter::new(BenignNameConfig::default()).unwrap();
assert!(filter.is_definitely_benign("VERSION"));
assert!(filter.is_definitely_benign("PROTOCOL_VERSION"));
assert!(filter.is_definitely_benign("api_version"));
}
#[test]
fn test_secret_is_suspicious() {
let filter = BenignNameFilter::new(BenignNameConfig::default()).unwrap();
let factors = filter.analyze("API_SECRET");
let total: i32 = factors.iter().map(|f| f.contribution).sum();
assert!(total > 0, "API_SECRET should have positive score");
}
#[test]
fn test_auth_token_is_very_suspicious() {
let filter = BenignNameFilter::new(BenignNameConfig::default()).unwrap();
let factors = filter.analyze("GRPC_AUTH_TOKEN");
let total: i32 = factors.iter().map(|f| f.contribution).sum();
assert!(total >= 40, "GRPC_AUTH_TOKEN should have high score");
}
}