use std::collections::HashMap;
use crate::{MatchLocation, RawMatch, Severity};
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum DedupScope {
Credential,
File,
None,
}
#[derive(Debug, Clone)]
pub struct DedupedMatch {
pub detector_id: String,
pub detector_name: String,
pub service: String,
pub severity: Severity,
pub credential: String,
pub companion: Option<String>,
pub primary_location: MatchLocation,
pub additional_locations: Vec<MatchLocation>,
pub confidence: Option<f64>,
}
pub fn dedup_matches(matches: Vec<RawMatch>, scope: &DedupScope) -> Vec<DedupedMatch> {
if *scope == DedupScope::None {
return matches
.into_iter()
.map(|m| DedupedMatch {
detector_id: m.detector_id,
detector_name: m.detector_name,
service: m.service,
severity: m.severity,
credential: m.credential,
companion: m.companion,
primary_location: m.location,
additional_locations: Vec::new(),
confidence: m.confidence,
})
.collect();
}
let mut groups: HashMap<String, DedupedMatch> = HashMap::new();
for matched in matches {
let key = match scope {
DedupScope::Credential => {
let (d, c) = matched.deduplication_key();
format!("{d}:{c}")
}
DedupScope::File => {
let (d, c) = matched.deduplication_key();
let file = matched.location.file_path.as_deref().unwrap_or("stdin");
format!("{d}:{c}:{file}")
}
DedupScope::None => {
unreachable!("DedupScope::None handled by early return above");
}
};
match groups.get_mut(&key) {
Some(existing) => {
existing.additional_locations.push(matched.location);
if existing.companion.is_none() && matched.companion.is_some() {
existing.companion = matched.companion;
}
}
None => {
groups.insert(
key,
DedupedMatch {
detector_id: matched.detector_id,
detector_name: matched.detector_name,
service: matched.service,
severity: matched.severity,
credential: matched.credential,
companion: matched.companion,
primary_location: matched.location,
additional_locations: Vec::new(),
confidence: matched.confidence,
},
);
}
}
}
groups.into_values().collect()
}
#[cfg(test)]
mod tests {
use super::*;
fn make_match(detector_id: &str, credential: &str, file: &str) -> RawMatch {
RawMatch {
detector_id: detector_id.into(),
detector_name: format!("{detector_id} detector"),
service: "test".into(),
severity: Severity::High,
credential: credential.into(),
companion: None,
location: MatchLocation {
source: "filesystem".into(),
file_path: Some(file.into()),
line: Some(1),
offset: 0,
commit: None,
author: None,
date: None,
},
entropy: None,
confidence: Some(0.9),
}
}
#[test]
fn credential_scope_merges_across_files() {
let matches = vec![
make_match("aws", "AKIA_SECRET", "a.py"),
make_match("aws", "AKIA_SECRET", "b.py"),
];
let groups = dedup_matches(matches, &DedupScope::Credential);
assert_eq!(groups.len(), 1);
assert_eq!(groups[0].additional_locations.len(), 1);
}
#[test]
fn file_scope_separates_different_files() {
let matches = vec![
make_match("aws", "AKIA_SECRET", "a.py"),
make_match("aws", "AKIA_SECRET", "b.py"),
];
let groups = dedup_matches(matches, &DedupScope::File);
assert_eq!(groups.len(), 2);
}
#[test]
fn no_scope_keeps_every_match() {
let matches = vec![
make_match("aws", "AKIA_SECRET", "a.py"),
make_match("aws", "AKIA_SECRET", "a.py"),
];
let groups = dedup_matches(matches, &DedupScope::None);
assert_eq!(groups.len(), 2);
}
#[test]
fn companion_is_preserved_from_later_match() {
let mut m1 = make_match("aws", "AKIA_SECRET", "a.py");
m1.companion = None;
let mut m2 = make_match("aws", "AKIA_SECRET", "b.py");
m2.companion = Some("secret_key_companion".into());
let groups = dedup_matches(vec![m1, m2], &DedupScope::Credential);
assert_eq!(groups.len(), 1);
assert_eq!(
groups[0].companion.as_deref(),
Some("secret_key_companion")
);
}
#[test]
fn different_detectors_same_credential_stay_separate() {
let matches = vec![
make_match("aws", "AKIA_SECRET", "a.py"),
make_match("github", "AKIA_SECRET", "a.py"),
];
let groups = dedup_matches(matches, &DedupScope::Credential);
assert_eq!(groups.len(), 2);
}
#[test]
fn empty_input_returns_empty() {
let groups = dedup_matches(Vec::new(), &DedupScope::Credential);
assert!(groups.is_empty());
}
}