use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use crate::{MatchLocation, RawMatch, Severity};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum DedupScope {
None,
File,
Credential,
}
#[derive(Debug, Clone, Serialize)]
pub struct DedupedMatch {
#[serde(with = "crate::finding::serde_arc_str")]
pub detector_id: Arc<str>,
#[serde(with = "crate::finding::serde_arc_str")]
pub detector_name: Arc<str>,
#[serde(with = "crate::finding::serde_arc_str")]
pub service: Arc<str>,
pub severity: Severity,
#[serde(with = "crate::finding::serde_arc_str")]
pub credential: Arc<str>,
pub credential_hash: String,
pub companions: HashMap<String, String>,
pub primary_location: MatchLocation,
pub additional_locations: Vec<MatchLocation>,
pub confidence: Option<f64>,
}
pub fn dedup_matches(matches: Vec<RawMatch>, scope: &DedupScope) -> Vec<DedupedMatch> {
if *scope == DedupScope::None {
return matches
.into_iter()
.map(|m| {
let credential_hash = sha256_hash(&m.credential);
DedupedMatch {
detector_id: m.detector_id,
detector_name: m.detector_name,
service: m.service,
severity: m.severity,
credential: m.credential,
credential_hash,
companions: m.companions,
primary_location: m.location,
additional_locations: Vec::new(),
confidence: m.confidence,
}
})
.collect();
}
#[allow(clippy::type_complexity)]
let mut groups: HashMap<(Arc<str>, Arc<str>, Option<Arc<str>>), DedupedMatch> = HashMap::new();
for matched in matches {
let detector_id_arc = Arc::clone(&matched.detector_id);
let credential_arc = Arc::clone(&matched.credential);
let key = match scope {
DedupScope::Credential => (detector_id_arc, credential_arc, None),
DedupScope::File => {
let file = Some(file_scope_identity(&matched.location));
(detector_id_arc, credential_arc, file)
}
DedupScope::None => continue,
};
match groups.get_mut(&key) {
Some(existing) => {
existing.additional_locations.push(matched.location);
merge_companions(&mut existing.companions, matched.companions);
existing.confidence = max_confidence(existing.confidence, matched.confidence);
}
None => {
let credential_hash = sha256_hash(&matched.credential);
groups.insert(
key,
DedupedMatch {
detector_id: matched.detector_id,
detector_name: matched.detector_name,
service: matched.service,
severity: matched.severity,
credential: matched.credential,
credential_hash,
companions: matched.companions,
primary_location: matched.location,
additional_locations: Vec::new(),
confidence: matched.confidence,
},
);
}
}
}
groups.into_values().collect()
}
fn file_scope_identity(location: &MatchLocation) -> Arc<str> {
let mut identity = String::new();
identity.push_str(location.source.as_ref());
identity.push('\0');
identity.push_str(location.file_path.as_deref().unwrap_or("<unknown>"));
identity.push('\0');
identity.push_str(location.commit.as_deref().unwrap_or("<no-commit>"));
Arc::from(identity)
}
fn merge_companions(existing: &mut HashMap<String, String>, incoming: HashMap<String, String>) {
for (name, value) in incoming {
match existing.get_mut(&name) {
Some(current) if current != &value => {
let already_present = current
.split(" | ")
.any(|candidate| candidate == value.as_str());
if !already_present {
current.push_str(" | ");
current.push_str(&value);
}
}
Some(_) => {}
None => {
existing.insert(name, value);
}
}
}
}
fn max_confidence(lhs: Option<f64>, rhs: Option<f64>) -> Option<f64> {
match (lhs, rhs) {
(Some(a), Some(b)) => Some(a.max(b)),
(Some(a), None) => Some(a),
(None, Some(b)) => Some(b),
(None, None) => None,
}
}
fn sha256_hash(s: &str) -> String {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
hasher.update(s.as_bytes());
hex::encode(hasher.finalize())
}