1use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::sync::Arc;
10
11use crate::{MatchLocation, RawMatch, Severity};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
15pub enum DedupScope {
16 None,
18 File,
20 Credential,
22}
23
24#[derive(Debug, Clone, Serialize)]
26pub struct DedupedMatch {
27 #[serde(with = "crate::finding::serde_arc_str")]
29 pub detector_id: Arc<str>,
30 #[serde(with = "crate::finding::serde_arc_str")]
32 pub detector_name: Arc<str>,
33 #[serde(with = "crate::finding::serde_arc_str")]
35 pub service: Arc<str>,
36 pub severity: Severity,
38 #[serde(with = "crate::finding::serde_arc_str")]
40 pub credential: Arc<str>,
41 pub credential_hash: String,
43 pub companions: HashMap<String, String>,
45 pub primary_location: MatchLocation,
47 pub additional_locations: Vec<MatchLocation>,
49 pub confidence: Option<f64>,
51}
52
53pub fn dedup_matches(matches: Vec<RawMatch>, scope: &DedupScope) -> Vec<DedupedMatch> {
55 if *scope == DedupScope::None {
56 return matches
57 .into_iter()
58 .map(|m| {
59 let credential_hash = sha256_hash(&m.credential);
60 DedupedMatch {
61 detector_id: m.detector_id,
62 detector_name: m.detector_name,
63 service: m.service,
64 severity: m.severity,
65 credential: m.credential,
66 credential_hash,
67 companions: m.companions,
68 primary_location: m.location,
69 additional_locations: Vec::new(),
70 confidence: m.confidence,
71 }
72 })
73 .collect();
74 }
75
76 #[allow(clippy::type_complexity)]
78 let mut groups: HashMap<(Arc<str>, Arc<str>, Option<Arc<str>>), DedupedMatch> = HashMap::new();
79
80 for matched in matches {
81 let detector_id_arc = Arc::clone(&matched.detector_id);
82 let credential_arc = Arc::clone(&matched.credential);
83
84 let key = match scope {
85 DedupScope::Credential => (detector_id_arc, credential_arc, None),
86 DedupScope::File => {
87 let file = Some(file_scope_identity(&matched.location));
88 (detector_id_arc, credential_arc, file)
89 }
90 DedupScope::None => continue,
91 };
92
93 match groups.get_mut(&key) {
94 Some(existing) => {
95 existing.additional_locations.push(matched.location);
96 merge_companions(&mut existing.companions, matched.companions);
97 existing.confidence = max_confidence(existing.confidence, matched.confidence);
98 }
99 None => {
100 let credential_hash = sha256_hash(&matched.credential);
101 groups.insert(
102 key,
103 DedupedMatch {
104 detector_id: matched.detector_id,
105 detector_name: matched.detector_name,
106 service: matched.service,
107 severity: matched.severity,
108 credential: matched.credential,
109 credential_hash,
110 companions: matched.companions,
111 primary_location: matched.location,
112 additional_locations: Vec::new(),
113 confidence: matched.confidence,
114 },
115 );
116 }
117 }
118 }
119
120 groups.into_values().collect()
121}
122
123fn file_scope_identity(location: &MatchLocation) -> Arc<str> {
124 let mut identity = String::new();
125 identity.push_str(location.source.as_ref());
126 identity.push('\0');
127 identity.push_str(location.file_path.as_deref().unwrap_or("<unknown>"));
128 identity.push('\0');
129 identity.push_str(location.commit.as_deref().unwrap_or("<no-commit>"));
130 Arc::from(identity)
131}
132
133fn merge_companions(existing: &mut HashMap<String, String>, incoming: HashMap<String, String>) {
134 for (name, value) in incoming {
135 match existing.get_mut(&name) {
136 Some(current) if current != &value => {
137 let already_present = current
138 .split(" | ")
139 .any(|candidate| candidate == value.as_str());
140 if !already_present {
141 current.push_str(" | ");
142 current.push_str(&value);
143 }
144 }
145 Some(_) => {}
146 None => {
147 existing.insert(name, value);
148 }
149 }
150 }
151}
152
153fn max_confidence(lhs: Option<f64>, rhs: Option<f64>) -> Option<f64> {
154 match (lhs, rhs) {
155 (Some(a), Some(b)) => Some(a.max(b)),
156 (Some(a), None) => Some(a),
157 (None, Some(b)) => Some(b),
158 (None, None) => None,
159 }
160}
161
162fn sha256_hash(s: &str) -> String {
163 use sha2::{Digest, Sha256};
164 let mut hasher = Sha256::new();
165 hasher.update(s.as_bytes());
166 hex::encode(hasher.finalize())
167}