keyhog_scanner/
resolution.rs1use std::collections::{HashMap, HashSet};
5use std::sync::Arc;
6
7use keyhog_core::RawMatch;
8
9const ADJACENT_LINE_DISTANCE: usize = 2;
10const SINGLE_MATCH_COUNT: usize = 1;
11const SCORE_EPSILON: f64 = 1e-9;
12const ENTROPY_MATCH_SCORE: f64 = 0.0;
13const NAMED_DETECTOR_SCORE: f64 = 10.0;
14const CONFIDENCE_WEIGHT: f64 = 5.0;
15const DETECTOR_ID_LENGTH_WEIGHT: f64 = 0.1;
16const MAX_CREDENTIAL_SCORE_LENGTH: usize = 200;
17const CREDENTIAL_LENGTH_WEIGHT: f64 = 0.01;
18
19pub fn resolve_matches(mut matches: Vec<RawMatch>) -> Vec<RawMatch> {
23 if matches.len() <= SINGLE_MATCH_COUNT {
24 return matches;
25 }
26 suppress_entropy_matches_near_named_detectors(&mut matches);
27 resolve_match_groups(matches)
28}
29
30fn suppress_entropy_matches_near_named_detectors(matches: &mut Vec<RawMatch>) {
31 let named_lines: HashSet<(Arc<str>, usize)> = matches
33 .iter()
34 .filter(|m| is_service_specific_detector(m.detector_id.as_ref()))
35 .filter_map(|m| {
36 let path = m
37 .location
38 .file_path
39 .clone()
40 .unwrap_or_else(|| Arc::from(""));
41 m.location.line.map(|line| (path, line))
42 })
43 .collect();
44 matches.retain(|m| {
45 if m.detector_id.as_ref() != "entropy" && !m.detector_id.as_ref().starts_with("entropy-") {
46 return true;
47 }
48 let path = m
49 .location
50 .file_path
51 .clone()
52 .unwrap_or_else(|| Arc::from(""));
53 if let Some(line) = m.location.line {
54 for offset in 0..=ADJACENT_LINE_DISTANCE {
55 if named_lines.contains(&(Arc::clone(&path), line.saturating_sub(offset)))
56 || named_lines.contains(&(Arc::clone(&path), line.saturating_add(offset)))
57 {
58 return false;
59 }
60 }
61 }
62 true
63 });
64}
65
66fn is_entropy_detector(detector_id: &str) -> bool {
67 detector_id == "entropy" || detector_id.starts_with("entropy-")
68}
69
70fn is_generic_detector(detector_id: &str) -> bool {
71 detector_id.starts_with("generic-") || detector_id == "private-key"
72}
73
74fn is_service_specific_detector(detector_id: &str) -> bool {
75 !is_entropy_detector(detector_id) && !is_generic_detector(detector_id)
76}
77
78fn resolve_match_groups(mut matches: Vec<RawMatch>) -> Vec<RawMatch> {
79 let mut groups: HashMap<(Arc<str>, usize), Vec<RawMatch>> = HashMap::new();
83 for m in matches.drain(..) {
84 let file = m
85 .location
86 .file_path
87 .clone()
88 .unwrap_or_else(|| Arc::from(""));
89 let line = m.location.line.unwrap_or(0);
90 groups.entry((file, line)).or_default().push(m);
91 }
92 let mut resolved = Vec::new();
93 for group in groups.into_values() {
94 if group.len() == SINGLE_MATCH_COUNT {
95 resolved.extend(group);
96 continue;
97 }
98 resolved.extend(best_matches_for_group(group));
99 }
100 resolved
101}
102
103fn best_matches_for_group(group: Vec<RawMatch>) -> Vec<RawMatch> {
104 let mut scored: Vec<(f64, RawMatch)> = group
105 .into_iter()
106 .map(|matched| (match_priority_score(&matched), matched))
107 .collect();
108 scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
109 let top_score = scored[0].0;
110 scored
111 .into_iter()
112 .take_while(|(score, _)| (*score - top_score).abs() < SCORE_EPSILON)
113 .map(|(_, matched)| matched)
114 .collect()
115}
116
117fn match_priority_score(m: &RawMatch) -> f64 {
119 let mut score = ENTROPY_MATCH_SCORE;
120
121 if is_service_specific_detector(m.detector_id.as_ref()) {
127 score += NAMED_DETECTOR_SCORE;
128 }
129
130 if let Some(conf) = m.confidence {
132 score += conf * CONFIDENCE_WEIGHT;
133 }
134
135 score += (m.detector_id.len() as f64) * DETECTOR_ID_LENGTH_WEIGHT;
137
138 score +=
140 (m.credential.len().min(MAX_CREDENTIAL_SCORE_LENGTH) as f64) * CREDENTIAL_LENGTH_WEIGHT;
141
142 if crate::confidence::known_prefix_confidence_floor(&m.credential).is_some()
144 && m.detector_id.as_ref() != "entropy"
145 && !m.detector_id.as_ref().starts_with("entropy-")
146 && !m.detector_id.as_ref().starts_with("generic-")
147 {
148 score += 5.0;
149 }
150
151 score
152}