keyhog_core/finding.rs
1//! Scanner findings: the output type for detected secrets with location,
2//! confidence, detector metadata, and optional verification status.
3
4use serde::Serialize;
5use std::collections::HashMap;
6
7use crate::Severity;
8
9/// A credential match found by the scanner, before verification.
10///
11/// # Examples
12///
13/// ```rust
14/// use keyhog_core::{MatchLocation, RawMatch, Severity};
15///
16/// let finding = RawMatch {
17/// detector_id: "demo-token".into(),
18/// detector_name: "Demo Token".into(),
19/// service: "demo".into(),
20/// severity: Severity::High,
21/// credential: "demo_ABC12345".into(),
22/// companion: None,
23/// location: MatchLocation {
24/// source: "filesystem".into(),
25/// file_path: Some(".env".into()),
26/// line: Some(1),
27/// offset: 0,
28/// commit: None,
29/// author: None,
30/// date: None,
31/// },
32/// entropy: None,
33/// confidence: Some(0.9),
34/// };
35///
36/// assert_eq!(finding.detector_id, "demo-token");
37/// ```
38#[derive(Debug, Clone, Serialize)]
39pub struct RawMatch {
40 /// Stable detector identifier.
41 pub detector_id: String,
42 /// Human-readable detector name.
43 pub detector_name: String,
44 /// Service namespace associated with the detector.
45 pub service: String,
46 /// Detector severity level.
47 pub severity: Severity,
48 /// Matched credential bytes before redaction.
49 pub credential: String,
50 /// Companion credential or context value extracted nearby.
51 pub companion: Option<String>,
52 /// Source location for the match.
53 pub location: MatchLocation,
54 /// Shannon entropy of the matched credential (0.0 - 8.0).
55 #[serde(skip_serializing_if = "Option::is_none")]
56 pub entropy: Option<f64>,
57 /// Confidence score (0.0 - 1.0) combining entropy, keyword proximity, file type, etc.
58 #[serde(skip_serializing_if = "Option::is_none")]
59 pub confidence: Option<f64>,
60}
61
62/// Where a credential was found: file path, line number, commit, and author.
63///
64/// # Examples
65///
66/// ```rust
67/// use keyhog_core::MatchLocation;
68///
69/// let location = MatchLocation {
70/// source: "stdin".into(),
71/// file_path: None,
72/// line: Some(3),
73/// offset: 20,
74/// commit: None,
75/// author: None,
76/// date: None,
77/// };
78///
79/// assert_eq!(location.line, Some(3));
80/// ```
81#[derive(Debug, Clone, Serialize)]
82pub struct MatchLocation {
83 /// Logical source backend, such as `filesystem` or `git`.
84 pub source: String,
85 /// File path, object key, or logical path when available.
86 pub file_path: Option<String>,
87 /// One-based line number when known.
88 pub line: Option<usize>,
89 /// Byte offset from the start of the source chunk.
90 pub offset: usize,
91 /// Commit identifier for history-derived matches.
92 pub commit: Option<String>,
93 /// Commit author when available.
94 pub author: Option<String>,
95 /// Commit timestamp when available.
96 pub date: Option<String>,
97}
98
99/// A finding after verification — the final output.
100///
101/// # Examples
102///
103/// ```rust
104/// use keyhog_core::{MatchLocation, Severity, VerificationResult, VerifiedFinding};
105/// use std::collections::HashMap;
106///
107/// let finding = VerifiedFinding {
108/// detector_id: "demo-token".into(),
109/// detector_name: "Demo Token".into(),
110/// service: "demo".into(),
111/// severity: Severity::High,
112/// credential_redacted: "demo_...2345".into(),
113/// location: MatchLocation {
114/// source: "filesystem".into(),
115/// file_path: Some(".env".into()),
116/// line: Some(1),
117/// offset: 0,
118/// commit: None,
119/// author: None,
120/// date: None,
121/// },
122/// verification: VerificationResult::Skipped,
123/// metadata: HashMap::new(),
124/// additional_locations: Vec::new(),
125/// confidence: Some(0.9),
126/// };
127///
128/// assert_eq!(finding.service, "demo");
129/// ```
130#[derive(Debug, Clone, Serialize)]
131pub struct VerifiedFinding {
132 /// Stable detector identifier.
133 pub detector_id: String,
134 /// Human-readable detector name.
135 pub detector_name: String,
136 /// Service namespace associated with the detector.
137 pub service: String,
138 /// Detector severity level.
139 pub severity: Severity,
140 /// Redacted credential string suitable for output.
141 pub credential_redacted: String,
142 /// Primary source location for the finding.
143 pub location: MatchLocation,
144 /// Verification outcome for the credential.
145 pub verification: VerificationResult,
146 /// Extra metadata extracted from verification responses.
147 #[serde(skip_serializing_if = "HashMap::is_empty")]
148 pub metadata: HashMap<String, String>,
149 /// Additional duplicate locations that resolved into the same finding.
150 #[serde(skip_serializing_if = "Vec::is_empty")]
151 pub additional_locations: Vec<MatchLocation>,
152 /// Confidence score (0.0 - 1.0) combining entropy, keyword proximity, file type, etc.
153 #[serde(skip_serializing_if = "Option::is_none")]
154 pub confidence: Option<f64>,
155}
156
157/// Result of live verification: whether the credential is active, revoked, or untested.
158///
159/// # Examples
160///
161/// ```rust
162/// use keyhog_core::VerificationResult;
163///
164/// let status = VerificationResult::Live;
165/// assert!(matches!(status, VerificationResult::Live));
166/// ```
167#[derive(Debug, Clone, Serialize)]
168#[serde(rename_all = "snake_case")]
169pub enum VerificationResult {
170 /// The credential was verified as active.
171 Live,
172 /// The credential was checked and appears invalid.
173 Dead,
174 /// Verification was throttled by the upstream service.
175 RateLimited,
176 /// Verification failed before a conclusive result was produced.
177 Error(String),
178 /// The detector has no live verification path.
179 Unverifiable,
180 /// Verification was disabled for this scan.
181 Skipped,
182}
183
184impl RawMatch {
185 /// Deduplication key: same detector + same credential = same finding.
186 /// Git history includes commit ID so the same secret in different commits stays distinct.
187 ///
188 /// # Examples
189 ///
190 /// ```rust
191 /// use keyhog_core::{MatchLocation, RawMatch, Severity};
192 ///
193 /// let finding = RawMatch {
194 /// detector_id: "demo".into(),
195 /// detector_name: "Demo".into(),
196 /// service: "demo".into(),
197 /// severity: Severity::High,
198 /// credential: "demo_ABC12345".into(),
199 /// companion: None,
200 /// location: MatchLocation {
201 /// source: "filesystem".into(),
202 /// file_path: Some(".env".into()),
203 /// line: Some(1),
204 /// offset: 0,
205 /// commit: None,
206 /// author: None,
207 /// date: None,
208 /// },
209 /// entropy: None,
210 /// confidence: None,
211 /// };
212 ///
213 /// assert_eq!(finding.deduplication_key().0, "demo");
214 /// ```
215 pub fn deduplication_key(&self) -> (String, String) {
216 if self.location.source == "git-history" {
217 (
218 format!(
219 "{}:{}",
220 self.detector_id,
221 self.location.commit.clone().unwrap_or_default()
222 ),
223 self.credential.clone(),
224 )
225 } else {
226 (self.detector_id.clone(), self.credential.clone())
227 }
228 }
229}
230
231/// Redact a credential for safe display without leaking type prefixes or exact length.
232///
233/// # Examples
234///
235/// ```rust
236/// use keyhog_core::redact;
237///
238/// assert_eq!(redact("sk_live_abcdefghijklmnopqrstuvwxyz1234"), "sk_live_...1234");
239/// ```
240pub fn redact(credential: &str) -> String {
241 if credential.is_empty() {
242 return "*".repeat(8);
243 }
244 if credential.len() <= SHORT_SECRET_MAX_LEN {
245 return redact_short_secret(credential);
246 }
247 redact_with_prefix_preservation(credential)
248}
249
250const SHORT_SECRET_MAX_LEN: usize = 8;
251const SHORT_SECRET_EDGE_CHARS: usize = 2;
252const DEFAULT_REDACTION_EDGE_CHARS: usize = 4;
253const MAX_VISIBLE_PREFIX_CHARS: usize = 8;
254const REDACTION_SEPARATOR: &str = "...";
255
256fn redact_short_secret(credential: &str) -> String {
257 let start = first_chars(credential, SHORT_SECRET_EDGE_CHARS);
258 let end = last_chars(credential, SHORT_SECRET_EDGE_CHARS);
259 format!("{start}{REDACTION_SEPARATOR}{end}")
260}
261
262fn redact_with_prefix_preservation(credential: &str) -> String {
263 let prefix_len = visible_prefix_len(credential);
264 let suffix_len = last_chars(credential, DEFAULT_REDACTION_EDGE_CHARS).len();
265 if prefix_len == 0 || credential.len() <= prefix_len + suffix_len {
266 return redact_without_prefix_preservation(credential);
267 }
268 let prefix = &credential[..prefix_len];
269 let suffix = &credential[credential.len() - suffix_len..];
270 format!("{prefix}{REDACTION_SEPARATOR}{suffix}")
271}
272
273fn visible_prefix_len(credential: &str) -> usize {
274 credential
275 .char_indices()
276 .take_while(|(_, ch)| ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.'))
277 .take(MAX_VISIBLE_PREFIX_CHARS)
278 .last()
279 .map(|(idx, ch)| idx + ch.len_utf8())
280 .unwrap_or(0)
281 .min(
282 credential
283 .len()
284 .saturating_sub(DEFAULT_REDACTION_EDGE_CHARS),
285 )
286}
287
288fn redact_without_prefix_preservation(credential: &str) -> String {
289 let start = first_chars(credential, DEFAULT_REDACTION_EDGE_CHARS);
290 let end = last_chars(credential, DEFAULT_REDACTION_EDGE_CHARS);
291 if start == end {
292 format!("{start}{REDACTION_SEPARATOR}")
293 } else {
294 format!("{start}{REDACTION_SEPARATOR}{end}")
295 }
296}
297
298fn first_chars(value: &str, count: usize) -> String {
299 value.chars().take(count).collect()
300}
301
302fn last_chars(value: &str, count: usize) -> String {
303 let total = value.chars().count();
304 value.chars().skip(total.saturating_sub(count)).collect()
305}
306
307#[cfg(test)]
308mod tests {
309 use super::*;
310
311 #[test]
312 fn redaction() {
313 assert_eq!(redact("xoxb-1234567890-abc"), "xoxb-123...-abc");
314 assert_eq!(redact("short"), "sh...rt");
315 assert_eq!(redact("AKIA1234567890ABCDEF"), "AKIA1234...CDEF");
316 assert_eq!(
317 redact("sk-proj-abcdefghijklmnopqrstuvwxyz1234"),
318 "sk-proj-...1234"
319 );
320 }
321
322 #[test]
323 fn deduplication_key_groups_same_credential() {
324 let m1 = RawMatch {
325 detector_id: "aws".into(),
326 detector_name: "AWS".into(),
327 service: "aws".into(),
328 severity: Severity::Critical,
329 credential: "AKIAIOSFODNN7EXAMPLE".into(),
330 companion: None,
331 location: MatchLocation {
332 source: "fs".into(),
333 file_path: Some("file1.py".into()),
334 line: Some(10),
335 offset: 0,
336 commit: None,
337 author: None,
338 date: None,
339 },
340 entropy: None,
341 confidence: None,
342 };
343 let m2 = RawMatch {
344 location: MatchLocation {
345 file_path: Some("file2.py".into()),
346 line: Some(20),
347 ..m1.location.clone()
348 },
349 ..m1.clone()
350 };
351 assert_eq!(m1.deduplication_key(), m2.deduplication_key());
352 }
353
354 macro_rules! redaction_case {
355 ($name:ident, $input:expr, $expected:expr) => {
356 #[test]
357 fn $name() {
358 assert_eq!(redact($input), $expected);
359 }
360 };
361 }
362
363 redaction_case!(redact_empty_secret, "", "********");
364 redaction_case!(redact_single_char_secret, "a", "a...a");
365 redaction_case!(redact_two_char_secret, "ab", "ab...ab");
366 redaction_case!(redact_eight_char_secret, "12345678", "12...78");
367 redaction_case!(
368 redact_prefixless_long_secret,
369 "@@@@abcdefgh1234",
370 "@@@@...1234"
371 );
372 redaction_case!(redact_unicode_secret, "пароль-супер-длинный", "паро...нный");
373 redaction_case!(
374 redact_secret_with_preserved_ascii_prefix,
375 "token_value_1234567890",
376 "token_va...7890"
377 );
378 redaction_case!(
379 redact_repeated_edges_compacts_suffix,
380 "aaaaabbbbb",
381 "aaaa...bbbb"
382 );
383
384 #[test]
385 fn git_history_deduplication_includes_commit_id() {
386 let matched = RawMatch {
387 detector_id: "aws".into(),
388 detector_name: "AWS".into(),
389 service: "aws".into(),
390 severity: Severity::Critical,
391 credential: "AKIAIOSFODNN7EXAMPLE".into(),
392 companion: None,
393 location: MatchLocation {
394 source: "git-history".into(),
395 file_path: Some("history.env".into()),
396 line: Some(1),
397 offset: 0,
398 commit: Some("abc123".into()),
399 author: None,
400 date: None,
401 },
402 entropy: None,
403 confidence: None,
404 };
405
406 let (detector, credential) = matched.deduplication_key();
407 assert_eq!(detector, "aws:abc123");
408 assert_eq!(credential, "AKIAIOSFODNN7EXAMPLE");
409 }
410}