Skip to main content

keyhog_core/
finding.rs

1//! Scanner findings: the output type for detected secrets with location,
2//! confidence, detector metadata, and optional verification status.
3
4use serde::Serialize;
5use std::collections::HashMap;
6
7use crate::Severity;
8
9/// A credential match found by the scanner, before verification.
10///
11/// # Examples
12///
13/// ```rust
14/// use keyhog_core::{MatchLocation, RawMatch, Severity};
15///
16/// let finding = RawMatch {
17///     detector_id: "demo-token".into(),
18///     detector_name: "Demo Token".into(),
19///     service: "demo".into(),
20///     severity: Severity::High,
21///     credential: "demo_ABC12345".into(),
22///     companion: None,
23///     location: MatchLocation {
24///         source: "filesystem".into(),
25///         file_path: Some(".env".into()),
26///         line: Some(1),
27///         offset: 0,
28///         commit: None,
29///         author: None,
30///         date: None,
31///     },
32///     entropy: None,
33///     confidence: Some(0.9),
34/// };
35///
36/// assert_eq!(finding.detector_id, "demo-token");
37/// ```
38#[derive(Debug, Clone, Serialize)]
39pub struct RawMatch {
40    /// Stable detector identifier.
41    pub detector_id: String,
42    /// Human-readable detector name.
43    pub detector_name: String,
44    /// Service namespace associated with the detector.
45    pub service: String,
46    /// Detector severity level.
47    pub severity: Severity,
48    /// Matched credential bytes before redaction.
49    pub credential: String,
50    /// Companion credential or context value extracted nearby.
51    pub companion: Option<String>,
52    /// Source location for the match.
53    pub location: MatchLocation,
54    /// Shannon entropy of the matched credential (0.0 - 8.0).
55    #[serde(skip_serializing_if = "Option::is_none")]
56    pub entropy: Option<f64>,
57    /// Confidence score (0.0 - 1.0) combining entropy, keyword proximity, file type, etc.
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub confidence: Option<f64>,
60}
61
62/// Where a credential was found: file path, line number, commit, and author.
63///
64/// # Examples
65///
66/// ```rust
67/// use keyhog_core::MatchLocation;
68///
69/// let location = MatchLocation {
70///     source: "stdin".into(),
71///     file_path: None,
72///     line: Some(3),
73///     offset: 20,
74///     commit: None,
75///     author: None,
76///     date: None,
77/// };
78///
79/// assert_eq!(location.line, Some(3));
80/// ```
81#[derive(Debug, Clone, Serialize)]
82pub struct MatchLocation {
83    /// Logical source backend, such as `filesystem` or `git`.
84    pub source: String,
85    /// File path, object key, or logical path when available.
86    pub file_path: Option<String>,
87    /// One-based line number when known.
88    pub line: Option<usize>,
89    /// Byte offset from the start of the source chunk.
90    pub offset: usize,
91    /// Commit identifier for history-derived matches.
92    pub commit: Option<String>,
93    /// Commit author when available.
94    pub author: Option<String>,
95    /// Commit timestamp when available.
96    pub date: Option<String>,
97}
98
99/// A finding after verification — the final output.
100///
101/// # Examples
102///
103/// ```rust
104/// use keyhog_core::{MatchLocation, Severity, VerificationResult, VerifiedFinding};
105/// use std::collections::HashMap;
106///
107/// let finding = VerifiedFinding {
108///     detector_id: "demo-token".into(),
109///     detector_name: "Demo Token".into(),
110///     service: "demo".into(),
111///     severity: Severity::High,
112///     credential_redacted: "demo_...2345".into(),
113///     location: MatchLocation {
114///         source: "filesystem".into(),
115///         file_path: Some(".env".into()),
116///         line: Some(1),
117///         offset: 0,
118///         commit: None,
119///         author: None,
120///         date: None,
121///     },
122///     verification: VerificationResult::Skipped,
123///     metadata: HashMap::new(),
124///     additional_locations: Vec::new(),
125///     confidence: Some(0.9),
126/// };
127///
128/// assert_eq!(finding.service, "demo");
129/// ```
130#[derive(Debug, Clone, Serialize)]
131pub struct VerifiedFinding {
132    /// Stable detector identifier.
133    pub detector_id: String,
134    /// Human-readable detector name.
135    pub detector_name: String,
136    /// Service namespace associated with the detector.
137    pub service: String,
138    /// Detector severity level.
139    pub severity: Severity,
140    /// Redacted credential string suitable for output.
141    pub credential_redacted: String,
142    /// Primary source location for the finding.
143    pub location: MatchLocation,
144    /// Verification outcome for the credential.
145    pub verification: VerificationResult,
146    /// Extra metadata extracted from verification responses.
147    #[serde(skip_serializing_if = "HashMap::is_empty")]
148    pub metadata: HashMap<String, String>,
149    /// Additional duplicate locations that resolved into the same finding.
150    #[serde(skip_serializing_if = "Vec::is_empty")]
151    pub additional_locations: Vec<MatchLocation>,
152    /// Confidence score (0.0 - 1.0) combining entropy, keyword proximity, file type, etc.
153    #[serde(skip_serializing_if = "Option::is_none")]
154    pub confidence: Option<f64>,
155}
156
157/// Result of live verification: whether the credential is active, revoked, or untested.
158///
159/// # Examples
160///
161/// ```rust
162/// use keyhog_core::VerificationResult;
163///
164/// let status = VerificationResult::Live;
165/// assert!(matches!(status, VerificationResult::Live));
166/// ```
167#[derive(Debug, Clone, Serialize)]
168#[serde(rename_all = "snake_case")]
169pub enum VerificationResult {
170    /// The credential was verified as active.
171    Live,
172    /// The credential was checked and appears invalid.
173    Dead,
174    /// Verification was throttled by the upstream service.
175    RateLimited,
176    /// Verification failed before a conclusive result was produced.
177    Error(String),
178    /// The detector has no live verification path.
179    Unverifiable,
180    /// Verification was disabled for this scan.
181    Skipped,
182}
183
184impl RawMatch {
185    /// Deduplication key: same detector + same credential = same finding.
186    /// Git history includes commit ID so the same secret in different commits stays distinct.
187    ///
188    /// # Examples
189    ///
190    /// ```rust
191    /// use keyhog_core::{MatchLocation, RawMatch, Severity};
192    ///
193    /// let finding = RawMatch {
194    ///     detector_id: "demo".into(),
195    ///     detector_name: "Demo".into(),
196    ///     service: "demo".into(),
197    ///     severity: Severity::High,
198    ///     credential: "demo_ABC12345".into(),
199    ///     companion: None,
200    ///     location: MatchLocation {
201    ///         source: "filesystem".into(),
202    ///         file_path: Some(".env".into()),
203    ///         line: Some(1),
204    ///         offset: 0,
205    ///         commit: None,
206    ///         author: None,
207    ///         date: None,
208    ///     },
209    ///     entropy: None,
210    ///     confidence: None,
211    /// };
212    ///
213    /// assert_eq!(finding.deduplication_key().0, "demo");
214    /// ```
215    pub fn deduplication_key(&self) -> (String, String) {
216        if self.location.source == "git-history" {
217            (
218                format!(
219                    "{}:{}",
220                    self.detector_id,
221                    self.location.commit.clone().unwrap_or_default()
222                ),
223                self.credential.clone(),
224            )
225        } else {
226            (self.detector_id.clone(), self.credential.clone())
227        }
228    }
229}
230
231/// Redact a credential for safe display without leaking type prefixes or exact length.
232///
233/// # Examples
234///
235/// ```rust
236/// use keyhog_core::redact;
237///
238/// let key = format!("sk_live_{}", "abcdefghijklmnopqrstuvwxyz1234");
239/// assert_eq!(redact(&key), "sk_live_...1234");
240/// ```
241pub fn redact(credential: &str) -> String {
242    if credential.is_empty() {
243        return "*".repeat(8);
244    }
245    if credential.len() <= SHORT_SECRET_MAX_LEN {
246        return redact_short_secret(credential);
247    }
248    redact_with_prefix_preservation(credential)
249}
250
251const SHORT_SECRET_MAX_LEN: usize = 8;
252const SHORT_SECRET_EDGE_CHARS: usize = 2;
253const DEFAULT_REDACTION_EDGE_CHARS: usize = 4;
254const MAX_VISIBLE_PREFIX_CHARS: usize = 8;
255const REDACTION_SEPARATOR: &str = "...";
256
257fn redact_short_secret(credential: &str) -> String {
258    let start = first_chars(credential, SHORT_SECRET_EDGE_CHARS);
259    let end = last_chars(credential, SHORT_SECRET_EDGE_CHARS);
260    format!("{start}{REDACTION_SEPARATOR}{end}")
261}
262
263fn redact_with_prefix_preservation(credential: &str) -> String {
264    let prefix_len = visible_prefix_len(credential);
265    let suffix_len = last_chars(credential, DEFAULT_REDACTION_EDGE_CHARS).len();
266    if prefix_len == 0 || credential.len() <= prefix_len + suffix_len {
267        return redact_without_prefix_preservation(credential);
268    }
269    let prefix = &credential[..prefix_len];
270    let suffix = &credential[credential.len() - suffix_len..];
271    format!("{prefix}{REDACTION_SEPARATOR}{suffix}")
272}
273
274fn visible_prefix_len(credential: &str) -> usize {
275    credential
276        .char_indices()
277        .take_while(|(_, ch)| ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.'))
278        .take(MAX_VISIBLE_PREFIX_CHARS)
279        .last()
280        .map(|(idx, ch)| idx + ch.len_utf8())
281        .unwrap_or(0)
282        .min(
283            credential
284                .len()
285                .saturating_sub(DEFAULT_REDACTION_EDGE_CHARS),
286        )
287}
288
289fn redact_without_prefix_preservation(credential: &str) -> String {
290    let start = first_chars(credential, DEFAULT_REDACTION_EDGE_CHARS);
291    let end = last_chars(credential, DEFAULT_REDACTION_EDGE_CHARS);
292    if start == end {
293        format!("{start}{REDACTION_SEPARATOR}")
294    } else {
295        format!("{start}{REDACTION_SEPARATOR}{end}")
296    }
297}
298
299fn first_chars(value: &str, count: usize) -> String {
300    value.chars().take(count).collect()
301}
302
303fn last_chars(value: &str, count: usize) -> String {
304    let total = value.chars().count();
305    value.chars().skip(total.saturating_sub(count)).collect()
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311
312    #[test]
313    fn redaction() {
314        assert_eq!(redact("xoxb-1234567890-abc"), "xoxb-123...-abc");
315        assert_eq!(redact("short"), "sh...rt");
316        assert_eq!(redact("AKIA1234567890ABCDEF"), "AKIA1234...CDEF");
317        assert_eq!(
318            redact("sk-proj-abcdefghijklmnopqrstuvwxyz1234"),
319            "sk-proj-...1234"
320        );
321    }
322
323    #[test]
324    fn deduplication_key_groups_same_credential() {
325        let m1 = RawMatch {
326            detector_id: "aws".into(),
327            detector_name: "AWS".into(),
328            service: "aws".into(),
329            severity: Severity::Critical,
330            credential: "AKIAIOSFODNN7EXAMPLE".into(),
331            companion: None,
332            location: MatchLocation {
333                source: "fs".into(),
334                file_path: Some("file1.py".into()),
335                line: Some(10),
336                offset: 0,
337                commit: None,
338                author: None,
339                date: None,
340            },
341            entropy: None,
342            confidence: None,
343        };
344        let m2 = RawMatch {
345            location: MatchLocation {
346                file_path: Some("file2.py".into()),
347                line: Some(20),
348                ..m1.location.clone()
349            },
350            ..m1.clone()
351        };
352        assert_eq!(m1.deduplication_key(), m2.deduplication_key());
353    }
354
355    macro_rules! redaction_case {
356        ($name:ident, $input:expr, $expected:expr) => {
357            #[test]
358            fn $name() {
359                assert_eq!(redact($input), $expected);
360            }
361        };
362    }
363
364    redaction_case!(redact_empty_secret, "", "********");
365    redaction_case!(redact_single_char_secret, "a", "a...a");
366    redaction_case!(redact_two_char_secret, "ab", "ab...ab");
367    redaction_case!(redact_eight_char_secret, "12345678", "12...78");
368    redaction_case!(
369        redact_prefixless_long_secret,
370        "@@@@abcdefgh1234",
371        "@@@@...1234"
372    );
373    redaction_case!(redact_unicode_secret, "пароль-супер-длинный", "паро...нный");
374    redaction_case!(
375        redact_secret_with_preserved_ascii_prefix,
376        "token_value_1234567890",
377        "token_va...7890"
378    );
379    redaction_case!(
380        redact_repeated_edges_compacts_suffix,
381        "aaaaabbbbb",
382        "aaaa...bbbb"
383    );
384
385    #[test]
386    fn git_history_deduplication_includes_commit_id() {
387        let matched = RawMatch {
388            detector_id: "aws".into(),
389            detector_name: "AWS".into(),
390            service: "aws".into(),
391            severity: Severity::Critical,
392            credential: "AKIAIOSFODNN7EXAMPLE".into(),
393            companion: None,
394            location: MatchLocation {
395                source: "git-history".into(),
396                file_path: Some("history.env".into()),
397                line: Some(1),
398                offset: 0,
399                commit: Some("abc123".into()),
400                author: None,
401                date: None,
402            },
403            entropy: None,
404            confidence: None,
405        };
406
407        let (detector, credential) = matched.deduplication_key();
408        assert_eq!(detector, "aws:abc123");
409        assert_eq!(credential, "AKIAIOSFODNN7EXAMPLE");
410    }
411}