Skip to main content

keyhog_core/
finding.rs

1//! Scanner findings: the output type for detected secrets with location,
2//! confidence, detector metadata, and optional verification status.
3
4use serde::Serialize;
5use std::collections::HashMap;
6
7use crate::Severity;
8
9/// A credential match found by the scanner, before verification.
10///
11/// # Examples
12///
13/// ```rust
14/// use keyhog_core::{MatchLocation, RawMatch, Severity};
15///
16/// let finding = RawMatch {
17///     detector_id: "demo-token".into(),
18///     detector_name: "Demo Token".into(),
19///     service: "demo".into(),
20///     severity: Severity::High,
21///     credential: "demo_ABC12345".into(),
22///     companion: None,
23///     location: MatchLocation {
24///         source: "filesystem".into(),
25///         file_path: Some(".env".into()),
26///         line: Some(1),
27///         offset: 0,
28///         commit: None,
29///         author: None,
30///         date: None,
31///     },
32///     entropy: None,
33///     confidence: Some(0.9),
34/// };
35///
36/// assert_eq!(finding.detector_id, "demo-token");
37/// ```
38#[derive(Debug, Clone, Serialize)]
39pub struct RawMatch {
40    /// Stable detector identifier.
41    pub detector_id: String,
42    /// Human-readable detector name.
43    pub detector_name: String,
44    /// Service namespace associated with the detector.
45    pub service: String,
46    /// Detector severity level.
47    pub severity: Severity,
48    /// Matched credential bytes before redaction.
49    pub credential: String,
50    /// Companion credential or context value extracted nearby.
51    pub companion: Option<String>,
52    /// Source location for the match.
53    pub location: MatchLocation,
54    /// Shannon entropy of the matched credential (0.0 - 8.0).
55    #[serde(skip_serializing_if = "Option::is_none")]
56    pub entropy: Option<f64>,
57    /// Confidence score (0.0 - 1.0) combining entropy, keyword proximity, file type, etc.
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub confidence: Option<f64>,
60}
61
62/// Where a credential was found: file path, line number, commit, and author.
63///
64/// # Examples
65///
66/// ```rust
67/// use keyhog_core::MatchLocation;
68///
69/// let location = MatchLocation {
70///     source: "stdin".into(),
71///     file_path: None,
72///     line: Some(3),
73///     offset: 20,
74///     commit: None,
75///     author: None,
76///     date: None,
77/// };
78///
79/// assert_eq!(location.line, Some(3));
80/// ```
81#[derive(Debug, Clone, Serialize)]
82pub struct MatchLocation {
83    /// Logical source backend, such as `filesystem` or `git`.
84    pub source: String,
85    /// File path, object key, or logical path when available.
86    pub file_path: Option<String>,
87    /// One-based line number when known.
88    pub line: Option<usize>,
89    /// Byte offset from the start of the source chunk.
90    pub offset: usize,
91    /// Commit identifier for history-derived matches.
92    pub commit: Option<String>,
93    /// Commit author when available.
94    pub author: Option<String>,
95    /// Commit timestamp when available.
96    pub date: Option<String>,
97}
98
99/// A finding after verification — the final output.
100///
101/// # Examples
102///
103/// ```rust
104/// use keyhog_core::{MatchLocation, Severity, VerificationResult, VerifiedFinding};
105/// use std::collections::HashMap;
106///
107/// let finding = VerifiedFinding {
108///     detector_id: "demo-token".into(),
109///     detector_name: "Demo Token".into(),
110///     service: "demo".into(),
111///     severity: Severity::High,
112///     credential_redacted: "demo_...2345".into(),
113///     location: MatchLocation {
114///         source: "filesystem".into(),
115///         file_path: Some(".env".into()),
116///         line: Some(1),
117///         offset: 0,
118///         commit: None,
119///         author: None,
120///         date: None,
121///     },
122///     verification: VerificationResult::Skipped,
123///     metadata: HashMap::new(),
124///     additional_locations: Vec::new(),
125///     confidence: Some(0.9),
126/// };
127///
128/// assert_eq!(finding.service, "demo");
129/// ```
130#[derive(Debug, Clone, Serialize)]
131pub struct VerifiedFinding {
132    /// Stable detector identifier.
133    pub detector_id: String,
134    /// Human-readable detector name.
135    pub detector_name: String,
136    /// Service namespace associated with the detector.
137    pub service: String,
138    /// Detector severity level.
139    pub severity: Severity,
140    /// Redacted credential string suitable for output.
141    pub credential_redacted: String,
142    /// Primary source location for the finding.
143    pub location: MatchLocation,
144    /// Verification outcome for the credential.
145    pub verification: VerificationResult,
146    /// Extra metadata extracted from verification responses.
147    #[serde(skip_serializing_if = "HashMap::is_empty")]
148    pub metadata: HashMap<String, String>,
149    /// Additional duplicate locations that resolved into the same finding.
150    #[serde(skip_serializing_if = "Vec::is_empty")]
151    pub additional_locations: Vec<MatchLocation>,
152    /// Confidence score (0.0 - 1.0) combining entropy, keyword proximity, file type, etc.
153    #[serde(skip_serializing_if = "Option::is_none")]
154    pub confidence: Option<f64>,
155}
156
157/// Result of live verification: whether the credential is active, revoked, or untested.
158///
159/// # Examples
160///
161/// ```rust
162/// use keyhog_core::VerificationResult;
163///
164/// let status = VerificationResult::Live;
165/// assert!(matches!(status, VerificationResult::Live));
166/// ```
167#[derive(Debug, Clone, Serialize)]
168#[serde(rename_all = "snake_case")]
169pub enum VerificationResult {
170    /// The credential was verified as active.
171    Live,
172    /// The credential was checked and appears invalid.
173    Dead,
174    /// Verification was throttled by the upstream service.
175    RateLimited,
176    /// Verification failed before a conclusive result was produced.
177    Error(String),
178    /// The detector has no live verification path.
179    Unverifiable,
180    /// Verification was disabled for this scan.
181    Skipped,
182}
183
184impl RawMatch {
185    /// Deduplication key: same detector + same credential = same finding.
186    /// Git history includes commit ID so the same secret in different commits stays distinct.
187    ///
188    /// # Examples
189    ///
190    /// ```rust
191    /// use keyhog_core::{MatchLocation, RawMatch, Severity};
192    ///
193    /// let finding = RawMatch {
194    ///     detector_id: "demo".into(),
195    ///     detector_name: "Demo".into(),
196    ///     service: "demo".into(),
197    ///     severity: Severity::High,
198    ///     credential: "demo_ABC12345".into(),
199    ///     companion: None,
200    ///     location: MatchLocation {
201    ///         source: "filesystem".into(),
202    ///         file_path: Some(".env".into()),
203    ///         line: Some(1),
204    ///         offset: 0,
205    ///         commit: None,
206    ///         author: None,
207    ///         date: None,
208    ///     },
209    ///     entropy: None,
210    ///     confidence: None,
211    /// };
212    ///
213    /// assert_eq!(finding.deduplication_key().0, "demo");
214    /// ```
215    pub fn deduplication_key(&self) -> (String, String) {
216        if self.location.source == "git-history" {
217            (
218                format!(
219                    "{}:{}",
220                    self.detector_id,
221                    self.location.commit.clone().unwrap_or_default()
222                ),
223                self.credential.clone(),
224            )
225        } else {
226            (self.detector_id.clone(), self.credential.clone())
227        }
228    }
229}
230
231/// Redact a credential for safe display without leaking type prefixes or exact length.
232///
233/// # Examples
234///
235/// ```rust
236/// use keyhog_core::redact;
237///
238/// assert_eq!(redact("sk_live_abcdefghijklmnopqrstuvwxyz1234"), "sk_live_...1234");
239/// ```
240pub fn redact(credential: &str) -> String {
241    if credential.is_empty() {
242        return "*".repeat(8);
243    }
244    if credential.len() <= SHORT_SECRET_MAX_LEN {
245        return redact_short_secret(credential);
246    }
247    redact_with_prefix_preservation(credential)
248}
249
250const SHORT_SECRET_MAX_LEN: usize = 8;
251const SHORT_SECRET_EDGE_CHARS: usize = 2;
252const DEFAULT_REDACTION_EDGE_CHARS: usize = 4;
253const MAX_VISIBLE_PREFIX_CHARS: usize = 8;
254const REDACTION_SEPARATOR: &str = "...";
255
256fn redact_short_secret(credential: &str) -> String {
257    let start = first_chars(credential, SHORT_SECRET_EDGE_CHARS);
258    let end = last_chars(credential, SHORT_SECRET_EDGE_CHARS);
259    format!("{start}{REDACTION_SEPARATOR}{end}")
260}
261
262fn redact_with_prefix_preservation(credential: &str) -> String {
263    let prefix_len = visible_prefix_len(credential);
264    let suffix_len = last_chars(credential, DEFAULT_REDACTION_EDGE_CHARS).len();
265    if prefix_len == 0 || credential.len() <= prefix_len + suffix_len {
266        return redact_without_prefix_preservation(credential);
267    }
268    let prefix = &credential[..prefix_len];
269    let suffix = &credential[credential.len() - suffix_len..];
270    format!("{prefix}{REDACTION_SEPARATOR}{suffix}")
271}
272
273fn visible_prefix_len(credential: &str) -> usize {
274    credential
275        .char_indices()
276        .take_while(|(_, ch)| ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.'))
277        .take(MAX_VISIBLE_PREFIX_CHARS)
278        .last()
279        .map(|(idx, ch)| idx + ch.len_utf8())
280        .unwrap_or(0)
281        .min(
282            credential
283                .len()
284                .saturating_sub(DEFAULT_REDACTION_EDGE_CHARS),
285        )
286}
287
288fn redact_without_prefix_preservation(credential: &str) -> String {
289    let start = first_chars(credential, DEFAULT_REDACTION_EDGE_CHARS);
290    let end = last_chars(credential, DEFAULT_REDACTION_EDGE_CHARS);
291    if start == end {
292        format!("{start}{REDACTION_SEPARATOR}")
293    } else {
294        format!("{start}{REDACTION_SEPARATOR}{end}")
295    }
296}
297
298fn first_chars(value: &str, count: usize) -> String {
299    value.chars().take(count).collect()
300}
301
302fn last_chars(value: &str, count: usize) -> String {
303    let total = value.chars().count();
304    value.chars().skip(total.saturating_sub(count)).collect()
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310
311    #[test]
312    fn redaction() {
313        assert_eq!(redact("xoxb-1234567890-abc"), "xoxb-123...-abc");
314        assert_eq!(redact("short"), "sh...rt");
315        assert_eq!(redact("AKIA1234567890ABCDEF"), "AKIA1234...CDEF");
316        assert_eq!(
317            redact("sk-proj-abcdefghijklmnopqrstuvwxyz1234"),
318            "sk-proj-...1234"
319        );
320    }
321
322    #[test]
323    fn deduplication_key_groups_same_credential() {
324        let m1 = RawMatch {
325            detector_id: "aws".into(),
326            detector_name: "AWS".into(),
327            service: "aws".into(),
328            severity: Severity::Critical,
329            credential: "AKIAIOSFODNN7EXAMPLE".into(),
330            companion: None,
331            location: MatchLocation {
332                source: "fs".into(),
333                file_path: Some("file1.py".into()),
334                line: Some(10),
335                offset: 0,
336                commit: None,
337                author: None,
338                date: None,
339            },
340            entropy: None,
341            confidence: None,
342        };
343        let m2 = RawMatch {
344            location: MatchLocation {
345                file_path: Some("file2.py".into()),
346                line: Some(20),
347                ..m1.location.clone()
348            },
349            ..m1.clone()
350        };
351        assert_eq!(m1.deduplication_key(), m2.deduplication_key());
352    }
353
354    macro_rules! redaction_case {
355        ($name:ident, $input:expr, $expected:expr) => {
356            #[test]
357            fn $name() {
358                assert_eq!(redact($input), $expected);
359            }
360        };
361    }
362
363    redaction_case!(redact_empty_secret, "", "********");
364    redaction_case!(redact_single_char_secret, "a", "a...a");
365    redaction_case!(redact_two_char_secret, "ab", "ab...ab");
366    redaction_case!(redact_eight_char_secret, "12345678", "12...78");
367    redaction_case!(
368        redact_prefixless_long_secret,
369        "@@@@abcdefgh1234",
370        "@@@@...1234"
371    );
372    redaction_case!(redact_unicode_secret, "пароль-супер-длинный", "паро...нный");
373    redaction_case!(
374        redact_secret_with_preserved_ascii_prefix,
375        "token_value_1234567890",
376        "token_va...7890"
377    );
378    redaction_case!(
379        redact_repeated_edges_compacts_suffix,
380        "aaaaabbbbb",
381        "aaaa...bbbb"
382    );
383
384    #[test]
385    fn git_history_deduplication_includes_commit_id() {
386        let matched = RawMatch {
387            detector_id: "aws".into(),
388            detector_name: "AWS".into(),
389            service: "aws".into(),
390            severity: Severity::Critical,
391            credential: "AKIAIOSFODNN7EXAMPLE".into(),
392            companion: None,
393            location: MatchLocation {
394                source: "git-history".into(),
395                file_path: Some("history.env".into()),
396                line: Some(1),
397                offset: 0,
398                commit: Some("abc123".into()),
399                author: None,
400                date: None,
401            },
402            entropy: None,
403            confidence: None,
404        };
405
406        let (detector, credential) = matched.deduplication_key();
407        assert_eq!(detector, "aws:abc123");
408        assert_eq!(credential, "AKIAIOSFODNN7EXAMPLE");
409    }
410}