Skip to main content

keyhog_core/
finding.rs

1//! Scanner findings: the output type for detected secrets with location,
2//! confidence, detector metadata, and optional verification status.
3
4use serde::{Deserialize, Serialize};
5use std::borrow::Cow;
6use std::collections::HashMap;
7use std::sync::Arc;
8
9use crate::Severity;
10
11/// A raw pattern match before verification or deduplication.
12#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
13pub struct RawMatch {
14    /// Stable detector identifier.
15    #[serde(with = "serde_arc_str")]
16    pub detector_id: Arc<str>,
17    /// Human-readable detector name.
18    #[serde(with = "serde_arc_str")]
19    pub detector_name: Arc<str>,
20    /// Service namespace associated with the detector.
21    #[serde(with = "serde_arc_str")]
22    pub service: Arc<str>,
23    /// Detector severity level.
24    pub severity: Severity,
25    /// Matched credential bytes before redaction.
26    #[serde(with = "serde_arc_str")]
27    pub credential: Arc<str>,
28    /// SHA-256 hash of the credential for allowlisting and deduplication.
29    pub credential_hash: String,
30    /// Companion credential or context value extracted nearby.
31    pub companions: std::collections::HashMap<String, String>,
32    /// Source location for the match.
33    pub location: MatchLocation,
34    /// Shannon entropy of the matched credential (0.0 - 8.0).
35    #[serde(skip_serializing_if = "Option::is_none")]
36    pub entropy: Option<f64>,
37    /// Confidence score (0.0 - 1.0) combining entropy, keyword proximity, file type, etc.
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub confidence: Option<f64>,
40}
41
42impl Eq for RawMatch {}
43
44impl PartialOrd for RawMatch {
45    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
46        Some(self.cmp(other))
47    }
48}
49
50impl Ord for RawMatch {
51    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
52        // Higher confidence first
53        let self_conf = self.confidence.unwrap_or(0.0);
54        let other_conf = other.confidence.unwrap_or(0.0);
55
56        match other_conf.total_cmp(&self_conf) {
57            std::cmp::Ordering::Equal => {}
58            ord => return ord,
59        }
60
61        // Then higher severity first (Critical > High > Medium > Low > Info)
62        match other.severity.cmp(&self.severity) {
63            std::cmp::Ordering::Equal => {}
64            ord => return ord,
65        }
66
67        // Finally, deterministic sort by detector and credential
68        match self.detector_id.cmp(&other.detector_id) {
69            std::cmp::Ordering::Equal => self.credential.cmp(&other.credential),
70            ord => ord,
71        }
72    }
73}
74
75/// Where a credential was found: file path, line number, commit, and author.
76#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
77pub struct MatchLocation {
78    /// Logical source backend, such as `filesystem` or `git`.
79    #[serde(with = "serde_arc_str")]
80    pub source: Arc<str>,
81    /// File path, object key, or logical path when available.
82    ///
83    /// Paths stored here must be valid UTF-8. Source implementations that see
84    /// non-UTF-8 paths should encode them into a reversible escaped string
85    /// before constructing a [`MatchLocation`].
86    #[serde(with = "serde_arc_str_opt")]
87    pub file_path: Option<Arc<str>>,
88    /// One-based line number when known.
89    pub line: Option<usize>,
90    /// Byte offset from the start of the source chunk.
91    pub offset: usize,
92    /// Commit identifier for history-derived matches.
93    #[serde(with = "serde_arc_str_opt")]
94    pub commit: Option<Arc<str>>,
95    /// Commit author when available.
96    #[serde(with = "serde_arc_str_opt")]
97    pub author: Option<Arc<str>>,
98    /// Commit timestamp when available.
99    #[serde(with = "serde_arc_str_opt")]
100    pub date: Option<Arc<str>>,
101}
102
103/// A finding after verification — the final output.
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct VerifiedFinding {
106    /// Stable detector identifier.
107    #[serde(with = "serde_arc_str")]
108    pub detector_id: Arc<str>,
109    /// Human-readable detector name.
110    #[serde(with = "serde_arc_str")]
111    pub detector_name: Arc<str>,
112    /// Service namespace associated with the detector.
113    #[serde(with = "serde_arc_str")]
114    pub service: Arc<str>,
115    /// Detector severity level.
116    pub severity: Severity,
117    /// Redacted version of the credential for reporting.
118    pub credential_redacted: Cow<'static, str>,
119    /// SHA-256 hash of the original credential for internal correlation.
120    pub credential_hash: String,
121    /// Source location for the match.
122    pub location: MatchLocation,
123    /// Verification result.
124    pub verification: VerificationResult,
125    /// Additional provider-specific metadata (e.g. account ID, scope).
126    pub metadata: HashMap<String, String>,
127    /// Additional duplicate locations found for this credential.
128    pub additional_locations: Vec<MatchLocation>,
129    /// Confidence score (0.0 - 1.0) combining entropy, keyword proximity, file type, etc.
130    #[serde(skip_serializing_if = "Option::is_none")]
131    pub confidence: Option<f64>,
132}
133
134/// Result of live verification: whether the credential is active, revoked, or untested.
135#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
136#[serde(rename_all = "snake_case")]
137pub enum VerificationResult {
138    /// Credential is active and verified by the provider.
139    Live,
140    /// Credential is valid but has been explicitly revoked or disabled.
141    Revoked,
142    /// Credential was rejected by the provider (invalid password/token).
143    Dead,
144    /// Provider returned a rate-limit error (e.g. 429).
145    RateLimited,
146    /// Verification failed due to network error or timeout.
147    Error(String),
148    /// Detector does not support live verification.
149    Unverifiable,
150    /// Verification was not attempted (e.g. disabled via flag).
151    Skipped,
152}
153
154impl RawMatch {
155    /// Get unique key for deduplication.
156    pub fn deduplication_key(&self) -> (&str, &str) {
157        (&self.detector_id, &self.credential)
158    }
159}
160
161pub mod serde_arc_str {
162    use serde::{Deserialize, Deserializer, Serialize, Serializer};
163    use std::sync::Arc;
164
165    pub fn serialize<S>(val: &Arc<str>, serializer: S) -> Result<S::Ok, S::Error>
166    where
167        S: Serializer,
168    {
169        val.as_ref().serialize(serializer)
170    }
171
172    pub fn deserialize<'de, D>(deserializer: D) -> Result<Arc<str>, D::Error>
173    where
174        D: Deserializer<'de>,
175    {
176        String::deserialize(deserializer).map(Arc::from)
177    }
178}
179
180pub mod serde_arc_str_opt {
181    use serde::{Deserialize, Deserializer, Serialize, Serializer};
182    use std::sync::Arc;
183
184    pub fn serialize<S>(val: &Option<Arc<str>>, serializer: S) -> Result<S::Ok, S::Error>
185    where
186        S: Serializer,
187    {
188        val.as_ref().map(|s| s.as_ref()).serialize(serializer)
189    }
190
191    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<Arc<str>>, D::Error>
192    where
193        D: Deserializer<'de>,
194    {
195        Option::<String>::deserialize(deserializer).map(|opt| opt.map(Arc::from))
196    }
197}