Skip to main content

keyhog_core/
allowlist.rs

1//! Allowlist support: `.keyhogignore` file parsing for suppressing known false
2//! positives by path glob, detector ID, or credential hash.
3
4/// Allowlist: known false positives and ignored patterns.
5///
6/// Users can create a `.keyhogignore` file to suppress known FPs.
7/// Format (one per line):
8///   - `hash:<sha256>` — ignore a specific credential by hash
9///   - `detector:<id>` — ignore all findings from a detector
10///   - `path:<glob>` — ignore files matching a glob pattern
11///   - `# comment` — comments
12///   - blank lines are skipped
13use std::collections::HashSet;
14use std::path::Component;
15use std::path::Path;
16
17use crate::VerifiedFinding;
18
19/// User-defined suppressions loaded from `.keyhogignore`: credential hashes, detector IDs, and path globs.
20///
21/// # Examples
22///
23/// ```rust
24/// use keyhog_core::allowlist::Allowlist;
25///
26/// let allowlist = Allowlist::parse("detector:demo-token\npath:**/*.md\n");
27/// assert!(allowlist.ignored_detectors.contains("demo-token"));
28/// ```
29#[derive(Debug, Clone, serde::Serialize)]
30pub struct Allowlist {
31    /// SHA-256 hashes of credentials to ignore.
32    pub credential_hashes: HashSet<[u8; 32]>,
33    /// Detector IDs to ignore entirely.
34    pub ignored_detectors: HashSet<String>,
35    /// Glob patterns for paths to ignore.
36    pub ignored_paths: Vec<String>,
37}
38
39const MAX_GLOB_SEGMENTS: usize = 256;
40const MAX_GLOB_SEGMENT_LEN: usize = 1024;
41
42impl Allowlist {
43    /// Create an empty allowlist with no suppressed hashes, detectors, or paths.
44    ///
45    /// # Examples
46    ///
47    /// ```rust
48    /// use keyhog_core::allowlist::Allowlist;
49    ///
50    /// let allowlist = Allowlist::empty();
51    /// assert!(allowlist.ignored_paths.is_empty());
52    /// ```
53    pub fn empty() -> Self {
54        Self {
55            credential_hashes: HashSet::new(),
56            ignored_detectors: HashSet::new(),
57            ignored_paths: Vec::new(),
58        }
59    }
60
61    /// Load from a .keyhogignore file.
62    ///
63    /// # Examples
64    ///
65    /// ```rust,no_run
66    /// use keyhog_core::allowlist::Allowlist;
67    /// use std::path::Path;
68    ///
69    /// let _allowlist = Allowlist::load(Path::new(".keyhogignore")).unwrap();
70    /// ```
71    pub fn load(path: &Path) -> Result<Self, std::io::Error> {
72        let contents = std::fs::read_to_string(path)?;
73        Ok(Self::parse(&contents))
74    }
75
76    /// Parse allowlist from string content.
77    ///
78    /// # Examples
79    ///
80    /// ```rust
81    /// use keyhog_core::allowlist::Allowlist;
82    ///
83    /// let allowlist = Allowlist::parse("path:**/.env\ndetector:demo-token\n");
84    /// assert!(allowlist.is_path_ignored("app/.env"));
85    /// ```
86    pub fn parse(content: &str) -> Self {
87        let mut al = Self::empty();
88        for (line_number, line) in content.lines().enumerate() {
89            let line = line.trim();
90            if line.is_empty() || line.starts_with('#') {
91                continue;
92            }
93            if let Some(hash) = line.strip_prefix("hash:") {
94                let trimmed = hash.trim();
95                if let Some(valid_hash) = parse_sha256_hex(trimmed) {
96                    al.credential_hashes.insert(valid_hash);
97                } else {
98                    tracing::warn!(
99                        "invalid hash allowlist entry at line {}: '{}'",
100                        line_number + 1,
101                        trimmed
102                    );
103                }
104            } else if let Some(detector) = line.strip_prefix("detector:") {
105                let detector = detector.trim();
106                if detector.is_empty() {
107                    tracing::warn!(
108                        "invalid detector allowlist entry at line {}: detector id is empty",
109                        line_number + 1
110                    );
111                } else {
112                    al.ignored_detectors.insert(detector.to_string());
113                }
114            } else if let Some(path) = line.strip_prefix("path:") {
115                let path = path.trim();
116                if path.is_empty() {
117                    tracing::warn!(
118                        "invalid path allowlist entry at line {}: glob is empty",
119                        line_number + 1
120                    );
121                } else {
122                    al.ignored_paths.push(path.to_string());
123                }
124            } else {
125                tracing::warn!(
126                    "invalid allowlist entry at line {}: '{}'. Fix: use hash:, detector:, or path:",
127                    line_number + 1,
128                    line
129                );
130            }
131        }
132        al
133    }
134
135    /// Check whether detector or path rules suppress a verified finding.
136    ///
137    /// Hash-based suppression is evaluated earlier on [`crate::RawMatch`] values
138    /// because [`VerifiedFinding`] stores only redacted credentials.
139    ///
140    /// # Examples
141    ///
142    /// ```rust
143    /// use keyhog_core::allowlist::Allowlist;
144    /// use keyhog_core::{MatchLocation, Severity, VerificationResult, VerifiedFinding};
145    /// use std::collections::HashMap;
146    ///
147    /// let allowlist = Allowlist::parse("detector:demo-token\n");
148    /// let finding = VerifiedFinding {
149    ///     detector_id: "demo-token".into(),
150    ///     detector_name: "Demo Token".into(),
151    ///     service: "demo".into(),
152    ///     severity: Severity::High,
153    ///     credential_redacted: "demo_...1234".into(),
154    ///     location: MatchLocation {
155    ///         source: "fs".into(),
156    ///         file_path: Some("src/main.rs".into()),
157    ///         line: Some(1),
158    ///         offset: 0,
159    ///         commit: None,
160    ///         author: None,
161    ///         date: None,
162    ///     },
163    ///     verification: VerificationResult::Unverifiable,
164    ///     metadata: std::collections::HashMap::new(),
165    ///     additional_locations: Vec::new(),
166    ///     confidence: None,
167    ///     credential_hash: "hash".to_string(),
168    /// };
169    /// assert!(allowlist.is_allowed(&finding));
170    /// ```
171    pub fn is_allowed(&self, finding: &VerifiedFinding) -> bool {
172        let detector_ignored = self.ignored_detectors.contains(&*finding.detector_id);
173
174        let path_ignored = finding.location.file_path.as_ref().is_some_and(|path| {
175            let normalized_path = normalize_path(path);
176            self.ignored_paths
177                .iter()
178                .any(|pattern| glob_match_normalized(pattern, &normalized_path))
179        });
180
181        let hash_ignored = self.matches_ignored_hash(&finding.credential_hash);
182
183        detector_ignored || path_ignored || hash_ignored
184    }
185
186    /// Check if a raw credential hash is allowlisted.
187    ///
188    /// # Examples
189    ///
190    /// ```rust
191    /// use keyhog_core::allowlist::Allowlist;
192    ///
193    /// let allowlist = Allowlist::parse("");
194    /// assert!(!allowlist.is_hash_allowed("demo_ABC12345"));
195    /// ```
196    pub fn is_hash_allowed(&self, credential: &str) -> bool {
197        self.matches_ignored_hash(credential)
198    }
199
200    /// Check if a hex-encoded SHA-256 hash is allowlisted.
201    pub fn is_raw_hash_ignored(&self, hash_hex: &str) -> bool {
202        self.matches_ignored_hash(hash_hex)
203    }
204
205    /// Check whether a raw path matches an ignored-path glob.
206    ///
207    /// # Examples
208    ///
209    /// ```rust
210    /// use keyhog_core::allowlist::Allowlist;
211    ///
212    /// let allowlist = Allowlist::parse("path:**/*.md\n");
213    /// assert!(allowlist.is_path_ignored("docs/README.md"));
214    /// ```
215    pub fn is_path_ignored(&self, path: &str) -> bool {
216        let normalized = normalize_path(path);
217        self.ignored_paths
218            .iter()
219            .any(|pattern| glob_match_normalized(pattern, &normalized))
220    }
221
222    fn matches_ignored_hash(&self, input: &str) -> bool {
223        if let Some(hash_bytes) = parse_sha256_hex(input)
224            && self.credential_hashes.contains(&hash_bytes)
225        {
226            return true;
227        }
228
229        let digest = sha256_digest(input);
230        self.credential_hashes.contains(&digest)
231    }
232}
233
234fn glob_match_normalized(pattern: &str, normalized_path: &str) -> bool {
235    let normalized_pattern = normalize_path(pattern);
236    let pattern_segments = split_segments(&normalized_pattern);
237    let path_segments = split_segments(normalized_path);
238
239    if pattern_segments.len() > MAX_GLOB_SEGMENTS
240        || path_segments.len() > MAX_GLOB_SEGMENTS
241        || pattern_segments
242            .iter()
243            .any(|segment| segment.len() > MAX_GLOB_SEGMENT_LEN)
244        || path_segments
245            .iter()
246            .any(|segment| segment.len() > MAX_GLOB_SEGMENT_LEN)
247    {
248        tracing::warn!(
249            "skipping oversized allowlist glob match (pattern segments: {}, path segments: {}). Fix: shorten the glob or path",
250            pattern_segments.len(),
251            path_segments.len()
252        );
253        return false;
254    }
255
256    glob_match_segments(&pattern_segments, &path_segments)
257}
258
259fn split_segments(path: &str) -> Vec<&str> {
260    if path.is_empty() {
261        Vec::new()
262    } else {
263        path.split(['/', '\\']).collect()
264    }
265}
266
267fn glob_match_segments(pattern: &[&str], path: &[&str]) -> bool {
268    let mut states = vec![false; path.len() + 1];
269    states[0] = true;
270
271    for segment in pattern {
272        let mut next = vec![false; path.len() + 1];
273        if *segment == "**" {
274            let mut reachable = false;
275            for idx in 0..=path.len() {
276                reachable |= states[idx];
277                next[idx] = reachable;
278            }
279        } else {
280            for idx in 0..path.len() {
281                if states[idx] && segment_match(segment, path[idx]) {
282                    next[idx + 1] = true;
283                }
284            }
285        }
286        states = next;
287    }
288
289    states[path.len()]
290}
291
292fn segment_match(pattern: &str, text: &str) -> bool {
293    if pattern.is_ascii() && text.is_ascii() {
294        return segment_match_ascii(pattern.as_bytes(), text.as_bytes());
295    }
296
297    segment_match_chars(pattern, text)
298}
299
300fn segment_match_ascii(pattern: &[u8], text: &[u8]) -> bool {
301    let mut pi = 0usize;
302    let mut ti = 0usize;
303    let mut star_pi = None;
304    let mut star_ti = 0usize;
305
306    while ti < text.len() {
307        if pi < pattern.len() && pattern[pi] == b'*' {
308            star_pi = Some(pi);
309            star_ti = ti;
310            pi += 1;
311            continue;
312        }
313
314        if pi < pattern.len() && pattern[pi] == text[ti] {
315            pi += 1;
316            ti += 1;
317            continue;
318        }
319
320        if let Some(star) = star_pi {
321            star_ti += 1;
322            ti = star_ti;
323            pi = star + 1;
324            continue;
325        }
326
327        return false;
328    }
329
330    while pi < pattern.len() && pattern[pi] == b'*' {
331        pi += 1;
332    }
333
334    pi == pattern.len()
335}
336
337fn segment_match_chars(pattern: &str, text: &str) -> bool {
338    let pattern_chars: Vec<char> = pattern.chars().collect();
339    let text_chars: Vec<char> = text.chars().collect();
340
341    let mut pi = 0usize;
342    let mut ti = 0usize;
343    let mut star_pi = None;
344    let mut star_ti = 0usize;
345
346    while ti < text_chars.len() {
347        if pi < pattern_chars.len() && pattern_chars[pi] == '*' {
348            star_pi = Some(pi);
349            star_ti = ti;
350            pi += 1;
351            continue;
352        }
353
354        if pi < pattern_chars.len() && pattern_chars[pi] == text_chars[ti] {
355            pi += 1;
356            ti += 1;
357            continue;
358        }
359
360        if let Some(star) = star_pi {
361            star_ti += 1;
362            ti = star_ti;
363            pi = star + 1;
364            continue;
365        }
366
367        return false;
368    }
369
370    while pi < pattern_chars.len() && pattern_chars[pi] == '*' {
371        pi += 1;
372    }
373
374    pi == pattern_chars.len()
375}
376
377fn normalize_path(path: &str) -> String {
378    let path = path.replace('\\', "/");
379    let mut parts = Vec::new();
380    for component in Path::new(&path).components() {
381        match component {
382            Component::CurDir => {}
383            Component::ParentDir => {
384                if !parts.is_empty() && parts.last().is_some_and(|part| part != "..") {
385                    parts.pop();
386                } else {
387                    parts.push("..".to_string());
388                }
389            }
390            Component::Normal(part) => parts.push(part.to_string_lossy().into_owned()),
391            Component::RootDir => parts.clear(),
392            Component::Prefix(prefix) => parts.push(prefix.as_os_str().to_string_lossy().into()),
393        }
394    }
395    parts.join("/")
396}
397
398/// SHA-256 digest of a string.
399fn sha256_digest(input: &str) -> [u8; 32] {
400    use sha2::{Digest, Sha256};
401    let mut hasher = Sha256::new();
402    hasher.update(input.as_bytes());
403    hasher.finalize().into()
404}
405
406fn parse_sha256_hex(input: &str) -> Option<[u8; 32]> {
407    let input = input.trim();
408    if input.len() != 64 {
409        return None;
410    }
411
412    let mut digest = [0u8; 32];
413    for idx in 0..32 {
414        let chunk = &input[idx * 2..idx * 2 + 2];
415        digest[idx] = u8::from_str_radix(chunk, 16).ok()?;
416    }
417    Some(digest)
418}