Skip to main content

keyhog_core/
allowlist.rs

1//! Allowlist support: `.keyhogignore` file parsing for suppressing known false
2//! positives by path glob, detector ID, or credential hash.
3
4/// Allowlist: known false positives and ignored patterns.
5///
6/// Users can create a `.keyhogignore` file to suppress known FPs.
7/// Format (one per line):
8///   - `hash:<sha256>` — ignore a specific credential by hash
9///   - `detector:<id>` — ignore all findings from a detector
10///   - `path:<glob>` — ignore files matching a glob pattern
11///   - `# comment` — comments
12///   - blank lines are skipped
13use std::collections::HashSet;
14use std::path::Component;
15use std::path::Path;
16
17use crate::VerifiedFinding;
18
19/// User-defined suppressions loaded from `.keyhogignore`: credential hashes, detector IDs, and path globs.
20///
21/// # Examples
22///
23/// ```rust
24/// use keyhog_core::allowlist::Allowlist;
25///
26/// let allowlist = Allowlist::parse("detector:demo-token\npath:**/*.md\n");
27/// assert!(allowlist.ignored_detectors.contains("demo-token"));
28/// ```
29#[derive(Debug, Clone, serde::Serialize)]
30pub struct Allowlist {
31    /// SHA-256 hashes of credentials to ignore.
32    pub credential_hashes: HashSet<[u8; 32]>,
33    /// Detector IDs to ignore entirely.
34    pub ignored_detectors: HashSet<String>,
35    /// Glob patterns for paths to ignore.
36    pub ignored_paths: Vec<String>,
37}
38
39impl Allowlist {
40    /// Create an empty allowlist with no suppressed hashes, detectors, or paths.
41    ///
42    /// # Examples
43    ///
44    /// ```rust
45    /// use keyhog_core::allowlist::Allowlist;
46    ///
47    /// let allowlist = Allowlist::empty();
48    /// assert!(allowlist.ignored_paths.is_empty());
49    /// ```
50    pub fn empty() -> Self {
51        Self {
52            credential_hashes: HashSet::new(),
53            ignored_detectors: HashSet::new(),
54            ignored_paths: Vec::new(),
55        }
56    }
57
58    /// Load from a .keyhogignore file.
59    ///
60    /// # Examples
61    ///
62    /// ```rust,no_run
63    /// use keyhog_core::allowlist::Allowlist;
64    /// use std::path::Path;
65    ///
66    /// let _allowlist = Allowlist::load(Path::new(".keyhogignore")).unwrap();
67    /// ```
68    pub fn load(path: &Path) -> Result<Self, std::io::Error> {
69        let contents = std::fs::read_to_string(path)?;
70        Ok(Self::parse(&contents))
71    }
72
73    /// Parse allowlist from string content.
74    ///
75    /// # Examples
76    ///
77    /// ```rust
78    /// use keyhog_core::allowlist::Allowlist;
79    ///
80    /// let allowlist = Allowlist::parse("path:**/.env\ndetector:demo-token\n");
81    /// assert!(allowlist.is_path_ignored("app/.env"));
82    /// ```
83    pub fn parse(content: &str) -> Self {
84        let mut al = Self::empty();
85        for line in content.lines() {
86            let line = line.trim();
87            if line.is_empty() || line.starts_with('#') {
88                continue;
89            }
90            if let Some(hash) = line.strip_prefix("hash:") {
91                if let Some(hash) = parse_sha256_hex(hash.trim()) {
92                    al.credential_hashes.insert(hash);
93                }
94            } else if let Some(detector) = line.strip_prefix("detector:") {
95                al.ignored_detectors.insert(detector.trim().to_string());
96            } else if let Some(path) = line.strip_prefix("path:") {
97                al.ignored_paths.push(path.trim().to_string());
98            }
99        }
100        al
101    }
102
103    /// Check whether detector or path rules suppress a verified finding.
104    ///
105    /// Hash-based suppression is evaluated earlier on [`crate::RawMatch`] values
106    /// because [`VerifiedFinding`] stores only redacted credentials.
107    ///
108    /// # Examples
109    ///
110    /// ```rust
111    /// use keyhog_core::allowlist::Allowlist;
112    /// use keyhog_core::{MatchLocation, Severity, VerificationResult, VerifiedFinding};
113    /// use std::collections::HashMap;
114    ///
115    /// let allowlist = Allowlist::parse("detector:demo-token\n");
116    /// let finding = VerifiedFinding {
117    ///     detector_id: "demo-token".into(),
118    ///     detector_name: "Demo Token".into(),
119    ///     service: "demo".into(),
120    ///     severity: Severity::High,
121    ///     credential_redacted: "demo_...1234".into(),
122    ///     location: MatchLocation {
123    ///         source: "filesystem".into(),
124    ///         file_path: Some(".env".into()),
125    ///         line: Some(1),
126    ///         offset: 0,
127    ///         commit: None,
128    ///         author: None,
129    ///         date: None,
130    ///     },
131    ///     verification: VerificationResult::Skipped,
132    ///     metadata: HashMap::new(),
133    ///     additional_locations: Vec::new(),
134    ///     confidence: None,
135    /// };
136    ///
137    /// assert!(allowlist.is_allowed(&finding));
138    /// ```
139    pub fn is_allowed(&self, finding: &VerifiedFinding) -> bool {
140        let detector_allowed = self.ignored_detectors.contains(&finding.detector_id);
141        let path_allowed = finding.location.file_path.as_ref().is_some_and(|path| {
142            let normalized_path = normalize_path(path);
143            self.ignored_paths
144                .iter()
145                .any(|pattern| glob_match_normalized(pattern, &normalized_path))
146        });
147
148        detector_allowed || path_allowed
149    }
150
151    /// Check if a raw credential hash is allowlisted.
152    ///
153    /// # Examples
154    ///
155    /// ```rust
156    /// use keyhog_core::allowlist::Allowlist;
157    ///
158    /// let allowlist = Allowlist::parse("");
159    /// assert!(!allowlist.is_hash_allowed("demo_ABC12345"));
160    /// ```
161    pub fn is_hash_allowed(&self, credential: &str) -> bool {
162        let hash = sha256_digest(credential);
163        self.credential_hashes.contains(&hash)
164    }
165
166    /// Check whether a raw path matches an ignored-path glob.
167    ///
168    /// # Examples
169    ///
170    /// ```rust
171    /// use keyhog_core::allowlist::Allowlist;
172    ///
173    /// let allowlist = Allowlist::parse("path:**/*.md\n");
174    /// assert!(allowlist.is_path_ignored("docs/README.md"));
175    /// ```
176    pub fn is_path_ignored(&self, path: &str) -> bool {
177        let normalized = normalize_path(path);
178        self.ignored_paths
179            .iter()
180            .any(|pattern| glob_match_normalized(pattern, &normalized))
181    }
182}
183
184#[cfg(test)]
185/// Simple glob matching (supports * and **).
186fn glob_match(pattern: &str, path: &str) -> bool {
187    let normalized_path = normalize_path(path);
188    glob_match_normalized(pattern, &normalized_path)
189}
190
191fn glob_match_normalized(pattern: &str, normalized_path: &str) -> bool {
192    let normalized_pattern = normalize_path(pattern);
193    let pattern_segments = split_segments(&normalized_pattern);
194    let path_segments = split_segments(normalized_path);
195    glob_match_segments(&pattern_segments, &path_segments)
196}
197
198fn split_segments(path: &str) -> Vec<&str> {
199    if path.is_empty() {
200        Vec::new()
201    } else {
202        path.split('/').collect()
203    }
204}
205
206fn glob_match_segments(pattern: &[&str], path: &[&str]) -> bool {
207    let mut states = vec![false; path.len() + 1];
208    states[0] = true;
209
210    for segment in pattern {
211        let mut next = vec![false; path.len() + 1];
212        if *segment == "**" {
213            let mut reachable = false;
214            for idx in 0..=path.len() {
215                reachable |= states[idx];
216                next[idx] = reachable;
217            }
218        } else {
219            for idx in 0..path.len() {
220                if states[idx] && segment_match(segment, path[idx]) {
221                    next[idx + 1] = true;
222                }
223            }
224        }
225        states = next;
226    }
227
228    states[path.len()]
229}
230
231fn segment_match(pattern: &str, text: &str) -> bool {
232    if pattern.is_ascii() && text.is_ascii() {
233        return segment_match_ascii(pattern.as_bytes(), text.as_bytes());
234    }
235
236    segment_match_chars(pattern, text)
237}
238
239fn segment_match_ascii(pattern: &[u8], text: &[u8]) -> bool {
240    let mut pi = 0usize;
241    let mut ti = 0usize;
242    let mut star_pi = None;
243    let mut star_ti = 0usize;
244
245    while ti < text.len() {
246        if pi < pattern.len() && pattern[pi] == b'*' {
247            star_pi = Some(pi);
248            star_ti = ti;
249            pi += 1;
250            continue;
251        }
252
253        if pi < pattern.len() && pattern[pi] == text[ti] {
254            pi += 1;
255            ti += 1;
256            continue;
257        }
258
259        if let Some(star) = star_pi {
260            star_ti += 1;
261            ti = star_ti;
262            pi = star + 1;
263            continue;
264        }
265
266        return false;
267    }
268
269    while pi < pattern.len() && pattern[pi] == b'*' {
270        pi += 1;
271    }
272
273    pi == pattern.len()
274}
275
276fn segment_match_chars(pattern: &str, text: &str) -> bool {
277    let pattern_chars: Vec<char> = pattern.chars().collect();
278    let text_chars: Vec<char> = text.chars().collect();
279
280    let mut pi = 0usize;
281    let mut ti = 0usize;
282    let mut star_pi = None;
283    let mut star_ti = 0usize;
284
285    while ti < text_chars.len() {
286        if pi < pattern_chars.len() && pattern_chars[pi] == '*' {
287            star_pi = Some(pi);
288            star_ti = ti;
289            pi += 1;
290            continue;
291        }
292
293        if pi < pattern_chars.len() && pattern_chars[pi] == text_chars[ti] {
294            pi += 1;
295            ti += 1;
296            continue;
297        }
298
299        if let Some(star) = star_pi {
300            star_ti += 1;
301            ti = star_ti;
302            pi = star + 1;
303            continue;
304        }
305
306        return false;
307    }
308
309    while pi < pattern_chars.len() && pattern_chars[pi] == '*' {
310        pi += 1;
311    }
312
313    pi == pattern_chars.len()
314}
315
316fn normalize_path(path: &str) -> String {
317    let path = path.replace('\\', "/");
318    let mut parts = Vec::new();
319    for component in Path::new(&path).components() {
320        match component {
321            Component::CurDir => {}
322            Component::ParentDir => {
323                if !parts.is_empty() && parts.last().is_some_and(|part| part != "..") {
324                    parts.pop();
325                } else {
326                    parts.push("..".to_string());
327                }
328            }
329            Component::Normal(part) => parts.push(part.to_string_lossy().into_owned()),
330            Component::RootDir => parts.clear(),
331            Component::Prefix(prefix) => parts.push(prefix.as_os_str().to_string_lossy().into()),
332        }
333    }
334    parts.join("/")
335}
336
337/// SHA-256 digest of a string.
338fn sha256_digest(input: &str) -> [u8; 32] {
339    use sha2::{Digest, Sha256};
340    let mut hasher = Sha256::new();
341    hasher.update(input.as_bytes());
342    hasher.finalize().into()
343}
344
345fn parse_sha256_hex(input: &str) -> Option<[u8; 32]> {
346    if input.len() != 64 || !input.as_bytes().iter().all(u8::is_ascii_hexdigit) {
347        return None;
348    }
349
350    let mut digest = [0u8; 32];
351    for (idx, chunk) in input.as_bytes().chunks_exact(2).enumerate() {
352        let text = std::str::from_utf8(chunk).ok()?;
353        digest[idx] = u8::from_str_radix(text, 16).ok()?;
354    }
355    Some(digest)
356}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361    use std::collections::HashMap;
362
363    #[test]
364    fn parse_allowlist() {
365        let content = "
366# Known false positives
367hash:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
368detector:entropy
369path:tests/**
370path:*.example
371";
372        let al = Allowlist::parse(content);
373        assert_eq!(al.credential_hashes.len(), 1);
374        assert!(al.ignored_detectors.contains("entropy"));
375        assert_eq!(al.ignored_paths.len(), 2);
376    }
377
378    #[test]
379    fn glob_matching() {
380        assert!(glob_match("tests/**", "tests/fixtures/config.env"));
381        assert!(glob_match("*.example", "config.example"));
382        assert!(glob_match("**/*.md", "docs/README.md"));
383        assert!(!glob_match("tests/**", "src/main.rs"));
384    }
385
386    #[test]
387    fn glob_matching_handles_non_ascii_segments() {
388        assert!(glob_match("fixtures/*", "fixtures/caf\u{00e9}.rs"));
389        assert!(segment_match("caf*", "cafe"));
390        assert!(segment_match("caf*", "cafeteria"));
391        assert!(!segment_match("caf*", "tea"));
392    }
393
394    #[test]
395    fn empty_allowlist_allows_nothing() {
396        let al = Allowlist::empty();
397        assert!(!al.is_hash_allowed("anything"));
398    }
399
400    #[test]
401    fn normalized_paths_still_match_globs() {
402        let mut al = Allowlist::empty();
403        al.ignored_paths.push("tests/**".into());
404        assert!(al.is_path_ignored("./tests/fixtures/../fixtures/config.env"));
405    }
406
407    #[test]
408    fn unicode_globs_match_unicode_paths() {
409        assert!(glob_match("München/**", "München/config.env"));
410        assert!(glob_match("tësts/*", "tësts/ß.env"));
411    }
412
413    #[test]
414    fn is_allowed_checks_detector_and_path_rules_consistently() {
415        let mut al = Allowlist::empty();
416        al.ignored_detectors.insert("aws".into());
417        al.ignored_paths.push("tests/**".into());
418
419        let finding = VerifiedFinding {
420            detector_id: "aws".into(),
421            detector_name: "AWS".into(),
422            service: "aws".into(),
423            severity: crate::Severity::High,
424            credential_redacted: "***".into(),
425            location: crate::MatchLocation {
426                source: "filesystem".into(),
427                file_path: Some("src/main.rs".into()),
428                line: Some(1),
429                offset: 0,
430                commit: None,
431                author: None,
432                date: None,
433            },
434            verification: crate::VerificationResult::Unverifiable,
435            metadata: HashMap::new(),
436            additional_locations: Vec::new(),
437            confidence: None,
438        };
439        assert!(al.is_allowed(&finding));
440
441        let finding = VerifiedFinding {
442            detector_id: "other".into(),
443            location: crate::MatchLocation {
444                source: "filesystem".into(),
445                file_path: Some("tests/fixture.env".into()),
446                line: Some(1),
447                offset: 0,
448                commit: None,
449                author: None,
450                date: None,
451            },
452            ..finding
453        };
454        assert!(al.is_allowed(&finding));
455    }
456
457    // Tests for .gitleaksignore compatibility
458
459    #[test]
460    fn gitleaks_format_parse_compatibility() {
461        // Gitleaks uses same format with hash:, detector:, path: prefixes
462        let content = "hash:deadbeef1234567890abcdef1234567890abcdef1234567890abcdef12345678\ndetector:aws-access-key\npath:**/*.test\n";
463        let al = Allowlist::parse(content);
464        assert_eq!(al.credential_hashes.len(), 1);
465        assert!(al.ignored_detectors.contains("aws-access-key"));
466        assert_eq!(al.ignored_paths.len(), 1);
467    }
468
469    #[test]
470    fn gitleaks_hash_suppression_behavior() {
471        // Hash-based suppression works the same as gitleaks
472        let content = "hash:9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08";
473        let al = Allowlist::parse(content);
474        // "test" hashes to the above SHA-256
475        assert!(al.is_hash_allowed("test"));
476        assert!(!al.is_hash_allowed("different"));
477    }
478
479    #[test]
480    fn gitleaks_path_glob_double_star() {
481        // ** matches any number of directory levels (gitleaks compatible)
482        assert!(glob_match("**/*.env", "config.env"));
483        assert!(glob_match("**/*.env", "src/config.env"));
484        assert!(glob_match("**/*.env", "deep/nested/path/config.env"));
485        assert!(!glob_match("**/*.env", "config.txt"));
486    }
487
488    #[test]
489    fn gitleaks_detector_ignore_by_id() {
490        // Ignore all findings from a specific detector ID
491        let content = "detector:generic-api-key";
492        let al = Allowlist::parse(content);
493        let finding = VerifiedFinding {
494            detector_id: "generic-api-key".into(),
495            detector_name: "Generic API Key".into(),
496            service: "generic".into(),
497            severity: crate::Severity::High,
498            credential_redacted: "***".into(),
499            location: crate::MatchLocation {
500                source: "filesystem".into(),
501                file_path: Some("any/path/file.rs".into()),
502                line: Some(1),
503                offset: 0,
504                commit: None,
505                author: None,
506                date: None,
507            },
508            verification: crate::VerificationResult::Unverifiable,
509            metadata: HashMap::new(),
510            additional_locations: Vec::new(),
511            confidence: None,
512        };
513        assert!(al.is_allowed(&finding));
514
515        let other_finding = VerifiedFinding {
516            detector_id: "different-detector".into(),
517            ..finding
518        };
519        assert!(!al.is_allowed(&other_finding));
520    }
521
522    #[test]
523    fn gitleaks_empty_allowlist_allows_everything() {
524        // Empty allowlist should not block anything
525        let al = Allowlist::empty();
526        assert!(!al.is_hash_allowed("any_credential"));
527        assert_eq!(al.ignored_detectors.len(), 0);
528        assert_eq!(al.ignored_paths.len(), 0);
529    }
530
531    #[test]
532    fn gitleaks_comment_lines_ignored() {
533        // Lines starting with # should be treated as comments
534        let content = "
535# This is a comment
536hash:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
537# Another comment
538detector:test
539";
540        let al = Allowlist::parse(content);
541        assert_eq!(al.credential_hashes.len(), 1);
542        assert!(al.ignored_detectors.contains("test"));
543    }
544
545    #[test]
546    fn gitleaks_blank_lines_ignored() {
547        // Blank lines should be skipped without error
548        let content = "
549hash:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
550
551detector:test
552
553path:**/ignore
554";
555        let al = Allowlist::parse(content);
556        assert_eq!(al.credential_hashes.len(), 1);
557        assert!(al.ignored_detectors.contains("test"));
558        assert_eq!(al.ignored_paths.len(), 1);
559    }
560
561    #[test]
562    fn gitleaks_malformed_lines_warning_not_crash() {
563        // Malformed lines should be silently ignored (not crash)
564        let content = "
565hash:invalid_hash
566not_a_valid_line
567random_text_here
568detector:
569hash:
570path:
571hash:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
572";
573        let al = Allowlist::parse(content);
574        // Should parse the valid hash and skip malformed lines
575        assert_eq!(al.credential_hashes.len(), 1);
576    }
577
578    #[test]
579    fn gitleaks_windows_backslash_normalized() {
580        // Windows paths with backslashes should be normalized
581        let mut al = Allowlist::empty();
582        al.ignored_paths.push("tests/**".into());
583        // Windows paths should match after normalization
584        assert!(al.is_path_ignored("tests\\fixtures\\config.env"));
585        assert!(al.is_path_ignored(".\\tests\\fixtures\\test.txt"));
586        // src\main.rs should NOT match tests/**
587        assert!(!al.is_path_ignored("src\\main.rs"));
588    }
589
590    #[test]
591    fn gitleaks_hash_case_insensitive() {
592        // Hashes with different case should still match (SHA-256 is hex)
593        let lower = "hash:9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08";
594        let upper = "hash:9F86D081884C7D659A2FEAA0C55AD015A3BF4F1B2B0B822CD15D6C15B0F00A08";
595        let mixed = "hash:9F86D081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08";
596
597        let al_lower = Allowlist::parse(lower);
598        let al_upper = Allowlist::parse(upper);
599        let al_mixed = Allowlist::parse(mixed);
600
601        // All should match "test"
602        assert!(al_lower.is_hash_allowed("test"));
603        assert!(al_upper.is_hash_allowed("test"));
604        assert!(al_mixed.is_hash_allowed("test"));
605    }
606}