Skip to main content

keyhog_core/
allowlist.rs

1//! Allowlist support: `.keyhogignore` file parsing for suppressing known false
2//! positives by path glob, detector ID, or credential hash.
3
4/// Allowlist: known false positives and ignored patterns.
5///
6/// Users can create a `.keyhogignore` file to suppress known FPs.
7/// Format (one per line):
8///   - `hash:<sha256>` - ignore a specific credential by hash
9///   - `detector:<id>` - ignore all findings from a detector
10///   - `path:<glob>` - ignore files matching a glob pattern
11///   - `# comment` - comments
12///   - blank lines are skipped
13use std::collections::HashSet;
14use std::path::Component;
15use std::path::Path;
16
17use crate::VerifiedFinding;
18
19#[path = "allowlist_metadata.rs"]
20mod allowlist_metadata;
21use allowlist_metadata::*;
22
23/// User-defined suppressions loaded from `.keyhogignore`: credential hashes, detector IDs, and path globs.
24///
25/// # Examples
26///
27/// ```rust
28/// use keyhog_core::allowlist::Allowlist;
29///
30/// let allowlist = Allowlist::parse("detector:demo-token\npath:**/*.md\n");
31/// assert!(allowlist.ignored_detectors.contains("demo-token"));
32/// ```
33#[derive(Debug, Clone, serde::Serialize)]
34pub struct Allowlist {
35    /// SHA-256 hashes of credentials to ignore.
36    pub credential_hashes: HashSet<[u8; 32]>,
37    /// Detector IDs to ignore entirely.
38    pub ignored_detectors: HashSet<String>,
39    /// Glob patterns for paths to ignore.
40    pub ignored_paths: Vec<String>,
41}
42
43const MAX_GLOB_SEGMENTS: usize = 256;
44const MAX_GLOB_SEGMENT_LEN: usize = 1024;
45
46impl Allowlist {
47    /// Create an empty allowlist with no suppressed hashes, detectors, or paths.
48    ///
49    /// # Examples
50    ///
51    /// ```rust
52    /// use keyhog_core::allowlist::Allowlist;
53    ///
54    /// let allowlist = Allowlist::empty();
55    /// assert!(allowlist.ignored_paths.is_empty());
56    /// ```
57    pub fn empty() -> Self {
58        Self {
59            credential_hashes: HashSet::new(),
60            ignored_detectors: HashSet::new(),
61            ignored_paths: Vec::new(),
62        }
63    }
64
65    /// Load from a .keyhogignore file.
66    ///
67    /// # Examples
68    ///
69    /// ```rust,no_run
70    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
71    /// use keyhog_core::allowlist::Allowlist;
72    /// use std::path::Path;
73    ///
74    /// let _allowlist = Allowlist::load(Path::new(".keyhogignore"))?;
75    /// # Ok(()) }
76    /// ```
77    pub fn load(path: &Path) -> Result<Self, std::io::Error> {
78        let contents = std::fs::read_to_string(path)?;
79        Ok(Self::parse(&contents))
80    }
81
82    /// Parse allowlist from string content.
83    ///
84    /// # Examples
85    ///
86    /// ```rust
87    /// use keyhog_core::allowlist::Allowlist;
88    ///
89    /// let allowlist = Allowlist::parse("path:**/.env\ndetector:demo-token\n");
90    /// assert!(allowlist.is_path_ignored("app/.env"));
91    /// ```
92    pub fn parse(content: &str) -> Self {
93        let mut al = Self::empty();
94        let today = today_yyyy_mm_dd();
95        for (line_number, raw_line) in content.lines().enumerate() {
96            let raw_line = raw_line.trim();
97            if raw_line.is_empty() || raw_line.starts_with('#') {
98                continue;
99            }
100            // Optional inline metadata: `entry; reason="..."; expires=YYYY-MM-DD; approved_by="..."`
101            // Each `;`-separated token after the first is a key=value pair.
102            let mut parts = raw_line.splitn(2, ';');
103            let entry = parts.next().unwrap_or("").trim();
104            let metadata = parts.next().unwrap_or("");
105            let parsed_meta = parse_inline_metadata(metadata);
106
107            // Drop entries whose `expires` is past - keeps `.keyhogignore`
108            // self-cleaning for short-lived approvals (Tier-B #18 governance).
109            if let Some(exp) = parsed_meta.expires.as_deref() {
110                if exp < today.as_str() {
111                    tracing::warn!(
112                        "allowlist entry expired on {} (today is {}): '{}'",
113                        exp,
114                        today,
115                        entry
116                    );
117                    continue;
118                }
119            }
120
121            if let Some(hash) = entry.strip_prefix("hash:") {
122                let trimmed = hash.trim();
123                if let Some(valid_hash) = parse_sha256_hex(trimmed) {
124                    al.credential_hashes.insert(valid_hash);
125                    log_metadata_audit("hash", trimmed, &parsed_meta);
126                } else {
127                    tracing::warn!(
128                        "invalid hash allowlist entry at line {}: '{}'",
129                        line_number + 1,
130                        trimmed
131                    );
132                }
133            } else if let Some(detector) = entry.strip_prefix("detector:") {
134                let detector = detector.trim();
135                if detector.is_empty() {
136                    tracing::warn!(
137                        "invalid detector allowlist entry at line {}: detector id is empty",
138                        line_number + 1
139                    );
140                } else {
141                    al.ignored_detectors.insert(detector.to_string());
142                    log_metadata_audit("detector", detector, &parsed_meta);
143                }
144            } else if let Some(path) = entry.strip_prefix("path:") {
145                let path = path.trim();
146                if path.is_empty() {
147                    tracing::warn!(
148                        "invalid path allowlist entry at line {}: glob is empty",
149                        line_number + 1
150                    );
151                } else {
152                    al.ignored_paths.push(path.to_string());
153                    log_metadata_audit("path", path, &parsed_meta);
154                }
155            } else if let Some(bytes) = parse_sha256_hex(entry) {
156                // Bare 64-char hex hash. Lets the obvious
157                // `keyhog scan ... --format jsonl | jq -r '.credential_hash'
158                // >> .keyhogignore` workflow Just Work without users
159                // learning the `hash:` prefix.
160                al.credential_hashes.insert(bytes);
161                log_metadata_audit("hash", entry, &parsed_meta);
162            } else {
163                // Bare path glob (gitignore-style). Anything that didn't
164                // match an explicit `hash:` / `detector:` / `path:` prefix
165                // and isn't a bare hash is interpreted as a path glob,
166                // matching `.gitignore` UX (`*.log`, `node_modules/`,
167                // `vendor/**/*.json`). kimi-1 dogfood #129 - the prior
168                // behavior emitted a warning and silently dropped the
169                // line, which is the worst of both worlds: every
170                // `.gitignore` users copied over was dead.
171                al.ignored_paths.push(entry.to_string());
172                log_metadata_audit("path", entry, &parsed_meta);
173            }
174        }
175        al
176    }
177
178    /// Check whether detector or path rules suppress a verified finding.
179    ///
180    /// Hash-based suppression is evaluated earlier on [`crate::RawMatch`] values
181    /// because [`VerifiedFinding`] stores only redacted credentials.
182    ///
183    /// # Examples
184    ///
185    /// ```rust
186    /// use keyhog_core::allowlist::Allowlist;
187    /// use keyhog_core::{MatchLocation, Severity, VerificationResult, VerifiedFinding};
188    /// use std::collections::HashMap;
189    ///
190    /// let allowlist = Allowlist::parse("detector:demo-token\n");
191    /// let finding = VerifiedFinding {
192    ///     detector_id: "demo-token".into(),
193    ///     detector_name: "Demo Token".into(),
194    ///     service: "demo".into(),
195    ///     severity: Severity::High,
196    ///     credential_redacted: "demo_...1234".into(),
197    ///     location: MatchLocation {
198    ///         source: "fs".into(),
199    ///         file_path: Some("src/main.rs".into()),
200    ///         line: Some(1),
201    ///         offset: 0,
202    ///         commit: None,
203    ///         author: None,
204    ///         date: None,
205    ///     },
206    ///     verification: VerificationResult::Unverifiable,
207    ///     metadata: std::collections::HashMap::new(),
208    ///     additional_locations: Vec::new(),
209    ///     confidence: None,
210    ///     credential_hash: "hash".to_string(),
211    /// };
212    /// assert!(allowlist.is_allowed(&finding));
213    /// ```
214    pub fn is_allowed(&self, finding: &VerifiedFinding) -> bool {
215        let detector_ignored = self.ignored_detectors.contains(&*finding.detector_id);
216
217        let path_ignored = finding.location.file_path.as_ref().is_some_and(|path| {
218            let normalized_path = normalize_path(path);
219            self.ignored_paths
220                .iter()
221                .any(|pattern| glob_match_normalized(pattern, &normalized_path))
222        });
223
224        let hash_ignored = self.matches_ignored_hash(&finding.credential_hash);
225
226        detector_ignored || path_ignored || hash_ignored
227    }
228
229    /// Check if a raw credential hash is allowlisted.
230    ///
231    /// # Examples
232    ///
233    /// ```rust
234    /// use keyhog_core::allowlist::Allowlist;
235    ///
236    /// let allowlist = Allowlist::parse("");
237    /// assert!(!allowlist.is_hash_allowed("demo_ABC12345"));
238    /// ```
239    pub fn is_hash_allowed(&self, credential: &str) -> bool {
240        self.matches_ignored_hash(credential)
241    }
242
243    /// Check if a hex-encoded SHA-256 hash is allowlisted.
244    pub fn is_raw_hash_ignored(&self, hash_hex: &str) -> bool {
245        self.matches_ignored_hash(hash_hex)
246    }
247
248    /// Check whether a raw path matches an ignored-path glob.
249    ///
250    /// # Examples
251    ///
252    /// ```rust
253    /// use keyhog_core::allowlist::Allowlist;
254    ///
255    /// let allowlist = Allowlist::parse("path:**/*.md\n");
256    /// assert!(allowlist.is_path_ignored("docs/README.md"));
257    /// ```
258    pub fn is_path_ignored(&self, path: &str) -> bool {
259        let normalized = normalize_path(path);
260        self.ignored_paths
261            .iter()
262            .any(|pattern| glob_match_normalized(pattern, &normalized))
263    }
264
265    fn matches_ignored_hash(&self, input: &str) -> bool {
266        // Only compare against the parsed-hex form. Earlier versions also
267        // hashed the raw input as a fallback, which silently encouraged users
268        // to put plaintext credentials in `.keyhogignore` (the file is often
269        // committed by accident - see audit release-2026-04-26). The
270        // `hash:` parser already rejects non-64-hex inputs at load time, so
271        // every legitimate suppressing entry passes through `parse_sha256_hex`
272        // here.
273        if let Some(hash_bytes) = parse_sha256_hex(input) {
274            return self.credential_hashes.contains(&hash_bytes);
275        }
276        false
277    }
278}
279
280fn glob_match_normalized(pattern: &str, normalized_path: &str) -> bool {
281    let normalized_pattern = normalize_path(pattern);
282    let pattern_segments = split_segments(&normalized_pattern);
283    let path_segments = split_segments(normalized_path);
284
285    if pattern_segments.len() > MAX_GLOB_SEGMENTS
286        || path_segments.len() > MAX_GLOB_SEGMENTS
287        || pattern_segments
288            .iter()
289            .any(|segment| segment.len() > MAX_GLOB_SEGMENT_LEN)
290        || path_segments
291            .iter()
292            .any(|segment| segment.len() > MAX_GLOB_SEGMENT_LEN)
293    {
294        tracing::warn!(
295            "skipping oversized allowlist glob match (pattern segments: {}, path segments: {}). Fix: shorten the glob or path",
296            pattern_segments.len(),
297            path_segments.len()
298        );
299        return false;
300    }
301
302    glob_match_segments(&pattern_segments, &path_segments)
303}
304
305fn split_segments(path: &str) -> Vec<&str> {
306    if path.is_empty() {
307        Vec::new()
308    } else {
309        path.split(['/', '\\']).collect()
310    }
311}
312
313fn glob_match_segments(pattern: &[&str], path: &[&str]) -> bool {
314    let mut states = vec![false; path.len() + 1];
315    states[0] = true;
316
317    for segment in pattern {
318        let mut next = vec![false; path.len() + 1];
319        if *segment == "**" {
320            let mut reachable = false;
321            for idx in 0..=path.len() {
322                reachable |= states[idx];
323                next[idx] = reachable;
324            }
325        } else {
326            for idx in 0..path.len() {
327                if states[idx] && segment_match(segment, path[idx]) {
328                    next[idx + 1] = true;
329                }
330            }
331        }
332        states = next;
333    }
334
335    states[path.len()]
336}
337
338fn segment_match(pattern: &str, text: &str) -> bool {
339    if pattern.is_ascii() && text.is_ascii() {
340        return segment_match_ascii(pattern.as_bytes(), text.as_bytes());
341    }
342
343    segment_match_chars(pattern, text)
344}
345
346#[allow(clippy::similar_names)] // star_pi / star_ti name the same Kleene-star state in two coordinate systems
347fn segment_match_ascii(pattern: &[u8], text: &[u8]) -> bool {
348    let mut pi = 0usize;
349    let mut ti = 0usize;
350    let mut star_pi = None;
351    let mut star_ti = 0usize;
352
353    while ti < text.len() {
354        if pi < pattern.len() && pattern[pi] == b'*' {
355            star_pi = Some(pi);
356            star_ti = ti;
357            pi += 1;
358            continue;
359        }
360
361        if pi < pattern.len() && pattern[pi] == text[ti] {
362            pi += 1;
363            ti += 1;
364            continue;
365        }
366
367        if let Some(star) = star_pi {
368            star_ti += 1;
369            ti = star_ti;
370            pi = star + 1;
371            continue;
372        }
373
374        return false;
375    }
376
377    while pi < pattern.len() && pattern[pi] == b'*' {
378        pi += 1;
379    }
380
381    pi == pattern.len()
382}
383
384#[allow(clippy::similar_names)] // star_pi / star_ti name the same Kleene-star state in two coordinate systems
385fn segment_match_chars(pattern: &str, text: &str) -> bool {
386    let pattern_chars: Vec<char> = pattern.chars().collect();
387    let text_chars: Vec<char> = text.chars().collect();
388
389    let mut pi = 0usize;
390    let mut ti = 0usize;
391    let mut star_pi = None;
392    let mut star_ti = 0usize;
393
394    while ti < text_chars.len() {
395        if pi < pattern_chars.len() && pattern_chars[pi] == '*' {
396            star_pi = Some(pi);
397            star_ti = ti;
398            pi += 1;
399            continue;
400        }
401
402        if pi < pattern_chars.len() && pattern_chars[pi] == text_chars[ti] {
403            pi += 1;
404            ti += 1;
405            continue;
406        }
407
408        if let Some(star) = star_pi {
409            star_ti += 1;
410            ti = star_ti;
411            pi = star + 1;
412            continue;
413        }
414
415        return false;
416    }
417
418    while pi < pattern_chars.len() && pattern_chars[pi] == '*' {
419        pi += 1;
420    }
421
422    pi == pattern_chars.len()
423}
424
425fn normalize_path(path: &str) -> String {
426    let path = path.replace('\\', "/");
427    let mut parts = Vec::new();
428    for component in Path::new(&path).components() {
429        match component {
430            Component::CurDir => {}
431            Component::ParentDir => {
432                if !parts.is_empty() && parts.last().is_some_and(|part| part != "..") {
433                    parts.pop();
434                } else {
435                    parts.push("..".to_string());
436                }
437            }
438            Component::Normal(part) => parts.push(part.to_string_lossy().into_owned()),
439            Component::RootDir => parts.clear(),
440            Component::Prefix(prefix) => parts.push(prefix.as_os_str().to_string_lossy().into()),
441        }
442    }
443    parts.join("/")
444}
445
446fn parse_sha256_hex(input: &str) -> Option<[u8; 32]> {
447    let input = input.trim();
448    if input.len() != 64 {
449        return None;
450    }
451
452    let mut digest = [0u8; 32];
453    for idx in 0..32 {
454        let chunk = &input[idx * 2..idx * 2 + 2];
455        digest[idx] = u8::from_str_radix(chunk, 16).ok()?;
456    }
457    Some(digest)
458}
459
460/// Inline metadata parsed from a `.keyhogignore` line trailer. Used to
461/// implement enterprise governance fields (`reason`, `expires`,
462/// `approved_by`) per audits/legendary-2026-04-26 Tier-B #18.
463#[derive(Default, Debug)]
464struct InlineMetadata {
465    reason: Option<String>,
466    expires: Option<String>,
467    approved_by: Option<String>,
468}