Skip to main content

keyhog_core/
allowlist.rs

1//! Allowlist support: `.keyhogignore` file parsing for suppressing known false
2//! positives by path glob, detector ID, or credential hash.
3
4/// Allowlist: known false positives and ignored patterns.
5///
6/// Users can create a `.keyhogignore` file to suppress known FPs.
7/// Format (one per line):
8///   - `hash:<sha256>` — ignore a specific credential by hash
9///   - `detector:<id>` — ignore all findings from a detector
10///   - `path:<glob>` — ignore files matching a glob pattern
11///   - `# comment` — comments
12///   - blank lines are skipped
13use std::collections::HashSet;
14use std::path::Component;
15use std::path::Path;
16
17use crate::VerifiedFinding;
18
19/// User-defined suppressions loaded from `.keyhogignore`: credential hashes, detector IDs, and path globs.
20///
21/// # Examples
22///
23/// ```rust
24/// use keyhog_core::allowlist::Allowlist;
25///
26/// let allowlist = Allowlist::parse("detector:demo-token\npath:**/*.md\n");
27/// assert!(allowlist.ignored_detectors.contains("demo-token"));
28/// ```
29#[derive(Debug, Clone, serde::Serialize)]
30pub struct Allowlist {
31    /// SHA-256 hashes of credentials to ignore.
32    pub credential_hashes: HashSet<[u8; 32]>,
33    /// Detector IDs to ignore entirely.
34    pub ignored_detectors: HashSet<String>,
35    /// Glob patterns for paths to ignore.
36    pub ignored_paths: Vec<String>,
37}
38
39const MAX_GLOB_SEGMENTS: usize = 256;
40const MAX_GLOB_SEGMENT_LEN: usize = 1024;
41
42impl Allowlist {
43    /// Create an empty allowlist with no suppressed hashes, detectors, or paths.
44    ///
45    /// # Examples
46    ///
47    /// ```rust
48    /// use keyhog_core::allowlist::Allowlist;
49    ///
50    /// let allowlist = Allowlist::empty();
51    /// assert!(allowlist.ignored_paths.is_empty());
52    /// ```
53    pub fn empty() -> Self {
54        Self {
55            credential_hashes: HashSet::new(),
56            ignored_detectors: HashSet::new(),
57            ignored_paths: Vec::new(),
58        }
59    }
60
61    /// Load from a .keyhogignore file.
62    ///
63    /// # Examples
64    ///
65    /// ```rust,no_run
66    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
67    /// use keyhog_core::allowlist::Allowlist;
68    /// use std::path::Path;
69    ///
70    /// let _allowlist = Allowlist::load(Path::new(".keyhogignore"))?;
71    /// # Ok(()) }
72    /// ```
73    pub fn load(path: &Path) -> Result<Self, std::io::Error> {
74        let contents = std::fs::read_to_string(path)?;
75        Ok(Self::parse(&contents))
76    }
77
78    /// Parse allowlist from string content.
79    ///
80    /// # Examples
81    ///
82    /// ```rust
83    /// use keyhog_core::allowlist::Allowlist;
84    ///
85    /// let allowlist = Allowlist::parse("path:**/.env\ndetector:demo-token\n");
86    /// assert!(allowlist.is_path_ignored("app/.env"));
87    /// ```
88    pub fn parse(content: &str) -> Self {
89        let mut al = Self::empty();
90        let today = today_yyyy_mm_dd();
91        for (line_number, raw_line) in content.lines().enumerate() {
92            let raw_line = raw_line.trim();
93            if raw_line.is_empty() || raw_line.starts_with('#') {
94                continue;
95            }
96            // Optional inline metadata: `entry; reason="..."; expires=YYYY-MM-DD; approved_by="..."`
97            // Each `;`-separated token after the first is a key=value pair.
98            let mut parts = raw_line.splitn(2, ';');
99            let entry = parts.next().unwrap_or("").trim();
100            let metadata = parts.next().unwrap_or("");
101            let parsed_meta = parse_inline_metadata(metadata);
102
103            // Drop entries whose `expires` is past — keeps `.keyhogignore`
104            // self-cleaning for short-lived approvals (Tier-B #18 governance).
105            if let Some(exp) = parsed_meta.expires.as_deref() {
106                if exp < today.as_str() {
107                    tracing::warn!(
108                        "allowlist entry expired on {} (today is {}): '{}'",
109                        exp,
110                        today,
111                        entry
112                    );
113                    continue;
114                }
115            }
116
117            if let Some(hash) = entry.strip_prefix("hash:") {
118                let trimmed = hash.trim();
119                if let Some(valid_hash) = parse_sha256_hex(trimmed) {
120                    al.credential_hashes.insert(valid_hash);
121                    log_metadata_audit("hash", trimmed, &parsed_meta);
122                } else {
123                    tracing::warn!(
124                        "invalid hash allowlist entry at line {}: '{}'",
125                        line_number + 1,
126                        trimmed
127                    );
128                }
129            } else if let Some(detector) = entry.strip_prefix("detector:") {
130                let detector = detector.trim();
131                if detector.is_empty() {
132                    tracing::warn!(
133                        "invalid detector allowlist entry at line {}: detector id is empty",
134                        line_number + 1
135                    );
136                } else {
137                    al.ignored_detectors.insert(detector.to_string());
138                    log_metadata_audit("detector", detector, &parsed_meta);
139                }
140            } else if let Some(path) = entry.strip_prefix("path:") {
141                let path = path.trim();
142                if path.is_empty() {
143                    tracing::warn!(
144                        "invalid path allowlist entry at line {}: glob is empty",
145                        line_number + 1
146                    );
147                } else {
148                    al.ignored_paths.push(path.to_string());
149                    log_metadata_audit("path", path, &parsed_meta);
150                }
151            } else {
152                tracing::warn!(
153                    "invalid allowlist entry at line {}: '{}'. Fix: use hash:, detector:, or path:",
154                    line_number + 1,
155                    entry
156                );
157            }
158        }
159        al
160    }
161
162    /// Check whether detector or path rules suppress a verified finding.
163    ///
164    /// Hash-based suppression is evaluated earlier on [`crate::RawMatch`] values
165    /// because [`VerifiedFinding`] stores only redacted credentials.
166    ///
167    /// # Examples
168    ///
169    /// ```rust
170    /// use keyhog_core::allowlist::Allowlist;
171    /// use keyhog_core::{MatchLocation, Severity, VerificationResult, VerifiedFinding};
172    /// use std::collections::HashMap;
173    ///
174    /// let allowlist = Allowlist::parse("detector:demo-token\n");
175    /// let finding = VerifiedFinding {
176    ///     detector_id: "demo-token".into(),
177    ///     detector_name: "Demo Token".into(),
178    ///     service: "demo".into(),
179    ///     severity: Severity::High,
180    ///     credential_redacted: "demo_...1234".into(),
181    ///     location: MatchLocation {
182    ///         source: "fs".into(),
183    ///         file_path: Some("src/main.rs".into()),
184    ///         line: Some(1),
185    ///         offset: 0,
186    ///         commit: None,
187    ///         author: None,
188    ///         date: None,
189    ///     },
190    ///     verification: VerificationResult::Unverifiable,
191    ///     metadata: std::collections::HashMap::new(),
192    ///     additional_locations: Vec::new(),
193    ///     confidence: None,
194    ///     credential_hash: "hash".to_string(),
195    /// };
196    /// assert!(allowlist.is_allowed(&finding));
197    /// ```
198    pub fn is_allowed(&self, finding: &VerifiedFinding) -> bool {
199        let detector_ignored = self.ignored_detectors.contains(&*finding.detector_id);
200
201        let path_ignored = finding.location.file_path.as_ref().is_some_and(|path| {
202            let normalized_path = normalize_path(path);
203            self.ignored_paths
204                .iter()
205                .any(|pattern| glob_match_normalized(pattern, &normalized_path))
206        });
207
208        let hash_ignored = self.matches_ignored_hash(&finding.credential_hash);
209
210        detector_ignored || path_ignored || hash_ignored
211    }
212
213    /// Check if a raw credential hash is allowlisted.
214    ///
215    /// # Examples
216    ///
217    /// ```rust
218    /// use keyhog_core::allowlist::Allowlist;
219    ///
220    /// let allowlist = Allowlist::parse("");
221    /// assert!(!allowlist.is_hash_allowed("demo_ABC12345"));
222    /// ```
223    pub fn is_hash_allowed(&self, credential: &str) -> bool {
224        self.matches_ignored_hash(credential)
225    }
226
227    /// Check if a hex-encoded SHA-256 hash is allowlisted.
228    pub fn is_raw_hash_ignored(&self, hash_hex: &str) -> bool {
229        self.matches_ignored_hash(hash_hex)
230    }
231
232    /// Check whether a raw path matches an ignored-path glob.
233    ///
234    /// # Examples
235    ///
236    /// ```rust
237    /// use keyhog_core::allowlist::Allowlist;
238    ///
239    /// let allowlist = Allowlist::parse("path:**/*.md\n");
240    /// assert!(allowlist.is_path_ignored("docs/README.md"));
241    /// ```
242    pub fn is_path_ignored(&self, path: &str) -> bool {
243        let normalized = normalize_path(path);
244        self.ignored_paths
245            .iter()
246            .any(|pattern| glob_match_normalized(pattern, &normalized))
247    }
248
249    fn matches_ignored_hash(&self, input: &str) -> bool {
250        // Only compare against the parsed-hex form. Earlier versions also
251        // hashed the raw input as a fallback, which silently encouraged users
252        // to put plaintext credentials in `.keyhogignore` (the file is often
253        // committed by accident — see audit release-2026-04-26). The
254        // `hash:` parser already rejects non-64-hex inputs at load time, so
255        // every legitimate suppressing entry passes through `parse_sha256_hex`
256        // here.
257        if let Some(hash_bytes) = parse_sha256_hex(input) {
258            return self.credential_hashes.contains(&hash_bytes);
259        }
260        false
261    }
262}
263
264fn glob_match_normalized(pattern: &str, normalized_path: &str) -> bool {
265    let normalized_pattern = normalize_path(pattern);
266    let pattern_segments = split_segments(&normalized_pattern);
267    let path_segments = split_segments(normalized_path);
268
269    if pattern_segments.len() > MAX_GLOB_SEGMENTS
270        || path_segments.len() > MAX_GLOB_SEGMENTS
271        || pattern_segments
272            .iter()
273            .any(|segment| segment.len() > MAX_GLOB_SEGMENT_LEN)
274        || path_segments
275            .iter()
276            .any(|segment| segment.len() > MAX_GLOB_SEGMENT_LEN)
277    {
278        tracing::warn!(
279            "skipping oversized allowlist glob match (pattern segments: {}, path segments: {}). Fix: shorten the glob or path",
280            pattern_segments.len(),
281            path_segments.len()
282        );
283        return false;
284    }
285
286    glob_match_segments(&pattern_segments, &path_segments)
287}
288
289fn split_segments(path: &str) -> Vec<&str> {
290    if path.is_empty() {
291        Vec::new()
292    } else {
293        path.split(['/', '\\']).collect()
294    }
295}
296
297fn glob_match_segments(pattern: &[&str], path: &[&str]) -> bool {
298    let mut states = vec![false; path.len() + 1];
299    states[0] = true;
300
301    for segment in pattern {
302        let mut next = vec![false; path.len() + 1];
303        if *segment == "**" {
304            let mut reachable = false;
305            for idx in 0..=path.len() {
306                reachable |= states[idx];
307                next[idx] = reachable;
308            }
309        } else {
310            for idx in 0..path.len() {
311                if states[idx] && segment_match(segment, path[idx]) {
312                    next[idx + 1] = true;
313                }
314            }
315        }
316        states = next;
317    }
318
319    states[path.len()]
320}
321
322fn segment_match(pattern: &str, text: &str) -> bool {
323    if pattern.is_ascii() && text.is_ascii() {
324        return segment_match_ascii(pattern.as_bytes(), text.as_bytes());
325    }
326
327    segment_match_chars(pattern, text)
328}
329
330fn segment_match_ascii(pattern: &[u8], text: &[u8]) -> bool {
331    let mut pi = 0usize;
332    let mut ti = 0usize;
333    let mut star_pi = None;
334    let mut star_ti = 0usize;
335
336    while ti < text.len() {
337        if pi < pattern.len() && pattern[pi] == b'*' {
338            star_pi = Some(pi);
339            star_ti = ti;
340            pi += 1;
341            continue;
342        }
343
344        if pi < pattern.len() && pattern[pi] == text[ti] {
345            pi += 1;
346            ti += 1;
347            continue;
348        }
349
350        if let Some(star) = star_pi {
351            star_ti += 1;
352            ti = star_ti;
353            pi = star + 1;
354            continue;
355        }
356
357        return false;
358    }
359
360    while pi < pattern.len() && pattern[pi] == b'*' {
361        pi += 1;
362    }
363
364    pi == pattern.len()
365}
366
367fn segment_match_chars(pattern: &str, text: &str) -> bool {
368    let pattern_chars: Vec<char> = pattern.chars().collect();
369    let text_chars: Vec<char> = text.chars().collect();
370
371    let mut pi = 0usize;
372    let mut ti = 0usize;
373    let mut star_pi = None;
374    let mut star_ti = 0usize;
375
376    while ti < text_chars.len() {
377        if pi < pattern_chars.len() && pattern_chars[pi] == '*' {
378            star_pi = Some(pi);
379            star_ti = ti;
380            pi += 1;
381            continue;
382        }
383
384        if pi < pattern_chars.len() && pattern_chars[pi] == text_chars[ti] {
385            pi += 1;
386            ti += 1;
387            continue;
388        }
389
390        if let Some(star) = star_pi {
391            star_ti += 1;
392            ti = star_ti;
393            pi = star + 1;
394            continue;
395        }
396
397        return false;
398    }
399
400    while pi < pattern_chars.len() && pattern_chars[pi] == '*' {
401        pi += 1;
402    }
403
404    pi == pattern_chars.len()
405}
406
407fn normalize_path(path: &str) -> String {
408    let path = path.replace('\\', "/");
409    let mut parts = Vec::new();
410    for component in Path::new(&path).components() {
411        match component {
412            Component::CurDir => {}
413            Component::ParentDir => {
414                if !parts.is_empty() && parts.last().is_some_and(|part| part != "..") {
415                    parts.pop();
416                } else {
417                    parts.push("..".to_string());
418                }
419            }
420            Component::Normal(part) => parts.push(part.to_string_lossy().into_owned()),
421            Component::RootDir => parts.clear(),
422            Component::Prefix(prefix) => parts.push(prefix.as_os_str().to_string_lossy().into()),
423        }
424    }
425    parts.join("/")
426}
427
428fn parse_sha256_hex(input: &str) -> Option<[u8; 32]> {
429    let input = input.trim();
430    if input.len() != 64 {
431        return None;
432    }
433
434    let mut digest = [0u8; 32];
435    for idx in 0..32 {
436        let chunk = &input[idx * 2..idx * 2 + 2];
437        digest[idx] = u8::from_str_radix(chunk, 16).ok()?;
438    }
439    Some(digest)
440}
441
442/// Inline metadata parsed from a `.keyhogignore` line trailer. Used to
443/// implement enterprise governance fields (`reason`, `expires`,
444/// `approved_by`) per audits/legendary-2026-04-26 Tier-B #18.
445#[derive(Default, Debug)]
446struct InlineMetadata {
447    reason: Option<String>,
448    expires: Option<String>,
449    approved_by: Option<String>,
450}
451
452fn parse_inline_metadata(s: &str) -> InlineMetadata {
453    let mut meta = InlineMetadata::default();
454    for token in s.split(';') {
455        let token = token.trim();
456        if token.is_empty() {
457            continue;
458        }
459        let Some(eq) = token.find('=') else { continue };
460        let key = token[..eq].trim();
461        let value = token[eq + 1..]
462            .trim()
463            .trim_matches(|c: char| c == '"' || c == '\'')
464            .to_string();
465        match key {
466            "reason" => meta.reason = Some(value),
467            "expires" => meta.expires = Some(value),
468            "approved_by" => meta.approved_by = Some(value),
469            _ => {
470                tracing::warn!("unknown allowlist metadata key '{key}' (ignored)");
471            }
472        }
473    }
474    meta
475}
476
477fn log_metadata_audit(kind: &str, entry: &str, meta: &InlineMetadata) {
478    if meta.reason.is_none() && meta.approved_by.is_none() && meta.expires.is_none() {
479        return;
480    }
481    tracing::info!(
482        kind,
483        entry,
484        reason = meta.reason.as_deref().unwrap_or("<unspecified>"),
485        approved_by = meta.approved_by.as_deref().unwrap_or("<unspecified>"),
486        expires = meta.expires.as_deref().unwrap_or("<no expiry>"),
487        "allowlist entry loaded with audit metadata"
488    );
489}
490
491/// Returns today's date as `YYYY-MM-DD` UTC, computed from
492/// `SystemTime::now()`. Hand-rolled to avoid pulling chrono into core.
493fn today_yyyy_mm_dd() -> String {
494    let secs = std::time::SystemTime::now()
495        .duration_since(std::time::UNIX_EPOCH)
496        .map(|d| d.as_secs() as i64)
497        .unwrap_or(0);
498    let days = secs.div_euclid(86_400);
499    // Civil-from-days, after Howard Hinnant.
500    let z = days + 719_468;
501    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
502    let doe = (z - era * 146_097) as u32;
503    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
504    let y = yoe as i64 + era * 400;
505    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
506    let mp = (5 * doy + 2) / 153;
507    let d = doy - (153 * mp + 2) / 5 + 1;
508    let m = if mp < 10 { mp + 3 } else { mp - 9 };
509    let year = y + i64::from(m <= 2);
510    format!("{year:04}-{m:02}-{d:02}")
511}
512
513#[cfg(test)]
514mod tests {
515    use super::*;
516
517    #[test]
518    fn metadata_fields_parse() {
519        let raw = r#"reason="rotate after release" ; expires=2099-01-01 ; approved_by="alice@example.com""#;
520        let meta = parse_inline_metadata(raw);
521        assert_eq!(meta.reason.as_deref(), Some("rotate after release"));
522        assert_eq!(meta.expires.as_deref(), Some("2099-01-01"));
523        assert_eq!(meta.approved_by.as_deref(), Some("alice@example.com"));
524    }
525
526    #[test]
527    fn unknown_metadata_keys_are_warned_not_fatal() {
528        // Should not panic; just emit a warning. We only verify parse returns
529        // defaults for the missing fields.
530        let meta = parse_inline_metadata("foo=bar; reason=ok");
531        assert_eq!(meta.reason.as_deref(), Some("ok"));
532        assert!(meta.expires.is_none());
533    }
534
535    #[test]
536    fn expired_entries_are_dropped() {
537        let content = "detector:foo ; expires=1970-01-01";
538        let al = Allowlist::parse(content);
539        assert!(
540            !al.ignored_detectors.contains("foo"),
541            "expired detector entry must not load"
542        );
543    }
544
545    #[test]
546    fn future_dated_entries_load_normally() {
547        let content = "detector:bar ; expires=9999-12-31 ; reason=\"long-lived ack\"";
548        let al = Allowlist::parse(content);
549        assert!(al.ignored_detectors.contains("bar"));
550    }
551
552    #[test]
553    fn entries_without_metadata_still_load() {
554        let al = Allowlist::parse("path:**/*.md\ndetector:demo\n");
555        assert!(al.ignored_paths.iter().any(|p| p == "**/*.md"));
556        assert!(al.ignored_detectors.contains("demo"));
557    }
558
559    #[test]
560    fn today_is_well_formed() {
561        let s = today_yyyy_mm_dd();
562        assert_eq!(s.len(), 10);
563        assert_eq!(s.as_bytes()[4], b'-');
564        assert_eq!(s.as_bytes()[7], b'-');
565    }
566}