keyhog_core/allowlist.rs
1//! Allowlist support: `.keyhogignore` file parsing for suppressing known false
2//! positives by path glob, detector ID, or credential hash.
3
4/// Allowlist: known false positives and ignored patterns.
5///
6/// Users can create a `.keyhogignore` file to suppress known FPs.
7/// Format (one per line):
8/// - `hash:<sha256>` - ignore a specific credential by hash
9/// - `detector:<id>` - ignore all findings from a detector
10/// - `path:<glob>` - ignore files matching a glob pattern
11/// - `# comment` - comments
12/// - blank lines are skipped
13use std::collections::HashSet;
14use std::path::Component;
15use std::path::Path;
16
17use crate::VerifiedFinding;
18
19#[path = "allowlist_metadata.rs"]
20mod allowlist_metadata;
21use allowlist_metadata::*;
22
23/// User-defined suppressions loaded from `.keyhogignore`: credential hashes, detector IDs, and path globs.
24///
25/// # Examples
26///
27/// ```rust
28/// use keyhog_core::allowlist::Allowlist;
29///
30/// let allowlist = Allowlist::parse("detector:demo-token\npath:**/*.md\n");
31/// assert!(allowlist.ignored_detectors.contains("demo-token"));
32/// ```
33#[derive(Debug, Clone, serde::Serialize)]
34pub struct Allowlist {
35 /// SHA-256 hashes of credentials to ignore.
36 pub credential_hashes: HashSet<[u8; 32]>,
37 /// Detector IDs to ignore entirely.
38 pub ignored_detectors: HashSet<String>,
39 /// Glob patterns for paths to ignore.
40 pub ignored_paths: Vec<String>,
41}
42
43const MAX_GLOB_SEGMENTS: usize = 256;
44const MAX_GLOB_SEGMENT_LEN: usize = 1024;
45
46impl Allowlist {
47 /// Create an empty allowlist with no suppressed hashes, detectors, or paths.
48 ///
49 /// # Examples
50 ///
51 /// ```rust
52 /// use keyhog_core::allowlist::Allowlist;
53 ///
54 /// let allowlist = Allowlist::empty();
55 /// assert!(allowlist.ignored_paths.is_empty());
56 /// ```
57 pub fn empty() -> Self {
58 Self {
59 credential_hashes: HashSet::new(),
60 ignored_detectors: HashSet::new(),
61 ignored_paths: Vec::new(),
62 }
63 }
64
65 /// Load from a .keyhogignore file.
66 ///
67 /// # Examples
68 ///
69 /// ```rust,no_run
70 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
71 /// use keyhog_core::allowlist::Allowlist;
72 /// use std::path::Path;
73 ///
74 /// let _allowlist = Allowlist::load(Path::new(".keyhogignore"))?;
75 /// # Ok(()) }
76 /// ```
77 pub fn load(path: &Path) -> Result<Self, std::io::Error> {
78 let contents = std::fs::read_to_string(path)?;
79 Ok(Self::parse(&contents))
80 }
81
82 /// Parse allowlist from string content.
83 ///
84 /// # Examples
85 ///
86 /// ```rust
87 /// use keyhog_core::allowlist::Allowlist;
88 ///
89 /// let allowlist = Allowlist::parse("path:**/.env\ndetector:demo-token\n");
90 /// assert!(allowlist.is_path_ignored("app/.env"));
91 /// ```
92 pub fn parse(content: &str) -> Self {
93 let mut al = Self::empty();
94 let today = today_yyyy_mm_dd();
95 for (line_number, raw_line) in content.lines().enumerate() {
96 let raw_line = raw_line.trim();
97 if raw_line.is_empty() || raw_line.starts_with('#') {
98 continue;
99 }
100 // Optional inline metadata: `entry; reason="..."; expires=YYYY-MM-DD; approved_by="..."`
101 // Each `;`-separated token after the first is a key=value pair.
102 let mut parts = raw_line.splitn(2, ';');
103 let entry = parts.next().unwrap_or("").trim();
104 let metadata = parts.next().unwrap_or("");
105 let parsed_meta = parse_inline_metadata(metadata);
106
107 // Drop entries whose `expires` is past - keeps `.keyhogignore`
108 // self-cleaning for short-lived approvals (Tier-B #18 governance).
109 if let Some(exp) = parsed_meta.expires.as_deref() {
110 if exp < today.as_str() {
111 tracing::warn!(
112 "allowlist entry expired on {} (today is {}): '{}'",
113 exp,
114 today,
115 entry
116 );
117 continue;
118 }
119 }
120
121 if let Some(hash) = entry.strip_prefix("hash:") {
122 let trimmed = hash.trim();
123 if let Some(valid_hash) = parse_sha256_hex(trimmed) {
124 al.credential_hashes.insert(valid_hash);
125 log_metadata_audit("hash", trimmed, &parsed_meta);
126 } else {
127 tracing::warn!(
128 "invalid hash allowlist entry at line {}: '{}'",
129 line_number + 1,
130 trimmed
131 );
132 }
133 } else if let Some(detector) = entry.strip_prefix("detector:") {
134 let detector = detector.trim();
135 if detector.is_empty() {
136 tracing::warn!(
137 "invalid detector allowlist entry at line {}: detector id is empty",
138 line_number + 1
139 );
140 } else {
141 al.ignored_detectors.insert(detector.to_string());
142 log_metadata_audit("detector", detector, &parsed_meta);
143 }
144 } else if let Some(path) = entry.strip_prefix("path:") {
145 let path = path.trim();
146 if path.is_empty() {
147 tracing::warn!(
148 "invalid path allowlist entry at line {}: glob is empty",
149 line_number + 1
150 );
151 } else {
152 al.ignored_paths.push(path.to_string());
153 log_metadata_audit("path", path, &parsed_meta);
154 }
155 } else if let Some(bytes) = parse_sha256_hex(entry) {
156 // Bare 64-char hex hash. Lets the obvious
157 // `keyhog scan ... --format jsonl | jq -r '.credential_hash'
158 // >> .keyhogignore` workflow Just Work without users
159 // learning the `hash:` prefix.
160 al.credential_hashes.insert(bytes);
161 log_metadata_audit("hash", entry, &parsed_meta);
162 } else {
163 // Bare path glob (gitignore-style). Anything that didn't
164 // match an explicit `hash:` / `detector:` / `path:` prefix
165 // and isn't a bare hash is interpreted as a path glob,
166 // matching `.gitignore` UX (`*.log`, `node_modules/`,
167 // `vendor/**/*.json`). kimi-1 dogfood #129 - the prior
168 // behavior emitted a warning and silently dropped the
169 // line, which is the worst of both worlds: every
170 // `.gitignore` users copied over was dead.
171 al.ignored_paths.push(entry.to_string());
172 log_metadata_audit("path", entry, &parsed_meta);
173 }
174 }
175 al
176 }
177
178 /// Check whether detector or path rules suppress a verified finding.
179 ///
180 /// Hash-based suppression is evaluated earlier on [`crate::RawMatch`] values
181 /// because [`VerifiedFinding`] stores only redacted credentials.
182 ///
183 /// # Examples
184 ///
185 /// ```rust
186 /// use keyhog_core::allowlist::Allowlist;
187 /// use keyhog_core::{MatchLocation, Severity, VerificationResult, VerifiedFinding};
188 /// use std::collections::HashMap;
189 ///
190 /// let allowlist = Allowlist::parse("detector:demo-token\n");
191 /// let finding = VerifiedFinding {
192 /// detector_id: "demo-token".into(),
193 /// detector_name: "Demo Token".into(),
194 /// service: "demo".into(),
195 /// severity: Severity::High,
196 /// credential_redacted: "demo_...1234".into(),
197 /// location: MatchLocation {
198 /// source: "fs".into(),
199 /// file_path: Some("src/main.rs".into()),
200 /// line: Some(1),
201 /// offset: 0,
202 /// commit: None,
203 /// author: None,
204 /// date: None,
205 /// },
206 /// verification: VerificationResult::Unverifiable,
207 /// metadata: std::collections::HashMap::new(),
208 /// additional_locations: Vec::new(),
209 /// confidence: None,
210 /// credential_hash: "hash".to_string(),
211 /// };
212 /// assert!(allowlist.is_allowed(&finding));
213 /// ```
214 pub fn is_allowed(&self, finding: &VerifiedFinding) -> bool {
215 let detector_ignored = self.ignored_detectors.contains(&*finding.detector_id);
216
217 let path_ignored = finding.location.file_path.as_ref().is_some_and(|path| {
218 let normalized_path = normalize_path(path);
219 self.ignored_paths
220 .iter()
221 .any(|pattern| glob_match_normalized(pattern, &normalized_path))
222 });
223
224 let hash_ignored = self.matches_ignored_hash(&finding.credential_hash);
225
226 detector_ignored || path_ignored || hash_ignored
227 }
228
229 /// Check if a raw credential hash is allowlisted.
230 ///
231 /// # Examples
232 ///
233 /// ```rust
234 /// use keyhog_core::allowlist::Allowlist;
235 ///
236 /// let allowlist = Allowlist::parse("");
237 /// assert!(!allowlist.is_hash_allowed("demo_ABC12345"));
238 /// ```
239 pub fn is_hash_allowed(&self, credential: &str) -> bool {
240 self.matches_ignored_hash(credential)
241 }
242
243 /// Check if a hex-encoded SHA-256 hash is allowlisted.
244 pub fn is_raw_hash_ignored(&self, hash_hex: &str) -> bool {
245 self.matches_ignored_hash(hash_hex)
246 }
247
248 /// Check whether a raw path matches an ignored-path glob.
249 ///
250 /// # Examples
251 ///
252 /// ```rust
253 /// use keyhog_core::allowlist::Allowlist;
254 ///
255 /// let allowlist = Allowlist::parse("path:**/*.md\n");
256 /// assert!(allowlist.is_path_ignored("docs/README.md"));
257 /// ```
258 pub fn is_path_ignored(&self, path: &str) -> bool {
259 let normalized = normalize_path(path);
260 self.ignored_paths
261 .iter()
262 .any(|pattern| glob_match_normalized(pattern, &normalized))
263 }
264
265 fn matches_ignored_hash(&self, input: &str) -> bool {
266 // Only compare against the parsed-hex form. Earlier versions also
267 // hashed the raw input as a fallback, which silently encouraged users
268 // to put plaintext credentials in `.keyhogignore` (the file is often
269 // committed by accident - see audit release-2026-04-26). The
270 // `hash:` parser already rejects non-64-hex inputs at load time, so
271 // every legitimate suppressing entry passes through `parse_sha256_hex`
272 // here.
273 if let Some(hash_bytes) = parse_sha256_hex(input) {
274 return self.credential_hashes.contains(&hash_bytes);
275 }
276 false
277 }
278}
279
280fn glob_match_normalized(pattern: &str, normalized_path: &str) -> bool {
281 let normalized_pattern = normalize_path(pattern);
282 let pattern_segments = split_segments(&normalized_pattern);
283 let path_segments = split_segments(normalized_path);
284
285 if pattern_segments.len() > MAX_GLOB_SEGMENTS
286 || path_segments.len() > MAX_GLOB_SEGMENTS
287 || pattern_segments
288 .iter()
289 .any(|segment| segment.len() > MAX_GLOB_SEGMENT_LEN)
290 || path_segments
291 .iter()
292 .any(|segment| segment.len() > MAX_GLOB_SEGMENT_LEN)
293 {
294 tracing::warn!(
295 "skipping oversized allowlist glob match (pattern segments: {}, path segments: {}). Fix: shorten the glob or path",
296 pattern_segments.len(),
297 path_segments.len()
298 );
299 return false;
300 }
301
302 glob_match_segments(&pattern_segments, &path_segments)
303}
304
305fn split_segments(path: &str) -> Vec<&str> {
306 if path.is_empty() {
307 Vec::new()
308 } else {
309 path.split(['/', '\\']).collect()
310 }
311}
312
313fn glob_match_segments(pattern: &[&str], path: &[&str]) -> bool {
314 let mut states = vec![false; path.len() + 1];
315 states[0] = true;
316
317 for segment in pattern {
318 let mut next = vec![false; path.len() + 1];
319 if *segment == "**" {
320 let mut reachable = false;
321 for idx in 0..=path.len() {
322 reachable |= states[idx];
323 next[idx] = reachable;
324 }
325 } else {
326 for idx in 0..path.len() {
327 if states[idx] && segment_match(segment, path[idx]) {
328 next[idx + 1] = true;
329 }
330 }
331 }
332 states = next;
333 }
334
335 states[path.len()]
336}
337
338fn segment_match(pattern: &str, text: &str) -> bool {
339 if pattern.is_ascii() && text.is_ascii() {
340 return segment_match_ascii(pattern.as_bytes(), text.as_bytes());
341 }
342
343 segment_match_chars(pattern, text)
344}
345
346#[allow(clippy::similar_names)] // star_pi / star_ti name the same Kleene-star state in two coordinate systems
347fn segment_match_ascii(pattern: &[u8], text: &[u8]) -> bool {
348 let mut pi = 0usize;
349 let mut ti = 0usize;
350 let mut star_pi = None;
351 let mut star_ti = 0usize;
352
353 while ti < text.len() {
354 if pi < pattern.len() && pattern[pi] == b'*' {
355 star_pi = Some(pi);
356 star_ti = ti;
357 pi += 1;
358 continue;
359 }
360
361 if pi < pattern.len() && pattern[pi] == text[ti] {
362 pi += 1;
363 ti += 1;
364 continue;
365 }
366
367 if let Some(star) = star_pi {
368 star_ti += 1;
369 ti = star_ti;
370 pi = star + 1;
371 continue;
372 }
373
374 return false;
375 }
376
377 while pi < pattern.len() && pattern[pi] == b'*' {
378 pi += 1;
379 }
380
381 pi == pattern.len()
382}
383
384#[allow(clippy::similar_names)] // star_pi / star_ti name the same Kleene-star state in two coordinate systems
385fn segment_match_chars(pattern: &str, text: &str) -> bool {
386 let pattern_chars: Vec<char> = pattern.chars().collect();
387 let text_chars: Vec<char> = text.chars().collect();
388
389 let mut pi = 0usize;
390 let mut ti = 0usize;
391 let mut star_pi = None;
392 let mut star_ti = 0usize;
393
394 while ti < text_chars.len() {
395 if pi < pattern_chars.len() && pattern_chars[pi] == '*' {
396 star_pi = Some(pi);
397 star_ti = ti;
398 pi += 1;
399 continue;
400 }
401
402 if pi < pattern_chars.len() && pattern_chars[pi] == text_chars[ti] {
403 pi += 1;
404 ti += 1;
405 continue;
406 }
407
408 if let Some(star) = star_pi {
409 star_ti += 1;
410 ti = star_ti;
411 pi = star + 1;
412 continue;
413 }
414
415 return false;
416 }
417
418 while pi < pattern_chars.len() && pattern_chars[pi] == '*' {
419 pi += 1;
420 }
421
422 pi == pattern_chars.len()
423}
424
425fn normalize_path(path: &str) -> String {
426 let path = path.replace('\\', "/");
427 let mut parts = Vec::new();
428 for component in Path::new(&path).components() {
429 match component {
430 Component::CurDir => {}
431 Component::ParentDir => {
432 if !parts.is_empty() && parts.last().is_some_and(|part| part != "..") {
433 parts.pop();
434 } else {
435 parts.push("..".to_string());
436 }
437 }
438 Component::Normal(part) => parts.push(part.to_string_lossy().into_owned()),
439 Component::RootDir => parts.clear(),
440 Component::Prefix(prefix) => parts.push(prefix.as_os_str().to_string_lossy().into()),
441 }
442 }
443 parts.join("/")
444}
445
446fn parse_sha256_hex(input: &str) -> Option<[u8; 32]> {
447 let input = input.trim();
448 if input.len() != 64 {
449 return None;
450 }
451
452 let mut digest = [0u8; 32];
453 for idx in 0..32 {
454 let chunk = &input[idx * 2..idx * 2 + 2];
455 digest[idx] = u8::from_str_radix(chunk, 16).ok()?;
456 }
457 Some(digest)
458}
459
460/// Inline metadata parsed from a `.keyhogignore` line trailer. Used to
461/// implement enterprise governance fields (`reason`, `expires`,
462/// `approved_by`) per audits/legendary-2026-04-26 Tier-B #18.
463#[derive(Default, Debug)]
464struct InlineMetadata {
465 reason: Option<String>,
466 expires: Option<String>,
467 approved_by: Option<String>,
468}