1use std::collections::HashSet;
14use std::path::Component;
15use std::path::Path;
16
17use crate::VerifiedFinding;
18
19#[derive(Debug, Clone, serde::Serialize)]
30pub struct Allowlist {
31 pub credential_hashes: HashSet<[u8; 32]>,
33 pub ignored_detectors: HashSet<String>,
35 pub ignored_paths: Vec<String>,
37}
38
39const MAX_GLOB_SEGMENTS: usize = 256;
40const MAX_GLOB_SEGMENT_LEN: usize = 1024;
41
42impl Allowlist {
43 pub fn empty() -> Self {
54 Self {
55 credential_hashes: HashSet::new(),
56 ignored_detectors: HashSet::new(),
57 ignored_paths: Vec::new(),
58 }
59 }
60
61 pub fn load(path: &Path) -> Result<Self, std::io::Error> {
74 let contents = std::fs::read_to_string(path)?;
75 Ok(Self::parse(&contents))
76 }
77
78 pub fn parse(content: &str) -> Self {
89 let mut al = Self::empty();
90 let today = today_yyyy_mm_dd();
91 for (line_number, raw_line) in content.lines().enumerate() {
92 let raw_line = raw_line.trim();
93 if raw_line.is_empty() || raw_line.starts_with('#') {
94 continue;
95 }
96 let mut parts = raw_line.splitn(2, ';');
99 let entry = parts.next().unwrap_or("").trim();
100 let metadata = parts.next().unwrap_or("");
101 let parsed_meta = parse_inline_metadata(metadata);
102
103 if let Some(exp) = parsed_meta.expires.as_deref() {
106 if exp < today.as_str() {
107 tracing::warn!(
108 "allowlist entry expired on {} (today is {}): '{}'",
109 exp,
110 today,
111 entry
112 );
113 continue;
114 }
115 }
116
117 if let Some(hash) = entry.strip_prefix("hash:") {
118 let trimmed = hash.trim();
119 if let Some(valid_hash) = parse_sha256_hex(trimmed) {
120 al.credential_hashes.insert(valid_hash);
121 log_metadata_audit("hash", trimmed, &parsed_meta);
122 } else {
123 tracing::warn!(
124 "invalid hash allowlist entry at line {}: '{}'",
125 line_number + 1,
126 trimmed
127 );
128 }
129 } else if let Some(detector) = entry.strip_prefix("detector:") {
130 let detector = detector.trim();
131 if detector.is_empty() {
132 tracing::warn!(
133 "invalid detector allowlist entry at line {}: detector id is empty",
134 line_number + 1
135 );
136 } else {
137 al.ignored_detectors.insert(detector.to_string());
138 log_metadata_audit("detector", detector, &parsed_meta);
139 }
140 } else if let Some(path) = entry.strip_prefix("path:") {
141 let path = path.trim();
142 if path.is_empty() {
143 tracing::warn!(
144 "invalid path allowlist entry at line {}: glob is empty",
145 line_number + 1
146 );
147 } else {
148 al.ignored_paths.push(path.to_string());
149 log_metadata_audit("path", path, &parsed_meta);
150 }
151 } else {
152 tracing::warn!(
153 "invalid allowlist entry at line {}: '{}'. Fix: use hash:, detector:, or path:",
154 line_number + 1,
155 entry
156 );
157 }
158 }
159 al
160 }
161
162 pub fn is_allowed(&self, finding: &VerifiedFinding) -> bool {
199 let detector_ignored = self.ignored_detectors.contains(&*finding.detector_id);
200
201 let path_ignored = finding.location.file_path.as_ref().is_some_and(|path| {
202 let normalized_path = normalize_path(path);
203 self.ignored_paths
204 .iter()
205 .any(|pattern| glob_match_normalized(pattern, &normalized_path))
206 });
207
208 let hash_ignored = self.matches_ignored_hash(&finding.credential_hash);
209
210 detector_ignored || path_ignored || hash_ignored
211 }
212
213 pub fn is_hash_allowed(&self, credential: &str) -> bool {
224 self.matches_ignored_hash(credential)
225 }
226
227 pub fn is_raw_hash_ignored(&self, hash_hex: &str) -> bool {
229 self.matches_ignored_hash(hash_hex)
230 }
231
232 pub fn is_path_ignored(&self, path: &str) -> bool {
243 let normalized = normalize_path(path);
244 self.ignored_paths
245 .iter()
246 .any(|pattern| glob_match_normalized(pattern, &normalized))
247 }
248
249 fn matches_ignored_hash(&self, input: &str) -> bool {
250 if let Some(hash_bytes) = parse_sha256_hex(input) {
258 return self.credential_hashes.contains(&hash_bytes);
259 }
260 false
261 }
262}
263
264fn glob_match_normalized(pattern: &str, normalized_path: &str) -> bool {
265 let normalized_pattern = normalize_path(pattern);
266 let pattern_segments = split_segments(&normalized_pattern);
267 let path_segments = split_segments(normalized_path);
268
269 if pattern_segments.len() > MAX_GLOB_SEGMENTS
270 || path_segments.len() > MAX_GLOB_SEGMENTS
271 || pattern_segments
272 .iter()
273 .any(|segment| segment.len() > MAX_GLOB_SEGMENT_LEN)
274 || path_segments
275 .iter()
276 .any(|segment| segment.len() > MAX_GLOB_SEGMENT_LEN)
277 {
278 tracing::warn!(
279 "skipping oversized allowlist glob match (pattern segments: {}, path segments: {}). Fix: shorten the glob or path",
280 pattern_segments.len(),
281 path_segments.len()
282 );
283 return false;
284 }
285
286 glob_match_segments(&pattern_segments, &path_segments)
287}
288
289fn split_segments(path: &str) -> Vec<&str> {
290 if path.is_empty() {
291 Vec::new()
292 } else {
293 path.split(['/', '\\']).collect()
294 }
295}
296
297fn glob_match_segments(pattern: &[&str], path: &[&str]) -> bool {
298 let mut states = vec![false; path.len() + 1];
299 states[0] = true;
300
301 for segment in pattern {
302 let mut next = vec![false; path.len() + 1];
303 if *segment == "**" {
304 let mut reachable = false;
305 for idx in 0..=path.len() {
306 reachable |= states[idx];
307 next[idx] = reachable;
308 }
309 } else {
310 for idx in 0..path.len() {
311 if states[idx] && segment_match(segment, path[idx]) {
312 next[idx + 1] = true;
313 }
314 }
315 }
316 states = next;
317 }
318
319 states[path.len()]
320}
321
322fn segment_match(pattern: &str, text: &str) -> bool {
323 if pattern.is_ascii() && text.is_ascii() {
324 return segment_match_ascii(pattern.as_bytes(), text.as_bytes());
325 }
326
327 segment_match_chars(pattern, text)
328}
329
330fn segment_match_ascii(pattern: &[u8], text: &[u8]) -> bool {
331 let mut pi = 0usize;
332 let mut ti = 0usize;
333 let mut star_pi = None;
334 let mut star_ti = 0usize;
335
336 while ti < text.len() {
337 if pi < pattern.len() && pattern[pi] == b'*' {
338 star_pi = Some(pi);
339 star_ti = ti;
340 pi += 1;
341 continue;
342 }
343
344 if pi < pattern.len() && pattern[pi] == text[ti] {
345 pi += 1;
346 ti += 1;
347 continue;
348 }
349
350 if let Some(star) = star_pi {
351 star_ti += 1;
352 ti = star_ti;
353 pi = star + 1;
354 continue;
355 }
356
357 return false;
358 }
359
360 while pi < pattern.len() && pattern[pi] == b'*' {
361 pi += 1;
362 }
363
364 pi == pattern.len()
365}
366
367fn segment_match_chars(pattern: &str, text: &str) -> bool {
368 let pattern_chars: Vec<char> = pattern.chars().collect();
369 let text_chars: Vec<char> = text.chars().collect();
370
371 let mut pi = 0usize;
372 let mut ti = 0usize;
373 let mut star_pi = None;
374 let mut star_ti = 0usize;
375
376 while ti < text_chars.len() {
377 if pi < pattern_chars.len() && pattern_chars[pi] == '*' {
378 star_pi = Some(pi);
379 star_ti = ti;
380 pi += 1;
381 continue;
382 }
383
384 if pi < pattern_chars.len() && pattern_chars[pi] == text_chars[ti] {
385 pi += 1;
386 ti += 1;
387 continue;
388 }
389
390 if let Some(star) = star_pi {
391 star_ti += 1;
392 ti = star_ti;
393 pi = star + 1;
394 continue;
395 }
396
397 return false;
398 }
399
400 while pi < pattern_chars.len() && pattern_chars[pi] == '*' {
401 pi += 1;
402 }
403
404 pi == pattern_chars.len()
405}
406
407fn normalize_path(path: &str) -> String {
408 let path = path.replace('\\', "/");
409 let mut parts = Vec::new();
410 for component in Path::new(&path).components() {
411 match component {
412 Component::CurDir => {}
413 Component::ParentDir => {
414 if !parts.is_empty() && parts.last().is_some_and(|part| part != "..") {
415 parts.pop();
416 } else {
417 parts.push("..".to_string());
418 }
419 }
420 Component::Normal(part) => parts.push(part.to_string_lossy().into_owned()),
421 Component::RootDir => parts.clear(),
422 Component::Prefix(prefix) => parts.push(prefix.as_os_str().to_string_lossy().into()),
423 }
424 }
425 parts.join("/")
426}
427
428fn parse_sha256_hex(input: &str) -> Option<[u8; 32]> {
429 let input = input.trim();
430 if input.len() != 64 {
431 return None;
432 }
433
434 let mut digest = [0u8; 32];
435 for idx in 0..32 {
436 let chunk = &input[idx * 2..idx * 2 + 2];
437 digest[idx] = u8::from_str_radix(chunk, 16).ok()?;
438 }
439 Some(digest)
440}
441
442#[derive(Default, Debug)]
446struct InlineMetadata {
447 reason: Option<String>,
448 expires: Option<String>,
449 approved_by: Option<String>,
450}
451
452fn parse_inline_metadata(s: &str) -> InlineMetadata {
453 let mut meta = InlineMetadata::default();
454 for token in s.split(';') {
455 let token = token.trim();
456 if token.is_empty() {
457 continue;
458 }
459 let Some(eq) = token.find('=') else { continue };
460 let key = token[..eq].trim();
461 let value = token[eq + 1..]
462 .trim()
463 .trim_matches(|c: char| c == '"' || c == '\'')
464 .to_string();
465 match key {
466 "reason" => meta.reason = Some(value),
467 "expires" => meta.expires = Some(value),
468 "approved_by" => meta.approved_by = Some(value),
469 _ => {
470 tracing::warn!("unknown allowlist metadata key '{key}' (ignored)");
471 }
472 }
473 }
474 meta
475}
476
477fn log_metadata_audit(kind: &str, entry: &str, meta: &InlineMetadata) {
478 if meta.reason.is_none() && meta.approved_by.is_none() && meta.expires.is_none() {
479 return;
480 }
481 tracing::info!(
482 kind,
483 entry,
484 reason = meta.reason.as_deref().unwrap_or("<unspecified>"),
485 approved_by = meta.approved_by.as_deref().unwrap_or("<unspecified>"),
486 expires = meta.expires.as_deref().unwrap_or("<no expiry>"),
487 "allowlist entry loaded with audit metadata"
488 );
489}
490
491fn today_yyyy_mm_dd() -> String {
494 let secs = std::time::SystemTime::now()
495 .duration_since(std::time::UNIX_EPOCH)
496 .map(|d| d.as_secs() as i64)
497 .unwrap_or(0);
498 let days = secs.div_euclid(86_400);
499 let z = days + 719_468;
501 let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
502 let doe = (z - era * 146_097) as u32;
503 let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
504 let y = yoe as i64 + era * 400;
505 let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
506 let mp = (5 * doy + 2) / 153;
507 let d = doy - (153 * mp + 2) / 5 + 1;
508 let m = if mp < 10 { mp + 3 } else { mp - 9 };
509 let year = y + i64::from(m <= 2);
510 format!("{year:04}-{m:02}-{d:02}")
511}
512
513#[cfg(test)]
514mod tests {
515 use super::*;
516
517 #[test]
518 fn metadata_fields_parse() {
519 let raw = r#"reason="rotate after release" ; expires=2099-01-01 ; approved_by="alice@example.com""#;
520 let meta = parse_inline_metadata(raw);
521 assert_eq!(meta.reason.as_deref(), Some("rotate after release"));
522 assert_eq!(meta.expires.as_deref(), Some("2099-01-01"));
523 assert_eq!(meta.approved_by.as_deref(), Some("alice@example.com"));
524 }
525
526 #[test]
527 fn unknown_metadata_keys_are_warned_not_fatal() {
528 let meta = parse_inline_metadata("foo=bar; reason=ok");
531 assert_eq!(meta.reason.as_deref(), Some("ok"));
532 assert!(meta.expires.is_none());
533 }
534
535 #[test]
536 fn expired_entries_are_dropped() {
537 let content = "detector:foo ; expires=1970-01-01";
538 let al = Allowlist::parse(content);
539 assert!(
540 !al.ignored_detectors.contains("foo"),
541 "expired detector entry must not load"
542 );
543 }
544
545 #[test]
546 fn future_dated_entries_load_normally() {
547 let content = "detector:bar ; expires=9999-12-31 ; reason=\"long-lived ack\"";
548 let al = Allowlist::parse(content);
549 assert!(al.ignored_detectors.contains("bar"));
550 }
551
552 #[test]
553 fn entries_without_metadata_still_load() {
554 let al = Allowlist::parse("path:**/*.md\ndetector:demo\n");
555 assert!(al.ignored_paths.iter().any(|p| p == "**/*.md"));
556 assert!(al.ignored_detectors.contains("demo"));
557 }
558
559 #[test]
560 fn today_is_well_formed() {
561 let s = today_yyyy_mm_dd();
562 assert_eq!(s.len(), 10);
563 assert_eq!(s.as_bytes()[4], b'-');
564 assert_eq!(s.as_bytes()[7], b'-');
565 }
566}