use std::collections::HashMap;
use std::collections::HashSet;
use std::path::Component;
use std::path::Path;
use crate::merkle_spec_hash::hex_nibble;
use crate::VerifiedFinding;
#[path = "allowlist_metadata.rs"]
mod allowlist_metadata;
use allowlist_metadata::*;
#[derive(Debug, Clone, serde::Serialize)]
pub struct Allowlist {
pub credential_hashes: HashSet<[u8; 32]>,
pub ignored_detectors: HashSet<String>,
pub ignored_paths: Vec<String>,
#[serde(skip)]
path_index: PathGlobIndex,
}
const MAX_GLOB_SEGMENTS: usize = 256;
const MAX_GLOB_SEGMENT_LEN: usize = 1024;
#[derive(Debug, Clone)]
struct CompiledGlob {
segments: Vec<String>,
oversize: bool,
}
#[derive(Debug, Clone, Default)]
struct PathGlobIndex {
literal_first: HashMap<String, Vec<CompiledGlob>>,
wild_first: Vec<CompiledGlob>,
empty_pattern: Vec<CompiledGlob>,
source_len: usize,
}
impl PathGlobIndex {
fn build(patterns: &[String]) -> Self {
let mut index = PathGlobIndex::default();
index.source_len = patterns.len();
for pattern in patterns {
let normalized_pattern = normalize_path(pattern);
let segments: Vec<String> = split_segments(&normalized_pattern)
.into_iter()
.map(str::to_string)
.collect();
let oversize = segments.len() > MAX_GLOB_SEGMENTS
|| segments.iter().any(|s| s.len() > MAX_GLOB_SEGMENT_LEN);
let glob = CompiledGlob { segments, oversize };
match glob.segments.first() {
None => index.empty_pattern.push(glob),
Some(first) if first == "**" || first.contains('*') => {
index.wild_first.push(glob);
}
Some(first) => {
index
.literal_first
.entry(first.clone())
.or_default()
.push(glob);
}
}
}
index
}
fn matches(&self, normalized_path: &str) -> bool {
let path_segments = split_segments(normalized_path);
let path_oversize = path_segments.len() > MAX_GLOB_SEGMENTS
|| path_segments.iter().any(|s| s.len() > MAX_GLOB_SEGMENT_LEN);
if path_oversize {
tracing::warn!(
"skipping oversized allowlist path match ({} segments). Fix: shorten the path",
path_segments.len()
);
return false;
}
let test = |glob: &CompiledGlob| -> bool {
!glob.oversize && glob_match_segments(&glob.segments, &path_segments)
};
if path_segments.is_empty() {
return self.empty_pattern.iter().any(test) || self.wild_first.iter().any(test);
}
let first = path_segments[0];
if let Some(bucket) = self.literal_first.get(first) {
if bucket.iter().any(test) {
return true;
}
}
self.wild_first.iter().any(test)
}
}
impl Allowlist {
pub fn empty() -> Self {
Self {
credential_hashes: HashSet::new(),
ignored_detectors: HashSet::new(),
ignored_paths: Vec::new(),
path_index: PathGlobIndex::default(),
}
}
pub fn load(path: &Path) -> Result<Self, std::io::Error> {
let contents = std::fs::read_to_string(path)?;
Ok(Self::parse(&contents))
}
pub fn parse(content: &str) -> Self {
let mut al = Self::empty();
let today = today_yyyy_mm_dd();
for (line_number, raw_line) in content.lines().enumerate() {
let raw_line = raw_line.trim();
if raw_line.is_empty() || raw_line.starts_with('#') {
continue;
}
let mut parts = raw_line.splitn(2, ';');
let entry = parts.next().unwrap_or("").trim();
let metadata = parts.next().unwrap_or("");
let parsed_meta = parse_inline_metadata(metadata);
if let Some(exp) = parsed_meta.expires.as_deref() {
if exp < today.as_str() {
tracing::warn!(
"allowlist entry expired on {} (today is {}): '{}'",
exp,
today,
entry
);
continue;
}
}
if let Some(hash) = entry.strip_prefix("hash:") {
let trimmed = hash.trim();
if let Some(valid_hash) = parse_sha256_hex(trimmed) {
al.credential_hashes.insert(valid_hash);
log_metadata_audit("hash", trimmed, &parsed_meta);
} else {
tracing::warn!(
"invalid hash allowlist entry at line {}: '{}'",
line_number + 1,
trimmed
);
}
} else if let Some(detector) = entry.strip_prefix("detector:") {
let detector = detector.trim();
if detector.is_empty() {
tracing::warn!(
"invalid detector allowlist entry at line {}: detector id is empty",
line_number + 1
);
} else {
al.ignored_detectors.insert(detector.to_string());
log_metadata_audit("detector", detector, &parsed_meta);
}
} else if let Some(path) = entry.strip_prefix("path:") {
let path = path.trim();
if path.is_empty() {
tracing::warn!(
"invalid path allowlist entry at line {}: glob is empty",
line_number + 1
);
} else {
al.ignored_paths.push(path.to_string());
log_metadata_audit("path", path, &parsed_meta);
}
} else if let Some(bytes) = parse_sha256_hex(entry) {
al.credential_hashes.insert(bytes);
log_metadata_audit("hash", entry, &parsed_meta);
} else {
al.ignored_paths.push(entry.to_string());
log_metadata_audit("path", entry, &parsed_meta);
}
}
al.path_index = PathGlobIndex::build(&al.ignored_paths);
al
}
pub fn is_allowed(&self, finding: &VerifiedFinding) -> bool {
let detector_ignored = self.ignored_detectors.contains(&*finding.detector_id);
let path_ignored = finding.location.file_path.as_ref().is_some_and(|path| {
let normalized_path = normalize_path(path);
self.path_matches(&normalized_path)
});
let hash_ignored = self.matches_ignored_hash(&finding.credential_hash);
detector_ignored || path_ignored || hash_ignored
}
pub fn is_hash_allowed(&self, credential: &str) -> bool {
parse_sha256_hex(credential).is_some_and(|bytes| self.matches_ignored_hash(&bytes))
}
pub fn is_raw_hash_ignored(&self, hash_hex: &str) -> bool {
parse_sha256_hex(hash_hex).is_some_and(|bytes| self.matches_ignored_hash(&bytes))
}
pub fn is_hash_ignored(&self, hash: &[u8; 32]) -> bool {
self.matches_ignored_hash(hash)
}
pub fn is_path_ignored(&self, path: &str) -> bool {
let normalized = normalize_path(path);
self.path_matches(&normalized)
}
fn path_matches(&self, normalized_path: &str) -> bool {
if self.path_index.source_len == self.ignored_paths.len() {
self.path_index.matches(normalized_path)
} else {
PathGlobIndex::build(&self.ignored_paths).matches(normalized_path)
}
}
fn matches_ignored_hash(&self, hash: &[u8; 32]) -> bool {
self.credential_hashes.contains(hash)
}
}
fn split_segments(path: &str) -> Vec<&str> {
if path.is_empty() {
Vec::new()
} else {
path.split(['/', '\\']).collect()
}
}
fn glob_match_segments<S: AsRef<str>>(pattern: &[S], path: &[&str]) -> bool {
let mut states = vec![false; path.len() + 1];
states[0] = true;
for segment in pattern {
let segment = segment.as_ref();
let mut next = vec![false; path.len() + 1];
if segment == "**" {
let mut reachable = false;
for idx in 0..=path.len() {
reachable |= states[idx];
next[idx] = reachable;
}
} else {
for idx in 0..path.len() {
if states[idx] && segment_match(segment, path[idx]) {
next[idx + 1] = true;
}
}
}
states = next;
}
states[path.len()]
}
fn segment_match(pattern: &str, text: &str) -> bool {
if pattern.is_ascii() && text.is_ascii() {
return segment_match_ascii(pattern.as_bytes(), text.as_bytes());
}
segment_match_chars(pattern, text)
}
#[allow(clippy::similar_names)] fn segment_match_ascii(pattern: &[u8], text: &[u8]) -> bool {
let mut pi = 0usize;
let mut ti = 0usize;
let mut star_pi = None;
let mut star_ti = 0usize;
while ti < text.len() {
if pi < pattern.len() && pattern[pi] == b'*' {
star_pi = Some(pi);
star_ti = ti;
pi += 1;
continue;
}
if pi < pattern.len() && pattern[pi] == text[ti] {
pi += 1;
ti += 1;
continue;
}
if let Some(star) = star_pi {
star_ti += 1;
ti = star_ti;
pi = star + 1;
continue;
}
return false;
}
while pi < pattern.len() && pattern[pi] == b'*' {
pi += 1;
}
pi == pattern.len()
}
#[allow(clippy::similar_names)] fn segment_match_chars(pattern: &str, text: &str) -> bool {
let pattern_chars: Vec<char> = pattern.chars().collect();
let text_chars: Vec<char> = text.chars().collect();
let mut pi = 0usize;
let mut ti = 0usize;
let mut star_pi = None;
let mut star_ti = 0usize;
while ti < text_chars.len() {
if pi < pattern_chars.len() && pattern_chars[pi] == '*' {
star_pi = Some(pi);
star_ti = ti;
pi += 1;
continue;
}
if pi < pattern_chars.len() && pattern_chars[pi] == text_chars[ti] {
pi += 1;
ti += 1;
continue;
}
if let Some(star) = star_pi {
star_ti += 1;
ti = star_ti;
pi = star + 1;
continue;
}
return false;
}
while pi < pattern_chars.len() && pattern_chars[pi] == '*' {
pi += 1;
}
pi == pattern_chars.len()
}
fn normalize_path(path: &str) -> String {
let path = path.replace('\\', "/");
let mut parts = Vec::new();
for component in Path::new(&path).components() {
match component {
Component::CurDir => {}
Component::ParentDir => {
if !parts.is_empty() && parts.last().is_some_and(|part| part != "..") {
parts.pop();
} else {
parts.push("..".to_string());
}
}
Component::Normal(part) => parts.push(part.to_string_lossy().into_owned()),
Component::RootDir => parts.clear(),
Component::Prefix(prefix) => parts.push(prefix.as_os_str().to_string_lossy().into()),
}
}
parts.join("/")
}
fn parse_sha256_hex(input: &str) -> Option<[u8; 32]> {
let input = input.trim();
let bytes = input.as_bytes();
if bytes.len() != 64 {
return None;
}
let mut digest = [0u8; 32];
for idx in 0..32 {
let hi = hex_nibble(bytes[idx * 2])?;
let lo = hex_nibble(bytes[idx * 2 + 1])?;
digest[idx] = (hi << 4) | lo;
}
Some(digest)
}
#[derive(Default, Debug)]
struct InlineMetadata {
reason: Option<String>,
expires: Option<String>,
approved_by: Option<String>,
}