use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::Path;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct Config {
pub entropy_threshold: f64,
pub min_confidence: f64,
pub enable_validation: bool,
pub scan_git_history: bool,
pub max_git_depth: Option<usize>,
pub respect_gitignore: bool,
pub max_file_size: u64,
pub exclude_tests: bool,
pub exclude_docs: bool,
#[serde(default)]
pub custom_patterns: Vec<CustomPattern>,
#[serde(default)]
pub allowlist: Vec<AllowlistRule>,
#[serde(default = "default_severities")]
pub report_severities: Vec<String>,
#[serde(default = "default_max_concurrent_validations")]
pub max_concurrent_validations: usize,
#[serde(default = "default_validation_delay_ms")]
pub validation_delay_ms: u64,
#[serde(default = "default_validation_max_retries")]
pub validation_max_retries: u32,
}
fn default_max_concurrent_validations() -> usize {
10
}
fn default_validation_delay_ms() -> u64 {
100
}
fn default_validation_max_retries() -> u32 {
3
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CustomPattern {
pub name: String,
pub regex: String,
pub severity: String,
pub confidence: f64,
#[serde(default)]
pub description: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct AllowlistRule {
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub secret_types: Vec<String>,
#[serde(default)]
pub paths: Vec<String>,
#[serde(default)]
pub value_regex: Option<String>,
#[serde(default)]
pub severities: Vec<String>,
}
impl AllowlistRule {
pub fn has_no_criteria(&self) -> bool {
self.secret_types.is_empty()
&& self.paths.is_empty()
&& self.value_regex.is_none()
&& self.severities.is_empty()
}
pub fn matches(
&self,
secret_type_name: &str,
file_path: &str,
secret_value: &str,
severity_name: &str,
) -> bool {
if self.has_no_criteria() {
return false;
}
if !self.secret_types.is_empty() && !self.secret_types.iter().any(|t| t == secret_type_name)
{
return false;
}
if !self.paths.is_empty() {
let matches_any = self.paths.iter().any(|p| glob_match(file_path, p));
if !matches_any {
return false;
}
}
if let Some(ref re_str) = self.value_regex {
match regex::Regex::new(re_str) {
Ok(re) => {
if !re.is_match(secret_value) {
return false;
}
}
Err(_) => return false, }
}
if !self.severities.is_empty()
&& !self
.severities
.iter()
.any(|s| s.eq_ignore_ascii_case(severity_name))
{
return false;
}
true
}
}
pub fn glob_match(text: &str, pattern: &str) -> bool {
if let Some(inner) = pattern.strip_prefix('!') {
return !glob_match(text, inner);
}
if let Some(suffix) = pattern.strip_prefix("**/") {
if glob_match_inner(text, suffix) {
return true;
}
for (i, c) in text.char_indices() {
if c == '/' && glob_match_inner(&text[i + 1..], suffix) {
return true;
}
}
return false;
}
if !pattern.contains('/') {
let filename = text.rsplit('/').next().unwrap_or(text);
return glob_match_inner(filename, pattern);
}
if glob_match_inner(text, pattern) {
return true;
}
for (i, c) in text.char_indices() {
if c == '/' && glob_match_inner(&text[i + 1..], pattern) {
return true;
}
}
false
}
pub(crate) fn glob_match_inner(text: &str, pattern: &str) -> bool {
let text_bytes = text.as_bytes();
let pat_bytes = pattern.as_bytes();
let (tlen, plen) = (text_bytes.len(), pat_bytes.len());
let mut ti = 0usize; let mut pi = 0usize;
let mut star_pi: Option<usize> = None;
let mut star_ti: usize = 0;
let mut dstar_pi: Option<usize> = None;
let mut dstar_ti: usize = 0;
while ti < tlen || pi < plen {
if pi < plen {
if pi + 1 < plen && pat_bytes[pi] == b'*' && pat_bytes[pi + 1] == b'*' {
let mut pp = pi;
while pp < plen && pat_bytes[pp] == b'*' {
pp += 1;
}
if pp < plen && pat_bytes[pp] == b'/' {
pp += 1;
}
dstar_pi = Some(pp);
dstar_ti = ti;
pi = pp;
star_pi = None;
continue;
}
if pat_bytes[pi] == b'*' {
star_pi = Some(pi + 1);
star_ti = ti;
pi += 1;
continue;
}
if ti < tlen {
if pat_bytes[pi] == b'?' && text_bytes[ti] != b'/' {
ti += 1;
pi += 1;
continue;
}
if pat_bytes[pi] == text_bytes[ti] {
ti += 1;
pi += 1;
continue;
}
}
}
if let Some(sp) = star_pi {
if star_ti < tlen && text_bytes[star_ti] != b'/' {
star_ti += 1;
ti = star_ti;
pi = sp;
continue;
}
}
if let Some(dp) = dstar_pi {
dstar_ti += 1;
if dstar_ti <= tlen {
ti = dstar_ti;
pi = dp;
star_pi = None; continue;
}
}
return false;
}
true
}
fn default_severities() -> Vec<String> {
vec![
"CRITICAL".to_string(),
"HIGH".to_string(),
"MEDIUM".to_string(),
"LOW".to_string(),
]
}
impl Default for Config {
fn default() -> Self {
Self {
entropy_threshold: 3.5,
min_confidence: 0.6,
enable_validation: false,
scan_git_history: true,
max_git_depth: None,
respect_gitignore: true,
max_file_size: 1024 * 1024, exclude_tests: false,
exclude_docs: false,
custom_patterns: Vec::new(),
allowlist: Vec::new(),
report_severities: default_severities(),
max_concurrent_validations: default_max_concurrent_validations(),
validation_delay_ms: default_validation_delay_ms(),
validation_max_retries: default_validation_max_retries(),
}
}
}
impl Config {
pub fn from_toml_file(path: &Path) -> Result<Self> {
let content = fs::read_to_string(path)?;
let config: Config = toml::from_str(&content)?;
config.warn_empty_allowlist_rules();
Ok(config)
}
pub fn from_yaml_file(path: &Path) -> Result<Self> {
let content = fs::read_to_string(path)?;
let config: Config = serde_yaml::from_str(&content)?;
config.warn_empty_allowlist_rules();
Ok(config)
}
fn warn_empty_allowlist_rules(&self) {
for (i, rule) in self.allowlist.iter().enumerate() {
if rule.has_no_criteria() {
let desc = rule
.description
.as_deref()
.unwrap_or("<no description>");
eprintln!(
"warning: allowlist rule #{} ({}) has no criteria (secret_types, paths, \
value_regex, severities are all empty) -- this rule will be ignored. \
Specify at least one criterion.",
i + 1,
desc,
);
}
}
}
pub fn to_toml_file(&self, path: &Path) -> Result<()> {
let mut content = String::new();
content.push_str("# Leaktor configuration\n");
content.push_str("# https://github.com/reschjonas/leaktor\n");
content.push_str("#\n");
content.push_str(
"# IMPORTANT: All top-level keys (like entropy_threshold, report_severities)\n",
);
content.push_str(
"# must appear BEFORE any [[custom_patterns]] or [[allowlist]] sections.\n",
);
content.push_str(
"# TOML treats keys after [[section]] headers as part of that section.\n\n",
);
content.push_str(&format!("entropy_threshold = {}\n", self.entropy_threshold));
content.push_str(&format!("min_confidence = {}\n", self.min_confidence));
content.push_str(&format!(
"enable_validation = {}\n",
self.enable_validation
));
content.push_str(&format!("scan_git_history = {}\n", self.scan_git_history));
if let Some(depth) = self.max_git_depth {
content.push_str(&format!("max_git_depth = {}\n", depth));
}
content.push_str(&format!("respect_gitignore = {}\n", self.respect_gitignore));
content.push_str(&format!("max_file_size = {}\n", self.max_file_size));
content.push_str(&format!("exclude_tests = {}\n", self.exclude_tests));
content.push_str(&format!("exclude_docs = {}\n", self.exclude_docs));
let sevs: Vec<String> = self.report_severities.iter().map(|s| format!("\"{}\"", s)).collect();
content.push_str(&format!("report_severities = [{}]\n", sevs.join(", ")));
content.push_str(&format!(
"max_concurrent_validations = {}\n",
self.max_concurrent_validations
));
content.push_str(&format!(
"validation_delay_ms = {}\n",
self.validation_delay_ms
));
content.push_str(&format!(
"validation_max_retries = {}\n",
self.validation_max_retries
));
content.push('\n');
if self.custom_patterns.is_empty() {
content.push_str("# [[custom_patterns]]\n");
content.push_str("# name = \"Internal API Key\"\n");
content.push_str("# regex = \"int_key_[0-9a-f]{32}\"\n");
content.push_str("# severity = \"HIGH\"\n");
content.push_str("# confidence = 0.85\n");
content.push_str("# description = \"Internal API key\"\n\n");
} else {
for cp in &self.custom_patterns {
content.push_str("[[custom_patterns]]\n");
content.push_str(&format!("name = \"{}\"\n", cp.name));
content.push_str(&format!("regex = \"{}\"\n", cp.regex));
content.push_str(&format!("severity = \"{}\"\n", cp.severity));
content.push_str(&format!("confidence = {}\n", cp.confidence));
if let Some(ref desc) = cp.description {
content.push_str(&format!("description = \"{}\"\n", desc));
}
content.push('\n');
}
}
if self.allowlist.is_empty() {
content.push_str("# [[allowlist]]\n");
content.push_str("# description = \"Suppress Sentry DSNs\"\n");
content.push_str("# secret_types = [\"Sentry DSN\"]\n");
} else {
for rule in &self.allowlist {
content.push_str("[[allowlist]]\n");
if let Some(ref desc) = rule.description {
content.push_str(&format!("description = \"{}\"\n", desc));
}
if !rule.secret_types.is_empty() {
let types: Vec<String> =
rule.secret_types.iter().map(|t| format!("\"{}\"", t)).collect();
content.push_str(&format!("secret_types = [{}]\n", types.join(", ")));
}
if !rule.paths.is_empty() {
let paths: Vec<String> =
rule.paths.iter().map(|p| format!("\"{}\"", p)).collect();
content.push_str(&format!("paths = [{}]\n", paths.join(", ")));
}
if let Some(ref re) = rule.value_regex {
content.push_str(&format!("value_regex = \"{}\"\n", re));
}
if !rule.severities.is_empty() {
let sevs: Vec<String> =
rule.severities.iter().map(|s| format!("\"{}\"", s)).collect();
content.push_str(&format!("severities = [{}]\n", sevs.join(", ")));
}
content.push('\n');
}
}
fs::write(path, content)?;
Ok(())
}
pub fn to_yaml_file(&self, path: &Path) -> Result<()> {
let content = serde_yaml::to_string(self)?;
fs::write(path, content)?;
Ok(())
}
pub fn load_from_current_dir() -> Result<Self> {
let config_names = [".leaktor.toml", ".leaktor.yaml", ".leaktor.yml"];
for name in &config_names {
let path = Path::new(name);
if path.exists() {
if name.ends_with(".toml") {
return Self::from_toml_file(path);
} else {
return Self::from_yaml_file(path);
}
}
}
Ok(Self::default())
}
pub fn compiled_allowlist(&self) -> &[AllowlistRule] {
&self.allowlist
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_default_config() {
let config = Config::default();
assert_eq!(config.entropy_threshold, 3.5);
assert_eq!(config.min_confidence, 0.6);
assert!(config.scan_git_history);
assert!(config.allowlist.is_empty());
assert!(config.custom_patterns.is_empty());
}
#[test]
fn test_config_serialization() -> Result<()> {
let config = Config::default();
let toml_str = toml::to_string(&config)?;
assert!(toml_str.contains("entropy_threshold"));
Ok(())
}
#[test]
fn test_config_save_and_load() -> Result<()> {
let temp_dir = TempDir::new()?;
let config_path = temp_dir.path().join("test.toml");
let config = Config::default();
config.to_toml_file(&config_path)?;
let loaded = Config::from_toml_file(&config_path)?;
assert_eq!(loaded.entropy_threshold, config.entropy_threshold);
Ok(())
}
#[test]
fn test_custom_patterns_round_trip() -> Result<()> {
let temp_dir = TempDir::new()?;
let config_path = temp_dir.path().join("test.toml");
let mut config = Config::default();
config.custom_patterns.push(CustomPattern {
name: "Internal Key".to_string(),
regex: "int_key_[a-f0-9]{32}".to_string(),
severity: "HIGH".to_string(),
confidence: 0.85,
description: Some("Company internal key".to_string()),
});
config.to_toml_file(&config_path)?;
let loaded = Config::from_toml_file(&config_path)?;
assert_eq!(loaded.custom_patterns.len(), 1);
assert_eq!(loaded.custom_patterns[0].name, "Internal Key");
assert_eq!(loaded.custom_patterns[0].confidence, 0.85);
Ok(())
}
#[test]
fn test_allowlist_round_trip() -> Result<()> {
let temp_dir = TempDir::new()?;
let config_path = temp_dir.path().join("test.toml");
let mut config = Config::default();
config.allowlist.push(AllowlistRule {
description: Some("Skip Sentry DSN".to_string()),
secret_types: vec!["Sentry DSN".to_string()],
paths: vec![],
value_regex: None,
severities: vec![],
});
config.to_toml_file(&config_path)?;
let loaded = Config::from_toml_file(&config_path)?;
assert_eq!(loaded.allowlist.len(), 1);
assert_eq!(loaded.allowlist[0].secret_types, vec!["Sentry DSN"]);
Ok(())
}
#[test]
fn test_allowlist_rule_matches_type() {
let rule = AllowlistRule {
description: None,
secret_types: vec!["Sentry DSN".to_string()],
paths: vec![],
value_regex: None,
severities: vec![],
};
assert!(rule.matches("Sentry DSN", "any/path", "any_value", "MEDIUM"));
assert!(!rule.matches("GitHub PAT", "any/path", "any_value", "CRITICAL"));
}
#[test]
fn test_allowlist_rule_matches_path() {
let rule = AllowlistRule {
description: None,
secret_types: vec![],
paths: vec!["tests/fixtures/*".to_string()],
value_regex: None,
severities: vec![],
};
assert!(rule.matches("Any", "tests/fixtures/secrets.env", "val", "HIGH"));
assert!(!rule.matches("Any", "src/main.rs", "val", "HIGH"));
}
#[test]
fn test_allowlist_rule_matches_value_regex() {
let rule = AllowlistRule {
description: None,
secret_types: vec![],
paths: vec![],
value_regex: Some("AKIAIOSFODNN7EXAMPLE".to_string()),
severities: vec![],
};
assert!(rule.matches("AWS", "file.env", "AKIAIOSFODNN7EXAMPLE", "CRITICAL"));
assert!(!rule.matches("AWS", "file.env", "AKIAREALKEY12345678", "CRITICAL"));
}
#[test]
fn test_allowlist_rule_matches_severity() {
let rule = AllowlistRule {
description: None,
secret_types: vec![],
paths: vec![],
value_regex: None,
severities: vec!["LOW".to_string(), "MEDIUM".to_string()],
};
assert!(rule.matches("Any", "any", "val", "LOW"));
assert!(rule.matches("Any", "any", "val", "MEDIUM"));
assert!(!rule.matches("Any", "any", "val", "CRITICAL"));
}
#[test]
fn test_allowlist_rule_multi_criteria() {
let rule = AllowlistRule {
description: None,
secret_types: vec!["Sentry DSN".to_string()],
paths: vec!["tests/**/*".to_string()],
value_regex: None,
severities: vec![],
};
assert!(rule.matches("Sentry DSN", "tests/fixtures/env", "val", "MEDIUM"));
assert!(!rule.matches("Sentry DSN", "src/main.rs", "val", "MEDIUM"));
assert!(!rule.matches("GitHub PAT", "tests/foo", "val", "CRITICAL"));
}
#[test]
fn test_glob_match_single_star() {
assert!(glob_match("tests/fixtures/secret.env", "tests/fixtures/*.env"));
assert!(glob_match("secret.env", "*.env"));
assert!(!glob_match("tests/fixtures/secret.env", "tests/*.env")); assert!(!glob_match("src/main.rs", "*.py"));
}
#[test]
fn test_glob_match_double_star() {
assert!(glob_match("foo/bar/baz.js", "**/baz.js"));
assert!(glob_match("baz.js", "**/baz.js"));
assert!(glob_match("a/b/c/d/e.txt", "a/**/e.txt"));
assert!(glob_match("a/e.txt", "a/**/e.txt"));
assert!(glob_match("tests/fixtures/secret.env", "tests/**/*.env"));
assert!(glob_match("tests/deep/nested/secret.env", "tests/**/*.env"));
}
#[test]
fn test_glob_match_question_mark() {
assert!(glob_match("test.rs", "test.?s"));
assert!(glob_match("test.js", "test.?s"));
assert!(!glob_match("test.rs", "test.??s"));
}
#[test]
fn test_glob_match_negation() {
assert!(!glob_match("secret.env", "!*.env"));
assert!(glob_match("secret.txt", "!*.env"));
}
#[test]
fn test_glob_match_exact() {
assert!(glob_match("src/main.rs", "src/main.rs"));
assert!(glob_match("src/main.rs", "main.rs"));
assert!(!glob_match("src/main.rs", "lib/main.rs"));
}
#[test]
fn test_allowlist_empty_rule_never_matches() {
let rule = AllowlistRule {
description: None,
secret_types: vec![],
paths: vec![],
value_regex: None,
severities: vec![],
};
assert!(rule.has_no_criteria());
assert!(!rule.matches("AWS Access Key", "src/config.rs", "AKIAIOSFODNN7REAL", "CRITICAL"));
assert!(!rule.matches("GitHub PAT", "any/path", "any_value", "HIGH"));
}
#[test]
fn test_deny_unknown_fields_rejects_typos() {
let bad_toml = r#"
[[allowlist]]
secret_type = "Generic High Entropy"
file_path = "*.lock"
"#;
let result: std::result::Result<Config, _> = toml::from_str(bad_toml);
assert!(
result.is_err(),
"Config with unknown fields should fail to parse"
);
}
}