use crate::core::error;
use regex::Regex;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone)]
pub struct GatekeeperConfig {
pub max_diff_bytes: u64,
pub allow_paths: Vec<String>,
pub block_paths: Vec<String>,
pub scan_secrets: bool,
pub scan_dangerous_patterns: bool,
}
impl Default for GatekeeperConfig {
fn default() -> Self {
Self {
max_diff_bytes: 10 * 1024 * 1024, allow_paths: vec!["*".to_string()], block_paths: vec![
".env".to_string(),
".env.*".to_string(),
"**/secrets/**".to_string(),
"**/.credentials".to_string(),
],
scan_secrets: true,
scan_dangerous_patterns: true,
}
}
}
#[derive(Debug)]
pub struct GateResult {
pub passed: bool,
pub violations: Vec<Violation>,
}
#[derive(Debug)]
pub struct Violation {
pub kind: ViolationKind,
pub path: PathBuf,
pub line: Option<usize>,
pub message: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ViolationKind {
PathBlocked,
DiffTooLarge,
SecretDetected,
DangerousPattern,
}
impl std::fmt::Display for ViolationKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::PathBlocked => write!(f, "Path blocked"),
Self::DiffTooLarge => write!(f, "Diff too large"),
Self::SecretDetected => write!(f, "Secret detected"),
Self::DangerousPattern => write!(f, "Dangerous pattern"),
}
}
}
pub fn run_gatekeeper(
repo_root: &Path,
paths: &[PathBuf],
diff_bytes: u64,
config: &GatekeeperConfig,
) -> Result<GateResult, error::DecapodError> {
let mut violations = Vec::new();
if diff_bytes > config.max_diff_bytes {
violations.push(Violation {
kind: ViolationKind::DiffTooLarge,
path: PathBuf::from("."),
line: None,
message: format!(
"Diff size {} bytes exceeds limit of {} bytes",
diff_bytes, config.max_diff_bytes
),
});
}
for path in paths {
let path_str = path.to_string_lossy();
for pattern in &config.block_paths {
if glob_match(pattern, &path_str) {
violations.push(Violation {
kind: ViolationKind::PathBlocked,
path: path.clone(),
line: None,
message: format!("Path matches blocked pattern: {}", pattern),
});
}
}
}
if config.scan_secrets {
violations.extend(scan_for_secrets(repo_root, paths)?);
}
if config.scan_dangerous_patterns {
violations.extend(scan_for_dangerous_patterns(repo_root, paths)?);
}
let passed = violations.is_empty();
Ok(GateResult { passed, violations })
}
fn scan_for_secrets(
repo_root: &Path,
paths: &[PathBuf],
) -> Result<Vec<Violation>, error::DecapodError> {
let patterns = secret_patterns();
let mut violations = Vec::new();
for path in paths {
let full_path = repo_root.join(path);
if !full_path.exists() || !full_path.is_file() {
continue;
}
let content = match std::fs::read_to_string(&full_path) {
Ok(c) => c,
Err(_) => continue,
};
for (line_num, line) in content.lines().enumerate() {
for pattern in &patterns {
if pattern.is_match(line) {
violations.push(Violation {
kind: ViolationKind::SecretDetected,
path: path.clone(),
line: Some(line_num + 1),
message: format!("Potential secret detected: {}", pattern),
});
}
}
}
}
Ok(violations)
}
fn scan_for_dangerous_patterns(
repo_root: &Path,
paths: &[PathBuf],
) -> Result<Vec<Violation>, error::DecapodError> {
let patterns = dangerous_patterns();
let mut violations = Vec::new();
let code_extensions = ["rs", "py", "js", "ts", "sh", "bash", "zsh"];
for path in paths {
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if !code_extensions.contains(&ext) {
continue;
}
let full_path = repo_root.join(path);
if !full_path.exists() || !full_path.is_file() {
continue;
}
let content = match std::fs::read_to_string(&full_path) {
Ok(c) => c,
Err(_) => continue,
};
for (line_num, line) in content.lines().enumerate() {
for pattern in &patterns {
if pattern.is_match(line) {
violations.push(Violation {
kind: ViolationKind::DangerousPattern,
path: path.clone(),
line: Some(line_num + 1),
message: format!("Dangerous pattern detected: {}", pattern),
});
}
}
}
}
Ok(violations)
}
fn secret_patterns() -> Vec<Regex> {
vec![
Regex::new(r#"(?i)(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[0-9A-Z]{16}"#).unwrap(),
Regex::new(r#"(?i)aws(.{0,20})?['"][0-9a-zA-Z/+=]{40}['"]"#).unwrap(),
Regex::new(r#"(?i)(api[_-]?key|apikey|api_secret|secret[_-]?key)['"]?\s*[:=]\s*['"]?[a-zA-Z0-9_\-]{20,}['"]?"#).unwrap(),
Regex::new(r#"(?i)bearer\s+[a-zA-Z0-9_\-\.]+"#).unwrap(),
Regex::new(r#"(ghp|gho|ghu|ghs|ghr)_[a-zA-Z0-9_]{36,255}"#).unwrap(),
Regex::new(r#"(?i)(password|passwd|pwd)['"]?\s*[:=]\s*['"]?[^\s'"]{8,}['"]?"#).unwrap(),
Regex::new(r#"-----BEGIN (RSA |DSA |EC |OPENSSH )?PRIVATE KEY-----"#).unwrap(),
Regex::new(r#"(?i)(postgres|mysql|mongodb|redis)://[^\s'"]+:[^\s'"]+@[^\s'"]+"#).unwrap(),
]
}
fn dangerous_patterns() -> Vec<Regex> {
vec![
Regex::new(r#"\beval\s+\$"#).unwrap(),
Regex::new(r#"\bexec\s*\("#).unwrap(),
Regex::new(r#"subprocess\.[a-z]+\([^)]*shell\s*=\s*True"#).unwrap(),
Regex::new(r#"\$\{[^}]+\}|\$\([^)]+\)"#).unwrap(),
Regex::new(r#"\$\w+[^\s"']"#).unwrap(),
]
}
fn glob_match(pattern: &str, text: &str) -> bool {
if pattern.contains("**") {
let parts: Vec<&str> = pattern.split("**").collect();
if parts.len() == 2 {
let prefix = parts[0];
let suffix = parts[1];
return (suffix.is_empty() || text.ends_with(suffix))
&& (prefix.is_empty() || text.starts_with(prefix));
}
}
if pattern.contains('*') && !pattern.contains("**") {
let parts: Vec<&str> = pattern.split('*').collect();
if parts.len() == 2 {
let prefix = parts[0];
let suffix = parts[1];
return text.starts_with(prefix) && text.ends_with(suffix);
}
}
pattern == text
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_glob_match() {
assert!(glob_match("*", "foo"));
assert!(glob_match("*.rs", "main.rs"));
assert!(glob_match("**/.credentials", "foo/bar/.credentials"));
assert!(glob_match("src/**", "src/lib.rs"));
assert!(glob_match(".env*", ".env.local"));
}
#[test]
fn test_secret_patterns() {
let patterns = secret_patterns();
let line = "AWS_KEY=AKIAIOSFODNN7EXAMPLE";
assert!(patterns.iter().any(|p| p.is_match(line)));
let line = "token=ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
assert!(patterns.iter().any(|p| p.is_match(line)));
let line = "-----BEGIN PRIVATE KEY-----";
assert!(patterns.iter().any(|p| p.is_match(line)));
}
#[test]
fn test_dangerous_patterns() {
let patterns = dangerous_patterns();
let line = "eval $CMD";
assert!(patterns.iter().any(|p| p.is_match(line)));
let line = "subprocess.run(cmd, shell=True)";
assert!(patterns.iter().any(|p| p.is_match(line)));
}
#[test]
fn test_gatekeeper_default_config() {
let config = GatekeeperConfig::default();
assert!(config.scan_secrets);
assert!(config.scan_dangerous_patterns);
assert!(!config.block_paths.is_empty());
}
}