1use std::fs;
7use std::path::{Path, PathBuf};
8
9use regex::Regex;
10
11use crate::cli::output;
12use crate::errors::Result;
13
14#[derive(Debug)]
16pub struct Finding {
17 pub file: PathBuf,
18 pub line: usize,
19 pub pattern_name: String,
20}
21
22const SKIP_DIRS: &[&str] = &[
24 ".git",
25 "node_modules",
26 "target",
27 ".envvault",
28 "__pycache__",
29 ".venv",
30 "vendor",
31 "dist",
32 ".next",
33];
34
35const BINARY_EXTENSIONS: &[&str] = &[
37 "png", "jpg", "jpeg", "gif", "bmp", "ico", "svg", "woff", "woff2", "ttf", "eot", "otf", "mp3",
38 "mp4", "avi", "mov", "zip", "tar", "gz", "bz2", "xz", "7z", "rar", "pdf", "doc", "docx", "xls",
39 "xlsx", "ppt", "pptx", "exe", "dll", "so", "dylib", "o", "a", "pyc", "class", "jar", "war",
40 "wasm", "db", "sqlite", "sqlite3",
41];
42
43pub fn execute(ci: bool, dir: Option<&str>, gitleaks_config: Option<&str>) -> Result<()> {
45 let scan_dir = match dir {
46 Some(d) => PathBuf::from(d),
47 None => std::env::current_dir()?,
48 };
49
50 if !scan_dir.is_dir() {
51 return Err(crate::errors::EnvVaultError::CommandFailed(format!(
52 "not a directory: {}",
53 scan_dir.display()
54 )));
55 }
56
57 let mut patterns: Vec<(String, Regex)> = Vec::new();
59
60 for (name, pat) in crate::git::SECRET_PATTERNS {
61 match Regex::new(pat) {
62 Ok(re) => patterns.push((name.to_string(), re)),
63 Err(_) => continue,
64 }
65 }
66
67 let gitleaks_config_from_settings;
69 if let Ok(cwd) = std::env::current_dir() {
70 if let Ok(settings) = crate::config::Settings::load(&cwd) {
71 for custom in &settings.secret_scanning.custom_patterns {
72 match Regex::new(&custom.regex) {
73 Ok(re) => patterns.push((custom.name.clone(), re)),
74 Err(e) => {
75 output::warning(&format!("Invalid custom pattern '{}': {e}", custom.name));
76 }
77 }
78 }
79 gitleaks_config_from_settings = settings.secret_scanning.gitleaks_config.clone();
80 } else {
81 gitleaks_config_from_settings = None;
82 }
83 } else {
84 gitleaks_config_from_settings = None;
85 }
86
87 let gitleaks_path = gitleaks_config.or(gitleaks_config_from_settings.as_deref());
89 if let Some(path) = gitleaks_path {
90 match load_gitleaks_rules(Path::new(path)) {
91 Ok(rules) => {
92 let count = rules.len();
93 patterns.extend(rules);
94 if count > 0 {
95 output::info(&format!("Loaded {count} gitleaks rules from {path}"));
96 }
97 }
98 Err(e) => {
99 output::warning(&format!("Failed to load gitleaks config '{path}': {e}"));
100 }
101 }
102 }
103
104 let mut findings = Vec::new();
106 walk_and_scan(&scan_dir, &patterns, &mut findings);
107
108 if findings.is_empty() {
109 output::success("No secrets detected.");
110 return Ok(());
111 }
112
113 output::warning(&format!("{} potential secret(s) found:", findings.len()));
115 println!();
116
117 for f in &findings {
118 let rel_path = f.file.strip_prefix(&scan_dir).unwrap_or(&f.file).display();
119 println!(" {}:{} — {}", rel_path, f.line, f.pattern_name);
120 }
121
122 if ci {
123 std::process::exit(1);
124 }
125
126 Ok(())
127}
128
129#[derive(serde::Deserialize)]
135struct GitleaksConfig {
136 #[serde(default)]
137 rules: Vec<GitleaksRule>,
138}
139
140#[derive(serde::Deserialize)]
142struct GitleaksRule {
143 #[serde(default)]
144 id: String,
145 #[serde(default)]
146 description: String,
147 #[serde(default)]
148 regex: String,
149}
150
151pub fn load_gitleaks_rules(path: &Path) -> Result<Vec<(String, Regex)>> {
157 let content = fs::read_to_string(path)?;
158 let config: GitleaksConfig = toml::from_str(&content).map_err(|e| {
159 crate::errors::EnvVaultError::ConfigError(format!("failed to parse gitleaks config: {e}"))
160 })?;
161
162 let mut rules = Vec::new();
163 for rule in &config.rules {
164 if rule.regex.is_empty() {
165 continue;
166 }
167 let name = if !rule.description.is_empty() {
168 rule.description.clone()
169 } else if !rule.id.is_empty() {
170 rule.id.clone()
171 } else {
172 "unnamed gitleaks rule".to_string()
173 };
174
175 match Regex::new(&rule.regex) {
176 Ok(re) => rules.push((name, re)),
177 Err(_) => {
178 }
180 }
181 }
182
183 Ok(rules)
184}
185
186fn walk_and_scan(dir: &Path, patterns: &[(String, Regex)], findings: &mut Vec<Finding>) {
188 let entries = match fs::read_dir(dir) {
189 Ok(e) => e,
190 Err(_) => return,
191 };
192
193 for entry in entries.flatten() {
194 let path = entry.path();
195
196 if path.is_dir() {
197 let dir_name = entry.file_name();
198 let name = dir_name.to_string_lossy();
199 if SKIP_DIRS.iter().any(|&s| s == name.as_ref()) {
200 continue;
201 }
202 walk_and_scan(&path, patterns, findings);
203 } else if path.is_file() {
204 if is_binary(&path) {
206 continue;
207 }
208 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
210 if ext == "vault" {
211 continue;
212 }
213 }
214 scan_file(&path, patterns, findings);
215 }
216 }
217}
218
219fn is_binary(path: &Path) -> bool {
221 match path.extension().and_then(|e| e.to_str()) {
222 Some(ext) => BINARY_EXTENSIONS.contains(&ext),
223 None => false,
224 }
225}
226
227fn scan_file(path: &Path, patterns: &[(String, Regex)], findings: &mut Vec<Finding>) {
229 let content = match fs::read_to_string(path) {
230 Ok(c) => c,
231 Err(_) => return, };
233
234 for (line_num, line) in content.lines().enumerate() {
235 for (name, re) in patterns {
236 if re.is_match(line) {
237 findings.push(Finding {
238 file: path.to_path_buf(),
239 line: line_num + 1,
240 pattern_name: name.clone(),
241 });
242 break; }
244 }
245 }
246}
247
248#[cfg(test)]
249mod tests {
250 use super::*;
251 use std::io::Write;
252 use tempfile::TempDir;
253
254 #[test]
255 fn detects_aws_access_key() {
256 let re = Regex::new(r"AKIA[0-9A-Z]{16}").unwrap();
257 assert!(re.is_match("aws_key = AKIAIOSFODNN7EXAMPLE"));
258 assert!(!re.is_match("not_a_key = hello"));
259 }
260
261 #[test]
262 fn scan_file_finds_secrets() {
263 let dir = TempDir::new().unwrap();
264 let file_path = dir.path().join("config.py");
265 let mut file = fs::File::create(&file_path).unwrap();
266 writeln!(file, "# Config file").unwrap();
267 writeln!(file, "aws_key = \"AKIAIOSFODNN7EXAMPLE1\"").unwrap();
268 writeln!(file, "safe_value = \"hello\"").unwrap();
269
270 let patterns = vec![(
271 "AWS Access Key".to_string(),
272 Regex::new(r"AKIA[0-9A-Z]{16}").unwrap(),
273 )];
274
275 let mut findings = Vec::new();
276 scan_file(&file_path, &patterns, &mut findings);
277
278 assert_eq!(findings.len(), 1);
279 assert_eq!(findings[0].line, 2);
280 assert_eq!(findings[0].pattern_name, "AWS Access Key");
281 }
282
283 #[test]
284 fn walk_skips_git_directory() {
285 let dir = TempDir::new().unwrap();
286
287 let git_dir = dir.path().join(".git");
289 fs::create_dir(&git_dir).unwrap();
290 let secret_file = git_dir.join("config");
291 fs::write(&secret_file, "AKIAIOSFODNN7EXAMPLE1\n").unwrap();
292
293 fs::write(dir.path().join("safe.txt"), "nothing here\n").unwrap();
295
296 let patterns = vec![(
297 "AWS Access Key".to_string(),
298 Regex::new(r"AKIA[0-9A-Z]{16}").unwrap(),
299 )];
300
301 let mut findings = Vec::new();
302 walk_and_scan(dir.path(), &patterns, &mut findings);
303
304 assert!(findings.is_empty(), "should not scan .git directory");
305 }
306
307 #[test]
308 fn is_binary_detects_common_types() {
309 assert!(is_binary(Path::new("image.png")));
310 assert!(is_binary(Path::new("data.zip")));
311 assert!(is_binary(Path::new("lib.so")));
312 assert!(!is_binary(Path::new("config.py")));
313 assert!(!is_binary(Path::new("README.md")));
314 assert!(!is_binary(Path::new("noext")));
315 }
316
317 #[test]
320 fn load_gitleaks_rules_parses_valid_toml() {
321 let dir = TempDir::new().unwrap();
322 let config_path = dir.path().join(".gitleaks.toml");
323 let config = r#"
324[[rules]]
325id = "aws-access-key"
326description = "AWS Access Key ID"
327regex = "AKIA[0-9A-Z]{16}"
328
329[[rules]]
330id = "generic-secret"
331description = "Generic Secret"
332regex = "secret[_-]?key\\s*=\\s*[\"'][^\"']{8,}"
333"#;
334 fs::write(&config_path, config).unwrap();
335
336 let rules = load_gitleaks_rules(&config_path).unwrap();
337 assert_eq!(rules.len(), 2);
338 assert_eq!(rules[0].0, "AWS Access Key ID");
339 assert_eq!(rules[1].0, "Generic Secret");
340 }
341
342 #[test]
343 fn load_gitleaks_rules_skips_invalid_regex() {
344 let dir = TempDir::new().unwrap();
345 let config_path = dir.path().join(".gitleaks.toml");
346 let config = r#"
348[[rules]]
349id = "valid-rule"
350description = "Valid Rule"
351regex = "AKIA[0-9A-Z]{16}"
352
353[[rules]]
354id = "invalid-rule"
355description = "Uses Lookahead"
356regex = "(?<=password=).+"
357"#;
358 fs::write(&config_path, config).unwrap();
359
360 let rules = load_gitleaks_rules(&config_path).unwrap();
361 assert_eq!(rules.len(), 1);
363 assert_eq!(rules[0].0, "Valid Rule");
364 }
365
366 #[test]
367 fn load_gitleaks_rules_uses_id_as_fallback_name() {
368 let dir = TempDir::new().unwrap();
369 let config_path = dir.path().join(".gitleaks.toml");
370 let config = r#"
371[[rules]]
372id = "my-rule-id"
373regex = "SECRET_[A-Z]+"
374"#;
375 fs::write(&config_path, config).unwrap();
376
377 let rules = load_gitleaks_rules(&config_path).unwrap();
378 assert_eq!(rules.len(), 1);
379 assert_eq!(rules[0].0, "my-rule-id");
380 }
381
382 #[test]
383 fn load_gitleaks_rules_handles_empty_rules() {
384 let dir = TempDir::new().unwrap();
385 let config_path = dir.path().join(".gitleaks.toml");
386 let config = "# empty config\n";
387 fs::write(&config_path, config).unwrap();
388
389 let rules = load_gitleaks_rules(&config_path).unwrap();
390 assert!(rules.is_empty());
391 }
392}