Skip to main content

envvault/cli/commands/
scan.rs

1//! `envvault scan` — scan files for leaked secrets.
2//!
3//! Walks the directory tree and checks each text file against built-in
4//! and custom secret patterns. Reports findings with file path and line number.
5
6use std::fs;
7use std::path::{Path, PathBuf};
8
9use regex::Regex;
10
11use crate::cli::output;
12use crate::errors::Result;
13
14/// A single finding from a secret scan.
15#[derive(Debug)]
16pub struct Finding {
17    pub file: PathBuf,
18    pub line: usize,
19    pub pattern_name: String,
20}
21
22/// Directories to skip during scanning.
23const SKIP_DIRS: &[&str] = &[
24    ".git",
25    "node_modules",
26    "target",
27    ".envvault",
28    "__pycache__",
29    ".venv",
30    "vendor",
31    "dist",
32    ".next",
33];
34
35/// File extensions to treat as binary (skip).
36const BINARY_EXTENSIONS: &[&str] = &[
37    "png", "jpg", "jpeg", "gif", "bmp", "ico", "svg", "woff", "woff2", "ttf", "eot", "otf", "mp3",
38    "mp4", "avi", "mov", "zip", "tar", "gz", "bz2", "xz", "7z", "rar", "pdf", "doc", "docx", "xls",
39    "xlsx", "ppt", "pptx", "exe", "dll", "so", "dylib", "o", "a", "pyc", "class", "jar", "war",
40    "wasm", "db", "sqlite", "sqlite3",
41];
42
43/// Execute the `scan` command.
44pub fn execute(ci: bool, dir: Option<&str>, gitleaks_config: Option<&str>) -> Result<()> {
45    let scan_dir = match dir {
46        Some(d) => PathBuf::from(d),
47        None => std::env::current_dir()?,
48    };
49
50    if !scan_dir.is_dir() {
51        return Err(crate::errors::EnvVaultError::CommandFailed(format!(
52            "not a directory: {}",
53            scan_dir.display()
54        )));
55    }
56
57    // Build patterns: built-in + custom from config.
58    let mut patterns: Vec<(String, Regex)> = Vec::new();
59
60    for (name, pat) in crate::git::SECRET_PATTERNS {
61        match Regex::new(pat) {
62            Ok(re) => patterns.push((name.to_string(), re)),
63            Err(_) => continue,
64        }
65    }
66
67    // Load custom patterns from config if available.
68    let gitleaks_config_from_settings;
69    if let Ok(cwd) = std::env::current_dir() {
70        if let Ok(settings) = crate::config::Settings::load(&cwd) {
71            for custom in &settings.secret_scanning.custom_patterns {
72                match Regex::new(&custom.regex) {
73                    Ok(re) => patterns.push((custom.name.clone(), re)),
74                    Err(e) => {
75                        output::warning(&format!("Invalid custom pattern '{}': {e}", custom.name));
76                    }
77                }
78            }
79            gitleaks_config_from_settings = settings.secret_scanning.gitleaks_config.clone();
80        } else {
81            gitleaks_config_from_settings = None;
82        }
83    } else {
84        gitleaks_config_from_settings = None;
85    }
86
87    // Load gitleaks rules from CLI flag or config.
88    let gitleaks_path = gitleaks_config.or(gitleaks_config_from_settings.as_deref());
89    if let Some(path) = gitleaks_path {
90        match load_gitleaks_rules(Path::new(path)) {
91            Ok(rules) => {
92                let count = rules.len();
93                patterns.extend(rules);
94                if count > 0 {
95                    output::info(&format!("Loaded {count} gitleaks rules from {path}"));
96                }
97            }
98            Err(e) => {
99                output::warning(&format!("Failed to load gitleaks config '{path}': {e}"));
100            }
101        }
102    }
103
104    // Walk directory and scan files.
105    let mut findings = Vec::new();
106    walk_and_scan(&scan_dir, &patterns, &mut findings);
107
108    if findings.is_empty() {
109        output::success("No secrets detected.");
110        return Ok(());
111    }
112
113    // Report findings.
114    output::warning(&format!("{} potential secret(s) found:", findings.len()));
115    println!();
116
117    for f in &findings {
118        let rel_path = f.file.strip_prefix(&scan_dir).unwrap_or(&f.file).display();
119        println!("  {}:{} — {}", rel_path, f.line, f.pattern_name);
120    }
121
122    if ci {
123        std::process::exit(1);
124    }
125
126    Ok(())
127}
128
129// ---------------------------------------------------------------------------
130// Gitleaks rule loading
131// ---------------------------------------------------------------------------
132
133/// A gitleaks TOML config file structure.
134#[derive(serde::Deserialize)]
135struct GitleaksConfig {
136    #[serde(default)]
137    rules: Vec<GitleaksRule>,
138}
139
140/// A single gitleaks rule with an id, description, and regex.
141#[derive(serde::Deserialize)]
142struct GitleaksRule {
143    #[serde(default)]
144    id: String,
145    #[serde(default)]
146    description: String,
147    #[serde(default)]
148    regex: String,
149}
150
151/// Load rules from a gitleaks-format TOML file.
152///
153/// Each rule has `id`, `description`, and `regex` fields. Rules whose regex
154/// fails to compile (e.g. uses PCRE-only features like lookarounds) are
155/// silently skipped.
156pub fn load_gitleaks_rules(path: &Path) -> Result<Vec<(String, Regex)>> {
157    let content = fs::read_to_string(path)?;
158    let config: GitleaksConfig = toml::from_str(&content).map_err(|e| {
159        crate::errors::EnvVaultError::ConfigError(format!("failed to parse gitleaks config: {e}"))
160    })?;
161
162    let mut rules = Vec::new();
163    for rule in &config.rules {
164        if rule.regex.is_empty() {
165            continue;
166        }
167        let name = if !rule.description.is_empty() {
168            rule.description.clone()
169        } else if !rule.id.is_empty() {
170            rule.id.clone()
171        } else {
172            "unnamed gitleaks rule".to_string()
173        };
174
175        match Regex::new(&rule.regex) {
176            Ok(re) => rules.push((name, re)),
177            Err(_) => {
178                // Silently skip rules with incompatible regex (PCRE lookarounds, etc.)
179            }
180        }
181    }
182
183    Ok(rules)
184}
185
186/// Recursively walk the directory, scanning each text file.
187fn walk_and_scan(dir: &Path, patterns: &[(String, Regex)], findings: &mut Vec<Finding>) {
188    let entries = match fs::read_dir(dir) {
189        Ok(e) => e,
190        Err(_) => return,
191    };
192
193    for entry in entries.flatten() {
194        let path = entry.path();
195
196        if path.is_dir() {
197            let dir_name = entry.file_name();
198            let name = dir_name.to_string_lossy();
199            if SKIP_DIRS.iter().any(|&s| s == name.as_ref()) {
200                continue;
201            }
202            walk_and_scan(&path, patterns, findings);
203        } else if path.is_file() {
204            // Skip binary files.
205            if is_binary(&path) {
206                continue;
207            }
208            // Skip vault files and the audit database.
209            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
210                if ext == "vault" {
211                    continue;
212                }
213            }
214            scan_file(&path, patterns, findings);
215        }
216    }
217}
218
219/// Check if a file is likely binary based on extension.
220fn is_binary(path: &Path) -> bool {
221    match path.extension().and_then(|e| e.to_str()) {
222        Some(ext) => BINARY_EXTENSIONS.contains(&ext),
223        None => false,
224    }
225}
226
227/// Scan a single file for secret patterns.
228fn scan_file(path: &Path, patterns: &[(String, Regex)], findings: &mut Vec<Finding>) {
229    let content = match fs::read_to_string(path) {
230        Ok(c) => c,
231        Err(_) => return, // Skip files that can't be read as UTF-8.
232    };
233
234    for (line_num, line) in content.lines().enumerate() {
235        for (name, re) in patterns {
236            if re.is_match(line) {
237                findings.push(Finding {
238                    file: path.to_path_buf(),
239                    line: line_num + 1,
240                    pattern_name: name.clone(),
241                });
242                break; // One finding per line is enough.
243            }
244        }
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251    use std::io::Write;
252    use tempfile::TempDir;
253
254    #[test]
255    fn detects_aws_access_key() {
256        let re = Regex::new(r"AKIA[0-9A-Z]{16}").unwrap();
257        assert!(re.is_match("aws_key = AKIAIOSFODNN7EXAMPLE"));
258        assert!(!re.is_match("not_a_key = hello"));
259    }
260
261    #[test]
262    fn scan_file_finds_secrets() {
263        let dir = TempDir::new().unwrap();
264        let file_path = dir.path().join("config.py");
265        let mut file = fs::File::create(&file_path).unwrap();
266        writeln!(file, "# Config file").unwrap();
267        writeln!(file, "aws_key = \"AKIAIOSFODNN7EXAMPLE1\"").unwrap();
268        writeln!(file, "safe_value = \"hello\"").unwrap();
269
270        let patterns = vec![(
271            "AWS Access Key".to_string(),
272            Regex::new(r"AKIA[0-9A-Z]{16}").unwrap(),
273        )];
274
275        let mut findings = Vec::new();
276        scan_file(&file_path, &patterns, &mut findings);
277
278        assert_eq!(findings.len(), 1);
279        assert_eq!(findings[0].line, 2);
280        assert_eq!(findings[0].pattern_name, "AWS Access Key");
281    }
282
283    #[test]
284    fn walk_skips_git_directory() {
285        let dir = TempDir::new().unwrap();
286
287        // Create .git directory with a "secret".
288        let git_dir = dir.path().join(".git");
289        fs::create_dir(&git_dir).unwrap();
290        let secret_file = git_dir.join("config");
291        fs::write(&secret_file, "AKIAIOSFODNN7EXAMPLE1\n").unwrap();
292
293        // Create a normal file.
294        fs::write(dir.path().join("safe.txt"), "nothing here\n").unwrap();
295
296        let patterns = vec![(
297            "AWS Access Key".to_string(),
298            Regex::new(r"AKIA[0-9A-Z]{16}").unwrap(),
299        )];
300
301        let mut findings = Vec::new();
302        walk_and_scan(dir.path(), &patterns, &mut findings);
303
304        assert!(findings.is_empty(), "should not scan .git directory");
305    }
306
307    #[test]
308    fn is_binary_detects_common_types() {
309        assert!(is_binary(Path::new("image.png")));
310        assert!(is_binary(Path::new("data.zip")));
311        assert!(is_binary(Path::new("lib.so")));
312        assert!(!is_binary(Path::new("config.py")));
313        assert!(!is_binary(Path::new("README.md")));
314        assert!(!is_binary(Path::new("noext")));
315    }
316
317    // --- Gitleaks rule loading tests ---
318
319    #[test]
320    fn load_gitleaks_rules_parses_valid_toml() {
321        let dir = TempDir::new().unwrap();
322        let config_path = dir.path().join(".gitleaks.toml");
323        let config = r#"
324[[rules]]
325id = "aws-access-key"
326description = "AWS Access Key ID"
327regex = "AKIA[0-9A-Z]{16}"
328
329[[rules]]
330id = "generic-secret"
331description = "Generic Secret"
332regex = "secret[_-]?key\\s*=\\s*[\"'][^\"']{8,}"
333"#;
334        fs::write(&config_path, config).unwrap();
335
336        let rules = load_gitleaks_rules(&config_path).unwrap();
337        assert_eq!(rules.len(), 2);
338        assert_eq!(rules[0].0, "AWS Access Key ID");
339        assert_eq!(rules[1].0, "Generic Secret");
340    }
341
342    #[test]
343    fn load_gitleaks_rules_skips_invalid_regex() {
344        let dir = TempDir::new().unwrap();
345        let config_path = dir.path().join(".gitleaks.toml");
346        // Use a PCRE lookahead which Rust's regex crate does not support.
347        let config = r#"
348[[rules]]
349id = "valid-rule"
350description = "Valid Rule"
351regex = "AKIA[0-9A-Z]{16}"
352
353[[rules]]
354id = "invalid-rule"
355description = "Uses Lookahead"
356regex = "(?<=password=).+"
357"#;
358        fs::write(&config_path, config).unwrap();
359
360        let rules = load_gitleaks_rules(&config_path).unwrap();
361        // Only the valid rule should be loaded.
362        assert_eq!(rules.len(), 1);
363        assert_eq!(rules[0].0, "Valid Rule");
364    }
365
366    #[test]
367    fn load_gitleaks_rules_uses_id_as_fallback_name() {
368        let dir = TempDir::new().unwrap();
369        let config_path = dir.path().join(".gitleaks.toml");
370        let config = r#"
371[[rules]]
372id = "my-rule-id"
373regex = "SECRET_[A-Z]+"
374"#;
375        fs::write(&config_path, config).unwrap();
376
377        let rules = load_gitleaks_rules(&config_path).unwrap();
378        assert_eq!(rules.len(), 1);
379        assert_eq!(rules[0].0, "my-rule-id");
380    }
381
382    #[test]
383    fn load_gitleaks_rules_handles_empty_rules() {
384        let dir = TempDir::new().unwrap();
385        let config_path = dir.path().join(".gitleaks.toml");
386        let config = "# empty config\n";
387        fs::write(&config_path, config).unwrap();
388
389        let rules = load_gitleaks_rules(&config_path).unwrap();
390        assert!(rules.is_empty());
391    }
392}