use crate::detectors::{ContextAnalyzer, PatternDetector};
use crate::models::{Finding, Location};
use anyhow::{Context as AnyhowContext, Result};
use ignore::WalkBuilder;
use rayon::prelude::*;
use std::fs;
use std::path::{Path, PathBuf};
pub struct FilesystemScanner {
root_path: PathBuf,
entropy_threshold: f64,
max_file_size: u64,
respect_gitignore: bool,
}
impl FilesystemScanner {
pub fn new(root_path: PathBuf) -> Self {
Self {
root_path,
entropy_threshold: 3.5,
max_file_size: 1024 * 1024, respect_gitignore: true,
}
}
pub fn with_entropy_threshold(mut self, threshold: f64) -> Self {
self.entropy_threshold = threshold;
self
}
pub fn with_max_file_size(mut self, size: u64) -> Self {
self.max_file_size = size;
self
}
pub fn with_gitignore(mut self, respect: bool) -> Self {
self.respect_gitignore = respect;
self
}
pub fn scan(&self) -> Result<Vec<Finding>> {
let files = self.collect_files()?;
let findings: Vec<Finding> = files
.par_iter()
.flat_map(|file_path| self.scan_file(file_path).unwrap_or_default())
.collect();
Ok(findings)
}
fn collect_files(&self) -> Result<Vec<PathBuf>> {
let mut files = Vec::new();
let walker = WalkBuilder::new(&self.root_path)
.git_ignore(self.respect_gitignore)
.git_global(self.respect_gitignore)
.git_exclude(self.respect_gitignore)
.hidden(false)
.build();
for entry in walker {
let entry = entry?;
let path = entry.path();
if !path.is_file() {
continue;
}
if let Ok(metadata) = fs::metadata(path) {
if metadata.len() > self.max_file_size {
continue;
}
}
if self.is_likely_binary(path) {
continue;
}
let file_context = ContextAnalyzer::analyze_file(path);
if file_context.is_vendor {
continue;
}
files.push(path.to_path_buf());
}
Ok(files)
}
fn scan_file(&self, file_path: &PathBuf) -> Result<Vec<Finding>> {
let mut findings = Vec::new();
let content = fs::read_to_string(file_path)
.with_context(|| format!("Failed to read file: {}", file_path.display()))?;
let lines: Vec<&str> = content.lines().collect();
let detector = PatternDetector::new();
let file_context = ContextAnalyzer::analyze_file(file_path);
for (line_num, line) in lines.iter().enumerate() {
if ContextAnalyzer::is_placeholder(line) {
continue;
}
let is_comment = ContextAnalyzer::is_comment(line);
let secrets = detector.scan_line(line, self.entropy_threshold);
for mut secret in secrets {
if is_comment {
secret.confidence *= 0.75;
}
secret.severity = ContextAnalyzer::adjust_severity(
secret.severity,
&Context {
line_before: if line_num > 0 {
Some(lines[line_num - 1].to_string())
} else {
None
},
line_content: line.to_string(),
line_after: if line_num + 1 < lines.len() {
Some(lines[line_num + 1].to_string())
} else {
None
},
is_test_file: file_context.is_test_file,
is_config_file: file_context.is_config_file,
is_documentation: file_context.is_documentation,
file_extension: file_context.file_extension.clone(),
},
);
let location = Location {
file_path: file_path.clone(),
line_number: line_num + 1,
column_start: 0,
column_end: line.len(),
commit_hash: None,
commit_author: None,
commit_date: None,
};
let context = ContextAnalyzer::build_context(
line.to_string(),
if line_num > 0 {
Some(lines[line_num - 1].to_string())
} else {
None
},
if line_num + 1 < lines.len() {
Some(lines[line_num + 1].to_string())
} else {
None
},
&file_context,
);
let finding = Finding::new(secret, location, context);
findings.push(finding);
}
}
Ok(findings)
}
fn is_likely_binary(&self, path: &Path) -> bool {
let binary_extensions = [
"exe", "dll", "so", "dylib", "bin", "dat", "db", "sqlite", "jpg", "jpeg", "png", "gif",
"bmp", "ico", "pdf", "zip", "tar", "gz", "bz2", "xz", "7z", "rar", "mp3", "mp4", "avi",
"mov", "woff", "woff2", "ttf", "eot", "otf", "class", "pyc", "o", "a", "lib", "obj",
];
if let Some(ext) = path.extension() {
if let Some(ext_str) = ext.to_str() {
return binary_extensions.contains(&ext_str.to_lowercase().as_str());
}
}
false
}
pub fn get_stats(&self) -> Result<ScanStats> {
let files = self.collect_files()?;
let total_size: u64 = files
.iter()
.filter_map(|f| fs::metadata(f).ok())
.map(|m| m.len())
.sum();
Ok(ScanStats {
total_files: files.len(),
total_size,
})
}
}
use crate::models::Context;
#[derive(Debug)]
pub struct ScanStats {
pub total_files: usize,
pub total_size: u64,
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_filesystem_scanner_creation() {
let scanner = FilesystemScanner::new(PathBuf::from("."));
assert_eq!(scanner.entropy_threshold, 3.5);
assert!(scanner.respect_gitignore);
}
#[test]
fn test_scan_file_with_secret() -> Result<()> {
let temp_dir = TempDir::new()?;
let file_path = temp_dir.path().join("test.txt");
fs::write(&file_path, "AWS_ACCESS_KEY=AKIAIOSFODNN7EXAMPLE")?;
let scanner = FilesystemScanner::new(temp_dir.path().to_path_buf());
let findings = scanner.scan()?;
assert!(!findings.is_empty());
Ok(())
}
#[test]
fn test_skip_binary_files() {
let scanner = FilesystemScanner::new(PathBuf::from("."));
assert!(scanner.is_likely_binary(&PathBuf::from("test.exe")));
assert!(scanner.is_likely_binary(&PathBuf::from("image.png")));
assert!(!scanner.is_likely_binary(&PathBuf::from("code.rs")));
}
}