use crate::detectors::{ContextAnalyzer, PatternDetector};
use crate::models::{Finding, Location};
use anyhow::{Context, Result};
use git2::{Commit, Diff, DiffOptions, Repository};
use std::path::{Path, PathBuf};
pub struct GitScanner {
repo_path: PathBuf,
scan_history: bool,
max_depth: Option<usize>,
entropy_threshold: f64,
}
impl GitScanner {
pub fn new(repo_path: PathBuf) -> Self {
Self {
repo_path,
scan_history: true,
max_depth: None,
entropy_threshold: 3.5,
}
}
pub fn with_history(mut self, scan_history: bool) -> Self {
self.scan_history = scan_history;
self
}
pub fn with_max_depth(mut self, depth: usize) -> Self {
self.max_depth = Some(depth);
self
}
pub fn with_entropy_threshold(mut self, threshold: f64) -> Self {
self.entropy_threshold = threshold;
self
}
pub fn scan(&self) -> Result<Vec<Finding>> {
let repo = Repository::open(&self.repo_path).context("Failed to open git repository")?;
let mut findings = Vec::new();
findings.extend(self.scan_working_directory(&repo)?);
if self.scan_history {
let history_findings = self.scan_git_history(&repo)?;
for hf in history_findings {
let dominated = findings.iter().any(|f: &Finding| {
f.location.file_path == hf.location.file_path
&& f.location.line_number == hf.location.line_number
&& f.secret.value == hf.secret.value
});
if !dominated {
findings.push(hf);
}
}
}
Ok(findings)
}
fn scan_git_history(&self, repo: &Repository) -> Result<Vec<Finding>> {
let mut findings = Vec::new();
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
let max_commits = self.max_depth.unwrap_or(usize::MAX);
for (commit_count, oid) in revwalk.enumerate() {
if commit_count >= max_commits {
break;
}
let oid = oid?;
let commit = repo.find_commit(oid)?;
findings.extend(self.scan_commit(repo, &commit)?);
}
Ok(findings)
}
fn scan_commit(&self, repo: &Repository, commit: &Commit) -> Result<Vec<Finding>> {
let mut findings = Vec::new();
let tree = commit.tree()?;
let parent_tree = if commit.parent_count() > 0 {
Some(commit.parent(0)?.tree()?)
} else {
None
};
let mut diff_opts = DiffOptions::new();
let diff = if let Some(parent_tree) = parent_tree {
repo.diff_tree_to_tree(Some(&parent_tree), Some(&tree), Some(&mut diff_opts))?
} else {
repo.diff_tree_to_tree(None, Some(&tree), Some(&mut diff_opts))?
};
findings.extend(self.scan_diff(repo, &diff, commit)?);
Ok(findings)
}
fn scan_diff(&self, _repo: &Repository, diff: &Diff, commit: &Commit) -> Result<Vec<Finding>> {
let mut findings = Vec::new();
let detector = PatternDetector::new();
diff.foreach(
&mut |delta, _progress| {
let file_path = delta.new_file().path().unwrap_or(Path::new("unknown"));
let file_context = ContextAnalyzer::analyze_file(file_path);
if file_context.is_vendor {
return true;
}
true
},
None,
None,
Some(&mut |_delta, _hunk, line| {
let content = String::from_utf8_lossy(line.content());
if line.origin() == '+' {
let secrets = detector.scan_line(&content, self.entropy_threshold);
for secret in secrets {
if let Some(path) = _delta.new_file().path() {
let file_context = ContextAnalyzer::analyze_file(path);
let location = Location {
file_path: path.to_path_buf(),
line_number: line.new_lineno().unwrap_or(0) as usize,
column_start: 0,
column_end: content.len(),
commit_hash: Some(commit.id().to_string()),
commit_author: commit.author().name().map(|s| s.to_string()),
commit_date: Some(
chrono::DateTime::from_timestamp(commit.time().seconds(), 0)
.unwrap_or_default(),
),
};
let context = ContextAnalyzer::build_context(
content.to_string(),
None,
None,
&file_context,
);
let finding = Finding::new(secret, location, context);
findings.push(finding);
}
}
}
true
}),
)?;
Ok(findings)
}
fn scan_working_directory(&self, repo: &Repository) -> Result<Vec<Finding>> {
let workdir = repo
.workdir()
.context("Repository doesn't have a working directory")?;
let filesystem_scanner = crate::scanners::FilesystemScanner::new(workdir.to_path_buf())
.with_entropy_threshold(self.entropy_threshold);
filesystem_scanner.scan()
}
pub fn get_commits(&self) -> Result<Vec<CommitInfo>> {
let repo = Repository::open(&self.repo_path)?;
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
let mut commits = Vec::new();
for oid in revwalk {
let oid = oid?;
let commit = repo.find_commit(oid)?;
commits.push(CommitInfo {
hash: commit.id().to_string(),
author: commit.author().name().unwrap_or("Unknown").to_string(),
message: commit.message().unwrap_or("").to_string(),
timestamp: chrono::DateTime::from_timestamp(commit.time().seconds(), 0)
.unwrap_or_default(),
});
}
Ok(commits)
}
}
#[derive(Debug, Clone)]
pub struct CommitInfo {
pub hash: String,
pub author: String,
pub message: String,
pub timestamp: chrono::DateTime<chrono::Utc>,
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
fn create_test_repo() -> Result<(TempDir, Repository)> {
let temp_dir = TempDir::new()?;
let repo = Repository::init(temp_dir.path())?;
let file_path = temp_dir.path().join("test.txt");
fs::write(&file_path, "AWS_KEY=AKIAZ52HGXYRN4WBTEST")?;
let mut index = repo.index()?;
index.add_path(Path::new("test.txt"))?;
index.write()?;
let tree_id = index.write_tree()?;
let sig = git2::Signature::now("Test", "test@example.com")?;
{
let tree = repo.find_tree(tree_id)?;
repo.commit(Some("HEAD"), &sig, &sig, "Initial commit", &tree, &[])?;
}
Ok((temp_dir, repo))
}
#[test]
fn test_git_scanner_creation() {
let scanner = GitScanner::new(PathBuf::from("."));
assert!(scanner.scan_history);
}
#[test]
fn test_git_scanner_finds_secrets_in_repo() -> Result<()> {
let (temp_dir, _repo) = create_test_repo()?;
let scanner = GitScanner::new(temp_dir.path().to_path_buf())
.with_history(true)
.with_entropy_threshold(3.0);
let findings = scanner.scan()?;
assert!(!findings.is_empty(), "Should find secrets in git repo");
Ok(())
}
#[test]
fn test_git_scanner_without_history() -> Result<()> {
let (temp_dir, _repo) = create_test_repo()?;
let scanner = GitScanner::new(temp_dir.path().to_path_buf())
.with_history(false)
.with_entropy_threshold(3.0);
let findings = scanner.scan()?;
assert!(!findings.is_empty(), "Should find secrets in working directory even without history scanning");
Ok(())
}
#[test]
fn test_git_scanner_with_max_depth() {
let scanner = GitScanner::new(PathBuf::from("."))
.with_max_depth(10);
assert_eq!(scanner.max_depth, Some(10));
}
}