use crate::core::report::SanitizeReport;
use crate::core::SanitizeConfig;
use crate::git::dangerous::DANGEROUS_COMPONENTS;
use std::fs;
use std::path::Path;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum SanitizeError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("Git directory not found: {0}")]
NotGitDirectory(String),
#[error("Permission denied: {0}")]
PermissionDenied(String),
}
pub struct GitSanitizer {
config: SanitizeConfig,
}
impl GitSanitizer {
pub fn new(config: SanitizeConfig) -> Self {
Self { config }
}
pub fn sanitize(&self, git_dir: &Path) -> Result<SanitizeReport, SanitizeError> {
if !git_dir.exists() {
return Err(SanitizeError::NotGitDirectory(
git_dir.display().to_string(),
));
}
let mut report = SanitizeReport::default();
if self.config.remove_hooks {
self.remove_hooks(git_dir, &mut report)?;
}
if self.config.sanitize_config {
self.sanitize_config_file(git_dir, &mut report)?;
}
if self.config.sanitize_attributes {
self.sanitize_attributes(git_dir, &mut report)?;
}
if self.config.disable_lfs {
self.disable_lfs(git_dir, &mut report)?;
}
if self.config.remove_submodules {
self.remove_submodules(git_dir, &mut report)?;
}
Ok(report)
}
fn remove_hooks(
&self,
git_dir: &Path,
report: &mut SanitizeReport,
) -> Result<(), SanitizeError> {
let hooks_dir = git_dir.join("hooks");
if hooks_dir.exists() {
for entry in fs::read_dir(&hooks_dir)? {
let entry = entry?;
let path = entry.path();
if path.is_file() {
let name = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
.to_string();
report.removed_hooks.push(name);
}
}
fs::remove_dir_all(&hooks_dir)?;
}
fs::create_dir_all(&hooks_dir)?;
let readme = hooks_dir.join("README.securegit");
fs::write(
&readme,
"# Hooks removed by securegit\n\n\
All git hooks were removed during secure acquisition.\n\
This is a security measure to prevent arbitrary code execution.\n\n\
If you need hooks, add them manually after reviewing the repository.\n",
)?;
Ok(())
}
fn sanitize_config_file(
&self,
git_dir: &Path,
report: &mut SanitizeReport,
) -> Result<(), SanitizeError> {
let config_path = git_dir.join("config");
if !config_path.exists() {
return Ok(());
}
let content = fs::read_to_string(&config_path)?;
let sanitized = self.filter_config(&content, report);
fs::write(&config_path, sanitized)?;
Ok(())
}
fn filter_config(&self, content: &str, report: &mut SanitizeReport) -> String {
let mut output = String::new();
let mut current_section = String::new();
let mut skip_section = false;
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with('[') && trimmed.ends_with(']') {
current_section = trimmed[1..trimmed.len() - 1].to_string();
skip_section = self.is_dangerous_section(¤t_section);
if skip_section {
report.removed_config_sections.push(current_section.clone());
continue;
}
}
if skip_section {
continue;
}
if let Some(key) = self.extract_config_key(trimmed) {
let full_key = if current_section.is_empty() {
key.to_string()
} else {
format!("{}.{}", current_section, key)
};
if self.is_dangerous_key(&full_key) {
report.removed_config_keys.push(full_key);
continue;
}
}
output.push_str(line);
output.push('\n');
}
output
}
fn is_dangerous_section(&self, section: &str) -> bool {
matches!(
section.to_lowercase().as_str(),
"filter" | "credential" | "lfs" | "include" | "includeif"
)
}
fn is_dangerous_key(&self, key: &str) -> bool {
let key_lower = key.to_lowercase();
for pattern in DANGEROUS_COMPONENTS.dangerous_config_keys {
if pattern.contains('*') {
let parts: Vec<&str> = pattern.split('*').collect();
if parts.len() == 2
&& key_lower.starts_with(parts[0])
&& key_lower.ends_with(parts[1])
{
return true;
}
} else if key_lower == *pattern {
return true;
}
}
for allowed in &self.config.allowed_config_keys {
if allowed.contains('*') {
let prefix = allowed.trim_end_matches('*');
if key_lower.starts_with(prefix) {
return false;
}
} else if key_lower == allowed.to_lowercase() {
return false;
}
}
false
}
fn extract_config_key<'a>(&self, line: &'a str) -> Option<&'a str> {
if line.starts_with('#') || line.starts_with(';') || line.is_empty() {
return None;
}
if let Some(eq_pos) = line.find('=') {
Some(line[..eq_pos].trim())
} else {
None
}
}
fn sanitize_attributes(
&self,
git_dir: &Path,
report: &mut SanitizeReport,
) -> Result<(), SanitizeError> {
let info_attrs = git_dir.join("info/attributes");
if info_attrs.exists() {
fs::remove_file(&info_attrs)?;
report.removed_files.push("info/attributes".to_string());
}
Ok(())
}
fn disable_lfs(
&self,
git_dir: &Path,
report: &mut SanitizeReport,
) -> Result<(), SanitizeError> {
let lfs_dir = git_dir.join("lfs");
if lfs_dir.exists() {
fs::remove_dir_all(&lfs_dir)?;
report.removed_files.push("lfs/".to_string());
}
Ok(())
}
fn remove_submodules(
&self,
git_dir: &Path,
report: &mut SanitizeReport,
) -> Result<(), SanitizeError> {
let modules_dir = git_dir.join("modules");
if modules_dir.exists() {
fs::remove_dir_all(&modules_dir)?;
report.removed_files.push("modules/".to_string());
}
report
.warnings
.push("Submodule configuration may exist in .gitmodules - review manually".to_string());
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dangerous_section_detection() {
let sanitizer = GitSanitizer::new(SanitizeConfig::default());
assert!(sanitizer.is_dangerous_section("filter"));
assert!(sanitizer.is_dangerous_section("credential"));
assert!(sanitizer.is_dangerous_section("lfs"));
assert!(!sanitizer.is_dangerous_section("core"));
assert!(!sanitizer.is_dangerous_section("remote"));
}
#[test]
fn test_dangerous_key_detection() {
let sanitizer = GitSanitizer::new(SanitizeConfig::default());
assert!(sanitizer.is_dangerous_key("core.fsmonitor"));
assert!(sanitizer.is_dangerous_key("credential.helper"));
assert!(sanitizer.is_dangerous_key("filter.lfs.clean"));
assert!(!sanitizer.is_dangerous_key("core.repositoryformatversion"));
}
}