use std::collections::HashSet;
use std::path::Path;
const REDACTION_MARKER: &str = "[REDACTED SECRET LINE]";
#[derive(Debug)]
pub struct SanitizedDiff {
pub diff: String,
pub redacted_lines: usize,
pub omitted_files: Vec<String>,
}
pub fn sanitize_diff(diff: &str, changed_files: &[String]) -> SanitizedDiff {
let sensitive_files: HashSet<String> = changed_files
.iter()
.filter(|path| is_sensitive_file(path))
.cloned()
.collect();
let mut output = String::new();
let mut section = Vec::new();
let mut current_file: Option<String> = None;
let mut redacted_lines = 0usize;
for line in diff.lines() {
if line.starts_with("diff --git ") {
flush_diff_section(
&mut output,
§ion,
current_file.as_deref(),
&sensitive_files,
&mut redacted_lines,
);
section.clear();
current_file = parse_diff_file_path(line);
}
section.push(line.to_string());
}
flush_diff_section(
&mut output,
§ion,
current_file.as_deref(),
&sensitive_files,
&mut redacted_lines,
);
let mut omitted_files: Vec<String> = sensitive_files.iter().cloned().collect();
omitted_files.sort();
if !omitted_files.is_empty() {
output = format!(
"{}\n\n{}",
omitted_files
.iter()
.map(|f| format!("Sensitive file changed (content omitted): {f}"))
.collect::<Vec<_>>()
.join("\n"),
output
);
}
SanitizedDiff {
diff: output,
redacted_lines,
omitted_files,
}
}
fn flush_diff_section(
out: &mut String,
section: &[String],
file: Option<&str>,
sensitive_files: &HashSet<String>,
redacted_lines: &mut usize,
) {
if section.is_empty() {
return;
}
if let Some(path) = file {
if sensitive_files.contains(path) {
out.push_str(&format!(
"diff --git a/{path} b/{path}\n@@ content omitted for sensitive file @@\n"
));
return;
}
}
for line in section {
if line_contains_secret(line) {
out.push_str(REDACTION_MARKER);
out.push('\n');
*redacted_lines += 1;
} else {
out.push_str(line);
out.push('\n');
}
}
}
fn parse_diff_file_path(line: &str) -> Option<String> {
let mut parts = line.split_whitespace();
let _ = parts.next();
let _ = parts.next();
let _a_path = parts.next()?;
let b_path = parts.next()?;
Some(normalize_diff_path(b_path))
}
fn normalize_diff_path(path: &str) -> String {
path.strip_prefix("b/").unwrap_or(path).to_string()
}
fn is_sensitive_file(path: &str) -> bool {
let path_lower = path.to_ascii_lowercase();
let file_name = Path::new(path)
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("")
.to_ascii_lowercase();
path_lower.contains(".env")
|| file_name.ends_with(".env")
|| file_name.ends_with(".pem")
|| file_name.ends_with(".p12")
|| file_name.ends_with(".pfx")
|| file_name.ends_with(".key")
|| file_name == "id_rsa"
|| file_name.contains("secret")
|| path_lower.contains("credentials")
}
fn line_contains_secret(line: &str) -> bool {
let upper = line.to_ascii_uppercase();
[
"API_KEY=",
"SECRET",
"PRIVATE KEY",
"TOKEN=",
"PASSWORD=",
"ACCESS_KEY",
]
.iter()
.any(|needle| upper.contains(needle))
}
#[cfg(test)]
mod tests {
use super::sanitize_diff;
#[test]
fn sensitive_files_are_omitted() {
let files = vec![".env".to_string()];
let diff = "diff --git a/.env b/.env\n+API_KEY=abc\n";
let out = sanitize_diff(diff, &files);
assert_eq!(out.omitted_files, vec![".env".to_string()]);
assert!(out.diff.contains("content omitted"));
}
#[test]
fn lines_with_secret_patterns_are_redacted() {
let files = vec!["src/app.rs".to_string()];
let diff = "diff --git a/src/app.rs b/src/app.rs\n+const TOKEN=\"abc\";\n+let x = 1;\n";
let out = sanitize_diff(diff, &files);
assert_eq!(out.redacted_lines, 1);
assert!(out.diff.contains("[REDACTED SECRET LINE]"));
}
}