use anyhow::{Context, Result, anyhow, bail};
use indicatif::{ProgressBar, ProgressStyle};
use rayon::ThreadPool;
use rayon::ThreadPoolBuilder;
use rayon::prelude::*;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::{
Arc,
atomic::{AtomicBool, Ordering},
};
use std::time::Duration;
use tempfile::NamedTempFile;
use tracing::{info, warn};
use walkdir::WalkDir;
use crate::audit;
use crate::cli::Cli;
use crate::hashing;
use crate::models::{DiscoveryResult, FileKind, FileReport, FileStatus, FileTask, RunSummary};
use crate::sanitizers;
pub fn run(cli: Cli) -> Result<()> {
let input = canonicalize_existing(&cli.input)
.with_context(|| format!("failed to resolve {}", cli.input.display()))?;
let input_is_directory = input.is_dir();
let output_root = match cli.output {
Some(ref output) => Some(canonicalize_or_create_dir(output)?),
None => None,
};
if input_is_directory
&& output_root
.as_ref()
.is_some_and(|output_root| output_root == &input)
{
bail!("output directory must not be the same as the input directory");
}
let log_root = audit_root(&input, input_is_directory, output_root.as_ref())?;
let _audit_guard = audit::init(&log_root)?;
let stop_flag = Arc::new(AtomicBool::new(false));
install_ctrlc_handler(stop_flag.clone())?;
let pool = build_thread_pool(cli.jobs)?;
let discovery = discover_tasks(&input, input_is_directory, output_root.as_ref())?;
if discovery.tasks.is_empty() {
info!(input = %input.display(), "no supported files found");
return Ok(());
}
let progress = configured_progress(discovery.tasks.len() as u64)?;
let reports: Vec<FileReport> = run_parallel(
pool.as_ref(),
discovery.tasks,
cli.dry_run,
stop_flag.clone(),
progress.clone(),
);
if stop_flag.load(Ordering::SeqCst) {
progress.finish_with_message("interrupted");
} else {
progress.finish_with_message("complete");
}
render_reports(&reports, cli.dry_run);
let summary = RunSummary::from_reports(&reports, discovery.skipped_unsupported);
log_summary(&summary);
if summary.interrupted {
bail!("processing was interrupted");
}
if summary.failed > 0 {
bail!("one or more files failed to sanitize");
}
Ok(())
}
fn run_parallel(
pool: Option<&ThreadPool>,
tasks: Vec<FileTask>,
dry_run: bool,
stop_flag: Arc<AtomicBool>,
progress: ProgressBar,
) -> Vec<FileReport> {
match pool {
Some(pool) => pool.install(|| {
tasks
.into_par_iter()
.map(|task| process_task(task, dry_run, stop_flag.clone(), progress.clone()))
.collect()
}),
None => tasks
.into_par_iter()
.map(|task| process_task(task, dry_run, stop_flag.clone(), progress.clone()))
.collect(),
}
}
fn process_task(
task: FileTask,
dry_run: bool,
stop_flag: Arc<AtomicBool>,
progress: ProgressBar,
) -> FileReport {
let source = task.source.clone();
let destination = task.destination.clone();
let kind = task.kind;
let report = process_task_inner(task, dry_run, stop_flag);
progress.inc(1);
match report {
Ok(report) => report,
Err(error) => FileReport::failed(source, destination, kind, None, error.to_string()),
}
}
fn process_task_inner(
task: FileTask,
dry_run: bool,
stop_flag: Arc<AtomicBool>,
) -> Result<FileReport> {
if stop_flag.load(Ordering::SeqCst) {
return Ok(FileReport::interrupted(
task.source,
task.destination,
task.kind,
None,
));
}
let sanitizer = sanitizers::sanitizer_for(task.kind);
debug_assert_eq!(sanitizer.kind(), task.kind);
let original_hash = hashing::hash_file(&task.source)?;
if dry_run {
let plan = sanitizer.plan(&task.source)?;
let report = FileReport::dry_run(
task.source.clone(),
task.destination.clone(),
task.kind,
original_hash,
plan.removed_items.clone(),
);
log_file_report(&report);
return Ok(report);
}
if let Some(parent) = task.destination.parent() {
fs::create_dir_all(parent)
.with_context(|| format!("failed to create {}", parent.display()))?;
}
let temp_dir = task
.destination
.parent()
.map(Path::to_path_buf)
.unwrap_or_else(|| PathBuf::from("."));
let mut temp_file = NamedTempFile::new_in(&temp_dir)
.with_context(|| format!("failed to create temp file in {}", temp_dir.display()))?;
let plan = sanitizer.sanitize(&task.source, temp_file.as_file_mut())?;
temp_file.as_file_mut().sync_all()?;
let validation = sanitizer.plan(temp_file.path())?;
if !validation.is_empty() {
return Err(anyhow!(
"sanitized file still contains metadata: {}",
validation.removed_items.join(", ")
));
}
let sanitized_hash = hashing::hash_file(temp_file.path())?;
if stop_flag.load(Ordering::SeqCst) {
return Ok(FileReport::interrupted(
task.source,
task.destination,
task.kind,
Some(original_hash),
));
}
temp_file.persist(&task.destination).with_context(|| {
format!(
"failed to persist sanitized file to {}",
task.destination.display()
)
})?;
let persisted_hash = hashing::hash_file(&task.destination)?;
if persisted_hash != sanitized_hash {
if task.destination != task.source {
let _ = fs::remove_file(&task.destination);
}
return Err(anyhow!(
"post-persist hash mismatch for {}: expected {}, got {}",
task.destination.display(),
sanitized_hash,
persisted_hash
));
}
let report = FileReport::sanitized(
task.source,
task.destination,
task.kind,
original_hash,
sanitized_hash,
plan.removed_items,
);
log_file_report(&report);
Ok(report)
}
fn discover_tasks(
input: &Path,
input_is_directory: bool,
output_root: Option<&PathBuf>,
) -> Result<DiscoveryResult> {
if !input_is_directory {
let kind = FileKind::from_path(input).ok_or_else(|| {
anyhow!(
"unsupported file type for {}. Supported extensions: {}",
input.display(),
FileKind::supported_extensions().join(", ")
)
})?;
let destination = output_root
.and_then(|output_root| {
input
.file_name()
.map(|file_name| output_root.join(file_name))
})
.unwrap_or_else(|| input.to_path_buf());
return Ok(DiscoveryResult {
tasks: vec![FileTask {
source: input.to_path_buf(),
destination,
kind,
}],
skipped_unsupported: 0,
});
}
let mut tasks = Vec::new();
let mut skipped_unsupported = 0_usize;
let skip_output_root = output_root.and_then(|candidate| {
if candidate.starts_with(input) {
Some(candidate.clone())
} else {
None
}
});
for entry in WalkDir::new(input).follow_links(false).sort_by_file_name() {
let entry = match entry {
Ok(entry) => entry,
Err(error) => {
warn!(error = %error, "skipping unreadable entry");
skipped_unsupported += 1;
continue;
}
};
if !entry.file_type().is_file() {
continue;
}
let source = entry.path();
if source.file_name().and_then(|name| name.to_str()) == Some(".eclipse_audit.log") {
continue;
}
if skip_output_root
.as_ref()
.is_some_and(|skip_root| source.starts_with(skip_root))
{
continue;
}
let kind = match FileKind::from_path(source) {
Some(kind) => kind,
None => {
skipped_unsupported += 1;
continue;
}
};
let destination = if let Some(output_root) = output_root {
output_root.join(source.strip_prefix(input).with_context(|| {
format!("failed to compute relative path for {}", source.display())
})?)
} else {
source.to_path_buf()
};
tasks.push(FileTask {
source: source.to_path_buf(),
destination,
kind,
});
}
tasks.sort_by(|left, right| left.source.cmp(&right.source));
Ok(DiscoveryResult {
tasks,
skipped_unsupported,
})
}
fn configured_progress(total: u64) -> Result<ProgressBar> {
let progress = ProgressBar::new(total);
let style = ProgressStyle::with_template(
"{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} {msg}",
)?
.progress_chars("=>-");
progress.set_style(style);
progress.enable_steady_tick(Duration::from_millis(100));
Ok(progress)
}
fn render_reports(reports: &[FileReport], dry_run: bool) {
for report in reports {
println!("{}", format_report(report, dry_run));
}
}
fn format_report(report: &FileReport, dry_run: bool) -> String {
let path = report.source.display();
match report.status {
FileStatus::DryRun => {
let removed = if report.removed_items.is_empty() {
"no metadata markers found".to_string()
} else {
report.removed_items.join(", ")
};
format!(
"[dry-run] {} ({}) -> {}",
path,
report.kind.label(),
removed
)
}
FileStatus::Sanitized => {
let removed = if report.removed_items.is_empty() {
"no metadata markers found".to_string()
} else {
format!("{} metadata markers removed", report.removed_items.len())
};
if dry_run {
format!(
"[dry-run] {} ({}) -> {}",
path,
report.kind.label(),
removed
)
} else {
format!("[ok] {} ({}) -> {}", path, report.kind.label(), removed)
}
}
FileStatus::Failed => format!(
"[error] {} ({}) -> {}",
path,
report.kind.label(),
report.error.as_deref().unwrap_or("unknown error")
),
FileStatus::Interrupted => format!("[interrupted] {} ({})", path, report.kind.label()),
}
}
fn log_file_report(report: &FileReport) {
let status = match report.status {
FileStatus::DryRun => "dry_run",
FileStatus::Sanitized => "sanitized",
FileStatus::Failed => "failed",
FileStatus::Interrupted => "interrupted",
};
info!(
target: "eclipse::audit",
timestamp = %chrono::Utc::now().to_rfc3339(),
status = status,
kind = report.kind.label(),
source = %report.source.display(),
destination = %report.destination.display(),
original_hash = report.original_hash.as_deref().unwrap_or(""),
sanitized_hash = report.sanitized_hash.as_deref().unwrap_or(""),
removed_items = ?report.removed_items,
error = report.error.as_deref().unwrap_or(""),
"file processed"
);
}
fn log_summary(summary: &RunSummary) {
info!(
target: "eclipse::audit",
discovered = summary.discovered,
skipped_unsupported = summary.skipped_unsupported,
dry_run = summary.dry_run,
sanitized = summary.sanitized,
failed = summary.failed,
interrupted = summary.interrupted,
"run complete"
);
}
fn install_ctrlc_handler(stop_flag: Arc<AtomicBool>) -> Result<()> {
ctrlc::set_handler(move || {
stop_flag.store(true, Ordering::SeqCst);
})
.context("failed to install CTRL+C handler")
}
fn build_thread_pool(jobs: Option<usize>) -> Result<Option<ThreadPool>> {
match jobs {
Some(0) => bail!("jobs must be at least 1"),
Some(jobs) => Ok(Some(
ThreadPoolBuilder::new()
.num_threads(jobs)
.build()
.context("failed to build rayon thread pool")?,
)),
None => Ok(None),
}
}
fn canonicalize_existing(path: &Path) -> Result<PathBuf> {
fs::canonicalize(path).with_context(|| format!("{} does not exist", path.display()))
}
fn canonicalize_or_create_dir(path: &Path) -> Result<PathBuf> {
if path.exists() {
if !path.is_dir() {
bail!("{} exists but is not a directory", path.display());
}
} else {
fs::create_dir_all(path).with_context(|| format!("failed to create {}", path.display()))?;
}
fs::canonicalize(path).with_context(|| format!("failed to resolve {}", path.display()))
}
fn audit_root(
input: &Path,
input_is_directory: bool,
output_root: Option<&PathBuf>,
) -> Result<PathBuf> {
if let Some(output_root) = output_root {
return Ok(output_root.clone());
}
if input_is_directory {
return Ok(input.to_path_buf());
}
if let Some(parent) = input.parent() {
return Ok(parent.to_path_buf());
}
std::env::current_dir().context("failed to resolve current directory")
}