use serde::Serialize;
use std::collections::HashMap;
use std::sync::Mutex;
use std::time::Instant;
use crate::scanner::ScanStats;
#[derive(Debug, Clone, Serialize)]
pub struct SanitizeReport {
pub metadata: ReportMetadata,
pub summary: ReportSummary,
pub files: Vec<FileReport>,
}
impl SanitizeReport {
pub fn to_json(&self) -> serde_json::Result<String> {
serde_json::to_string(self)
}
pub fn to_json_pretty(&self) -> serde_json::Result<String> {
serde_json::to_string_pretty(self)
}
}
#[derive(Debug, Clone, Serialize)]
pub struct ReportMetadata {
pub version: String,
pub timestamp: String,
pub deterministic: bool,
pub dry_run: bool,
pub strict: bool,
pub chunk_size: usize,
pub threads: Option<usize>,
pub secrets_file: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct ReportSummary {
pub total_files: u64,
pub total_matches: u64,
pub total_replacements: u64,
pub total_bytes_processed: u64,
pub total_bytes_output: u64,
pub duration_ms: u64,
pub pattern_counts: HashMap<String, u64>,
}
#[derive(Debug, Clone, Serialize)]
pub struct FileReport {
pub path: String,
pub matches: u64,
pub replacements: u64,
pub bytes_processed: u64,
pub bytes_output: u64,
pub pattern_counts: HashMap<String, u64>,
pub method: String,
}
impl FileReport {
#[must_use]
pub fn from_scan_stats(
path: impl Into<String>,
stats: &ScanStats,
method: impl Into<String>,
) -> Self {
Self {
path: path.into(),
matches: stats.matches_found,
replacements: stats.replacements_applied,
bytes_processed: stats.bytes_processed,
bytes_output: stats.bytes_output,
pattern_counts: stats.pattern_counts.clone(),
method: method.into(),
}
}
}
#[derive(Debug)]
pub struct ReportBuilder {
metadata: ReportMetadata,
files: Mutex<Vec<FileReport>>,
start: Instant,
}
const _: fn() = || {
fn assert_send<T: Send>() {}
fn assert_sync<T: Sync>() {}
assert_send::<ReportBuilder>();
assert_sync::<ReportBuilder>();
};
impl ReportBuilder {
#[must_use]
pub fn new(metadata: ReportMetadata) -> Self {
Self {
metadata,
files: Mutex::new(Vec::new()),
start: Instant::now(),
}
}
pub fn record_file(&self, file_report: FileReport) {
let mut files = self.files.lock().expect("report mutex poisoned");
files.push(file_report);
}
pub fn record_files(&self, reports: impl IntoIterator<Item = FileReport>) {
let mut files = self.files.lock().expect("report mutex poisoned");
files.extend(reports);
}
pub fn finish(self) -> SanitizeReport {
#[allow(clippy::cast_possible_truncation)] let duration_ms = self.start.elapsed().as_millis() as u64;
let files = self.files.into_inner().expect("report mutex poisoned");
let mut total_matches: u64 = 0;
let mut total_replacements: u64 = 0;
let mut total_bytes_processed: u64 = 0;
let mut total_bytes_output: u64 = 0;
let mut pattern_counts: HashMap<String, u64> = HashMap::new();
for f in &files {
total_matches += f.matches;
total_replacements += f.replacements;
total_bytes_processed += f.bytes_processed;
total_bytes_output += f.bytes_output;
for (pat, count) in &f.pattern_counts {
*pattern_counts.entry(pat.clone()).or_insert(0) += count;
}
}
let summary = ReportSummary {
total_files: files.len() as u64,
total_matches,
total_replacements,
total_bytes_processed,
total_bytes_output,
duration_ms,
pattern_counts,
};
SanitizeReport {
metadata: self.metadata,
summary,
files,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_metadata() -> ReportMetadata {
ReportMetadata {
version: "0.2.0".into(),
timestamp: "2026-03-01T00:00:00Z".into(),
deterministic: false,
dry_run: false,
strict: false,
chunk_size: 1_048_576,
threads: None,
secrets_file: None,
}
}
fn sample_file_report(path: &str, matches: u64, pattern: &str) -> FileReport {
FileReport {
path: path.into(),
matches,
replacements: matches,
bytes_processed: matches * 100,
bytes_output: matches * 110,
pattern_counts: HashMap::from([(pattern.into(), matches)]),
method: "scanner".into(),
}
}
#[test]
fn empty_report() {
let builder = ReportBuilder::new(sample_metadata());
let report = builder.finish();
assert_eq!(report.summary.total_files, 0);
assert_eq!(report.summary.total_matches, 0);
assert!(report.files.is_empty());
}
#[test]
fn single_file_report() {
let builder = ReportBuilder::new(sample_metadata());
builder.record_file(sample_file_report("data.log", 10, "email"));
let report = builder.finish();
assert_eq!(report.summary.total_files, 1);
assert_eq!(report.summary.total_matches, 10);
assert_eq!(report.summary.total_replacements, 10);
assert_eq!(report.summary.total_bytes_processed, 1000);
assert_eq!(report.summary.total_bytes_output, 1100);
assert_eq!(*report.summary.pattern_counts.get("email").unwrap(), 10);
assert_eq!(report.files[0].path, "data.log");
}
#[test]
fn multiple_files_aggregated() {
let builder = ReportBuilder::new(sample_metadata());
builder.record_file(sample_file_report("a.log", 5, "email"));
builder.record_file(sample_file_report("b.log", 3, "ipv4"));
builder.record_file(sample_file_report("c.log", 7, "email"));
let report = builder.finish();
assert_eq!(report.summary.total_files, 3);
assert_eq!(report.summary.total_matches, 15);
assert_eq!(*report.summary.pattern_counts.get("email").unwrap(), 12);
assert_eq!(*report.summary.pattern_counts.get("ipv4").unwrap(), 3);
}
#[test]
fn json_serialization_no_secrets() {
let builder = ReportBuilder::new(sample_metadata());
builder.record_file(FileReport {
path: "config.yaml".into(),
matches: 2,
replacements: 2,
bytes_processed: 500,
bytes_output: 520,
pattern_counts: HashMap::from([("hostname".into(), 2)]),
method: "structured:yaml".into(),
});
let report = builder.finish();
let json = report.to_json_pretty().unwrap();
assert!(json.contains("\"total_matches\": 2"));
assert!(json.contains("\"version\": \"0.2.0\""));
assert!(json.contains("\"hostname\": 2"));
assert!(json.contains("\"method\": \"structured:yaml\""));
assert!(json.contains("\"duration_ms\""));
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
assert!(parsed["files"][0]["path"].as_str() == Some("config.yaml"));
let flat = json.to_lowercase();
assert!(!flat.contains("\"original\""));
assert!(!flat.contains("\"secret_value\""));
}
#[test]
fn compact_json() {
let builder = ReportBuilder::new(sample_metadata());
let report = builder.finish();
let json = report.to_json().unwrap();
assert!(!json.contains(" "));
}
#[test]
fn metadata_flags_preserved() {
let meta = ReportMetadata {
version: "1.0.0".into(),
timestamp: "2026-06-15T12:00:00Z".into(),
deterministic: true,
dry_run: true,
strict: true,
chunk_size: 262_144,
threads: Some(8),
secrets_file: Some("secrets.enc".into()),
};
let builder = ReportBuilder::new(meta);
let report = builder.finish();
assert!(report.metadata.deterministic);
assert!(report.metadata.dry_run);
assert!(report.metadata.strict);
assert_eq!(report.metadata.chunk_size, 262_144);
assert_eq!(report.metadata.threads, Some(8));
assert_eq!(report.metadata.secrets_file.as_deref(), Some("secrets.enc"));
}
#[test]
fn duration_is_positive() {
let builder = ReportBuilder::new(sample_metadata());
builder.record_file(sample_file_report("x.txt", 1, "email"));
let report = builder.finish();
assert!(report.summary.duration_ms < 5_000); }
#[test]
fn concurrent_recording() {
use std::sync::Arc;
use std::thread;
let builder = Arc::new(ReportBuilder::new(sample_metadata()));
let mut handles = Vec::new();
for i in 0_u64..16 {
let b = Arc::clone(&builder);
handles.push(thread::spawn(move || {
b.record_file(sample_file_report(&format!("file_{i}.log"), i + 1, "email"));
}));
}
for h in handles {
h.join().unwrap();
}
let builder = Arc::try_unwrap(builder).expect("other refs still held");
let report = builder.finish();
assert_eq!(report.summary.total_files, 16);
assert_eq!(report.summary.total_matches, 136);
}
#[test]
fn file_report_from_scan_stats() {
let stats = ScanStats {
bytes_processed: 2048,
bytes_output: 2100,
matches_found: 5,
replacements_applied: 5,
pattern_counts: HashMap::from([("email".into(), 3), ("ipv4".into(), 2)]),
};
let fr = FileReport::from_scan_stats("test.log", &stats, "scanner");
assert_eq!(fr.path, "test.log");
assert_eq!(fr.matches, 5);
assert_eq!(fr.bytes_processed, 2048);
assert_eq!(*fr.pattern_counts.get("email").unwrap(), 3);
assert_eq!(fr.method, "scanner");
}
#[test]
fn large_file_report() {
let builder = ReportBuilder::new(sample_metadata());
builder.record_file(FileReport {
path: "huge.log".into(),
matches: 1_000_000,
replacements: 1_000_000,
bytes_processed: 10_737_418_240, bytes_output: 10_900_000_000,
pattern_counts: HashMap::from([("email".into(), 600_000), ("ipv4".into(), 400_000)]),
method: "scanner".into(),
});
let report = builder.finish();
assert_eq!(report.summary.total_matches, 1_000_000);
assert_eq!(report.summary.total_bytes_processed, 10_737_418_240);
let json = report.to_json().unwrap();
assert!(json.contains("10737418240"));
}
#[test]
fn record_files_bulk() {
let builder = ReportBuilder::new(sample_metadata());
let files: Vec<FileReport> = (0..5)
.map(|i| sample_file_report(&format!("entry_{i}.txt"), 2, "ssn"))
.collect();
builder.record_files(files);
let report = builder.finish();
assert_eq!(report.summary.total_files, 5);
assert_eq!(report.summary.total_matches, 10);
}
}