1use serde::Serialize;
53use std::collections::HashMap;
54use std::sync::Mutex;
55use std::time::Instant;
56
57use crate::scanner::ScanStats;
58
59#[derive(Debug, Clone, Serialize)]
68pub struct SanitizeReport {
69 pub metadata: ReportMetadata,
71 pub summary: ReportSummary,
73 pub files: Vec<FileReport>,
75}
76
77impl SanitizeReport {
78 pub fn to_json(&self) -> serde_json::Result<String> {
84 serde_json::to_string(self)
85 }
86
87 pub fn to_json_pretty(&self) -> serde_json::Result<String> {
93 serde_json::to_string_pretty(self)
94 }
95}
96
97#[derive(Debug, Clone, Serialize)]
99pub struct ReportMetadata {
100 pub version: String,
102 pub timestamp: String,
104 pub deterministic: bool,
106 pub dry_run: bool,
108 pub strict: bool,
110 pub chunk_size: usize,
112 pub threads: Option<usize>,
114 pub secrets_file: Option<String>,
116}
117
118#[derive(Debug, Clone, Serialize)]
120pub struct ReportSummary {
121 pub total_files: u64,
123 pub total_matches: u64,
125 pub total_replacements: u64,
127 pub total_bytes_processed: u64,
129 pub total_bytes_output: u64,
131 pub duration_ms: u64,
133 pub pattern_counts: HashMap<String, u64>,
135}
136
137#[derive(Debug, Clone, Serialize)]
142pub struct FileReport {
143 pub path: String,
145 pub matches: u64,
147 pub replacements: u64,
149 pub bytes_processed: u64,
151 pub bytes_output: u64,
153 pub pattern_counts: HashMap<String, u64>,
155 pub method: String,
157}
158
159impl FileReport {
160 #[must_use]
162 pub fn from_scan_stats(
163 path: impl Into<String>,
164 stats: &ScanStats,
165 method: impl Into<String>,
166 ) -> Self {
167 Self {
168 path: path.into(),
169 matches: stats.matches_found,
170 replacements: stats.replacements_applied,
171 bytes_processed: stats.bytes_processed,
172 bytes_output: stats.bytes_output,
173 pattern_counts: stats.pattern_counts.clone(),
174 method: method.into(),
175 }
176 }
177}
178
179#[derive(Debug)]
190pub struct ReportBuilder {
191 metadata: ReportMetadata,
192 files: Mutex<Vec<FileReport>>,
193 start: Instant,
194}
195
196const _: fn() = || {
199 fn assert_send<T: Send>() {}
200 fn assert_sync<T: Sync>() {}
201 assert_send::<ReportBuilder>();
202 assert_sync::<ReportBuilder>();
203};
204
205impl ReportBuilder {
206 #[must_use]
210 pub fn new(metadata: ReportMetadata) -> Self {
211 Self {
212 metadata,
213 files: Mutex::new(Vec::new()),
214 start: Instant::now(),
215 }
216 }
217
218 pub fn record_file(&self, file_report: FileReport) {
220 let mut files = self.files.lock().expect("report mutex poisoned");
221 files.push(file_report);
222 }
223
224 pub fn record_files(&self, reports: impl IntoIterator<Item = FileReport>) {
226 let mut files = self.files.lock().expect("report mutex poisoned");
227 files.extend(reports);
228 }
229
230 pub fn finish(self) -> SanitizeReport {
234 #[allow(clippy::cast_possible_truncation)] let duration_ms = self.start.elapsed().as_millis() as u64;
236 let files = self.files.into_inner().expect("report mutex poisoned");
237
238 let mut total_matches: u64 = 0;
240 let mut total_replacements: u64 = 0;
241 let mut total_bytes_processed: u64 = 0;
242 let mut total_bytes_output: u64 = 0;
243 let mut pattern_counts: HashMap<String, u64> = HashMap::new();
244
245 for f in &files {
246 total_matches += f.matches;
247 total_replacements += f.replacements;
248 total_bytes_processed += f.bytes_processed;
249 total_bytes_output += f.bytes_output;
250 for (pat, count) in &f.pattern_counts {
251 *pattern_counts.entry(pat.clone()).or_insert(0) += count;
252 }
253 }
254
255 let summary = ReportSummary {
256 total_files: files.len() as u64,
257 total_matches,
258 total_replacements,
259 total_bytes_processed,
260 total_bytes_output,
261 duration_ms,
262 pattern_counts,
263 };
264
265 SanitizeReport {
266 metadata: self.metadata,
267 summary,
268 files,
269 }
270 }
271}
272
273#[cfg(test)]
278mod tests {
279 use super::*;
280
281 fn sample_metadata() -> ReportMetadata {
282 ReportMetadata {
283 version: "0.2.0".into(),
284 timestamp: "2026-03-01T00:00:00Z".into(),
285 deterministic: false,
286 dry_run: false,
287 strict: false,
288 chunk_size: 1_048_576,
289 threads: None,
290 secrets_file: None,
291 }
292 }
293
294 fn sample_file_report(path: &str, matches: u64, pattern: &str) -> FileReport {
295 FileReport {
296 path: path.into(),
297 matches,
298 replacements: matches,
299 bytes_processed: matches * 100,
300 bytes_output: matches * 110,
301 pattern_counts: HashMap::from([(pattern.into(), matches)]),
302 method: "scanner".into(),
303 }
304 }
305
306 #[test]
309 fn empty_report() {
310 let builder = ReportBuilder::new(sample_metadata());
311 let report = builder.finish();
312 assert_eq!(report.summary.total_files, 0);
313 assert_eq!(report.summary.total_matches, 0);
314 assert!(report.files.is_empty());
315 }
316
317 #[test]
318 fn single_file_report() {
319 let builder = ReportBuilder::new(sample_metadata());
320 builder.record_file(sample_file_report("data.log", 10, "email"));
321 let report = builder.finish();
322
323 assert_eq!(report.summary.total_files, 1);
324 assert_eq!(report.summary.total_matches, 10);
325 assert_eq!(report.summary.total_replacements, 10);
326 assert_eq!(report.summary.total_bytes_processed, 1000);
327 assert_eq!(report.summary.total_bytes_output, 1100);
328 assert_eq!(*report.summary.pattern_counts.get("email").unwrap(), 10);
329 assert_eq!(report.files[0].path, "data.log");
330 }
331
332 #[test]
333 fn multiple_files_aggregated() {
334 let builder = ReportBuilder::new(sample_metadata());
335 builder.record_file(sample_file_report("a.log", 5, "email"));
336 builder.record_file(sample_file_report("b.log", 3, "ipv4"));
337 builder.record_file(sample_file_report("c.log", 7, "email"));
338 let report = builder.finish();
339
340 assert_eq!(report.summary.total_files, 3);
341 assert_eq!(report.summary.total_matches, 15);
342 assert_eq!(*report.summary.pattern_counts.get("email").unwrap(), 12);
343 assert_eq!(*report.summary.pattern_counts.get("ipv4").unwrap(), 3);
344 }
345
346 #[test]
349 fn json_serialization_no_secrets() {
350 let builder = ReportBuilder::new(sample_metadata());
351 builder.record_file(FileReport {
352 path: "config.yaml".into(),
353 matches: 2,
354 replacements: 2,
355 bytes_processed: 500,
356 bytes_output: 520,
357 pattern_counts: HashMap::from([("hostname".into(), 2)]),
358 method: "structured:yaml".into(),
359 });
360 let report = builder.finish();
361 let json = report.to_json_pretty().unwrap();
362
363 assert!(json.contains("\"total_matches\": 2"));
365 assert!(json.contains("\"version\": \"0.2.0\""));
366 assert!(json.contains("\"hostname\": 2"));
367 assert!(json.contains("\"method\": \"structured:yaml\""));
368 assert!(json.contains("\"duration_ms\""));
369
370 let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
375 assert!(parsed["files"][0]["path"].as_str() == Some("config.yaml"));
376 let flat = json.to_lowercase();
378 assert!(!flat.contains("\"original\""));
379 assert!(!flat.contains("\"secret_value\""));
380 }
381
382 #[test]
383 fn compact_json() {
384 let builder = ReportBuilder::new(sample_metadata());
385 let report = builder.finish();
386 let json = report.to_json().unwrap();
387 assert!(!json.contains(" "));
389 }
390
391 #[test]
394 fn metadata_flags_preserved() {
395 let meta = ReportMetadata {
396 version: "1.0.0".into(),
397 timestamp: "2026-06-15T12:00:00Z".into(),
398 deterministic: true,
399 dry_run: true,
400 strict: true,
401 chunk_size: 262_144,
402 threads: Some(8),
403 secrets_file: Some("secrets.enc".into()),
404 };
405 let builder = ReportBuilder::new(meta);
406 let report = builder.finish();
407 assert!(report.metadata.deterministic);
408 assert!(report.metadata.dry_run);
409 assert!(report.metadata.strict);
410 assert_eq!(report.metadata.chunk_size, 262_144);
411 assert_eq!(report.metadata.threads, Some(8));
412 assert_eq!(report.metadata.secrets_file.as_deref(), Some("secrets.enc"));
413 }
414
415 #[test]
418 fn duration_is_positive() {
419 let builder = ReportBuilder::new(sample_metadata());
420 builder.record_file(sample_file_report("x.txt", 1, "email"));
422 let report = builder.finish();
423 assert!(report.summary.duration_ms < 5_000); }
426
427 #[test]
430 fn concurrent_recording() {
431 use std::sync::Arc;
432 use std::thread;
433
434 let builder = Arc::new(ReportBuilder::new(sample_metadata()));
435 let mut handles = Vec::new();
436
437 for i in 0_u64..16 {
438 let b = Arc::clone(&builder);
439 handles.push(thread::spawn(move || {
440 b.record_file(sample_file_report(&format!("file_{i}.log"), i + 1, "email"));
441 }));
442 }
443
444 for h in handles {
445 h.join().unwrap();
446 }
447
448 let builder = Arc::try_unwrap(builder).expect("other refs still held");
450 let report = builder.finish();
451
452 assert_eq!(report.summary.total_files, 16);
453 assert_eq!(report.summary.total_matches, 136);
455 }
456
457 #[test]
460 fn file_report_from_scan_stats() {
461 let stats = ScanStats {
462 bytes_processed: 2048,
463 bytes_output: 2100,
464 matches_found: 5,
465 replacements_applied: 5,
466 pattern_counts: HashMap::from([("email".into(), 3), ("ipv4".into(), 2)]),
467 };
468 let fr = FileReport::from_scan_stats("test.log", &stats, "scanner");
469 assert_eq!(fr.path, "test.log");
470 assert_eq!(fr.matches, 5);
471 assert_eq!(fr.bytes_processed, 2048);
472 assert_eq!(*fr.pattern_counts.get("email").unwrap(), 3);
473 assert_eq!(fr.method, "scanner");
474 }
475
476 #[test]
479 fn large_file_report() {
480 let builder = ReportBuilder::new(sample_metadata());
481 builder.record_file(FileReport {
483 path: "huge.log".into(),
484 matches: 1_000_000,
485 replacements: 1_000_000,
486 bytes_processed: 10_737_418_240, bytes_output: 10_900_000_000,
488 pattern_counts: HashMap::from([("email".into(), 600_000), ("ipv4".into(), 400_000)]),
489 method: "scanner".into(),
490 });
491 let report = builder.finish();
492 assert_eq!(report.summary.total_matches, 1_000_000);
493 assert_eq!(report.summary.total_bytes_processed, 10_737_418_240);
494
495 let json = report.to_json().unwrap();
497 assert!(json.contains("10737418240"));
498 }
499
500 #[test]
503 fn record_files_bulk() {
504 let builder = ReportBuilder::new(sample_metadata());
505 let files: Vec<FileReport> = (0..5)
506 .map(|i| sample_file_report(&format!("entry_{i}.txt"), 2, "ssn"))
507 .collect();
508 builder.record_files(files);
509 let report = builder.finish();
510 assert_eq!(report.summary.total_files, 5);
511 assert_eq!(report.summary.total_matches, 10);
512 }
513}