1use serde::Serialize;
65use std::collections::HashMap;
66use std::sync::Mutex;
67use std::time::Instant;
68
69use crate::log_context::LogContextResult;
70use crate::scanner::{MatchLocation, ScanStats};
71
72#[derive(Debug, Clone, Serialize)]
81pub struct SanitizeReport {
82 pub metadata: ReportMetadata,
84 pub summary: ReportSummary,
86 pub files: Vec<FileReport>,
89}
90
91impl SanitizeReport {
92 pub fn to_json(&self) -> serde_json::Result<String> {
98 serde_json::to_string(self)
99 }
100
101 pub fn to_json_pretty(&self) -> serde_json::Result<String> {
107 serde_json::to_string_pretty(self)
108 }
109
110 #[allow(clippy::too_many_lines)]
121 pub fn to_sarif(&self) -> serde_json::Result<String> {
122 use serde_json::json;
123
124 let needs_generic = self
128 .files
129 .iter()
130 .any(|f| f.matches > 0 && f.pattern_counts.is_empty());
131
132 let mut rule_ids: Vec<&str> = self
133 .summary
134 .pattern_counts
135 .keys()
136 .map(String::as_str)
137 .collect();
138 rule_ids.sort_unstable();
139 if needs_generic {
140 rule_ids.push("sensitive_value");
141 }
142
143 let rules: Vec<serde_json::Value> = rule_ids
144 .iter()
145 .map(|&id| {
146 let (short, full) = if id == "sensitive_value" {
147 (
148 "Sensitive value detected".to_owned(),
149 "One or more sensitive values were detected during sanitization and \
150 replaced with safe substitutes. No original values are stored. \
151 Run with a secrets file for per-pattern breakdown."
152 .to_owned(),
153 )
154 } else {
155 (
156 format!("Sensitive value of type '{}' detected", id),
157 format!(
158 "A sensitive value of type '{}' was detected during sanitization \
159 and replaced with a safe substitute. No original value is stored.",
160 id
161 ),
162 )
163 };
164 json!({
165 "id": id,
166 "name": sarif_rule_name(id),
167 "shortDescription": { "text": short },
168 "fullDescription": { "text": full },
169 "defaultConfiguration": { "level": sarif_level(id) },
170 "properties": { "tags": ["security"] }
171 })
172 })
173 .collect();
174
175 let mut results: Vec<serde_json::Value> = Vec::new();
178 for f in &self.files {
179 let uri = path_to_sarif_uri(&f.path);
180 let location = json!([{
181 "physicalLocation": {
182 "artifactLocation": { "uri": uri, "uriBaseId": "%SRCROOT%" }
183 }
184 }]);
185 if f.matches > 0 && f.pattern_counts.is_empty() {
186 results.push(json!({
187 "ruleId": "sensitive_value",
188 "level": "warning",
189 "message": {
190 "text": format!(
191 "{} sensitive value(s) detected and sanitized.",
192 f.matches
193 )
194 },
195 "locations": location
196 }));
197 } else {
198 for (pattern, &count) in &f.pattern_counts {
199 if count == 0 {
200 continue;
201 }
202 let first_line = f.match_locations.as_ref().and_then(|ml| {
204 ml.locations
205 .iter()
206 .find(|loc| loc.pattern == *pattern)
207 .map(|loc| loc.line)
208 });
209 let loc = if let Some(line) = first_line {
210 json!([{
211 "physicalLocation": {
212 "artifactLocation": { "uri": &uri, "uriBaseId": "%SRCROOT%" },
213 "region": { "startLine": line }
214 }
215 }])
216 } else {
217 location.clone()
218 };
219 results.push(json!({
220 "ruleId": pattern,
221 "level": sarif_level(pattern),
222 "message": {
223 "text": format!(
224 "{} sensitive value(s) of type '{}' detected and sanitized.",
225 count, pattern
226 )
227 },
228 "locations": loc
229 }));
230 }
231 }
232 }
233
234 let artifacts: Vec<serde_json::Value> = self
235 .files
236 .iter()
237 .map(|f| {
238 let uri = path_to_sarif_uri(&f.path);
239 json!({ "location": { "uri": uri, "uriBaseId": "%SRCROOT%" } })
240 })
241 .collect();
242
243 let sarif = json!({
244 "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
245 "version": "2.1.0",
246 "runs": [{
247 "tool": {
248 "driver": {
249 "name": "rust-sanitize",
250 "version": self.metadata.version,
251 "informationUri": "https://github.com/kayelohbyte/rust-sanitize",
252 "rules": rules
253 }
254 },
255 "invocations": [{
256 "executionSuccessful": true,
257 "endTimeUtc": self.metadata.timestamp
258 }],
259 "results": results,
260 "artifacts": artifacts
261 }]
262 });
263
264 serde_json::to_string_pretty(&sarif)
265 }
266
267 #[must_use]
273 #[allow(clippy::too_many_lines, clippy::format_collect)]
274 pub fn to_html(&self) -> String {
275 let s = &self.summary;
276 let m = &self.metadata;
277
278 let cards = format!(
280 r#"<div class="cards">
281 <div class="card"><div class="card-label">Files</div><div class="card-value">{}</div></div>
282 <div class="card"><div class="card-label">Matches</div><div class="card-value">{}</div></div>
283 <div class="card"><div class="card-label">Replacements</div><div class="card-value">{}</div></div>
284 <div class="card"><div class="card-label">Input</div><div class="card-value">{}</div></div>
285 <div class="card"><div class="card-label">Duration</div><div class="card-value">{} ms</div></div>
286</div>"#,
287 s.total_files,
288 s.total_matches,
289 s.total_replacements,
290 fmt_bytes(s.total_bytes_processed),
291 s.duration_ms,
292 );
293
294 let patterns_section = if s.total_matches > 0 {
296 let mut sorted_patterns: Vec<(&String, &u64)> = s.pattern_counts.iter().collect();
297 sorted_patterns.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0)));
298 let rows: String = sorted_patterns
299 .iter()
300 .map(|(pat, count)| {
301 format!("<tr><td>{}</td><td>{}</td></tr>\n", html_escape(pat), count)
302 })
303 .collect();
304 format!(
305 r#"<div class="section">
306<h2>Patterns detected</h2>
307<div class="table-wrap"><table>
308<thead><tr><th>Pattern</th><th>Total matches</th></tr></thead>
309<tbody>{}</tbody>
310</table></div></div>"#,
311 rows
312 )
313 } else {
314 String::new()
315 };
316
317 let has_locations = self.files.iter().any(|f| f.match_locations.is_some());
319 let file_rows: String = self
320 .files
321 .iter()
322 .map(|f| {
323 let badges: String = {
324 let mut pairs: Vec<(&String, &u64)> = f.pattern_counts.iter().collect();
325 pairs.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0)));
326 pairs
327 .iter()
328 .filter(|(_, &c)| c > 0)
329 .map(|(pat, count)| {
330 format!(
331 r#"<span class="badge {}">{}: {}</span>"#,
332 sarif_badge_class(pat),
333 html_escape(pat),
334 count
335 )
336 })
337 .collect()
338 };
339 let match_class = if f.matches > 0 { "count-positive" } else { "count-zero" };
340 let first_line_cell = if has_locations {
341 match f.match_locations.as_ref().and_then(|ml| ml.locations.first()) {
342 Some(loc) => {
343 let truncated_marker = if f
344 .match_locations
345 .as_ref()
346 .is_some_and(|ml| ml.truncated)
347 {
348 "<span title=\"more matches not shown\">…</span>"
349 } else {
350 ""
351 };
352 format!(
353 "<td class=\"count-positive\">L{}{}</td>",
354 loc.line, truncated_marker
355 )
356 }
357 None => "<td class=\"count-zero\">—</td>".to_owned(),
358 }
359 } else {
360 String::new()
361 };
362 format!(
363 "<tr><td><code>{}</code></td><td class=\"{}\">{}</td><td>{}</td>{}<td>{}</td></tr>\n",
364 html_escape(&f.path),
365 match_class,
366 f.matches,
367 html_escape(&f.method),
368 first_line_cell,
369 badges,
370 )
371 })
372 .collect();
373
374 let first_line_header = if has_locations {
375 "<th>First match</th>"
376 } else {
377 ""
378 };
379
380 format!(
381 r#"<!DOCTYPE html>
382<html lang="en">
383<head>
384<meta charset="utf-8">
385<meta name="viewport" content="width=device-width,initial-scale=1">
386<title>rust-sanitize report</title>
387<style>
388:root{{--bg:#f8f9fa;--surface:#fff;--border:#dee2e6;--text:#212529;--muted:#6c757d;--accent:#0d6efd;--danger:#dc3545;--warn-col:#fd7e14;--success:#198754;--badge:#e9ecef;--code-bg:#f1f3f4}}
389@media(prefers-color-scheme:dark){{:root{{--bg:#0d1117;--surface:#161b22;--border:#30363d;--text:#e6edf3;--muted:#8b949e;--accent:#58a6ff;--danger:#f85149;--warn-col:#d29922;--success:#3fb950;--badge:#21262d;--code-bg:#1c2128}}}}
390*,*::before,*::after{{box-sizing:border-box;margin:0;padding:0}}
391body{{font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Helvetica,Arial,sans-serif;background:var(--bg);color:var(--text);line-height:1.5;font-size:14px}}
392.container{{max-width:1100px;margin:0 auto;padding:24px 16px}}
393header{{margin-bottom:24px;padding-bottom:16px;border-bottom:1px solid var(--border)}}
394h1{{font-size:1.4rem;font-weight:600}}
395.meta{{font-size:.8rem;color:var(--muted);margin-top:4px}}
396.section{{margin-bottom:28px}}
397h2{{font-size:.95rem;font-weight:600;margin-bottom:10px}}
398.cards{{display:grid;grid-template-columns:repeat(auto-fit,minmax(140px,1fr));gap:12px;margin-bottom:24px}}
399.card{{background:var(--surface);border:1px solid var(--border);border-radius:6px;padding:14px}}
400.card-label{{font-size:.7rem;text-transform:uppercase;letter-spacing:.05em;color:var(--muted)}}
401.card-value{{font-size:1.4rem;font-weight:600;margin-top:2px}}
402.table-wrap{{overflow-x:auto}}
403table{{width:100%;border-collapse:collapse;background:var(--surface);border:1px solid var(--border);border-radius:6px;font-size:.85rem}}
404th{{text-align:left;padding:9px 12px;border-bottom:1px solid var(--border);font-weight:600;color:var(--muted);white-space:nowrap}}
405td{{padding:9px 12px;border-bottom:1px solid var(--border);vertical-align:top}}
406tr:last-child td{{border-bottom:none}}
407tr:hover td{{background:var(--badge)}}
408code{{background:var(--code-bg);border-radius:3px;padding:1px 4px;font-size:.8rem;word-break:break-all}}
409.badge{{display:inline-block;padding:1px 7px;border-radius:12px;font-size:.72rem;font-weight:500;background:var(--badge);margin:1px}}
410.badge-pii{{background:rgba(220,53,69,.12);color:var(--danger)}}
411.badge-warn{{background:rgba(253,126,20,.12);color:var(--warn-col)}}
412.count-zero{{color:var(--muted)}}
413.count-positive{{font-weight:600}}
414footer{{margin-top:40px;padding-top:16px;border-top:1px solid var(--border);font-size:.75rem;color:var(--muted)}}
415</style>
416</head>
417<body>
418<div class="container">
419<header>
420<h1>rust-sanitize report</h1>
421<div class="meta">version {version} · {timestamp} · {duration_ms} ms total</div>
422</header>
423{cards}
424{patterns_section}
425<div class="section">
426<h2>Files</h2>
427<div class="table-wrap"><table>
428<thead><tr><th>Path</th><th>Matches</th><th>Method</th>{first_line_header}<th>Patterns</th></tr></thead>
429<tbody>{file_rows}</tbody>
430</table></div></div>
431<footer>Generated by <strong>rust-sanitize {version}</strong> on {timestamp}</footer>
432</div>
433</body>
434</html>"#,
435 version = html_escape(&m.version),
436 timestamp = html_escape(&m.timestamp),
437 duration_ms = s.duration_ms,
438 cards = cards,
439 patterns_section = patterns_section,
440 first_line_header = first_line_header,
441 file_rows = file_rows,
442 )
443 }
444}
445
446fn sarif_level(pattern: &str) -> &'static str {
453 match pattern {
454 "email" | "name" | "phone" | "credit_card" | "ssn" | "auth_token" | "jwt" => "error",
455 _ => "warning",
456 }
457}
458
459fn sarif_rule_name(pattern: &str) -> String {
462 pattern
463 .split(['_', ':', '-'])
464 .map(|word| {
465 let mut chars = word.chars();
466 match chars.next() {
467 None => String::new(),
468 Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
469 }
470 })
471 .collect()
472}
473
474fn path_to_sarif_uri(path: &str) -> String {
476 path.replace('\\', "/")
477}
478
479fn sarif_badge_class(pattern: &str) -> &'static str {
481 match pattern {
482 "email" | "name" | "phone" | "credit_card" | "ssn" | "auth_token" | "jwt" => "badge-pii",
483 _ => "badge-warn",
484 }
485}
486
487#[allow(clippy::cast_precision_loss)]
489fn fmt_bytes(bytes: u64) -> String {
490 const KIB: u64 = 1024;
491 const MIB: u64 = 1024 * KIB;
492 const GIB: u64 = 1024 * MIB;
493 if bytes >= GIB {
494 format!("{:.1} GiB", bytes as f64 / GIB as f64)
495 } else if bytes >= MIB {
496 format!("{:.1} MiB", bytes as f64 / MIB as f64)
497 } else if bytes >= KIB {
498 format!("{:.1} KiB", bytes as f64 / KIB as f64)
499 } else {
500 format!("{bytes} B")
501 }
502}
503
504fn html_escape(s: &str) -> String {
506 s.replace('&', "&")
507 .replace('<', "<")
508 .replace('>', ">")
509 .replace('"', """)
510}
511
512#[derive(Debug, Clone, Serialize)]
514pub struct ReportMetadata {
515 pub version: String,
517 pub timestamp: String,
519 pub deterministic: bool,
521 pub dry_run: bool,
523 pub strict: bool,
525 pub chunk_size: usize,
527 pub threads: Option<usize>,
529 pub secrets_file: Option<String>,
531}
532
533#[derive(Debug, Clone, Serialize)]
535pub struct ReportSummary {
536 pub total_files: u64,
538 pub total_matches: u64,
540 pub total_replacements: u64,
542 pub total_bytes_processed: u64,
544 pub total_bytes_output: u64,
546 pub duration_ms: u64,
548 pub pattern_counts: HashMap<String, u64>,
550}
551
552#[derive(Debug, Clone, Serialize)]
555pub struct MatchLocationsResult {
556 pub locations: Vec<MatchLocation>,
558 pub truncated: bool,
561}
562
563#[derive(Debug, Clone, Serialize)]
568pub struct FileReport {
569 pub path: String,
571 pub matches: u64,
573 pub replacements: u64,
575 pub bytes_processed: u64,
577 pub bytes_output: u64,
579 pub pattern_counts: HashMap<String, u64>,
581 pub method: String,
583 #[serde(skip_serializing_if = "Option::is_none")]
586 pub log_context: Option<LogContextResult>,
587 #[serde(skip_serializing_if = "Option::is_none")]
591 pub match_locations: Option<MatchLocationsResult>,
592}
593
594impl FileReport {
595 #[must_use]
597 pub fn from_scan_stats(
598 path: impl Into<String>,
599 stats: &ScanStats,
600 method: impl Into<String>,
601 ) -> Self {
602 Self {
603 path: path.into(),
604 matches: stats.matches_found,
605 replacements: stats.replacements_applied,
606 bytes_processed: stats.bytes_processed,
607 bytes_output: stats.bytes_output,
608 pattern_counts: stats.pattern_counts.clone(),
609 method: method.into(),
610 log_context: None,
611 match_locations: None,
612 }
613 }
614
615 #[must_use]
621 pub fn with_match_locations(mut self, locations: Vec<MatchLocation>, truncated: bool) -> Self {
622 if !locations.is_empty() || truncated {
623 self.match_locations = Some(MatchLocationsResult {
624 locations,
625 truncated,
626 });
627 }
628 self
629 }
630}
631
632#[derive(Debug)]
643pub struct ReportBuilder {
644 metadata: ReportMetadata,
645 files: Mutex<Vec<FileReport>>,
646 start: Instant,
647}
648
649const _: fn() = || {
652 fn assert_send<T: Send>() {}
653 fn assert_sync<T: Sync>() {}
654 assert_send::<ReportBuilder>();
655 assert_sync::<ReportBuilder>();
656};
657
658impl ReportBuilder {
659 #[must_use]
663 pub fn new(metadata: ReportMetadata) -> Self {
664 Self {
665 metadata,
666 files: Mutex::new(Vec::new()),
667 start: Instant::now(),
668 }
669 }
670
671 pub fn set_file_log_context(&self, path: &str, result: LogContextResult) {
675 let mut files = self.files.lock().expect("report mutex poisoned");
676 if let Some(file) = files.iter_mut().find(|f| f.path == path) {
677 file.log_context = Some(result);
678 }
679 }
680
681 pub fn record_file(&self, file_report: FileReport) {
683 let mut files = self.files.lock().expect("report mutex poisoned");
684 files.push(file_report);
685 }
686
687 pub fn record_files(&self, reports: impl IntoIterator<Item = FileReport>) {
689 let mut files = self.files.lock().expect("report mutex poisoned");
690 files.extend(reports);
691 }
692
693 pub fn finish(self) -> SanitizeReport {
697 #[allow(clippy::cast_possible_truncation)] let duration_ms = self.start.elapsed().as_millis() as u64;
699 let files = self.files.into_inner().expect("report mutex poisoned");
700
701 let mut total_matches: u64 = 0;
703 let mut total_replacements: u64 = 0;
704 let mut total_bytes_processed: u64 = 0;
705 let mut total_bytes_output: u64 = 0;
706 let mut pattern_counts: HashMap<String, u64> = HashMap::new();
707
708 for f in &files {
709 total_matches += f.matches;
710 total_replacements += f.replacements;
711 total_bytes_processed += f.bytes_processed;
712 total_bytes_output += f.bytes_output;
713 for (pat, count) in &f.pattern_counts {
714 *pattern_counts.entry(pat.clone()).or_insert(0) += count;
715 }
716 }
717
718 let summary = ReportSummary {
719 total_files: files.len() as u64,
720 total_matches,
721 total_replacements,
722 total_bytes_processed,
723 total_bytes_output,
724 duration_ms,
725 pattern_counts,
726 };
727
728 SanitizeReport {
729 metadata: self.metadata,
730 summary,
731 files,
732 }
733 }
734}
735
736#[cfg(test)]
741mod tests {
742 use super::*;
743
744 fn sample_metadata() -> ReportMetadata {
745 ReportMetadata {
746 version: "0.2.0".into(),
747 timestamp: "2026-03-01T00:00:00Z".into(),
748 deterministic: false,
749 dry_run: false,
750 strict: false,
751 chunk_size: 1_048_576,
752 threads: None,
753 secrets_file: None,
754 }
755 }
756
757 fn sample_file_report(path: &str, matches: u64, pattern: &str) -> FileReport {
758 FileReport {
759 path: path.into(),
760 matches,
761 replacements: matches,
762 bytes_processed: matches * 100,
763 bytes_output: matches * 110,
764 pattern_counts: HashMap::from([(pattern.into(), matches)]),
765 method: "scanner".into(),
766 log_context: None,
767 match_locations: None,
768 }
769 }
770
771 #[test]
774 fn empty_report() {
775 let builder = ReportBuilder::new(sample_metadata());
776 let report = builder.finish();
777 assert_eq!(report.summary.total_files, 0);
778 assert_eq!(report.summary.total_matches, 0);
779 assert!(report.files.is_empty());
780 }
781
782 #[test]
783 fn single_file_report() {
784 let builder = ReportBuilder::new(sample_metadata());
785 builder.record_file(sample_file_report("data.log", 10, "email"));
786 let report = builder.finish();
787
788 assert_eq!(report.summary.total_files, 1);
789 assert_eq!(report.summary.total_matches, 10);
790 assert_eq!(report.summary.total_replacements, 10);
791 assert_eq!(report.summary.total_bytes_processed, 1000);
792 assert_eq!(report.summary.total_bytes_output, 1100);
793 assert_eq!(*report.summary.pattern_counts.get("email").unwrap(), 10);
794 assert_eq!(report.files[0].path, "data.log");
795 }
796
797 #[test]
798 fn multiple_files_aggregated() {
799 let builder = ReportBuilder::new(sample_metadata());
800 builder.record_file(sample_file_report("a.log", 5, "email"));
801 builder.record_file(sample_file_report("b.log", 3, "ipv4"));
802 builder.record_file(sample_file_report("c.log", 7, "email"));
803 let report = builder.finish();
804
805 assert_eq!(report.summary.total_files, 3);
806 assert_eq!(report.summary.total_matches, 15);
807 assert_eq!(*report.summary.pattern_counts.get("email").unwrap(), 12);
808 assert_eq!(*report.summary.pattern_counts.get("ipv4").unwrap(), 3);
809 }
810
811 #[test]
814 fn json_serialization_no_secrets() {
815 let builder = ReportBuilder::new(sample_metadata());
816 builder.record_file(FileReport {
817 path: "config.yaml".into(),
818 matches: 2,
819 replacements: 2,
820 bytes_processed: 500,
821 bytes_output: 520,
822 pattern_counts: HashMap::from([("hostname".into(), 2)]),
823 method: "structured:yaml".into(),
824 log_context: None,
825 match_locations: None,
826 });
827 let report = builder.finish();
828 let json = report.to_json_pretty().unwrap();
829
830 assert!(json.contains("\"total_matches\": 2"));
832 assert!(json.contains("\"version\": \"0.2.0\""));
833 assert!(json.contains("\"hostname\": 2"));
834 assert!(json.contains("\"method\": \"structured:yaml\""));
835 assert!(json.contains("\"duration_ms\""));
836
837 let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
842 assert!(parsed["files"][0]["path"].as_str() == Some("config.yaml"));
843 let flat = json.to_lowercase();
845 assert!(!flat.contains("\"original\""));
846 assert!(!flat.contains("\"secret_value\""));
847 }
848
849 #[test]
850 fn compact_json() {
851 let builder = ReportBuilder::new(sample_metadata());
852 let report = builder.finish();
853 let json = report.to_json().unwrap();
854 assert!(!json.contains(" "));
856 }
857
858 #[test]
861 fn metadata_flags_preserved() {
862 let meta = ReportMetadata {
863 version: "0.8.0".into(),
864 timestamp: "2026-06-15T12:00:00Z".into(),
865 deterministic: true,
866 dry_run: true,
867 strict: true,
868 chunk_size: 262_144,
869 threads: Some(8),
870 secrets_file: Some("secrets.enc".into()),
871 };
872 let builder = ReportBuilder::new(meta);
873 let report = builder.finish();
874 assert!(report.metadata.deterministic);
875 assert!(report.metadata.dry_run);
876 assert!(report.metadata.strict);
877 assert_eq!(report.metadata.chunk_size, 262_144);
878 assert_eq!(report.metadata.threads, Some(8));
879 assert_eq!(report.metadata.secrets_file.as_deref(), Some("secrets.enc"));
880 }
881
882 #[test]
885 fn duration_is_positive() {
886 let builder = ReportBuilder::new(sample_metadata());
887 builder.record_file(sample_file_report("x.txt", 1, "email"));
889 let report = builder.finish();
890 assert!(report.summary.duration_ms < 5_000); }
893
894 #[test]
897 fn concurrent_recording() {
898 use std::sync::Arc;
899 use std::thread;
900
901 let builder = Arc::new(ReportBuilder::new(sample_metadata()));
902 let mut handles = Vec::new();
903
904 for i in 0_u64..16 {
905 let b = Arc::clone(&builder);
906 handles.push(thread::spawn(move || {
907 b.record_file(sample_file_report(&format!("file_{i}.log"), i + 1, "email"));
908 }));
909 }
910
911 for h in handles {
912 h.join().unwrap();
913 }
914
915 let builder = Arc::try_unwrap(builder).expect("other refs still held");
917 let report = builder.finish();
918
919 assert_eq!(report.summary.total_files, 16);
920 assert_eq!(report.summary.total_matches, 136);
922 }
923
924 #[test]
927 fn file_report_from_scan_stats() {
928 let stats = ScanStats {
929 bytes_processed: 2048,
930 bytes_output: 2100,
931 matches_found: 5,
932 replacements_applied: 5,
933 pattern_counts: HashMap::from([("email".into(), 3), ("ipv4".into(), 2)]),
934 };
935 let fr = FileReport::from_scan_stats("test.log", &stats, "scanner");
936 assert_eq!(fr.path, "test.log");
937 assert_eq!(fr.matches, 5);
938 assert_eq!(fr.bytes_processed, 2048);
939 assert_eq!(*fr.pattern_counts.get("email").unwrap(), 3);
940 assert_eq!(fr.method, "scanner");
941 }
942
943 #[test]
946 fn large_file_report() {
947 let builder = ReportBuilder::new(sample_metadata());
948 builder.record_file(FileReport {
950 path: "huge.log".into(),
951 matches: 1_000_000,
952 replacements: 1_000_000,
953 bytes_processed: 10_737_418_240, bytes_output: 10_900_000_000,
955 pattern_counts: HashMap::from([("email".into(), 600_000), ("ipv4".into(), 400_000)]),
956 method: "scanner".into(),
957 log_context: None,
958 match_locations: None,
959 });
960 let report = builder.finish();
961 assert_eq!(report.summary.total_matches, 1_000_000);
962 assert_eq!(report.summary.total_bytes_processed, 10_737_418_240);
963
964 let json = report.to_json().unwrap();
966 assert!(json.contains("10737418240"));
967 }
968
969 #[test]
972 fn record_files_bulk() {
973 let builder = ReportBuilder::new(sample_metadata());
974 let files: Vec<FileReport> = (0..5)
975 .map(|i| sample_file_report(&format!("entry_{i}.txt"), 2, "ssn"))
976 .collect();
977 builder.record_files(files);
978 let report = builder.finish();
979 assert_eq!(report.summary.total_files, 5);
980 assert_eq!(report.summary.total_matches, 10);
981 }
982
983 fn rich_report() -> SanitizeReport {
986 let builder = ReportBuilder::new(sample_metadata());
987 builder.record_file(FileReport {
988 path: "config.yaml".into(),
989 matches: 3,
990 replacements: 3,
991 bytes_processed: 1024,
992 bytes_output: 1100,
993 pattern_counts: HashMap::from([("auth_token".into(), 2u64), ("email".into(), 1u64)]),
994 method: "structured:yaml".into(),
995 log_context: None,
996 match_locations: None,
997 });
998 builder.record_file(FileReport {
999 path: "logs/app.log".into(),
1000 matches: 0,
1001 replacements: 0,
1002 bytes_processed: 512,
1003 bytes_output: 512,
1004 pattern_counts: HashMap::new(),
1005 method: "scanner".into(),
1006 log_context: None,
1007 match_locations: None,
1008 });
1009 builder.finish()
1010 }
1011
1012 #[test]
1013 fn sarif_is_valid_json() {
1014 let sarif = rich_report().to_sarif().unwrap();
1015 let v: serde_json::Value = serde_json::from_str(&sarif).unwrap();
1016 assert_eq!(v["version"], "2.1.0");
1017 assert_eq!(
1018 v["$schema"],
1019 "https://json.schemastore.org/sarif-2.1.0.json"
1020 );
1021 }
1022
1023 #[test]
1024 fn sarif_contains_one_run() {
1025 let v: serde_json::Value =
1026 serde_json::from_str(&rich_report().to_sarif().unwrap()).unwrap();
1027 assert_eq!(v["runs"].as_array().unwrap().len(), 1);
1028 }
1029
1030 #[test]
1031 fn sarif_driver_name_and_version() {
1032 let v: serde_json::Value =
1033 serde_json::from_str(&rich_report().to_sarif().unwrap()).unwrap();
1034 let driver = &v["runs"][0]["tool"]["driver"];
1035 assert_eq!(driver["name"], "rust-sanitize");
1036 assert_eq!(driver["version"], "0.2.0");
1037 }
1038
1039 #[test]
1040 fn sarif_rules_one_per_pattern() {
1041 let v: serde_json::Value =
1042 serde_json::from_str(&rich_report().to_sarif().unwrap()).unwrap();
1043 let rules = v["runs"][0]["tool"]["driver"]["rules"].as_array().unwrap();
1044 assert_eq!(rules.len(), 2);
1046 let ids: Vec<&str> = rules.iter().map(|r| r["id"].as_str().unwrap()).collect();
1047 assert!(ids.contains(&"auth_token"));
1048 assert!(ids.contains(&"email"));
1049 }
1050
1051 #[test]
1052 fn sarif_results_only_for_nonzero_counts() {
1053 let v: serde_json::Value =
1054 serde_json::from_str(&rich_report().to_sarif().unwrap()).unwrap();
1055 let results = v["runs"][0]["results"].as_array().unwrap();
1056 assert_eq!(results.len(), 2);
1058 }
1059
1060 #[test]
1061 fn sarif_result_level_pii_is_error() {
1062 let v: serde_json::Value =
1063 serde_json::from_str(&rich_report().to_sarif().unwrap()).unwrap();
1064 let results = v["runs"][0]["results"].as_array().unwrap();
1065 let email_result = results
1066 .iter()
1067 .find(|r| r["ruleId"] == "email")
1068 .expect("email result missing");
1069 assert_eq!(email_result["level"], "error");
1070 }
1071
1072 #[test]
1073 fn sarif_result_has_file_uri() {
1074 let v: serde_json::Value =
1075 serde_json::from_str(&rich_report().to_sarif().unwrap()).unwrap();
1076 let results = v["runs"][0]["results"].as_array().unwrap();
1077 for result in results {
1078 let uri = result["locations"][0]["physicalLocation"]["artifactLocation"]["uri"]
1079 .as_str()
1080 .unwrap();
1081 assert_eq!(uri, "config.yaml");
1082 }
1083 }
1084
1085 #[test]
1086 fn sarif_artifacts_all_files() {
1087 let v: serde_json::Value =
1088 serde_json::from_str(&rich_report().to_sarif().unwrap()).unwrap();
1089 let artifacts = v["runs"][0]["artifacts"].as_array().unwrap();
1090 assert_eq!(artifacts.len(), 2);
1091 let uris: Vec<&str> = artifacts
1092 .iter()
1093 .map(|a| a["location"]["uri"].as_str().unwrap())
1094 .collect();
1095 assert!(uris.contains(&"config.yaml"));
1096 assert!(uris.contains(&"logs/app.log"));
1097 }
1098
1099 #[test]
1100 fn sarif_windows_paths_use_forward_slash() {
1101 let builder = ReportBuilder::new(sample_metadata());
1102 builder.record_file(FileReport {
1103 path: r"src\secrets\config.json".into(),
1104 matches: 1,
1105 replacements: 1,
1106 bytes_processed: 100,
1107 bytes_output: 110,
1108 pattern_counts: HashMap::from([("auth_token".into(), 1u64)]),
1109 method: "structured:json".into(),
1110 log_context: None,
1111 match_locations: None,
1112 });
1113 let report = builder.finish();
1114 let v: serde_json::Value = serde_json::from_str(&report.to_sarif().unwrap()).unwrap();
1115 let uri = v["runs"][0]["results"][0]["locations"][0]["physicalLocation"]
1116 ["artifactLocation"]["uri"]
1117 .as_str()
1118 .unwrap();
1119 assert_eq!(uri, "src/secrets/config.json");
1120 }
1121
1122 #[test]
1125 fn html_is_valid_document() {
1126 let html = rich_report().to_html();
1127 assert!(html.starts_with("<!DOCTYPE html>"));
1128 assert!(html.contains("</html>"));
1129 assert!(html.contains("<title>rust-sanitize report</title>"));
1130 }
1131
1132 #[test]
1133 fn html_contains_summary_stats() {
1134 let html = rich_report().to_html();
1135 assert!(html.contains(">2<"), "file count missing");
1137 assert!(html.contains(">3<"), "match count missing");
1139 }
1140
1141 #[test]
1142 fn html_contains_file_paths() {
1143 let html = rich_report().to_html();
1144 assert!(html.contains("config.yaml"));
1145 assert!(html.contains("logs/app.log"));
1146 }
1147
1148 #[test]
1149 fn html_escapes_special_chars() {
1150 let builder = ReportBuilder::new(sample_metadata());
1151 builder.record_file(FileReport {
1152 path: "<script>alert(1)</script>".into(),
1153 matches: 0,
1154 replacements: 0,
1155 bytes_processed: 0,
1156 bytes_output: 0,
1157 pattern_counts: HashMap::new(),
1158 method: "scanner".into(),
1159 log_context: None,
1160 match_locations: None,
1161 });
1162 let html = builder.finish().to_html();
1163 assert!(!html.contains("<script>alert(1)</script>"));
1164 assert!(html.contains("<script>"));
1165 }
1166
1167 #[test]
1168 fn html_no_external_resources() {
1169 let html = rich_report().to_html();
1170 assert!(!html.contains("http://") || !html.contains("https://json.schemastore.org"));
1172 assert!(!html.contains("cdn."));
1173 assert!(!html.contains("src=\"http"));
1174 assert!(!html.contains("href=\"http"));
1175 }
1176
1177 #[test]
1180 fn sarif_rule_name_camel_case() {
1181 assert_eq!(sarif_rule_name("auth_token"), "AuthToken");
1182 assert_eq!(sarif_rule_name("email"), "Email");
1183 assert_eq!(sarif_rule_name("custom:password"), "CustomPassword");
1184 assert_eq!(sarif_rule_name("aws_arn"), "AwsArn");
1185 }
1186
1187 #[test]
1188 fn fmt_bytes_human_readable() {
1189 assert_eq!(fmt_bytes(512), "512 B");
1190 assert_eq!(fmt_bytes(1024), "1.0 KiB");
1191 assert_eq!(fmt_bytes(1536), "1.5 KiB");
1192 assert_eq!(fmt_bytes(1024 * 1024), "1.0 MiB");
1193 assert_eq!(fmt_bytes(1024 * 1024 * 1024), "1.0 GiB");
1194 }
1195
1196 #[test]
1197 fn html_escape_special_chars() {
1198 assert_eq!(html_escape("a&b"), "a&b");
1199 assert_eq!(html_escape("<tag>"), "<tag>");
1200 assert_eq!(html_escape("\"quote\""), ""quote"");
1201 assert_eq!(html_escape("normal"), "normal");
1202 }
1203}