Skip to main content

api_scanner/
reports.rs

1//! Reporting layer.
2//!
3//! Responsibilities:
4//!   1. Define the canonical [`Finding`] and [`Severity`] types used project-wide.
5//!   2. Serialise a completed [`RunResult`] to JSON (pretty or NDJSON).
6//!   3. Write reports to stdout, a file, or both.
7//!   4. Emit a human-readable summary to the tracing subscriber.
8//!   5. Optionally flush partial results periodically (streaming mode).
9
10use std::{
11    fmt,
12    fs::{File, OpenOptions},
13    io::{BufWriter, Write},
14    path::PathBuf,
15    sync::{Arc, Mutex},
16    time::Duration,
17};
18
19use anyhow::{Context, Result};
20use chrono::{DateTime, Utc};
21use serde::{Deserialize, Serialize};
22use tracing::{error, info};
23
24use crate::{
25    error::CapturedError,
26    runner::{RunResult, RuntimeMetrics},
27};
28
29// ── SARIF output ──────────────────────────────────────────────────────────────
30
31#[derive(Debug, Serialize)]
32struct SarifReport {
33    version: String,
34    #[serde(rename = "$schema")]
35    schema: String,
36    runs: Vec<SarifRun>,
37}
38
39#[derive(Debug, Serialize)]
40struct SarifRun {
41    tool: SarifTool,
42    results: Vec<SarifResult>,
43}
44
45#[derive(Debug, Serialize)]
46struct SarifTool {
47    driver: SarifDriver,
48}
49
50#[derive(Debug, Serialize)]
51struct SarifDriver {
52    name: String,
53    version: String,
54    rules: Vec<SarifRule>,
55}
56
57#[derive(Debug, Serialize)]
58struct SarifRule {
59    id: String,
60    name: String,
61    #[serde(rename = "shortDescription")]
62    short_description: SarifText,
63    #[serde(rename = "fullDescription")]
64    full_description: SarifText,
65    #[serde(skip_serializing_if = "Option::is_none")]
66    help: Option<SarifText>,
67}
68
69#[derive(Debug, Serialize)]
70struct SarifResult {
71    #[serde(rename = "ruleId")]
72    rule_id: String,
73    level: String,
74    message: SarifText,
75    locations: Vec<SarifLocation>,
76}
77
78#[derive(Debug, Serialize)]
79struct SarifLocation {
80    #[serde(rename = "physicalLocation")]
81    physical_location: SarifPhysicalLocation,
82}
83
84#[derive(Debug, Serialize)]
85struct SarifPhysicalLocation {
86    #[serde(rename = "artifactLocation")]
87    artifact_location: SarifArtifactLocation,
88}
89
90#[derive(Debug, Serialize)]
91struct SarifArtifactLocation {
92    uri: String,
93}
94
95#[derive(Debug, Serialize)]
96struct SarifText {
97    text: String,
98}
99
100// ── Severity ───────────────────────────────────────────────────────────────────
101
102/// Unified severity scale shared by every scanner.
103#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
104#[serde(rename_all = "UPPERCASE")]
105pub enum Severity {
106    Critical,
107    High,
108    Medium,
109    Low,
110    #[default]
111    Info,
112}
113
114impl PartialOrd for Severity {
115    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
116        Some(self.cmp(other))
117    }
118}
119
120impl Ord for Severity {
121    /// Severity ordering: Critical > High > Medium > Low > Info.
122    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
123        self.rank().cmp(&other.rank())
124    }
125}
126
127impl Severity {
128    /// Numeric rank used for sorting (higher = more severe).
129    #[inline]
130    pub fn rank(&self) -> u8 {
131        match self {
132            Severity::Critical => 4,
133            Severity::High => 3,
134            Severity::Medium => 2,
135            Severity::Low => 1,
136            Severity::Info => 0,
137        }
138    }
139
140    /// CSS / ANSI colour label for terminal output.
141    pub fn label(&self) -> &'static str {
142        match self {
143            Severity::Critical => "CRITICAL",
144            Severity::High => "HIGH    ",
145            Severity::Medium => "MEDIUM  ",
146            Severity::Low => "LOW     ",
147            Severity::Info => "INFO    ",
148        }
149    }
150}
151
152impl fmt::Display for Severity {
153    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
154        f.write_str(self.label().trim())
155    }
156}
157
158// ── Finding ────────────────────────────────────────────────────────────────────
159
160/// A single security or informational observation produced by a scanner.
161#[derive(Debug, Clone, Serialize, Deserialize, Default)]
162pub struct Finding {
163    /// Canonical URL that was scanned.
164    pub url: String,
165
166    /// Machine-readable slug identifying the check (e.g. `"cors.wildcard"`).
167    pub check: String,
168
169    /// Short, human-readable title.
170    pub title: String,
171
172    /// Severity classification.
173    pub severity: Severity,
174
175    /// Full description — what was found and why it matters.
176    pub detail: String,
177
178    /// The raw evidence: header value, JSON snippet, etc.
179    /// `None` when no meaningful snippet is available.
180    #[serde(skip_serializing_if = "Option::is_none")]
181    pub evidence: Option<String>,
182
183    /// Concrete remediation advice.
184    #[serde(skip_serializing_if = "Option::is_none")]
185    pub remediation: Option<String>,
186
187    /// Which scanner produced this finding (e.g. `"cors"`, `"csp"`).
188    pub scanner: String,
189
190    /// Wall-clock time when the finding was recorded (UTC).
191    pub timestamp: DateTime<Utc>,
192
193    /// Optional extra fields scanners may attach (request IDs, CVE refs, etc.).
194    #[serde(skip_serializing_if = "Option::is_none")]
195    pub metadata: Option<serde_json::Value>,
196}
197
198impl Finding {
199    /// Convenience constructor — `timestamp` is set to `Utc::now()`.
200    pub fn new(
201        url: impl Into<String>,
202        check: impl Into<String>,
203        title: impl Into<String>,
204        severity: Severity,
205        detail: impl Into<String>,
206        scanner: impl Into<String>,
207    ) -> Self {
208        Self {
209            url: url.into(),
210            check: check.into(),
211            title: title.into(),
212            severity,
213            detail: detail.into(),
214            scanner: scanner.into(),
215            timestamp: Utc::now(),
216            ..Default::default()
217        }
218    }
219
220    /// Builder: attach raw evidence.
221    #[must_use]
222    pub fn with_evidence(mut self, evidence: impl Into<String>) -> Self {
223        self.evidence = Some(evidence.into());
224        self
225    }
226
227    /// Builder: attach remediation advice.
228    #[must_use]
229    #[allow(dead_code)]
230    pub fn with_remediation(mut self, rem: impl Into<String>) -> Self {
231        self.remediation = Some(rem.into());
232        self
233    }
234
235    /// Builder: attach arbitrary JSON metadata.
236    #[must_use]
237    #[allow(dead_code)]
238    pub fn with_metadata(mut self, meta: serde_json::Value) -> Self {
239        self.metadata = Some(meta);
240        self
241    }
242}
243
244// ── ReportConfig ──────────────────────────────────────────────────────────────
245
246/// Controls how the report is written.
247#[derive(Debug, Clone)]
248pub struct ReportConfig {
249    /// Write pretty-printed JSON (default) or one object per line (NDJSON).
250    pub format: ReportFormat,
251
252    /// If `Some`, write the full report to this path in addition to stdout.
253    pub output_path: Option<PathBuf>,
254
255    /// If `true`, also print a human-readable summary table to stdout.
256    pub print_summary: bool,
257
258    /// If `true`, suppress the findings list from stdout (file only).
259    pub quiet: bool,
260
261    /// Stream findings as NDJSON while scanning (NDJSON only).
262    pub stream: bool,
263}
264
265impl Default for ReportConfig {
266    fn default() -> Self {
267        Self {
268            format: ReportFormat::Pretty,
269            output_path: None,
270            print_summary: true,
271            quiet: false,
272            stream: false,
273        }
274    }
275}
276
277#[derive(Debug, Clone, PartialEq, Eq, Default)]
278pub enum ReportFormat {
279    /// Single JSON object — suitable for dashboards, CI artefacts.
280    #[default]
281    Pretty,
282    /// One `Finding` JSON object per line — suitable for `jq` pipelines.
283    Ndjson,
284    /// SARIF 2.1.0 for SAST tooling integration.
285    Sarif,
286}
287
288// ── Full report document ───────────────────────────────────────────────────────
289
290/// The complete, serialisable report document written to disk / stdout.
291#[derive(Debug, Serialize)]
292pub struct ReportDocument {
293    pub meta: ReportMeta,
294    pub summary: ReportSummary,
295    pub findings: Vec<Finding>,
296    pub errors: Vec<CapturedErrorRecord>,
297}
298
299/// Top-level metadata about the run.
300#[derive(Debug, Serialize)]
301pub struct ReportMeta {
302    pub generated_at: DateTime<Utc>,
303    pub elapsed_ms: u128,
304    pub scanned: usize,
305    pub skipped: usize,
306    pub scanner_ver: &'static str,
307    pub runtime_metrics: RuntimeMetrics,
308}
309
310/// Counts by severity — useful at a glance without reading all findings.
311#[derive(Debug, Serialize, Default)]
312pub struct ReportSummary {
313    pub total: usize,
314    pub critical: usize,
315    pub high: usize,
316    pub medium: usize,
317    pub low: usize,
318    pub info: usize,
319    pub errors: usize,
320}
321
322/// A serialisable wrapper around [`CapturedError`].
323#[derive(Debug, Serialize)]
324pub struct CapturedErrorRecord {
325    pub url: Option<String>,
326    pub kind: String,
327    pub message: String,
328}
329
330impl From<&CapturedError> for CapturedErrorRecord {
331    fn from(e: &CapturedError) -> Self {
332        Self {
333            url: e.url.clone(),
334            kind: e.error_type.clone(),
335            message: e.message.clone(),
336        }
337    }
338}
339
340// ── Reporter ──────────────────────────────────────────────────────────────────
341
342/// Stateful reporter that can also act as a streaming sink for partial flushes.
343pub struct Reporter {
344    cfg: ReportConfig,
345    /// Buffered writer for the output file (if configured).
346    file_writer: Option<Arc<Mutex<BufWriter<File>>>>,
347}
348
349impl Reporter {
350    /// Create a new reporter, opening the output file (truncating) if needed.
351    pub fn new(cfg: ReportConfig) -> std::io::Result<Self> {
352        let file_writer = if let Some(ref path) = cfg.output_path {
353            let file = OpenOptions::new()
354                .write(true)
355                .create(true)
356                .truncate(true)
357                .open(path)?;
358            Some(Arc::new(Mutex::new(BufWriter::new(file))))
359        } else {
360            None
361        };
362
363        Ok(Self { cfg, file_writer })
364    }
365
366    pub fn stream_enabled(&self) -> bool {
367        self.cfg.stream && self.cfg.format == ReportFormat::Ndjson
368    }
369
370    // ── Main entry point ─────────────────────────────────────────────────────
371
372    /// Serialise and write a completed run.  Always returns `Ok` — errors are
373    /// logged via `tracing` rather than propagated (non-fatal for the scan).
374    pub fn write_run_result(&self, result: &RunResult) {
375        let doc = build_document(result);
376
377        if self.cfg.print_summary {
378            print_summary_table(&doc.summary, result.elapsed);
379        }
380
381        match self.cfg.format {
382            ReportFormat::Pretty => self.write_pretty(&doc),
383            ReportFormat::Ndjson => {
384                if self.cfg.stream {
385                    self.write_ndjson_stream_final(&doc);
386                } else {
387                    self.write_ndjson(&doc);
388                }
389            }
390            ReportFormat::Sarif => self.write_sarif(&doc),
391        }
392    }
393
394    // ── Streaming / partial flush ────────────────────────────────────────────
395
396    /// Append a single [`Finding`] immediately (NDJSON only).
397    ///
398    /// Useful when scanners emit findings progressively rather than waiting for
399    /// the full run to complete.  In `Pretty` mode this is a no-op (the full
400    /// document must be written atomically).
401    #[allow(dead_code)]
402    pub fn flush_finding(&self, finding: &Finding) {
403        if self.cfg.format != ReportFormat::Ndjson || !self.cfg.stream {
404            return;
405        }
406
407        match serde_json::to_string(finding) {
408            Ok(line) => {
409                self.write_line_to_file(&line);
410                if !self.cfg.quiet {
411                    println!("{line}");
412                }
413            }
414            Err(e) => {
415                error!("Failed to serialise finding for streaming flush: {e}");
416            }
417        }
418    }
419
420    /// Emit a streaming header (NDJSON only).
421    pub fn start_stream(&self, meta: &ReportMeta) {
422        if self.cfg.format != ReportFormat::Ndjson || !self.cfg.stream {
423            return;
424        }
425
426        let header = serde_json::json!({
427            "type": "meta",
428            "meta": meta,
429            "stream": true,
430        });
431
432        if let Ok(line) = serde_json::to_string(&header) {
433            self.write_line_to_file(&line);
434            if !self.cfg.quiet {
435                println!("{line}");
436            }
437        }
438    }
439
440    // ── Internal helpers ─────────────────────────────────────────────────────
441
442    fn write_pretty(&self, doc: &ReportDocument) {
443        match serde_json::to_string_pretty(doc) {
444            Ok(json) => {
445                // Write to file first (more important), then stdout
446                self.write_line_to_file(&json);
447
448                if !self.cfg.quiet {
449                    println!("{json}");
450                }
451            }
452            Err(e) => error!("Failed to serialise report: {e}"),
453        }
454    }
455
456    fn write_sarif(&self, doc: &ReportDocument) {
457        let mut rules_map: std::collections::BTreeMap<String, SarifRule> =
458            std::collections::BTreeMap::new();
459        let mut results = Vec::new();
460
461        for f in &doc.findings {
462            rules_map
463                .entry(f.check.clone())
464                .or_insert_with(|| SarifRule {
465                    id: f.check.clone(),
466                    name: f.title.clone(),
467                    short_description: SarifText {
468                        text: f.title.clone(),
469                    },
470                    full_description: SarifText {
471                        text: f.detail.clone(),
472                    },
473                    help: f
474                        .remediation
475                        .as_ref()
476                        .map(|r| SarifText { text: r.clone() }),
477                });
478
479            let level = match f.severity {
480                Severity::Critical | Severity::High => "error",
481                Severity::Medium => "warning",
482                Severity::Low | Severity::Info => "note",
483            };
484
485            let message = if let Some(evidence) = &f.evidence {
486                format!("{} — {}", f.detail, evidence)
487            } else {
488                f.detail.clone()
489            };
490
491            results.push(SarifResult {
492                rule_id: f.check.clone(),
493                level: level.to_string(),
494                message: SarifText { text: message },
495                locations: vec![SarifLocation {
496                    physical_location: SarifPhysicalLocation {
497                        artifact_location: SarifArtifactLocation { uri: f.url.clone() },
498                    },
499                }],
500            });
501        }
502
503        let report = SarifReport {
504            version: "2.1.0".to_string(),
505            schema: "https://json.schemastore.org/sarif-2.1.0.json".to_string(),
506            runs: vec![SarifRun {
507                tool: SarifTool {
508                    driver: SarifDriver {
509                        name: env!("CARGO_PKG_NAME").to_string(),
510                        version: doc.meta.scanner_ver.to_string(),
511                        rules: rules_map.into_values().collect(),
512                    },
513                },
514                results,
515            }],
516        };
517
518        match serde_json::to_string_pretty(&report) {
519            Ok(json) => {
520                self.write_line_to_file(&json);
521                if !self.cfg.quiet {
522                    println!("{json}");
523                }
524            }
525            Err(e) => error!("Failed to serialise SARIF report: {e}"),
526        }
527    }
528
529    fn write_ndjson(&self, doc: &ReportDocument) {
530        // Emit meta + summary as the first line so consumers can detect format
531        let header = serde_json::json!({
532            "type":    "meta",
533            "meta":    &doc.meta,
534            "summary": &doc.summary,
535        });
536
537        if let Ok(line) = serde_json::to_string(&header) {
538            self.write_line_to_file(&line);
539            if !self.cfg.quiet {
540                println!("{line}");
541            }
542        }
543
544        for finding in &doc.findings {
545            match serde_json::to_string(finding) {
546                Ok(line) => {
547                    self.write_line_to_file(&line);
548                    if !self.cfg.quiet {
549                        println!("{line}");
550                    }
551                }
552                Err(e) => error!("Failed to serialise finding: {e}"),
553            }
554        }
555
556        for err in &doc.errors {
557            match serde_json::to_string(err) {
558                Ok(line) => {
559                    self.write_line_to_file(&line);
560                    if !self.cfg.quiet {
561                        println!("{line}");
562                    }
563                }
564                Err(e) => error!("Failed to serialise error record: {e}"),
565            }
566        }
567    }
568
569    fn write_ndjson_stream_final(&self, doc: &ReportDocument) {
570        let summary = serde_json::json!({
571            "type": "summary",
572            "summary": &doc.summary,
573            "meta": &doc.meta,
574        });
575
576        if let Ok(line) = serde_json::to_string(&summary) {
577            self.write_line_to_file(&line);
578            if !self.cfg.quiet {
579                println!("{line}");
580            }
581        }
582
583        for err in &doc.errors {
584            match serde_json::to_string(err) {
585                Ok(line) => {
586                    self.write_line_to_file(&line);
587                    if !self.cfg.quiet {
588                        println!("{line}");
589                    }
590                }
591                Err(e) => error!("Failed to serialise error record: {e}"),
592            }
593        }
594    }
595
596    fn write_line_to_file(&self, content: &str) {
597        let Some(ref writer) = self.file_writer else {
598            return;
599        };
600
601        match writer.lock() {
602            Ok(mut w) => {
603                if let Err(e) = writeln!(w, "{content}") {
604                    error!("Failed to write to report file: {e}");
605                }
606            }
607            Err(e) => error!("Report file writer lock poisoned: {e}"),
608        }
609    }
610
611    /// Flush and sync the file writer.  Call once after the run completes.
612    pub fn finalize(&self) {
613        let Some(ref writer) = self.file_writer else {
614            return;
615        };
616
617        match writer.lock() {
618            Ok(mut w) => {
619                if let Err(e) = w.flush() {
620                    error!("Failed to flush report file: {e}");
621                } else if let Some(ref path) = self.cfg.output_path {
622                    info!(path = %path.display(), "Report written");
623                }
624            }
625            Err(e) => error!("Report file writer lock poisoned on finalize: {e}"),
626        }
627    }
628}
629
630// ── Document builders ─────────────────────────────────────────────────────────
631
632pub fn build_document(result: &RunResult) -> ReportDocument {
633    let summary = build_summary(result);
634    let errors: Vec<CapturedErrorRecord> = result
635        .errors
636        .iter()
637        .map(CapturedErrorRecord::from)
638        .collect();
639
640    ReportDocument {
641        meta: ReportMeta {
642            generated_at: Utc::now(),
643            elapsed_ms: result.elapsed.as_millis(),
644            scanned: result.scanned,
645            skipped: result.skipped,
646            scanner_ver: env!("CARGO_PKG_VERSION"),
647            runtime_metrics: result.metrics.clone(),
648        },
649        summary,
650        findings: result.findings.clone(),
651        errors,
652    }
653}
654
655pub fn build_summary(result: &RunResult) -> ReportSummary {
656    let mut s = ReportSummary {
657        total: result.findings.len(),
658        errors: result.errors.len(),
659        ..Default::default()
660    };
661
662    for f in &result.findings {
663        match f.severity {
664            Severity::Critical => s.critical += 1,
665            Severity::High => s.high += 1,
666            Severity::Medium => s.medium += 1,
667            Severity::Low => s.low += 1,
668            Severity::Info => s.info += 1,
669        }
670    }
671
672    s
673}
674
675// ── Human-readable summary ────────────────────────────────────────────────────
676
677/// Print a compact, aligned summary table to stdout via `tracing::info!`.
678/// Uses plain `println!` so it always reaches the user regardless of log level.
679fn print_summary_table(summary: &ReportSummary, elapsed: Duration) {
680    println!();
681    println!("╔═══════════════════════════════╗");
682    println!("║         SCAN SUMMARY          ║");
683    println!("╠═══════════════════════════════╣");
684    println!("║  Findings      {:>5}          ║", summary.total);
685    println!("║  ├─ Critical   {:>5}          ║", summary.critical);
686    println!("║  ├─ High       {:>5}          ║", summary.high);
687    println!("║  ├─ Medium     {:>5}          ║", summary.medium);
688    println!("║  ├─ Low        {:>5}          ║", summary.low);
689    println!("║  └─ Info       {:>5}          ║", summary.info);
690    println!("╠═══════════════════════════════╣");
691    println!("║  Errors        {:>5}          ║", summary.errors);
692    println!("╠═══════════════════════════════╣");
693    println!("║  Elapsed    {:>8}ms          ║", elapsed.as_millis());
694    println!("╚═══════════════════════════════╝");
695    println!();
696}
697
698// ── Exit-code helper ──────────────────────────────────────────────────────────
699
700/// Returns a UNIX-style exit code appropriate for CI pipelines.
701///
702/// | Condition                                    | Code |
703/// |----------------------------------------------|------|
704/// | No findings at or above `threshold`          |  0   |
705/// | At least one finding at or above `threshold` |  1   |
706/// | Scanner errors occurred (regardless of finds)|  2   |
707///
708/// Callers may OR the codes together; e.g. code `3` = findings + errors.
709pub fn exit_code(summary: &ReportSummary, threshold: &Severity) -> i32 {
710    let mut code = 0i32;
711
712    let has_findings = match *threshold {
713        Severity::Critical => summary.critical > 0,
714        Severity::High => summary.critical + summary.high > 0,
715        Severity::Medium => summary.critical + summary.high + summary.medium > 0,
716        Severity::Low => summary.critical + summary.high + summary.medium + summary.low > 0,
717        Severity::Info => summary.total > 0,
718    };
719
720    if has_findings {
721        code |= 1;
722    }
723    if summary.errors > 0 {
724        code |= 2;
725    }
726
727    code
728}
729
730// ── Filtering helpers ─────────────────────────────────────────────────────────
731
732/// Return only findings whose severity is **at or above** `min_severity`.
733pub fn filter_findings<'a>(findings: &'a [Finding], min_severity: &Severity) -> Vec<&'a Finding> {
734    findings
735        .iter()
736        .filter(|f| f.severity.rank() >= min_severity.rank())
737        .collect()
738}
739
740/// Load a baseline NDJSON file and return a set of `(url, check)` keys.
741pub fn load_baseline_keys(
742    path: &std::path::Path,
743) -> Result<std::collections::HashSet<(String, String)>> {
744    let content = std::fs::read_to_string(path)
745        .with_context(|| format!("Failed to read baseline file: {}", path.display()))?;
746
747    let mut keys = std::collections::HashSet::new();
748
749    for (idx, line) in content.lines().enumerate() {
750        let trimmed = line.trim();
751        if trimmed.is_empty() {
752            continue;
753        }
754
755        let value: serde_json::Value = serde_json::from_str(trimmed)
756            .with_context(|| format!("Invalid JSON on baseline line {}", idx + 1))?;
757
758        let url = value.get("url").and_then(|v| v.as_str());
759        let check = value.get("check").and_then(|v| v.as_str());
760
761        if let (Some(u), Some(c)) = (url, check) {
762            keys.insert((u.to_string(), c.to_string()));
763        }
764    }
765
766    Ok(keys)
767}
768
769/// Filter out findings that already exist in the baseline set.
770pub fn filter_new_findings(
771    findings: Vec<Finding>,
772    baseline: &std::collections::HashSet<(String, String)>,
773) -> Vec<Finding> {
774    findings
775        .into_iter()
776        .filter(|f| !baseline.contains(&(f.url.clone(), f.check.clone())))
777        .collect()
778}
779
780/// Deduplicate findings by `(url, check, evidence)` key, keeping the highest-severity
781/// instance for identical evidence payloads while preserving distinct evidence
782/// variants for the same check.
783pub fn dedup_findings(mut findings: Vec<Finding>) -> Vec<Finding> {
784    // Sort descending so the first occurrence of each key is the most severe.
785    findings.sort_by(|a, b| b.severity.rank().cmp(&a.severity.rank()));
786
787    let mut seen = std::collections::HashSet::new();
788    findings.retain(|f| {
789        seen.insert((
790            f.url.clone(),
791            f.check.clone(),
792            f.evidence.clone().unwrap_or_default(),
793        ))
794    });
795    findings
796}