Skip to main content

keyhog_core/report/
sarif.rs

1//! SARIF reporter for code-scanning platforms such as GitHub code scanning,
2//! Azure DevOps, and IDE integrations.
3
4use std::collections::HashMap;
5use std::io::Write;
6
7use crate::{MatchLocation, Severity, VerifiedFinding};
8
9use super::{ReportError, Reporter, WriterBackedReporter};
10
11#[path = "sarif_taxonomies.rs"]
12mod sarif_taxonomies;
13use sarif_taxonomies::sarif_taxonomies_json;
14
15/// SARIF v2.1.0 reporter - STREAMING.
16///
17/// Writes the SARIF document skeleton on construction and emits each
18/// `runs[0].results[]` entry directly to the writer as `report()` is called.
19/// Rules accumulate in a small `HashMap` (one entry per unique detector_id,
20/// at most a few hundred), and are flushed in `finish()`. Peak memory is
21/// O(rules × ~500B) regardless of finding count, replacing the previous
22/// O(N findings × ~500B) buffer that audited as the SARIF OOM wall at 1M+
23/// findings.
24///
25/// SARIF spec is order-agnostic on object keys; we emit `runs[0].results`
26/// before `runs[0].tool` so the streaming write order is legal.
27pub struct SarifReporter<W: Write + Send> {
28    writer: W,
29    rules: HashMap<String, SarifRule>,
30    /// Tracks whether the prefix has been emitted; lazy so the writer can
31    /// fail before we touch it.
32    prefix_written: bool,
33    /// Tracks whether at least one result has been emitted (for comma logic).
34    any_result: bool,
35}
36
37#[path = "sarif_types.rs"]
38mod sarif_types;
39use sarif_types::*;
40
41impl<W: Write + Send> SarifReporter<W> {
42    /// Construct a streaming SARIF reporter that writes its document to
43    /// `writer`. The SARIF prefix is emitted lazily on the first finding.
44    pub fn new(writer: W) -> Self {
45        Self {
46            writer,
47            rules: HashMap::new(),
48            prefix_written: false,
49            any_result: false,
50        }
51    }
52
53    /// Lazily emit the SARIF document skeleton up to the start of the
54    /// `results` array. Idempotent.
55    fn ensure_prefix(&mut self) -> Result<(), ReportError> {
56        if self.prefix_written {
57            return Ok(());
58        }
59        // Manual JSON: serde won't help us here because we want to write
60        // results streamed BEFORE we know the rule set. We use
61        // `serde_json::to_string` for value escaping.
62        let version = env!("CARGO_PKG_VERSION");
63        write!(
64            self.writer,
65            r#"{{"version":"2.1.0","$schema":"https://raw.githubusercontent.com/oasis-tcs/sarif-spec/main/sarif-2.1.0/sarif-schema-2.1.0.json","runs":[{{"results":["#
66        )?;
67        let _ = version;
68        self.prefix_written = true;
69        Ok(())
70    }
71
72    fn build_sarif_result(finding: &VerifiedFinding) -> SarifResult {
73        let locations = vec![Self::location_to_sarif(&finding.location)];
74        // GitHub Code Scanning rejects SARIF whose `relatedLocations`
75        // contains duplicate items. Some detector pipelines emit the
76        // same location twice (e.g. a credential found via two rules
77        // pointing at the same span). Dedup by the canonical
78        // (file_path, line, offset) tuple - that's what makes two
79        // locations "the same finding" for UI purposes.
80        let mut seen_related: std::collections::HashSet<(String, Option<usize>, usize)> =
81            std::collections::HashSet::new();
82        let related_locations: Vec<SarifLocation> = finding
83            .additional_locations
84            .iter()
85            .filter(|loc| {
86                let key = (
87                    loc.file_path.clone().unwrap_or_default().to_string(),
88                    loc.line,
89                    loc.offset,
90                );
91                seen_related.insert(key)
92            })
93            .map(Self::location_to_sarif)
94            .collect();
95
96        let mut properties = serde_json::Map::new();
97        properties.insert(
98            "verification".to_string(),
99            serde_json::Value::String(format!("{:?}", finding.verification).to_lowercase()),
100        );
101        if let Some(confidence) = finding.confidence {
102            properties.insert(
103                "confidence".to_string(),
104                serde_json::Value::Number(
105                    serde_json::Number::from_f64(confidence).unwrap_or_else(|| 0.into()),
106                ),
107            );
108        }
109        // CWE / OWASP taxonomy. CWE-798 ("Use of Hard-coded Credentials") and
110        // OWASP A07:2021 ("Identification and Authentication Failures") apply
111        // to every secret-scanning finding by definition. Compliance dashboards
112        // consume `properties.cwe` + `properties.owasp` directly. Tier-B #16.
113        properties.insert(
114            "cwe".to_string(),
115            serde_json::Value::String("CWE-798".to_string()),
116        );
117        properties.insert(
118            "owasp".to_string(),
119            serde_json::Value::String("A07:2021".to_string()),
120        );
121        for (key, value) in &finding.metadata {
122            properties.insert(
123                format!("metadata.{}", key),
124                serde_json::Value::String(value.to_string()),
125            );
126        }
127
128        // Auto-fix suggestion: replace the leaked credential with a
129        // ${ENV_VAR_NAME} reference at the same physical location. We emit
130        // this only when we have a file_path (no fix possible for stdin /
131        // git-history-only findings) AND a line number.
132        let fixes = if let (Some(_), Some(line)) =
133            (finding.location.file_path.as_ref(), finding.location.line)
134        {
135            let replacement = crate::auto_fix::fix_replacement_text(&finding.service);
136            let env_name = crate::auto_fix::env_var_name_for_service(&finding.service);
137            Some(vec![SarifFix {
138                description: SarifMessage {
139                    text: format!(
140                        "Replace the leaked credential with `{replacement}` and load `{env_name}` from your secret manager."
141                    ),
142                },
143                artifact_changes: vec![SarifArtifactChange {
144                    artifact_location: SarifArtifactLocation {
145                        uri: finding
146                            .location
147                            .file_path
148                            .as_deref()
149                            .map(super::sarif_uri::file_path_to_sarif_uri)
150                            .unwrap_or_default(),
151                        uri_base_id: None,
152                    },
153                    replacements: vec![SarifReplacement {
154                        deleted_region: SarifRegion {
155                            start_line: Some(line),
156                            start_column: None,
157                            end_line: None,
158                            end_column: None,
159                            char_offset: None,
160                            snippet: None,
161                        },
162                        inserted_content: SarifSnippet { text: replacement },
163                    }],
164                }],
165            }])
166        } else {
167            None
168        };
169
170        SarifResult {
171            rule_id: finding.detector_id.to_string(),
172            level: Self::severity_to_level(finding.severity).to_string(),
173            message: SarifMessage {
174                text: format!(
175                    "{} secret detected: {}",
176                    finding.service, finding.credential_redacted
177                ),
178            },
179            locations,
180            properties: Some(properties),
181            related_locations: if related_locations.is_empty() {
182                None
183            } else {
184                Some(related_locations)
185            },
186            fixes,
187            partial_fingerprints: super::sarif_uri::credential_fingerprints(
188                &finding.credential_hash,
189            ),
190        }
191    }
192
193    fn severity_to_level(severity: Severity) -> &'static str {
194        match severity {
195            Severity::Critical => "error",
196            Severity::High => "error",
197            Severity::Medium => "warning",
198            Severity::Low => "note",
199            Severity::ClientSafe => "note",
200            Severity::Info => "note",
201        }
202    }
203
204    fn build_rule(finding: &VerifiedFinding) -> SarifRule {
205        SarifRule {
206            id: finding.detector_id.to_string(),
207            name: finding.detector_name.to_string(),
208            short_description: Some(SarifMessage {
209                text: format!("{} secret detected", finding.service),
210            }),
211            full_description: Some(SarifMessage {
212                text: format!(
213                    "A {} secret was detected by the {} detector",
214                    finding.service, finding.detector_name
215                ),
216            }),
217            help: Some(SarifMessage {
218                text: format!(
219                    "Review and rotate the exposed {} credential.",
220                    finding.service
221                ),
222            }),
223            properties: Some({
224                let mut props = serde_json::Map::new();
225                props.insert(
226                    "service".to_string(),
227                    serde_json::Value::String(finding.service.to_string()),
228                );
229                props.insert(
230                    "severity".to_string(),
231                    serde_json::Value::String(format!("{:?}", finding.severity).to_lowercase()),
232                );
233                super::sarif_uri::apply_code_scanning_props(&mut props, finding.severity);
234                props
235            }),
236        }
237    }
238
239    fn location_to_sarif(loc: &MatchLocation) -> SarifLocation {
240        let uri = loc
241            .file_path
242            .as_ref()
243            .map(|p| super::sarif_uri::file_path_to_sarif_uri(p.as_ref()))
244            .unwrap_or_else(|| "stdin".to_string());
245
246        let artifact_location = Some(SarifArtifactLocation {
247            uri,
248            uri_base_id: None,
249        });
250
251        let region = if loc.line.is_some() || loc.offset != 0 {
252            Some(SarifRegion {
253                start_line: loc.line,
254                start_column: None,
255                end_line: None,
256                end_column: None,
257                char_offset: if loc.offset != 0 {
258                    Some(loc.offset)
259                } else {
260                    None
261                },
262                snippet: None,
263            })
264        } else {
265            None
266        };
267
268        let mut logical_locations = Vec::new();
269
270        if let Some(commit) = &loc.commit {
271            logical_locations.push(SarifLogicalLocation {
272                name: commit.to_string(),
273                kind: "commit".to_string(),
274            });
275        }
276
277        if let Some(author) = &loc.author {
278            logical_locations.push(SarifLogicalLocation {
279                name: author.to_string(),
280                kind: "author".to_string(),
281            });
282        }
283
284        if let Some(date) = &loc.date {
285            logical_locations.push(SarifLogicalLocation {
286                name: date.to_string(),
287                kind: "date".to_string(),
288            });
289        }
290
291        SarifLocation {
292            physical_location: SarifPhysicalLocation {
293                artifact_location,
294                region,
295            },
296            logical_locations: if logical_locations.is_empty() {
297                None
298            } else {
299                Some(logical_locations)
300            },
301        }
302    }
303}
304
305impl<W: Write + Send> Reporter for SarifReporter<W> {
306    fn report(&mut self, finding: &VerifiedFinding) -> Result<(), ReportError> {
307        self.ensure_prefix()?;
308
309        let detector_id = finding.detector_id.as_ref();
310        if !self.rules.contains_key(detector_id) {
311            let rule = Self::build_rule(finding);
312            self.rules.insert(detector_id.to_string(), rule);
313        }
314
315        // Stream this result directly to the writer. No per-finding buffer.
316        if self.any_result {
317            self.writer.write_all(b",")?;
318        }
319        let result = Self::build_sarif_result(finding);
320        serde_json::to_writer(&mut self.writer, &result)?;
321        self.any_result = true;
322        Ok(())
323    }
324
325    fn finish(&mut self) -> Result<(), ReportError> {
326        // If `report()` was never called we still need a valid SARIF doc.
327        self.ensure_prefix()?;
328
329        // Close the results array; emit tool.driver with the accumulated
330        // rules; emit taxonomies (CWE + OWASP) so consumers can resolve
331        // `properties.cwe` references; close runs[0], runs[], and the doc.
332        write!(self.writer, "],\"tool\":")?;
333
334        let mut rules: Vec<SarifRule> = self.rules.values().cloned().collect();
335        rules.sort_by(|a, b| a.id.cmp(&b.id));
336        let tool = SarifTool {
337            driver: SarifToolDriver {
338                name: "keyhog".to_string(),
339                version: Some(env!("CARGO_PKG_VERSION").to_string()),
340                // Sourced from the crate's `repository` field (Cargo sets
341                // CARGO_PKG_REPOSITORY) so the SARIF `informationUri` always
342                // points at the canonical repo and can never drift from the
343                // published manifest. Previously hardcoded to the wrong
344                // `github.com/keyhog/keyhog` org.
345                information_uri: Some(env!("CARGO_PKG_REPOSITORY").to_string()),
346                rules,
347            },
348        };
349        serde_json::to_writer(&mut self.writer, &tool)?;
350
351        // SARIF taxonomies block - each entry references a canonical entry in
352        // CWE / OWASP. Compliance dashboards (e.g. SonarQube, GitHub Code
353        // Scanning, Splunk) resolve `result.properties.cwe = "CWE-798"`
354        // against this block. Tier-B #16 from audits/legendary-2026-04-26.
355        write!(self.writer, ",\"taxonomies\":")?;
356        serde_json::to_writer(&mut self.writer, &sarif_taxonomies_json())?;
357
358        write!(self.writer, "}}]}}")?;
359        writeln!(self.writer)?;
360        self.flush_writer()
361    }
362}
363
364impl<W: Write + Send> WriterBackedReporter for SarifReporter<W> {
365    type Writer = W;
366
367    fn writer_mut(&mut self) -> &mut Self::Writer {
368        &mut self.writer
369    }
370}