Skip to main content

keyhog_core/report/
sarif.rs

1//! SARIF reporter for code-scanning platforms such as GitHub code scanning,
2//! Azure DevOps, and IDE integrations.
3
4use std::collections::HashMap;
5use std::io::Write;
6
7use crate::{MatchLocation, Severity, VerifiedFinding};
8
9use super::{ReportError, Reporter, WriterBackedReporter};
10
11/// SARIF v2.1.0 reporter — STREAMING.
12///
13/// Writes the SARIF document skeleton on construction and emits each
14/// `runs[0].results[]` entry directly to the writer as `report()` is called.
15/// Rules accumulate in a small `HashMap` (one entry per unique detector_id,
16/// at most a few hundred), and are flushed in `finish()`. Peak memory is
17/// O(rules × ~500B) regardless of finding count, replacing the previous
18/// O(N findings × ~500B) buffer that audited as the SARIF OOM wall at 1M+
19/// findings.
20///
21/// SARIF spec is order-agnostic on object keys; we emit `runs[0].results`
22/// before `runs[0].tool` so the streaming write order is legal.
23pub struct SarifReporter<W: Write + Send> {
24    writer: W,
25    rules: HashMap<String, SarifRule>,
26    /// Tracks whether the prefix has been emitted; lazy so the writer can
27    /// fail before we touch it.
28    prefix_written: bool,
29    /// Tracks whether at least one result has been emitted (for comma logic).
30    any_result: bool,
31}
32
33/// A SARIF rule (tool component rule).
34#[derive(Debug, Clone, serde::Serialize)]
35#[serde(rename_all = "camelCase")]
36struct SarifRule {
37    id: String,
38    name: String,
39    #[serde(skip_serializing_if = "Option::is_none")]
40    short_description: Option<SarifMessage>,
41    #[serde(skip_serializing_if = "Option::is_none")]
42    full_description: Option<SarifMessage>,
43    #[serde(skip_serializing_if = "Option::is_none")]
44    help: Option<SarifMessage>,
45    #[serde(skip_serializing_if = "Option::is_none")]
46    properties: Option<serde_json::Map<String, serde_json::Value>>,
47}
48
49#[derive(Debug, Clone, serde::Serialize)]
50#[serde(rename_all = "camelCase")]
51struct SarifMessage {
52    text: String,
53}
54
55// Note: `SarifRun` and `SarifLog` are no longer constructed since the
56// streaming reporter writes the document skeleton manually. They remain as
57// schema documentation for readers; mark `#[allow(dead_code)]` so the
58// compiler warns us if a non-streaming consumer reuses them.
59#[allow(dead_code)]
60#[derive(Debug, Clone, serde::Serialize)]
61#[serde(rename_all = "camelCase")]
62struct SarifRun {
63    tool: SarifTool,
64    results: Vec<SarifResult>,
65}
66
67#[derive(Debug, Clone, serde::Serialize)]
68#[serde(rename_all = "camelCase")]
69struct SarifTool {
70    driver: SarifToolDriver,
71}
72
73#[derive(Debug, Clone, serde::Serialize)]
74#[serde(rename_all = "camelCase")]
75struct SarifToolDriver {
76    name: String,
77    #[serde(skip_serializing_if = "Option::is_none")]
78    version: Option<String>,
79    #[serde(skip_serializing_if = "Option::is_none")]
80    information_uri: Option<String>,
81    rules: Vec<SarifRule>,
82}
83
84#[derive(Debug, Clone, serde::Serialize)]
85#[serde(rename_all = "camelCase")]
86struct SarifResult {
87    rule_id: String,
88    level: String,
89    message: SarifMessage,
90    locations: Vec<SarifLocation>,
91    #[serde(skip_serializing_if = "Option::is_none")]
92    properties: Option<serde_json::Map<String, serde_json::Value>>,
93    #[serde(skip_serializing_if = "Option::is_none")]
94    related_locations: Option<Vec<SarifLocation>>,
95    /// SARIF v2.2.0 `fixes[]` — auto-rotation suggestions. Each entry
96    /// proposes replacing the leaked credential with a `${ENV_VAR_NAME}`
97    /// shell-interpolation reference. Tier-B #15 + #17.
98    #[serde(skip_serializing_if = "Option::is_none")]
99    fixes: Option<Vec<SarifFix>>,
100}
101
102#[derive(Debug, Clone, serde::Serialize)]
103#[serde(rename_all = "camelCase")]
104struct SarifFix {
105    description: SarifMessage,
106    artifact_changes: Vec<SarifArtifactChange>,
107}
108
109#[derive(Debug, Clone, serde::Serialize)]
110#[serde(rename_all = "camelCase")]
111struct SarifArtifactChange {
112    artifact_location: SarifArtifactLocation,
113    replacements: Vec<SarifReplacement>,
114}
115
116#[derive(Debug, Clone, serde::Serialize)]
117#[serde(rename_all = "camelCase")]
118struct SarifReplacement {
119    deleted_region: SarifRegion,
120    inserted_content: SarifSnippet,
121}
122
123#[derive(Debug, Clone, serde::Serialize)]
124#[serde(rename_all = "camelCase")]
125struct SarifLocation {
126    physical_location: SarifPhysicalLocation,
127    #[serde(skip_serializing_if = "Option::is_none")]
128    logical_locations: Option<Vec<SarifLogicalLocation>>,
129}
130
131#[derive(Debug, Clone, serde::Serialize)]
132#[serde(rename_all = "camelCase")]
133struct SarifPhysicalLocation {
134    #[serde(skip_serializing_if = "Option::is_none")]
135    artifact_location: Option<SarifArtifactLocation>,
136    #[serde(skip_serializing_if = "Option::is_none")]
137    region: Option<SarifRegion>,
138}
139
140#[derive(Debug, Clone, serde::Serialize)]
141#[serde(rename_all = "camelCase")]
142struct SarifArtifactLocation {
143    uri: String,
144    #[serde(skip_serializing_if = "Option::is_none")]
145    uri_base_id: Option<String>,
146}
147
148#[derive(Debug, Clone, serde::Serialize)]
149#[serde(rename_all = "camelCase")]
150struct SarifRegion {
151    #[serde(skip_serializing_if = "Option::is_none")]
152    start_line: Option<usize>,
153    #[serde(skip_serializing_if = "Option::is_none")]
154    start_column: Option<usize>,
155    #[serde(skip_serializing_if = "Option::is_none")]
156    end_line: Option<usize>,
157    #[serde(skip_serializing_if = "Option::is_none")]
158    end_column: Option<usize>,
159    #[serde(skip_serializing_if = "Option::is_none")]
160    snippet: Option<SarifSnippet>,
161}
162
163#[derive(Debug, Clone, serde::Serialize)]
164#[serde(rename_all = "camelCase")]
165struct SarifSnippet {
166    text: String,
167}
168
169#[derive(Debug, Clone, serde::Serialize)]
170#[serde(rename_all = "camelCase")]
171struct SarifLogicalLocation {
172    name: String,
173    kind: String,
174}
175
176#[allow(dead_code)]
177#[derive(Debug, Clone, serde::Serialize)]
178#[serde(rename_all = "camelCase")]
179struct SarifLog {
180    version: String,
181    #[serde(rename = "$schema")]
182    schema: String,
183    runs: Vec<SarifRun>,
184}
185
186impl<W: Write + Send> SarifReporter<W> {
187    pub fn new(writer: W) -> Self {
188        Self {
189            writer,
190            rules: HashMap::new(),
191            prefix_written: false,
192            any_result: false,
193        }
194    }
195
196    /// Lazily emit the SARIF document skeleton up to the start of the
197    /// `results` array. Idempotent.
198    fn ensure_prefix(&mut self) -> Result<(), ReportError> {
199        if self.prefix_written {
200            return Ok(());
201        }
202        // Manual JSON: serde won't help us here because we want to write
203        // results streamed BEFORE we know the rule set. We use
204        // `serde_json::to_string` for value escaping.
205        let version = env!("CARGO_PKG_VERSION");
206        write!(
207            self.writer,
208            r#"{{"version":"2.1.0","$schema":"https://raw.githubusercontent.com/oasis-tcs/sarif-spec/main/sarif-2.1.0/sarif-schema-2.1.0.json","runs":[{{"results":["#
209        )?;
210        let _ = version;
211        self.prefix_written = true;
212        Ok(())
213    }
214
215    fn build_sarif_result(finding: &VerifiedFinding) -> SarifResult {
216        let locations = vec![Self::location_to_sarif(&finding.location)];
217        let related_locations: Vec<SarifLocation> = finding
218            .additional_locations
219            .iter()
220            .map(Self::location_to_sarif)
221            .collect();
222
223        let mut properties = serde_json::Map::new();
224        properties.insert(
225            "verification".to_string(),
226            serde_json::Value::String(format!("{:?}", finding.verification).to_lowercase()),
227        );
228        if let Some(confidence) = finding.confidence {
229            properties.insert(
230                "confidence".to_string(),
231                serde_json::Value::Number(
232                    serde_json::Number::from_f64(confidence).unwrap_or_else(|| 0.into()),
233                ),
234            );
235        }
236        // CWE / OWASP taxonomy. CWE-798 ("Use of Hard-coded Credentials") and
237        // OWASP A07:2021 ("Identification and Authentication Failures") apply
238        // to every secret-scanning finding by definition. Compliance dashboards
239        // consume `properties.cwe` + `properties.owasp` directly. Tier-B #16.
240        properties.insert(
241            "cwe".to_string(),
242            serde_json::Value::String("CWE-798".to_string()),
243        );
244        properties.insert(
245            "owasp".to_string(),
246            serde_json::Value::String("A07:2021".to_string()),
247        );
248        for (key, value) in &finding.metadata {
249            properties.insert(
250                format!("metadata.{}", key),
251                serde_json::Value::String(value.to_string()),
252            );
253        }
254
255        // Auto-fix suggestion: replace the leaked credential with a
256        // ${ENV_VAR_NAME} reference at the same physical location. We emit
257        // this only when we have a file_path (no fix possible for stdin /
258        // git-history-only findings) AND a line number.
259        let fixes = if let (Some(_), Some(line)) =
260            (finding.location.file_path.as_ref(), finding.location.line)
261        {
262            let replacement = crate::auto_fix::fix_replacement_text(&finding.service);
263            let env_name = crate::auto_fix::env_var_name_for_service(&finding.service);
264            Some(vec![SarifFix {
265                description: SarifMessage {
266                    text: format!(
267                        "Replace the leaked credential with `{replacement}` and load `{env_name}` from your secret manager."
268                    ),
269                },
270                artifact_changes: vec![SarifArtifactChange {
271                    artifact_location: SarifArtifactLocation {
272                        uri: finding
273                            .location
274                            .file_path
275                            .as_deref()
276                            .map(|s| s.to_string())
277                            .unwrap_or_default(),
278                        uri_base_id: None,
279                    },
280                    replacements: vec![SarifReplacement {
281                        deleted_region: SarifRegion {
282                            start_line: Some(line),
283                            start_column: None,
284                            end_line: None,
285                            end_column: None,
286                            snippet: None,
287                        },
288                        inserted_content: SarifSnippet {
289                            text: replacement,
290                        },
291                    }],
292                }],
293            }])
294        } else {
295            None
296        };
297
298        SarifResult {
299            rule_id: finding.detector_id.to_string(),
300            level: Self::severity_to_level(finding.severity).to_string(),
301            message: SarifMessage {
302                text: format!(
303                    "{} secret detected: {}",
304                    finding.service, finding.credential_redacted
305                ),
306            },
307            locations,
308            properties: Some(properties),
309            related_locations: if related_locations.is_empty() {
310                None
311            } else {
312                Some(related_locations)
313            },
314            fixes,
315        }
316    }
317
318    fn severity_to_level(severity: Severity) -> &'static str {
319        match severity {
320            Severity::Critical => "error",
321            Severity::High => "error",
322            Severity::Medium => "warning",
323            Severity::Low => "note",
324            Severity::Info => "note",
325        }
326    }
327
328    fn build_rule(finding: &VerifiedFinding) -> SarifRule {
329        SarifRule {
330            id: finding.detector_id.to_string(),
331            name: finding.detector_name.to_string(),
332            short_description: Some(SarifMessage {
333                text: format!("{} secret detected", finding.service),
334            }),
335            full_description: Some(SarifMessage {
336                text: format!(
337                    "A {} secret was detected by the {} detector",
338                    finding.service, finding.detector_name
339                ),
340            }),
341            help: Some(SarifMessage {
342                text: format!(
343                    "Review and rotate the exposed {} credential.",
344                    finding.service
345                ),
346            }),
347            properties: Some({
348                let mut props = serde_json::Map::new();
349                props.insert(
350                    "service".to_string(),
351                    serde_json::Value::String(finding.service.to_string()),
352                );
353                props.insert(
354                    "severity".to_string(),
355                    serde_json::Value::String(format!("{:?}", finding.severity).to_lowercase()),
356                );
357                props
358            }),
359        }
360    }
361
362    fn location_to_sarif(loc: &MatchLocation) -> SarifLocation {
363        let uri = loc
364            .file_path
365            .as_ref()
366            .map(|p| p.to_string())
367            .unwrap_or_else(|| "stdin".to_string());
368
369        let artifact_location = Some(SarifArtifactLocation {
370            uri,
371            uri_base_id: None,
372        });
373
374        let region = loc.line.map(|line| SarifRegion {
375            start_line: Some(line),
376            start_column: None,
377            end_line: None,
378            end_column: None,
379            snippet: None,
380        });
381
382        let mut logical_locations = Vec::new();
383
384        if let Some(commit) = &loc.commit {
385            logical_locations.push(SarifLogicalLocation {
386                name: commit.to_string(),
387                kind: "commit".to_string(),
388            });
389        }
390
391        if let Some(author) = &loc.author {
392            logical_locations.push(SarifLogicalLocation {
393                name: author.to_string(),
394                kind: "author".to_string(),
395            });
396        }
397
398        if let Some(date) = &loc.date {
399            logical_locations.push(SarifLogicalLocation {
400                name: date.to_string(),
401                kind: "date".to_string(),
402            });
403        }
404
405        SarifLocation {
406            physical_location: SarifPhysicalLocation {
407                artifact_location,
408                region,
409            },
410            logical_locations: if logical_locations.is_empty() {
411                None
412            } else {
413                Some(logical_locations)
414            },
415        }
416    }
417}
418
419impl<W: Write + Send> Reporter for SarifReporter<W> {
420    fn report(&mut self, finding: &VerifiedFinding) -> Result<(), ReportError> {
421        self.ensure_prefix()?;
422
423        let detector_id = finding.detector_id.as_ref();
424        if !self.rules.contains_key(detector_id) {
425            let rule = Self::build_rule(finding);
426            self.rules.insert(detector_id.to_string(), rule);
427        }
428
429        // Stream this result directly to the writer. No per-finding buffer.
430        if self.any_result {
431            self.writer.write_all(b",")?;
432        }
433        let result = Self::build_sarif_result(finding);
434        serde_json::to_writer(&mut self.writer, &result)?;
435        self.any_result = true;
436        Ok(())
437    }
438
439    fn finish(&mut self) -> Result<(), ReportError> {
440        // If `report()` was never called we still need a valid SARIF doc.
441        self.ensure_prefix()?;
442
443        // Close the results array; emit tool.driver with the accumulated
444        // rules; emit taxonomies (CWE + OWASP) so consumers can resolve
445        // `properties.cwe` references; close runs[0], runs[], and the doc.
446        write!(self.writer, "],\"tool\":")?;
447
448        let mut rules: Vec<SarifRule> = self.rules.values().cloned().collect();
449        rules.sort_by(|a, b| a.id.cmp(&b.id));
450        let tool = SarifTool {
451            driver: SarifToolDriver {
452                name: "keyhog".to_string(),
453                version: Some(env!("CARGO_PKG_VERSION").to_string()),
454                information_uri: Some("https://github.com/keyhog/keyhog".to_string()),
455                rules,
456            },
457        };
458        serde_json::to_writer(&mut self.writer, &tool)?;
459
460        // SARIF taxonomies block — each entry references a canonical entry in
461        // CWE / OWASP. Compliance dashboards (e.g. SonarQube, GitHub Code
462        // Scanning, Splunk) resolve `result.properties.cwe = "CWE-798"`
463        // against this block. Tier-B #16 from audits/legendary-2026-04-26.
464        write!(self.writer, ",\"taxonomies\":")?;
465        let taxonomies = serde_json::json!([
466            {
467                "name": "CWE",
468                "version": "4.13",
469                "informationUri": "https://cwe.mitre.org/data/definitions/798.html",
470                "shortDescription": { "text": "Common Weakness Enumeration" },
471                "taxa": [{
472                    "id": "CWE-798",
473                    "name": "Use of Hard-coded Credentials",
474                    "shortDescription": {
475                        "text": "The product contains hard-coded credentials, such as a password or cryptographic key, which it uses for its own inbound authentication, outbound communication to external components, or encryption of internal data."
476                    },
477                    "helpUri": "https://cwe.mitre.org/data/definitions/798.html"
478                }]
479            },
480            {
481                "name": "OWASP",
482                "version": "2021",
483                "informationUri": "https://owasp.org/Top10/A07_2021-Identification_and_Authentication_Failures/",
484                "shortDescription": { "text": "OWASP Top 10:2021" },
485                "taxa": [{
486                    "id": "A07:2021",
487                    "name": "Identification and Authentication Failures",
488                    "shortDescription": {
489                        "text": "Confirmation of the user's identity, authentication, and session management is critical to protect against authentication-related attacks."
490                    },
491                    "helpUri": "https://owasp.org/Top10/A07_2021-Identification_and_Authentication_Failures/"
492                }]
493            }
494        ]);
495        serde_json::to_writer(&mut self.writer, &taxonomies)?;
496
497        write!(self.writer, "}}]}}")?;
498        writeln!(self.writer)?;
499        self.flush_writer()
500    }
501}
502
503impl<W: Write + Send> WriterBackedReporter for SarifReporter<W> {
504    type Writer = W;
505
506    fn writer_mut(&mut self) -> &mut Self::Writer {
507        &mut self.writer
508    }
509}
510
511#[cfg(test)]
512mod tests {
513    use super::*;
514    use crate::{MatchLocation, VerificationResult};
515    use std::collections::HashMap;
516    use std::sync::Arc;
517
518    fn synthetic_finding() -> VerifiedFinding {
519        VerifiedFinding {
520            detector_id: Arc::from("test-detector"),
521            detector_name: Arc::from("Test Detector"),
522            service: Arc::from("test"),
523            severity: Severity::High,
524            credential_redacted: std::borrow::Cow::Borrowed("****redacted"),
525            credential_hash: "abcdefabcdefabcdef".into(),
526            location: MatchLocation {
527                source: Arc::from("filesystem"),
528                file_path: Some(Arc::from("config.env")),
529                line: Some(42),
530                offset: 0,
531                commit: None,
532                author: None,
533                date: None,
534            },
535            verification: VerificationResult::Unverifiable,
536            metadata: HashMap::new(),
537            additional_locations: vec![],
538            confidence: Some(0.9),
539        }
540    }
541
542    #[test]
543    fn sarif_output_is_valid_json_with_cwe_owasp_taxa() {
544        let mut buf: Vec<u8> = Vec::new();
545        {
546            let mut r = SarifReporter::new(&mut buf);
547            r.report(&synthetic_finding()).unwrap();
548            r.finish().unwrap();
549        }
550        let json: serde_json::Value =
551            serde_json::from_slice(&buf).expect("SARIF output must parse as JSON");
552
553        // Per-result properties carry CWE and OWASP refs.
554        let cwe = json["runs"][0]["results"][0]["properties"]["cwe"].as_str();
555        assert_eq!(cwe, Some("CWE-798"));
556        let owasp = json["runs"][0]["results"][0]["properties"]["owasp"].as_str();
557        assert_eq!(owasp, Some("A07:2021"));
558
559        // runs[0].taxonomies block resolves the CWE/OWASP references.
560        let tax_name = json["runs"][0]["taxonomies"][0]["name"].as_str();
561        assert_eq!(tax_name, Some("CWE"));
562        let cwe_taxa_id = json["runs"][0]["taxonomies"][0]["taxa"][0]["id"].as_str();
563        assert_eq!(cwe_taxa_id, Some("CWE-798"));
564        let owasp_name = json["runs"][0]["taxonomies"][1]["name"].as_str();
565        assert_eq!(owasp_name, Some("OWASP"));
566
567        // SARIF v2.2 fixes[]: a replacement suggestion for the leaked
568        // credential. With service="test" we expect ${TEST_KEY} fallback.
569        let fix_replacement = json["runs"][0]["results"][0]["fixes"][0]["artifactChanges"][0]
570            ["replacements"][0]["insertedContent"]["text"]
571            .as_str();
572        assert_eq!(fix_replacement, Some("${TEST_KEY}"));
573        let fix_uri = json["runs"][0]["results"][0]["fixes"][0]["artifactChanges"][0]
574            ["artifactLocation"]["uri"]
575            .as_str();
576        assert_eq!(fix_uri, Some("config.env"));
577    }
578
579    #[test]
580    fn empty_run_still_produces_valid_sarif() {
581        let mut buf: Vec<u8> = Vec::new();
582        {
583            let mut r = SarifReporter::new(&mut buf);
584            r.finish().unwrap();
585        }
586        let json: serde_json::Value = serde_json::from_slice(&buf).expect("valid JSON");
587        assert_eq!(json["version"].as_str(), Some("2.1.0"));
588        let results = json["runs"][0]["results"]
589            .as_array()
590            .expect("results array");
591        assert!(results.is_empty());
592    }
593}