taudit_report_json/
lib.rs

1use taudit_core::error::TauditError;
2use taudit_core::finding::{
3    compute_finding_group_id, compute_fingerprint, compute_suppression_key, rule_id_for, Finding,
4};
5use taudit_core::graph::{
6    is_docker_digest_pinned, is_pin_semantically_valid, AuthorityCompleteness, AuthorityGraph,
7    EdgeKind, GapKind, NodeKind, META_CONTAINER, META_OIDC, META_SERVICE_CONNECTION,
8    META_SERVICE_CONNECTION_NAME, META_VARIABLE_GROUP,
9};
10use taudit_core::ports::ReportSink;
11
12use serde::Serialize;
13
14const JSON_REPORT_SCHEMA_VERSION: &str = "1.0.0";
15const JSON_REPORT_SCHEMA_URI: &str = "https://taudit.dev/schemas/taudit-report.schema.json";
16
17/// Schema version of the standalone authority-graph export
18/// (`taudit graph --format json`). Semver-stable: 1.x.y additions are
19/// non-breaking; 2.0.0 means breaking changes.
20pub const AUTHORITY_GRAPH_SCHEMA_VERSION: &str = "1.0.0";
21
22/// Canonical URI of the authority-graph JSON Schema.
23pub const AUTHORITY_GRAPH_SCHEMA_URI: &str = "https://taudit.dev/schemas/authority-graph.v1.json";
24
25/// JSON report containing the full authority graph and all findings.
26#[derive(Serialize)]
27pub struct JsonReport<'a> {
28    pub schema_version: &'static str,
29    /// Canonical URI of the JSON Schema this report conforms to.
30    /// Non-breaking addition (1.x consumers ignore unknown fields).
31    pub schema_uri: &'static str,
32    pub graph: &'a AuthorityGraph,
33    pub findings: Vec<FindingWithFingerprint>,
34    pub summary: Summary,
35}
36
37/// Per-finding wrapper that flattens the upstream `Finding` fields and
38/// appends a stable `fingerprint`. The fingerprint matches the value
39/// surfaced by SARIF `partialFingerprints[primaryLocationLineHash]` and
40/// CloudEvents extension attribute `tauditfindingfingerprint`, so a SIEM
41/// keying on any of the three sees the same identifier per finding.
42/// See `docs/finding-fingerprint.md` for the contract.
43///
44/// The `rule_id` field carries the snake_case rule identifier (custom-rule
45/// id when the finding came from a YAML rule with a `[id] …` message
46/// prefix, otherwise the snake_case form of the category enum). This is
47/// the same id surfaced in SARIF `result.ruleId` and CloudEvents
48/// `taudit.rule_id`, so JSON consumers can filter/group by rule without
49/// re-deriving it from the category serialization.
50///
51/// The wrapper owns its `Finding` so the JSON sink can populate
52/// `extras.finding_group_id` from the fingerprint without mutating the
53/// caller's finding list. See `docs/finding-output-enhancements.md`.
54#[derive(Serialize)]
55pub struct FindingWithFingerprint {
56    pub rule_id: String,
57    #[serde(flatten)]
58    pub finding: Finding,
59    pub fingerprint: String,
60    pub suppression_key: String,
61}
62
63/// Standalone authority-graph export — the document emitted by
64/// `taudit graph --format json`. Versioned independently from the scan
65/// report because downstream tools (tsign, axiom, runtime cells)
66/// consume the graph without caring about findings.
67#[derive(Serialize)]
68pub struct GraphExport<'a> {
69    /// Semver of the authority-graph schema. See `AUTHORITY_GRAPH_SCHEMA_VERSION`.
70    pub schema_version: &'static str,
71    /// Canonical URI of the schema this document conforms to.
72    pub schema_uri: &'static str,
73    /// The authority graph itself.
74    pub graph: &'a AuthorityGraph,
75}
76
77impl<'a> GraphExport<'a> {
78    /// Wrap a graph reference in a versioned export envelope.
79    pub fn new(graph: &'a AuthorityGraph) -> Self {
80        Self {
81            schema_version: AUTHORITY_GRAPH_SCHEMA_VERSION,
82            schema_uri: AUTHORITY_GRAPH_SCHEMA_URI,
83            graph,
84        }
85    }
86
87    /// Serialize to pretty-printed JSON.
88    pub fn to_json_pretty(&self) -> Result<String, TauditError> {
89        serde_json::to_string_pretty(self)
90            .map_err(|e| TauditError::Report(format!("graph JSON serialization error: {e}")))
91    }
92}
93
94/// Structured representation of a single graph completeness gap as it
95/// appears in `summary.completeness_gaps`. Each entry pairs the typed
96/// `GapKind` (so SIEMs can filter by class of imprecision) with the
97/// human-readable `reason` (so analysts can read it).
98#[derive(Serialize)]
99pub struct CompletenessGap {
100    pub kind: GapKind,
101    pub reason: String,
102}
103
104#[derive(Serialize)]
105pub struct Summary {
106    pub total_findings: usize,
107    pub critical: usize,
108    pub high: usize,
109    pub medium: usize,
110    pub low: usize,
111    pub info: usize,
112    pub total_nodes: usize,
113    pub total_edges: usize,
114    pub completeness: AuthorityCompleteness,
115    /// Structured `{kind, reason}` entries describing why the graph is
116    /// partial. Built by zipping `AuthorityGraph.completeness_gap_kinds`
117    /// with `AuthorityGraph.completeness_gaps`. Omitted when empty.
118    #[serde(skip_serializing_if = "Vec::is_empty")]
119    pub completeness_gaps: Vec<CompletenessGap>,
120    /// Compact graph-density rollup for report authors and triage systems.
121    /// This is not a finding; it describes how much authority-bearing surface
122    /// the graph exposed so large workflows can be discussed without relying
123    /// on raw finding count alone.
124    #[serde(skip_serializing_if = "GraphRiskSummary::is_empty")]
125    pub graph_risk_summary: GraphRiskSummary,
126}
127
128/// High-signal graph-density metrics used by reports and dashboards.
129#[derive(Serialize, Default)]
130pub struct GraphRiskSummary {
131    pub authority_roots: usize,
132    pub untrusted_sinks: usize,
133    pub mutable_refs: usize,
134    pub publication_adjacent_sinks: usize,
135    pub delegation_hops: usize,
136    #[serde(skip_serializing_if = "Vec::is_empty")]
137    pub protected_resource_categories: Vec<String>,
138}
139
140impl GraphRiskSummary {
141    fn is_empty(&self) -> bool {
142        self.authority_roots == 0
143            && self.untrusted_sinks == 0
144            && self.mutable_refs == 0
145            && self.publication_adjacent_sinks == 0
146            && self.delegation_hops == 0
147            && self.protected_resource_categories.is_empty()
148    }
149}
150
151fn graph_risk_summary(graph: &AuthorityGraph) -> GraphRiskSummary {
152    let mut protected = std::collections::BTreeSet::<String>::new();
153    let mut summary = GraphRiskSummary::default();
154
155    for node in &graph.nodes {
156        match node.kind {
157            NodeKind::Secret | NodeKind::Identity => {
158                summary.authority_roots += 1;
159            }
160            _ => {}
161        }
162
163        if node.trust_zone == taudit_core::graph::TrustZone::Untrusted {
164            summary.untrusted_sinks += 1;
165        }
166
167        if node.kind == NodeKind::Image
168            && !node
169                .metadata
170                .get(META_CONTAINER)
171                .map(|v| v == "true")
172                .unwrap_or(false)
173            && !is_pin_semantically_valid(&node.name)
174            && !is_docker_digest_pinned(&node.name)
175        {
176            summary.mutable_refs += 1;
177        }
178
179        let lower = node.name.to_ascii_lowercase();
180        if node.kind == NodeKind::Step
181            && ["publish", "release", "deploy", "push", "upload"]
182                .iter()
183                .any(|needle| lower.contains(needle))
184        {
185            summary.publication_adjacent_sinks += 1;
186        }
187
188        if node.metadata.contains_key(META_VARIABLE_GROUP) {
189            protected.insert("variable_group".into());
190        }
191        if node.metadata.contains_key(META_SERVICE_CONNECTION)
192            || node.metadata.contains_key(META_SERVICE_CONNECTION_NAME)
193        {
194            protected.insert("service_connection".into());
195        }
196        if node.metadata.contains_key(META_OIDC) {
197            protected.insert("oidc_identity".into());
198        }
199        if node.kind == NodeKind::Secret {
200            protected.insert("secret".into());
201        }
202        if node.kind == NodeKind::Identity {
203            protected.insert("identity".into());
204        }
205    }
206
207    summary.delegation_hops = graph
208        .edges
209        .iter()
210        .filter(|edge| edge.kind == EdgeKind::DelegatesTo)
211        .count();
212    summary.protected_resource_categories = protected.into_iter().collect();
213    summary
214}
215
216pub struct JsonReportSink;
217
218impl<W: std::io::Write> ReportSink<W> for JsonReportSink {
219    fn emit(
220        &self,
221        w: &mut W,
222        graph: &AuthorityGraph,
223        findings: &[Finding],
224    ) -> Result<(), TauditError> {
225        use taudit_core::finding::Severity;
226
227        // For each finding compute the fingerprint and derive the
228        // group id from it. We populate `extras.finding_group_id` on a
229        // cloned `Finding` so callers' lists stay untouched. If a rule
230        // already populated the group id, we respect that value.
231        let findings_with_fp: Vec<FindingWithFingerprint> = findings
232            .iter()
233            .map(|f| {
234                let fingerprint = compute_fingerprint(f, graph);
235                let rule_id = rule_id_for(f);
236                let mut owned = f.clone();
237                if owned.extras.finding_group_id.is_none() {
238                    owned.extras.finding_group_id = Some(compute_finding_group_id(&fingerprint));
239                }
240                FindingWithFingerprint {
241                    rule_id,
242                    finding: owned,
243                    suppression_key: compute_suppression_key(f, graph),
244                    fingerprint,
245                }
246            })
247            .collect();
248
249        let report = JsonReport {
250            schema_version: JSON_REPORT_SCHEMA_VERSION,
251            schema_uri: JSON_REPORT_SCHEMA_URI,
252            graph,
253            findings: findings_with_fp,
254            summary: Summary {
255                total_findings: findings.len(),
256                critical: findings
257                    .iter()
258                    .filter(|f| f.severity == Severity::Critical)
259                    .count(),
260                high: findings
261                    .iter()
262                    .filter(|f| f.severity == Severity::High)
263                    .count(),
264                medium: findings
265                    .iter()
266                    .filter(|f| f.severity == Severity::Medium)
267                    .count(),
268                low: findings
269                    .iter()
270                    .filter(|f| f.severity == Severity::Low)
271                    .count(),
272                info: findings
273                    .iter()
274                    .filter(|f| f.severity == Severity::Info)
275                    .count(),
276                total_nodes: graph.nodes.len(),
277                total_edges: graph.edges.len(),
278                completeness: graph.completeness,
279                // Zip kinds with reasons. `zip` stops at the shorter
280                // iterator, so if `completeness_gap_kinds` is somehow
281                // shorter than `completeness_gaps` (shouldn't happen —
282                // mark_partial pushes both — but be safe), we silently
283                // drop the unkinded extras rather than emit a malformed
284                // gap.
285                completeness_gaps: graph
286                    .completeness_gap_kinds
287                    .iter()
288                    .zip(graph.completeness_gaps.iter())
289                    .map(|(kind, reason)| CompletenessGap {
290                        kind: *kind,
291                        reason: reason.clone(),
292                    })
293                    .collect(),
294                graph_risk_summary: graph_risk_summary(graph),
295            },
296        };
297
298        serde_json::to_writer_pretty(w, &report)
299            .map_err(|e| TauditError::Report(format!("JSON serialization error: {e}")))?;
300
301        Ok(())
302    }
303}
304
305#[cfg(test)]
306mod tests {
307    use crate::JsonReportSink;
308    use std::{fs, path::PathBuf};
309    use taudit_core::finding::{Finding, FindingExtras, Recommendation, Severity};
310    use taudit_core::graph::PipelineSource;
311    use taudit_core::ports::ReportSink;
312
313    fn workspace_file(relative: &str) -> PathBuf {
314        PathBuf::from(env!("CARGO_MANIFEST_DIR"))
315            .join("../..")
316            .join(relative)
317    }
318
319    fn read_json(relative: &str) -> serde_json::Value {
320        let path = workspace_file(relative);
321        let text = fs::read_to_string(&path)
322            .unwrap_or_else(|err| panic!("failed to read {}: {err}", path.display()));
323        serde_json::from_str(&text)
324            .unwrap_or_else(|err| panic!("failed to parse {}: {err}", path.display()))
325    }
326
327    fn assert_schema_validates_instance(schema_relative: &str, instance_relative: &str) {
328        let schema = read_json(schema_relative);
329        let instance = read_json(instance_relative);
330        let validator = jsonschema::validator_for(&schema)
331            .unwrap_or_else(|err| panic!("invalid schema {schema_relative}: {err}"));
332        let errors: Vec<String> = validator
333            .iter_errors(&instance)
334            .map(|err| err.to_string())
335            .collect();
336        assert!(
337            errors.is_empty(),
338            "{instance_relative} does not match {schema_relative}:\n{}",
339            errors.join("\n")
340        );
341    }
342
343    #[test]
344    fn emitted_report_includes_schema_version_and_matches_schema() {
345        let graph = taudit_core::graph::AuthorityGraph::new(PipelineSource {
346            file: ".github/workflows/ci.yml".into(),
347            repo: None,
348            git_ref: None,
349            commit_sha: None,
350        });
351        let findings = vec![Finding {
352            severity: Severity::Medium,
353            category: taudit_core::finding::FindingCategory::UnpinnedAction,
354            path: None,
355            nodes_involved: vec![],
356            message: "test finding".into(),
357            recommendation: Recommendation::Manual {
358                action: "pin the action".into(),
359            },
360            source: taudit_core::finding::FindingSource::BuiltIn,
361            extras: FindingExtras::default(),
362        }];
363
364        let mut buf = Vec::new();
365        JsonReportSink.emit(&mut buf, &graph, &findings).unwrap();
366
367        let report: serde_json::Value = serde_json::from_slice(&buf).unwrap();
368        assert_eq!(report["schema_version"], "1.0.0");
369
370        let schema = read_json("contracts/schemas/taudit-report.schema.json");
371        let validator = jsonschema::validator_for(&schema).expect("report schema should compile");
372        let errors: Vec<String> = validator
373            .iter_errors(&report)
374            .map(|err| err.to_string())
375            .collect();
376
377        assert!(
378            errors.is_empty(),
379            "emitted report does not match report schema:\n{}",
380            errors.join("\n")
381        );
382    }
383
384    #[test]
385    fn clean_report_example_matches_schema() {
386        assert_schema_validates_instance(
387            "contracts/schemas/taudit-report.schema.json",
388            "contracts/examples/clean-report.json",
389        );
390    }
391
392    #[test]
393    fn over_privileged_report_example_matches_schema() {
394        assert_schema_validates_instance(
395            "contracts/schemas/taudit-report.schema.json",
396            "contracts/examples/over-privileged-report.json",
397        );
398    }
399
400    /// End-to-end: build a graph that exercises every NodeKind, every
401    /// TrustZone, and every EdgeKind, emit it through the standalone
402    /// GraphExport envelope, then validate the JSON against the
403    /// authority-graph v1 schema. Catches drift between the Rust types
404    /// and the published schema before downstream consumers do.
405    #[test]
406    fn authority_graph_export_matches_v1_schema() {
407        use taudit_core::graph::{
408            AuthorityGraph, EdgeKind, GapKind, NodeKind, PipelineSource, TrustZone,
409        };
410
411        let mut graph = AuthorityGraph::new(PipelineSource {
412            file: "tests/fixtures/over-privileged.yml".into(),
413            repo: Some("0ryant/taudit".into()),
414            git_ref: Some("main".into()),
415            commit_sha: None,
416        });
417        graph.mark_partial(
418            GapKind::Expression,
419            "inline shell scripts not fully resolved",
420        );
421
422        let secret = graph.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
423        let identity = graph.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
424        let image = graph.add_node(NodeKind::Image, "ubuntu-latest", TrustZone::ThirdParty);
425        let step_build = graph.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
426        let artifact = graph.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
427        let step_deploy = graph.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
428
429        graph.add_edge(step_build, secret, EdgeKind::HasAccessTo);
430        graph.add_edge(step_build, identity, EdgeKind::HasAccessTo);
431        graph.add_edge(step_build, image, EdgeKind::UsesImage);
432        graph.add_edge(step_build, artifact, EdgeKind::Produces);
433        graph.add_edge(artifact, step_deploy, EdgeKind::Consumes);
434        graph.add_edge(step_build, step_deploy, EdgeKind::DelegatesTo);
435        graph.add_edge(step_build, secret, EdgeKind::PersistsTo);
436
437        graph.stamp_edge_authority_summaries();
438
439        let export = crate::GraphExport::new(&graph);
440        let json = export.to_json_pretty().expect("export serializes");
441        let value: serde_json::Value =
442            serde_json::from_str(&json).expect("export round-trips through serde_json");
443
444        assert_eq!(
445            value["schema_version"],
446            crate::AUTHORITY_GRAPH_SCHEMA_VERSION
447        );
448        assert_eq!(value["schema_uri"], crate::AUTHORITY_GRAPH_SCHEMA_URI);
449
450        // The standalone graph export keeps `completeness_gaps` and
451        // `completeness_gap_kinds` as parallel arrays (this is the
452        // authority-graph v1 schema contract). Confirm the Expression
453        // gap we marked is round-tripped under both keys.
454        assert_eq!(
455            value["graph"]["completeness_gaps"][0],
456            "inline shell scripts not fully resolved"
457        );
458        assert_eq!(value["graph"]["completeness_gap_kinds"][0], "expression");
459
460        let schema = read_json("schemas/authority-graph.v1.json");
461        let validator =
462            jsonschema::validator_for(&schema).expect("authority-graph schema should compile");
463        let errors: Vec<String> = validator
464            .iter_errors(&value)
465            .map(|err| err.to_string())
466            .collect();
467
468        assert!(
469            errors.is_empty(),
470            "graph export does not match authority-graph.v1.json:\n{}",
471            errors.join("\n")
472        );
473    }
474
475    /// Regression for the post-v0.9.1 fuzz-report B1 (HIGH): scanning the
476    /// same fixture nine times in a row must produce nine byte-identical
477    /// JSON outputs. Before the fix, HashMap iteration order leaked into
478    /// node IDs, edge `from`/`to`, and `metadata` key ordering — so each
479    /// run differed and any cache / SIEM keying on the JSON saw false
480    /// changes. The fix sorts parser HashMap iteration and serializes
481    /// graph metadata maps in sorted-key order.
482    #[test]
483    fn json_output_is_byte_deterministic_across_runs() {
484        use std::collections::HashMap;
485        use taudit_core::graph::{AuthorityGraph, EdgeKind, NodeKind, PipelineSource, TrustZone};
486
487        // Build a graph with rich metadata across multiple keys — exercises
488        // the HashMap-key-order code path that was previously the source of
489        // non-determinism. We then serialise it twice in sequence (mimics
490        // back-to-back runs of the same scan).
491        fn build_graph() -> (AuthorityGraph, Vec<Finding>) {
492            let mut graph = AuthorityGraph::new(PipelineSource {
493                file: "ci.yml".into(),
494                repo: None,
495                git_ref: None,
496                commit_sha: None,
497            });
498            let secret_a = graph.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
499            let secret_b = graph.add_node(NodeKind::Secret, "DEPLOY_TOKEN", TrustZone::FirstParty);
500            let step = graph.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
501            graph.add_edge(step, secret_a, EdgeKind::HasAccessTo);
502            graph.add_edge(step, secret_b, EdgeKind::HasAccessTo);
503            // Stamp many metadata keys on the step so HashMap ordering matters.
504            if let Some(node) = graph.nodes.get_mut(step) {
505                let mut meta: HashMap<String, String> = HashMap::new();
506                meta.insert("z_field".into(), "z".into());
507                meta.insert("a_field".into(), "a".into());
508                meta.insert("m_field".into(), "m".into());
509                meta.insert("k_field".into(), "k".into());
510                meta.insert("c_field".into(), "c".into());
511                node.metadata = meta;
512            }
513            graph
514                .metadata
515                .insert("trigger".into(), "pull_request".into());
516            graph.metadata.insert("platform".into(), "github".into());
517            let findings = vec![Finding {
518                severity: Severity::High,
519                category: taudit_core::finding::FindingCategory::AuthorityPropagation,
520                path: None,
521                nodes_involved: vec![secret_a, step],
522                message: "AWS_KEY reaches deploy".into(),
523                recommendation: Recommendation::Manual {
524                    action: "scope it".into(),
525                },
526                source: taudit_core::finding::FindingSource::BuiltIn,
527                extras: taudit_core::finding::FindingExtras::default(),
528            }];
529            (graph, findings)
530        }
531
532        let mut runs: Vec<Vec<u8>> = Vec::with_capacity(9);
533        for _ in 0..9 {
534            let (g, f) = build_graph();
535            let mut buf = Vec::new();
536            JsonReportSink.emit(&mut buf, &g, &f).unwrap();
537            runs.push(buf);
538        }
539
540        let first = &runs[0];
541        for (i, run) in runs.iter().enumerate().skip(1) {
542            assert_eq!(
543                first, run,
544                "run 0 and run {i} produced byte-different JSON output (non-determinism regression)"
545            );
546        }
547    }
548
549    /// Regression for the post-v0.9.1 self-hosting-scan finding: every
550    /// `findings[].rule_id` was `null` in the JSON sink output, even though
551    /// SARIF and the text formatter surfaced rule names correctly. JSON
552    /// consumers (SIEMs, suppression DBs, dashboards) couldn't filter by
553    /// rule. Each finding must now carry a non-null `rule_id` string equal
554    /// to the snake_case form of the category — and a custom-rule message
555    /// prefix `[id]` must override the category id.
556    #[test]
557    fn each_finding_has_non_null_snake_case_rule_id() {
558        let graph = taudit_core::graph::AuthorityGraph::new(PipelineSource {
559            file: ".github/workflows/ci.yml".into(),
560            repo: None,
561            git_ref: None,
562            commit_sha: None,
563        });
564        let findings = vec![
565            Finding {
566                severity: Severity::High,
567                category: taudit_core::finding::FindingCategory::AuthorityPropagation,
568                path: None,
569                nodes_involved: vec![],
570                message: "GITHUB_TOKEN propagated".into(),
571                recommendation: Recommendation::Manual {
572                    action: "scope it".into(),
573                },
574                source: taudit_core::finding::FindingSource::BuiltIn,
575                extras: taudit_core::finding::FindingExtras::default(),
576            },
577            Finding {
578                severity: Severity::Medium,
579                category: taudit_core::finding::FindingCategory::UnpinnedAction,
580                path: None,
581                nodes_involved: vec![],
582                message: "[my_custom_rule] custom rule fired".into(),
583                recommendation: Recommendation::Manual {
584                    action: "pin it".into(),
585                },
586                source: taudit_core::finding::FindingSource::BuiltIn,
587                extras: taudit_core::finding::FindingExtras::default(),
588            },
589        ];
590
591        let mut buf = Vec::new();
592        JsonReportSink.emit(&mut buf, &graph, &findings).unwrap();
593        let report: serde_json::Value = serde_json::from_slice(&buf).unwrap();
594
595        let findings_arr = report["findings"].as_array().expect("findings is an array");
596        assert_eq!(findings_arr.len(), 2);
597
598        // Each finding has a non-null rule_id string.
599        for f in findings_arr {
600            let id = f["rule_id"].as_str();
601            assert!(
602                id.is_some(),
603                "every finding must have a string rule_id, got: {:?}",
604                f["rule_id"]
605            );
606            assert!(
607                !id.unwrap().is_empty(),
608                "rule_id must be non-empty, got: {:?}",
609                f["rule_id"]
610            );
611        }
612
613        // Category-derived id: snake_case form of FindingCategory.
614        assert_eq!(findings_arr[0]["rule_id"], "authority_propagation");
615        // Custom-rule prefix wins over the category id.
616        assert_eq!(findings_arr[1]["rule_id"], "my_custom_rule");
617    }
618
619    /// Lane 1F contract: `summary.completeness_gaps` is an array of
620    /// `{kind, reason}` objects, not plain strings. Each entry carries
621    /// the typed `GapKind` (snake_case: `expression` | `structural` |
622    /// `opaque`) so downstream consumers can filter / group gaps by
623    /// class without re-parsing the human-readable reason. Exercise
624    /// every variant in one report and assert both shape and values.
625    #[test]
626    fn summary_completeness_gaps_serialize_as_kind_reason_objects() {
627        use taudit_core::graph::GapKind;
628
629        let mut graph = taudit_core::graph::AuthorityGraph::new(PipelineSource {
630            file: ".github/workflows/ci.yml".into(),
631            repo: None,
632            git_ref: None,
633            commit_sha: None,
634        });
635        graph.mark_partial(GapKind::Structural, "composite action not found: ./action");
636        graph.mark_partial(
637            GapKind::Expression,
638            "matrix strategy hides some authority paths",
639        );
640        graph.mark_partial(GapKind::Opaque, "platform unknown; zero steps produced");
641
642        let mut buf = Vec::new();
643        JsonReportSink.emit(&mut buf, &graph, &[]).unwrap();
644        let report: serde_json::Value = serde_json::from_slice(&buf).unwrap();
645
646        let gaps = report["summary"]["completeness_gaps"]
647            .as_array()
648            .expect("summary.completeness_gaps must be an array");
649        assert_eq!(gaps.len(), 3, "all three gaps round-trip");
650
651        // Index 0: Structural
652        assert_eq!(gaps[0]["kind"], "structural");
653        assert_eq!(gaps[0]["reason"], "composite action not found: ./action");
654        // Index 1: Expression
655        assert_eq!(gaps[1]["kind"], "expression");
656        assert_eq!(
657            gaps[1]["reason"],
658            "matrix strategy hides some authority paths"
659        );
660        // Index 2: Opaque
661        assert_eq!(gaps[2]["kind"], "opaque");
662        assert_eq!(gaps[2]["reason"], "platform unknown; zero steps produced");
663
664        // Every entry must be an object with exactly the two contract
665        // keys — guards against a regression that drops the structured
666        // shape and falls back to bare strings.
667        for (i, gap) in gaps.iter().enumerate() {
668            assert!(gap.is_object(), "gap[{i}] must be an object, got: {gap:?}");
669            assert!(
670                gap.get("kind").and_then(|v| v.as_str()).is_some(),
671                "gap[{i}].kind must be a string"
672            );
673            assert!(
674                gap.get("reason").and_then(|v| v.as_str()).is_some(),
675                "gap[{i}].reason must be a string"
676            );
677        }
678
679        // The emitted report (with structured gaps) must still validate
680        // against the published JSON Schema — catches drift between the
681        // Rust types and `contracts/schemas/taudit-report.schema.json`.
682        let schema = read_json("contracts/schemas/taudit-report.schema.json");
683        let validator = jsonschema::validator_for(&schema).expect("report schema should compile");
684        let errors: Vec<String> = validator
685            .iter_errors(&report)
686            .map(|err| err.to_string())
687            .collect();
688        assert!(
689            errors.is_empty(),
690            "partial-graph report does not match report schema:\n{}",
691            errors.join("\n")
692        );
693    }
694
695    /// Regression for the P0 schema-drift class (Agent-10 Findings 2 + 3
696    /// of the v1.1.0-beta.2 deep-audit synthesis). The published
697    /// `taudit-report.schema.json` previously listed only 10 of the 63
698    /// `FindingCategory` variants in `Finding/properties/category/enum`.
699    /// With `additionalProperties: false`, a finding emitted by 53 of 63
700    /// rules was byte-valid but schema-invalid against the contract the
701    /// README publishes — strict-validating consumers rejected the output.
702    /// CI was blind because every prior schema test fired
703    /// `UnpinnedAction` or `AuthorityPropagation`, both inside the
704    /// stale 10-item subset.
705    ///
706    /// This test enumerates every variant — including the two reserved
707    /// ones, which are valid in OUTPUT — emits each through the JSON
708    /// sink, and validates the resulting report against the published
709    /// schema. Adding a new `FindingCategory` variant without updating
710    /// the schema generator (`scripts/generate-authority-invariant-schema.py`)
711    /// trips this test in addition to the CI `--check` step.
712    ///
713    /// Variants are hand-listed (no `strum`) — the workspace deliberately
714    /// avoids that dependency for one test. Removing a variant fails to
715    /// compile, which is a stronger signal than a missing-from-list bug.
716    #[test]
717    fn every_finding_category_variant_validates_against_report_schema() {
718        use taudit_core::finding::FindingCategory as C;
719
720        // Hand-listed enumeration of every FindingCategory variant. If a
721        // variant is added/removed in `crates/taudit-core/src/finding.rs`,
722        // this list MUST be updated in lock-step with the schema
723        // generator. The schema `--check` CI gate catches the schema half;
724        // this test catches the test-coverage half.
725        let all: Vec<C> = vec![
726            C::AuthorityPropagation,
727            C::OverPrivilegedIdentity,
728            C::UnpinnedAction,
729            C::UntrustedWithAuthority,
730            C::ArtifactBoundaryCrossing,
731            C::FloatingImage,
732            C::LongLivedCredential,
733            C::PersistedCredential,
734            C::TriggerContextMismatch,
735            C::CrossWorkflowAuthorityChain,
736            C::AuthorityCycle,
737            C::UpliftWithoutAttestation,
738            C::SelfMutatingPipeline,
739            C::CheckoutSelfPrExposure,
740            C::VariableGroupInPrJob,
741            C::SelfHostedPoolPrHijack,
742            C::SharedSelfHostedPoolNoIsolation,
743            C::ServiceConnectionScopeMismatch,
744            C::TemplateExtendsUnpinnedBranch,
745            C::TemplateRepoRefIsFeatureBranch,
746            C::VmRemoteExecViaPipelineSecret,
747            C::ShortLivedSasInCommandLine,
748            C::SecretToInlineScriptEnvExport,
749            C::SecretMaterialisedToWorkspaceFile,
750            C::KeyVaultSecretToPlaintext,
751            C::TerraformAutoApproveInProd,
752            C::AddSpnWithInlineScript,
753            C::ParameterInterpolationIntoShell,
754            C::RuntimeScriptFetchedFromFloatingUrl,
755            C::PrTriggerWithFloatingActionRef,
756            C::UntrustedApiResponseToEnvSink,
757            C::PrBuildPushesImageWithFloatingCredentials,
758            C::SecretViaEnvGateToUntrustedConsumer,
759            C::NoWorkflowLevelPermissionsBlock,
760            C::ProdDeployJobNoEnvironmentGate,
761            C::LongLivedSecretWithoutOidcRecommendation,
762            C::PullRequestWorkflowInconsistentForkCheck,
763            C::GitlabDeployJobMissingProtectedBranchOnly,
764            C::TerraformOutputViaSetvariableShellExpansion,
765            C::RiskyTriggerWithAuthority,
766            C::SensitiveValueInJobOutput,
767            C::ManualDispatchInputToUrlOrCommand,
768            C::SecretsInheritOverscopedPassthrough,
769            C::UnsafePrArtifactInWorkflowRunConsumer,
770            C::ScriptInjectionViaUntrustedContext,
771            C::InteractiveDebugActionInAuthorityWorkflow,
772            C::PrSpecificCacheKeyInDefaultBranchConsumer,
773            C::GhCliWithDefaultTokenEscalating,
774            C::GhaScriptInjectionToPrivilegedShell,
775            C::GhaWorkflowRunArtifactPoisoningToPrivilegedConsumer,
776            C::GhaRemoteScriptInAuthorityJob,
777            C::GhaPatRemoteUrlWrite,
778            C::GhaIssueCommentCommandToWriteToken,
779            C::GhaPrBuildPushesPublishableImage,
780            C::GhaManualDispatchRefToPrivilegedCheckout,
781            C::CiJobTokenToExternalApi,
782            C::IdTokenAudienceOverscoped,
783            C::UntrustedCiVarInShellInterpolation,
784            C::UnpinnedIncludeRemoteOrBranchRef,
785            C::DindServiceGrantsHostAuthority,
786            C::SecurityJobSilentlySkipped,
787            C::ChildPipelineTriggerInheritsAuthority,
788            C::CacheKeyCrossesTrustBoundary,
789            C::PatEmbeddedInGitRemoteUrl,
790            C::CiTokenTriggersDownstreamWithVariablePassthrough,
791            C::DotenvArtifactFlowsToPrivilegedDeployment,
792            C::SetvariableIssecretFalse,
793            C::HomoglyphInActionRef,
794            C::GhaHelperPathSensitiveArgv,
795            C::GhaHelperPathSensitiveStdin,
796            C::GhaHelperPathSensitiveEnv,
797            C::GhaPostAmbientEnvCleanupPath,
798            C::GhaActionMintedSecretToHelper,
799            C::GhaHelperUntrustedPathResolution,
800            C::GhaSecretOutputAfterHelperLogin,
801            C::LaterSecretMaterializedAfterPathMutation,
802            C::GhaSetupNodeCacheHelperPathHandoff,
803            C::GhaSetupPythonCacheHelperPathHandoff,
804            C::GhaSetupPythonPipInstallAuthorityEnv,
805            C::GhaSetupGoCacheHelperPathHandoff,
806            C::GhaDockerSetupQemuPrivilegedDockerHelper,
807            C::GhaToolInstallerThenShellHelperAuthority,
808            C::GhaWorkflowShellAuthorityConcentration,
809            C::GhaActionTokenEnvBeforeBareDownloadHelper,
810            C::GhaPostActionInputRetargetToCacheSave,
811            C::GhaTerraformWrapperSensitiveOutput,
812            C::GhaCompositeBareHelperAfterPathInstallWithSecretEnv,
813            C::GhaPulumiPathResolvedCliWithAuthority,
814            C::GhaPypiPublishOidcAfterPathMutation,
815            C::GhaChangesetsPublishCommandWithAuthority,
816            C::GhaRubygemsReleaseGitTokenAndOidcHelper,
817            C::GhaCompositeEntrypointPathShadowWithSecretEnv,
818            C::GhaDockerBuildxAuthorityPathHandoff,
819            C::GhaGoogleDeployGcloudCredentialPath,
820            C::GhaDatadogTestVisibilityInstallerAuthority,
821            C::GhaKubernetesHelperKubeconfigAuthority,
822            C::GhaAzureCompanionHelperAuthority,
823            C::GhaCreatePrGitTokenPathHandoff,
824            C::GhaImportGpgPrivateKeyHelperPath,
825            C::GhaSshAgentPrivateKeyToPathHelper,
826            C::GhaMacosCodesignCertSecurityPath,
827            C::GhaPagesDeployTokenUrlToGitHelper,
828            C::GhaToolcacheAbsolutePathDowngrade,
829            // Reserved categories — valid in OUTPUT (the Rust enum can
830            // construct them); rejected in custom-rule YAML INPUT via
831            // `#[serde(skip_deserializing)]`.
832            C::EgressBlindspot,
833            C::MissingAuditTrail,
834        ];
835
836        // Sanity guard: 93 is the wire-contract count the schema
837        // generator emits. A drift between this list and the enum is the
838        // exact failure class this test exists to catch.
839        assert_eq!(
840            all.len(),
841            105,
842            "FindingCategory enumeration is out of sync with the schema generator (expected 105, got {})",
843            all.len()
844        );
845
846        let schema = read_json("contracts/schemas/taudit-report.schema.json");
847        let validator = jsonschema::validator_for(&schema).expect("report schema should compile");
848
849        for category in all {
850            let graph = taudit_core::graph::AuthorityGraph::new(PipelineSource {
851                file: ".github/workflows/ci.yml".into(),
852                repo: None,
853                git_ref: None,
854                commit_sha: None,
855            });
856            let findings = vec![Finding {
857                severity: Severity::Medium,
858                category,
859                path: None,
860                nodes_involved: vec![],
861                message: "category coverage probe".into(),
862                recommendation: Recommendation::Manual {
863                    action: "noop".into(),
864                },
865                source: taudit_core::finding::FindingSource::BuiltIn,
866                extras: FindingExtras::default(),
867            }];
868
869            let mut buf = Vec::new();
870            JsonReportSink
871                .emit(&mut buf, &graph, &findings)
872                .expect("sink emits");
873            let report: serde_json::Value =
874                serde_json::from_slice(&buf).expect("output is valid JSON");
875            let errors: Vec<String> = validator
876                .iter_errors(&report)
877                .map(|err| err.to_string())
878                .collect();
879            assert!(
880                errors.is_empty(),
881                "category {category:?} produced a report that fails the published schema:\n{}",
882                errors.join("\n")
883            );
884        }
885    }
886}
taudit_report_json/lib.rs

taudit_report_json/
lib.rs