Skip to main content

taudit_core/
propagation.rs

1use crate::graph::{AuthorityGraph, EdgeId, NodeId, TrustZone};
2use serde::{Deserialize, Serialize};
3use std::collections::VecDeque;
4
5/// A path that authority took through the graph.
6/// The path is the product — it's what makes findings persuasive.
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct PropagationPath {
9    /// The authority origin (Secret or Identity).
10    pub source: NodeId,
11    /// Where authority ended up.
12    pub sink: NodeId,
13    /// The full edge path from source to sink.
14    pub edges: Vec<EdgeId>,
15    /// Did this path cross a trust zone boundary?
16    pub crossed_boundary: bool,
17    /// If crossed, from which zone to which zone.
18    #[serde(skip_serializing_if = "Option::is_none")]
19    pub boundary_crossing: Option<(TrustZone, TrustZone)>,
20}
21
22/// Default maximum BFS depth. Override via CLI --max-hops.
23pub const DEFAULT_MAX_HOPS: usize = 4;
24
25/// Node-count threshold above which the dense-graph guard activates.
26/// Graphs at or below this size are always scanned regardless of edge density.
27pub const DENSE_GRAPH_NODE_THRESHOLD: usize = 50_000;
28
29/// Edge-to-node ratio above which a graph is considered "dense" for the
30/// purposes of the safety guard. A graph with `V > DENSE_GRAPH_NODE_THRESHOLD`
31/// AND `E > V * DENSE_GRAPH_EDGE_RATIO` will be refused unless the caller
32/// passes `force_dense = true` (CLI: `--force-scan-dense`).
33pub const DENSE_GRAPH_EDGE_RATIO: usize = 5;
34
35/// Error returned when a graph is too dense to scan safely without an
36/// explicit override. The error message is the user-visible string the
37/// CLI surfaces verbatim.
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct DenseGraphError {
40    pub nodes: usize,
41    pub edges: usize,
42}
43
44impl std::fmt::Display for DenseGraphError {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        write!(
47            f,
48            "graph too dense to scan safely (V={}, E={}); use --force-scan-dense to override",
49            self.nodes, self.edges
50        )
51    }
52}
53
54impl std::error::Error for DenseGraphError {}
55
56/// Returns true when the graph exceeds the size+density thresholds the
57/// propagation engine considers a DoS risk. Used by both the engine and
58/// the CLI surface.
59pub fn is_dense_graph(graph: &AuthorityGraph) -> bool {
60    let v = graph.nodes.len();
61    let e = graph.edges.len();
62    v > DENSE_GRAPH_NODE_THRESHOLD && e > v.saturating_mul(DENSE_GRAPH_EDGE_RATIO)
63}
64
65/// Walk the graph from every authority-bearing source node (Secret + Identity).
66/// Flag any path that reaches a node in a lower trust zone.
67///
68/// Backward-compatible entry point — does NOT enforce the dense-graph guard
69/// (preserves existing behaviour for callers that haven't opted into the
70/// safety check). New callers should prefer `propagation_analysis_checked`
71/// which returns an error on dense graphs unless explicitly overridden.
72pub fn propagation_analysis(graph: &AuthorityGraph, max_hops: usize) -> Vec<PropagationPath> {
73    propagation_analysis_inner(graph, max_hops)
74}
75
76/// Density-gated entry point. Returns `Err(DenseGraphError)` when the graph
77/// exceeds size+density thresholds AND `force_dense` is false. Otherwise
78/// behaves like `propagation_analysis`.
79pub fn propagation_analysis_checked(
80    graph: &AuthorityGraph,
81    max_hops: usize,
82    force_dense: bool,
83) -> Result<Vec<PropagationPath>, DenseGraphError> {
84    if !force_dense && is_dense_graph(graph) {
85        return Err(DenseGraphError {
86            nodes: graph.nodes.len(),
87            edges: graph.edges.len(),
88        });
89    }
90    Ok(propagation_analysis_inner(graph, max_hops))
91}
92
93/// The actual BFS engine. Pre-builds an adjacency-list index once per call
94/// so the inner loop never linear-scans the edge vector.
95///
96/// Complexity: O(sources × (V + E)) once adjacency is built. The previous
97/// implementation was O(sources × accessor_steps × V × E) because
98/// `edges_from`/`edges_to` are O(E) linear filters on the edge vector and
99/// the BFS reseeded from every accessor step independently. On the
100/// `dense_5x` bench at V=10 000 the pathological cost was ~1.08 s; with
101/// adjacency pre-indexing and per-source visited sharing it drops to
102/// well under 100 ms.
103fn propagation_analysis_inner(graph: &AuthorityGraph, max_hops: usize) -> Vec<PropagationPath> {
104    let n = graph.nodes.len();
105    let mut results = Vec::new();
106
107    if n == 0 {
108        return results;
109    }
110
111    // Outgoing adjacency: for each node, the (edge_id, dest_node_id) pairs.
112    let mut adj_out: Vec<Vec<(EdgeId, NodeId)>> = vec![Vec::new(); n];
113    // Incoming HasAccessTo edges keyed by destination — we only need this
114    // edge kind for finding accessor steps, so filter at index time.
115    let mut accessors_for: Vec<Vec<NodeId>> = vec![Vec::new(); n];
116
117    for edge in &graph.edges {
118        if edge.from < n && edge.to < n {
119            adj_out[edge.from].push((edge.id, edge.to));
120            if edge.kind == crate::graph::EdgeKind::HasAccessTo {
121                accessors_for[edge.to].push(edge.from);
122            }
123        }
124    }
125
126    // Reused buffers across sources to avoid per-source allocator churn on
127    // large graphs. `visited` is bulk-reset to false on each iteration; on
128    // small graphs that's a couple cache lines, on large graphs it's a
129    // single linear write — still strictly cheaper than re-allocating.
130    let mut visited: Vec<bool> = vec![false; n];
131    let mut queue: VecDeque<(NodeId, Vec<EdgeId>, usize)> = VecDeque::new();
132
133    for source_node in graph.authority_sources() {
134        let accessor_steps = &accessors_for[source_node.id];
135        if accessor_steps.is_empty() {
136            continue;
137        }
138
139        // Bulk reset visited buffer for this source's BFS.
140        for v in visited.iter_mut() {
141            *v = false;
142        }
143        queue.clear();
144
145        // Seed the BFS with every accessor step at once (single visited
146        // set, single frontier) — the previous code ran an independent
147        // BFS per accessor step, which on dense graphs duplicated O(V·E)
148        // work for every additional accessor.
149        for &start_step in accessor_steps {
150            if !visited[start_step] {
151                visited[start_step] = true;
152                for &(edge_id, to) in &adj_out[start_step] {
153                    queue.push_back((to, vec![edge_id], 1));
154                }
155            }
156        }
157
158        let source_zone = source_node.trust_zone;
159
160        while let Some((current_id, path, depth)) = queue.pop_front() {
161            if visited[current_id] {
162                continue;
163            }
164            if depth > max_hops {
165                continue;
166            }
167            visited[current_id] = true;
168
169            let current_node = match graph.node(current_id) {
170                Some(n) => n,
171                None => continue,
172            };
173
174            let current_zone = current_node.trust_zone;
175            if current_zone.is_lower_than(&source_zone) {
176                results.push(PropagationPath {
177                    source: source_node.id,
178                    sink: current_id,
179                    edges: path.clone(),
180                    crossed_boundary: true,
181                    boundary_crossing: Some((source_zone, current_zone)),
182                });
183            }
184
185            if depth >= max_hops {
186                continue;
187            }
188
189            for &(edge_id, to) in &adj_out[current_id] {
190                if !visited[to] {
191                    let mut new_path = path.clone();
192                    new_path.push(edge_id);
193                    queue.push_back((to, new_path, depth + 1));
194                }
195            }
196        }
197    }
198
199    results
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205    use crate::graph::*;
206
207    fn make_source(file: &str) -> PipelineSource {
208        PipelineSource {
209            file: file.into(),
210            repo: None,
211            git_ref: None,
212            commit_sha: None,
213        }
214    }
215
216    #[test]
217    fn detects_secret_propagation_across_trust_boundary() {
218        let mut g = AuthorityGraph::new(make_source("test.yml"));
219
220        // Secret in first-party zone
221        let secret = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
222        // First-party build step reads the secret
223        let build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
224        // Build produces an artifact
225        let artifact = g.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
226        // Third-party deploy step consumes the artifact
227        let deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
228
229        g.add_edge(build, secret, EdgeKind::HasAccessTo);
230        g.add_edge(build, artifact, EdgeKind::Produces);
231        g.add_edge(artifact, deploy, EdgeKind::Consumes);
232
233        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
234
235        assert!(!paths.is_empty(), "should detect propagation");
236        assert!(paths
237            .iter()
238            .any(|p| p.source == secret && p.crossed_boundary));
239    }
240
241    #[test]
242    fn no_finding_when_same_trust_zone() {
243        let mut g = AuthorityGraph::new(make_source("test.yml"));
244
245        let secret = g.add_node(NodeKind::Secret, "TOKEN", TrustZone::FirstParty);
246        let step_a = g.add_node(NodeKind::Step, "lint", TrustZone::FirstParty);
247        let step_b = g.add_node(NodeKind::Step, "test", TrustZone::FirstParty);
248        let artifact = g.add_node(NodeKind::Artifact, "output", TrustZone::FirstParty);
249
250        g.add_edge(step_a, secret, EdgeKind::HasAccessTo);
251        g.add_edge(step_a, artifact, EdgeKind::Produces);
252        g.add_edge(artifact, step_b, EdgeKind::Consumes);
253
254        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
255
256        let boundary_crossings: Vec<_> = paths.iter().filter(|p| p.crossed_boundary).collect();
257        assert!(
258            boundary_crossings.is_empty(),
259            "no boundary crossing expected"
260        );
261    }
262
263    #[test]
264    fn respects_max_hops() {
265        let mut g = AuthorityGraph::new(make_source("test.yml"));
266
267        let secret = g.add_node(NodeKind::Secret, "KEY", TrustZone::FirstParty);
268        let s1 = g.add_node(NodeKind::Step, "s1", TrustZone::FirstParty);
269        let a1 = g.add_node(NodeKind::Artifact, "a1", TrustZone::FirstParty);
270        let s2 = g.add_node(NodeKind::Step, "s2", TrustZone::FirstParty);
271        let a2 = g.add_node(NodeKind::Artifact, "a2", TrustZone::FirstParty);
272        let s3 = g.add_node(NodeKind::Step, "s3", TrustZone::Untrusted);
273
274        g.add_edge(s1, secret, EdgeKind::HasAccessTo);
275        g.add_edge(s1, a1, EdgeKind::Produces);
276        g.add_edge(a1, s2, EdgeKind::Consumes);
277        g.add_edge(s2, a2, EdgeKind::Produces);
278        g.add_edge(a2, s3, EdgeKind::Consumes);
279
280        // With max_hops=2, should NOT reach s3 (which is 4 edges away)
281        let paths_short = propagation_analysis(&g, 2);
282        let boundary_short: Vec<_> = paths_short.iter().filter(|p| p.crossed_boundary).collect();
283        assert!(
284            boundary_short.is_empty(),
285            "should not reach untrusted at depth 2"
286        );
287
288        // With max_hops=5, should reach s3
289        let paths_long = propagation_analysis(&g, 5);
290        let boundary_long: Vec<_> = paths_long.iter().filter(|p| p.crossed_boundary).collect();
291        assert!(
292            !boundary_long.is_empty(),
293            "should reach untrusted at depth 5"
294        );
295    }
296
297    #[test]
298    fn identity_is_authority_source() {
299        let mut g = AuthorityGraph::new(make_source("test.yml"));
300
301        let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
302        let step = g.add_node(NodeKind::Step, "publish", TrustZone::FirstParty);
303        let action = g.add_node(
304            NodeKind::Image,
305            "third-party/deploy@main",
306            TrustZone::Untrusted,
307        );
308
309        g.add_edge(step, identity, EdgeKind::HasAccessTo);
310        g.add_edge(step, action, EdgeKind::UsesImage);
311
312        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
313        assert!(paths
314            .iter()
315            .any(|p| p.source == identity && p.crossed_boundary));
316    }
317
318    #[test]
319    fn dense_graph_check_rejects_oversized_input() {
320        // Construct a synthetic graph that crosses BOTH the node-count and
321        // density thresholds. We don't need real authority — only the
322        // shape matters for the gate.
323        let mut g = AuthorityGraph::new(make_source("dense.yml"));
324        let n = DENSE_GRAPH_NODE_THRESHOLD + 10;
325        let e_per_node = DENSE_GRAPH_EDGE_RATIO + 1;
326
327        for i in 0..n {
328            g.add_node(NodeKind::Step, format!("s{i}"), TrustZone::FirstParty);
329        }
330        for i in 0..n {
331            for k in 0..e_per_node {
332                let to = (i + k + 1) % n;
333                g.add_edge(i, to, EdgeKind::DelegatesTo);
334            }
335        }
336
337        assert!(is_dense_graph(&g), "fixture should trip the density gate");
338
339        let err = propagation_analysis_checked(&g, DEFAULT_MAX_HOPS, false).unwrap_err();
340        assert_eq!(err.nodes, n);
341        assert!(err.to_string().contains("--force-scan-dense"));
342        assert!(err.to_string().contains("graph too dense"));
343    }
344
345    #[test]
346    fn force_scan_dense_overrides_the_gate() {
347        // Same fixture, but pass force_dense=true — the scan must run to
348        // completion even though the gate would otherwise fire.
349        let mut g = AuthorityGraph::new(make_source("dense.yml"));
350        // Smaller fixture so the test stays fast — just past both
351        // thresholds is enough; the BFS itself must not panic or hang.
352        let n = DENSE_GRAPH_NODE_THRESHOLD + 5;
353        let e_per_node = DENSE_GRAPH_EDGE_RATIO + 1;
354
355        for i in 0..n {
356            g.add_node(NodeKind::Step, format!("s{i}"), TrustZone::FirstParty);
357        }
358        for i in 0..n {
359            for k in 0..e_per_node {
360                let to = (i + k + 1) % n;
361                g.add_edge(i, to, EdgeKind::DelegatesTo);
362            }
363        }
364
365        assert!(is_dense_graph(&g));
366        let result = propagation_analysis_checked(&g, DEFAULT_MAX_HOPS, true);
367        assert!(
368            result.is_ok(),
369            "force_dense=true must override the density gate"
370        );
371        // No authority sources in this graph — result is empty by design.
372        assert!(result.unwrap().is_empty());
373    }
374
375    #[test]
376    fn small_graph_below_threshold_passes_check() {
377        let mut g = AuthorityGraph::new(make_source("small.yml"));
378        let secret = g.add_node(NodeKind::Secret, "K", TrustZone::FirstParty);
379        let step = g.add_node(NodeKind::Step, "s", TrustZone::Untrusted);
380        g.add_edge(step, secret, EdgeKind::HasAccessTo);
381
382        assert!(!is_dense_graph(&g));
383        let result = propagation_analysis_checked(&g, DEFAULT_MAX_HOPS, false);
384        assert!(result.is_ok());
385    }
386}