Skip to main content

taudit_core/
propagation.rs

1use crate::graph::{AuthorityGraph, EdgeId, NodeId};
2use std::collections::VecDeque;
3
4// `PropagationPath` is a wire type — the BFS engine here computes them, but
5// the struct itself lives in `taudit-api` (it serialises into `Finding.path`
6// in JSON output and into the standalone `authority-propagation-summary.v1.json`
7// schema). Re-export so existing in-tree imports stay green.
8pub use taudit_api::PropagationPath;
9
10/// Default maximum BFS depth. Override via CLI --max-hops.
11pub const DEFAULT_MAX_HOPS: usize = 4;
12
13/// Node-count threshold above which the dense-graph guard activates.
14/// Graphs at or below this size are always scanned regardless of edge density.
15pub const DENSE_GRAPH_NODE_THRESHOLD: usize = 50_000;
16
17/// Edge-to-node ratio above which a graph is considered "dense" for the
18/// purposes of the safety guard. A graph with `V > DENSE_GRAPH_NODE_THRESHOLD`
19/// AND `E > V * DENSE_GRAPH_EDGE_RATIO` will be refused unless the caller
20/// passes `force_dense = true` (CLI: `--force-scan-dense`).
21pub const DENSE_GRAPH_EDGE_RATIO: usize = 5;
22
23/// Error returned when a graph is too dense to scan safely without an
24/// explicit override. The error message is the user-visible string the
25/// CLI surfaces verbatim.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct DenseGraphError {
28    pub nodes: usize,
29    pub edges: usize,
30}
31
32impl std::fmt::Display for DenseGraphError {
33    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34        write!(
35            f,
36            "graph too dense to scan safely (V={}, E={}); use --force-scan-dense to override",
37            self.nodes, self.edges
38        )
39    }
40}
41
42impl std::error::Error for DenseGraphError {}
43
44/// Returns true when the graph exceeds the size+density thresholds the
45/// propagation engine considers a DoS risk. Used by both the engine and
46/// the CLI surface.
47///
48/// Note: On 32-bit systems, graphs approaching usize::MAX nodes/edges are
49/// conservatively treated as dense to avoid overflow edge cases.
50pub fn is_dense_graph(graph: &AuthorityGraph) -> bool {
51    let v = graph.nodes.len();
52    let e = graph.edges.len();
53
54    // Sanity check: if v or e are close to usize::MAX on 32-bit systems,
55    // conservatively treat as dense to avoid any overflow edge cases
56    if v > (usize::MAX / 10) || e > (usize::MAX / 10) {
57        return true;
58    }
59
60    v > DENSE_GRAPH_NODE_THRESHOLD && e > v.saturating_mul(DENSE_GRAPH_EDGE_RATIO)
61}
62
63/// Walk the graph from every authority-bearing source node (Secret + Identity).
64/// Flag any path that reaches a node in a lower trust zone.
65///
66/// Backward-compatible entry point — does NOT enforce the dense-graph guard
67/// (preserves existing behaviour for callers that haven't opted into the
68/// safety check). New callers should prefer `propagation_analysis_checked`
69/// which returns an error on dense graphs unless explicitly overridden.
70pub fn propagation_analysis(graph: &AuthorityGraph, max_hops: usize) -> Vec<PropagationPath> {
71    propagation_analysis_inner(graph, max_hops)
72}
73
74/// Density-gated entry point. Returns `Err(DenseGraphError)` when the graph
75/// exceeds size+density thresholds AND `force_dense` is false. Otherwise
76/// behaves like `propagation_analysis`.
77pub fn propagation_analysis_checked(
78    graph: &AuthorityGraph,
79    max_hops: usize,
80    force_dense: bool,
81) -> Result<Vec<PropagationPath>, DenseGraphError> {
82    if !force_dense && is_dense_graph(graph) {
83        return Err(DenseGraphError {
84            nodes: graph.nodes.len(),
85            edges: graph.edges.len(),
86        });
87    }
88    Ok(propagation_analysis_inner(graph, max_hops))
89}
90
91/// The actual BFS engine. Pre-builds an adjacency-list index once per call
92/// so the inner loop never linear-scans the edge vector.
93///
94/// Complexity: O(sources × (V + E)) once adjacency is built. The previous
95/// implementation was O(sources × accessor_steps × V × E) because
96/// `edges_from`/`edges_to` are O(E) linear filters on the edge vector and
97/// the BFS reseeded from every accessor step independently. On the
98/// `dense_5x` bench at V=10 000 the pathological cost was ~1.08 s; with
99/// adjacency pre-indexing and per-source visited sharing it drops to
100/// well under 100 ms.
101fn propagation_analysis_inner(graph: &AuthorityGraph, max_hops: usize) -> Vec<PropagationPath> {
102    let n = graph.nodes.len();
103    let mut results = Vec::new();
104
105    if n == 0 {
106        return results;
107    }
108
109    // Outgoing adjacency: for each node, the (edge_id, dest_node_id) pairs.
110    let mut adj_out: Vec<Vec<(EdgeId, NodeId)>> = vec![Vec::new(); n];
111    // Incoming HasAccessTo edges keyed by destination — we only need this
112    // edge kind for finding accessor steps, so filter at index time.
113    let mut accessors_for: Vec<Vec<NodeId>> = vec![Vec::new(); n];
114
115    for edge in &graph.edges {
116        if edge.from < n && edge.to < n {
117            adj_out[edge.from].push((edge.id, edge.to));
118            if edge.kind == crate::graph::EdgeKind::HasAccessTo {
119                accessors_for[edge.to].push(edge.from);
120            }
121        }
122    }
123
124    // Reused buffers across sources to avoid per-source allocator churn on
125    // large graphs. `visited` is bulk-reset to false on each iteration; on
126    // small graphs that's a couple cache lines, on large graphs it's a
127    // single linear write — still strictly cheaper than re-allocating.
128    let mut visited: Vec<bool> = vec![false; n];
129    let mut queue: VecDeque<(NodeId, Vec<EdgeId>, usize)> = VecDeque::new();
130
131    for source_node in graph.authority_sources() {
132        let accessor_steps = &accessors_for[source_node.id];
133        if accessor_steps.is_empty() {
134            continue;
135        }
136
137        // Bulk reset visited buffer for this source's BFS.
138        for v in visited.iter_mut() {
139            *v = false;
140        }
141        queue.clear();
142
143        // Seed the BFS with every accessor step at once (single visited
144        // set, single frontier) — the previous code ran an independent
145        // BFS per accessor step, which on dense graphs duplicated O(V·E)
146        // work for every additional accessor.
147        for &start_step in accessor_steps {
148            if !visited[start_step] {
149                visited[start_step] = true;
150                for &(edge_id, to) in &adj_out[start_step] {
151                    queue.push_back((to, vec![edge_id], 1));
152                }
153            }
154        }
155
156        let source_zone = source_node.trust_zone;
157
158        while let Some((current_id, path, depth)) = queue.pop_front() {
159            if visited[current_id] {
160                continue;
161            }
162            if depth > max_hops {
163                continue;
164            }
165            visited[current_id] = true;
166
167            let current_node = match graph.node(current_id) {
168                Some(n) => n,
169                None => continue,
170            };
171
172            let current_zone = current_node.trust_zone;
173            if current_zone.is_lower_than(&source_zone) {
174                results.push(PropagationPath {
175                    source: source_node.id,
176                    sink: current_id,
177                    edges: path.clone(),
178                    crossed_boundary: true,
179                    boundary_crossing: Some((source_zone, current_zone)),
180                });
181            }
182
183            if depth >= max_hops {
184                continue;
185            }
186
187            for &(edge_id, to) in &adj_out[current_id] {
188                if !visited[to] {
189                    let mut new_path = path.clone();
190                    new_path.push(edge_id);
191                    queue.push_back((to, new_path, depth + 1));
192                }
193            }
194        }
195    }
196
197    results
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203    use crate::graph::*;
204
205    fn make_source(file: &str) -> PipelineSource {
206        PipelineSource {
207            file: file.into(),
208            repo: None,
209            git_ref: None,
210            commit_sha: None,
211        }
212    }
213
214    #[test]
215    fn detects_secret_propagation_across_trust_boundary() {
216        let mut g = AuthorityGraph::new(make_source("test.yml"));
217
218        // Secret in first-party zone
219        let secret = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
220        // First-party build step reads the secret
221        let build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
222        // Build produces an artifact
223        let artifact = g.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
224        // Third-party deploy step consumes the artifact
225        let deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
226
227        g.add_edge(build, secret, EdgeKind::HasAccessTo);
228        g.add_edge(build, artifact, EdgeKind::Produces);
229        g.add_edge(artifact, deploy, EdgeKind::Consumes);
230
231        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
232
233        assert!(!paths.is_empty(), "should detect propagation");
234        assert!(paths
235            .iter()
236            .any(|p| p.source == secret && p.crossed_boundary));
237    }
238
239    #[test]
240    fn no_finding_when_same_trust_zone() {
241        let mut g = AuthorityGraph::new(make_source("test.yml"));
242
243        let secret = g.add_node(NodeKind::Secret, "TOKEN", TrustZone::FirstParty);
244        let step_a = g.add_node(NodeKind::Step, "lint", TrustZone::FirstParty);
245        let step_b = g.add_node(NodeKind::Step, "test", TrustZone::FirstParty);
246        let artifact = g.add_node(NodeKind::Artifact, "output", TrustZone::FirstParty);
247
248        g.add_edge(step_a, secret, EdgeKind::HasAccessTo);
249        g.add_edge(step_a, artifact, EdgeKind::Produces);
250        g.add_edge(artifact, step_b, EdgeKind::Consumes);
251
252        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
253
254        let boundary_crossings: Vec<_> = paths.iter().filter(|p| p.crossed_boundary).collect();
255        assert!(
256            boundary_crossings.is_empty(),
257            "no boundary crossing expected"
258        );
259    }
260
261    #[test]
262    fn respects_max_hops() {
263        let mut g = AuthorityGraph::new(make_source("test.yml"));
264
265        let secret = g.add_node(NodeKind::Secret, "KEY", TrustZone::FirstParty);
266        let s1 = g.add_node(NodeKind::Step, "s1", TrustZone::FirstParty);
267        let a1 = g.add_node(NodeKind::Artifact, "a1", TrustZone::FirstParty);
268        let s2 = g.add_node(NodeKind::Step, "s2", TrustZone::FirstParty);
269        let a2 = g.add_node(NodeKind::Artifact, "a2", TrustZone::FirstParty);
270        let s3 = g.add_node(NodeKind::Step, "s3", TrustZone::Untrusted);
271
272        g.add_edge(s1, secret, EdgeKind::HasAccessTo);
273        g.add_edge(s1, a1, EdgeKind::Produces);
274        g.add_edge(a1, s2, EdgeKind::Consumes);
275        g.add_edge(s2, a2, EdgeKind::Produces);
276        g.add_edge(a2, s3, EdgeKind::Consumes);
277
278        // With max_hops=2, should NOT reach s3 (which is 4 edges away)
279        let paths_short = propagation_analysis(&g, 2);
280        let boundary_short: Vec<_> = paths_short.iter().filter(|p| p.crossed_boundary).collect();
281        assert!(
282            boundary_short.is_empty(),
283            "should not reach untrusted at depth 2"
284        );
285
286        // With max_hops=5, should reach s3
287        let paths_long = propagation_analysis(&g, 5);
288        let boundary_long: Vec<_> = paths_long.iter().filter(|p| p.crossed_boundary).collect();
289        assert!(
290            !boundary_long.is_empty(),
291            "should reach untrusted at depth 5"
292        );
293    }
294
295    #[test]
296    fn identity_is_authority_source() {
297        let mut g = AuthorityGraph::new(make_source("test.yml"));
298
299        let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
300        let step = g.add_node(NodeKind::Step, "publish", TrustZone::FirstParty);
301        let action = g.add_node(
302            NodeKind::Image,
303            "third-party/deploy@main",
304            TrustZone::Untrusted,
305        );
306
307        g.add_edge(step, identity, EdgeKind::HasAccessTo);
308        g.add_edge(step, action, EdgeKind::UsesImage);
309
310        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
311        assert!(paths
312            .iter()
313            .any(|p| p.source == identity && p.crossed_boundary));
314    }
315
316    #[test]
317    fn dense_graph_check_rejects_oversized_input() {
318        // Construct a synthetic graph that crosses BOTH the node-count and
319        // density thresholds. We don't need real authority — only the
320        // shape matters for the gate.
321        let mut g = AuthorityGraph::new(make_source("dense.yml"));
322        let n = DENSE_GRAPH_NODE_THRESHOLD + 10;
323        let e_per_node = DENSE_GRAPH_EDGE_RATIO + 1;
324
325        for i in 0..n {
326            g.add_node(NodeKind::Step, format!("s{i}"), TrustZone::FirstParty);
327        }
328        for i in 0..n {
329            for k in 0..e_per_node {
330                let to = (i + k + 1) % n;
331                g.add_edge(i, to, EdgeKind::DelegatesTo);
332            }
333        }
334
335        assert!(is_dense_graph(&g), "fixture should trip the density gate");
336
337        let err = propagation_analysis_checked(&g, DEFAULT_MAX_HOPS, false).unwrap_err();
338        assert_eq!(err.nodes, n);
339        assert!(err.to_string().contains("--force-scan-dense"));
340        assert!(err.to_string().contains("graph too dense"));
341    }
342
343    #[test]
344    fn force_scan_dense_overrides_the_gate() {
345        // Same fixture, but pass force_dense=true — the scan must run to
346        // completion even though the gate would otherwise fire.
347        let mut g = AuthorityGraph::new(make_source("dense.yml"));
348        // Smaller fixture so the test stays fast — just past both
349        // thresholds is enough; the BFS itself must not panic or hang.
350        let n = DENSE_GRAPH_NODE_THRESHOLD + 5;
351        let e_per_node = DENSE_GRAPH_EDGE_RATIO + 1;
352
353        for i in 0..n {
354            g.add_node(NodeKind::Step, format!("s{i}"), TrustZone::FirstParty);
355        }
356        for i in 0..n {
357            for k in 0..e_per_node {
358                let to = (i + k + 1) % n;
359                g.add_edge(i, to, EdgeKind::DelegatesTo);
360            }
361        }
362
363        assert!(is_dense_graph(&g));
364        let result = propagation_analysis_checked(&g, DEFAULT_MAX_HOPS, true);
365        assert!(
366            result.is_ok(),
367            "force_dense=true must override the density gate"
368        );
369        // No authority sources in this graph — result is empty by design.
370        assert!(result.unwrap().is_empty());
371    }
372
373    #[test]
374    fn small_graph_below_threshold_passes_check() {
375        let mut g = AuthorityGraph::new(make_source("small.yml"));
376        let secret = g.add_node(NodeKind::Secret, "K", TrustZone::FirstParty);
377        let step = g.add_node(NodeKind::Step, "s", TrustZone::Untrusted);
378        g.add_edge(step, secret, EdgeKind::HasAccessTo);
379
380        assert!(!is_dense_graph(&g));
381        let result = propagation_analysis_checked(&g, DEFAULT_MAX_HOPS, false);
382        assert!(result.is_ok());
383    }
384}