Skip to main content

taudit_core/
propagation.rs

1use crate::graph::{AuthorityGraph, EdgeId, NodeId, TrustZone};
2use serde::{Deserialize, Serialize};
3use std::collections::VecDeque;
4
5/// A path that authority took through the graph.
6/// The path is the product — it's what makes findings persuasive.
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct PropagationPath {
9    /// The authority origin (Secret or Identity).
10    pub source: NodeId,
11    /// Where authority ended up.
12    pub sink: NodeId,
13    /// The full edge path from source to sink.
14    pub edges: Vec<EdgeId>,
15    /// Did this path cross a trust zone boundary?
16    pub crossed_boundary: bool,
17    /// If crossed, from which zone to which zone.
18    #[serde(skip_serializing_if = "Option::is_none")]
19    pub boundary_crossing: Option<(TrustZone, TrustZone)>,
20}
21
22/// Default maximum BFS depth. Override via CLI --max-hops.
23pub const DEFAULT_MAX_HOPS: usize = 4;
24
25/// Node-count threshold above which the dense-graph guard activates.
26/// Graphs at or below this size are always scanned regardless of edge density.
27pub const DENSE_GRAPH_NODE_THRESHOLD: usize = 50_000;
28
29/// Edge-to-node ratio above which a graph is considered "dense" for the
30/// purposes of the safety guard. A graph with `V > DENSE_GRAPH_NODE_THRESHOLD`
31/// AND `E > V * DENSE_GRAPH_EDGE_RATIO` will be refused unless the caller
32/// passes `force_dense = true` (CLI: `--force-scan-dense`).
33pub const DENSE_GRAPH_EDGE_RATIO: usize = 5;
34
35/// Error returned when a graph is too dense to scan safely without an
36/// explicit override. The error message is the user-visible string the
37/// CLI surfaces verbatim.
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct DenseGraphError {
40    pub nodes: usize,
41    pub edges: usize,
42}
43
44impl std::fmt::Display for DenseGraphError {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        write!(
47            f,
48            "graph too dense to scan safely (V={}, E={}); use --force-scan-dense to override",
49            self.nodes, self.edges
50        )
51    }
52}
53
54impl std::error::Error for DenseGraphError {}
55
56/// Returns true when the graph exceeds the size+density thresholds the
57/// propagation engine considers a DoS risk. Used by both the engine and
58/// the CLI surface.
59///
60/// Note: On 32-bit systems, graphs approaching usize::MAX nodes/edges are
61/// conservatively treated as dense to avoid overflow edge cases.
62pub fn is_dense_graph(graph: &AuthorityGraph) -> bool {
63    let v = graph.nodes.len();
64    let e = graph.edges.len();
65
66    // Sanity check: if v or e are close to usize::MAX on 32-bit systems,
67    // conservatively treat as dense to avoid any overflow edge cases
68    if v > (usize::MAX / 10) || e > (usize::MAX / 10) {
69        return true;
70    }
71
72    v > DENSE_GRAPH_NODE_THRESHOLD && e > v.saturating_mul(DENSE_GRAPH_EDGE_RATIO)
73}
74
75/// Walk the graph from every authority-bearing source node (Secret + Identity).
76/// Flag any path that reaches a node in a lower trust zone.
77///
78/// Backward-compatible entry point — does NOT enforce the dense-graph guard
79/// (preserves existing behaviour for callers that haven't opted into the
80/// safety check). New callers should prefer `propagation_analysis_checked`
81/// which returns an error on dense graphs unless explicitly overridden.
82pub fn propagation_analysis(graph: &AuthorityGraph, max_hops: usize) -> Vec<PropagationPath> {
83    propagation_analysis_inner(graph, max_hops)
84}
85
86/// Density-gated entry point. Returns `Err(DenseGraphError)` when the graph
87/// exceeds size+density thresholds AND `force_dense` is false. Otherwise
88/// behaves like `propagation_analysis`.
89pub fn propagation_analysis_checked(
90    graph: &AuthorityGraph,
91    max_hops: usize,
92    force_dense: bool,
93) -> Result<Vec<PropagationPath>, DenseGraphError> {
94    if !force_dense && is_dense_graph(graph) {
95        return Err(DenseGraphError {
96            nodes: graph.nodes.len(),
97            edges: graph.edges.len(),
98        });
99    }
100    Ok(propagation_analysis_inner(graph, max_hops))
101}
102
103/// The actual BFS engine. Pre-builds an adjacency-list index once per call
104/// so the inner loop never linear-scans the edge vector.
105///
106/// Complexity: O(sources × (V + E)) once adjacency is built. The previous
107/// implementation was O(sources × accessor_steps × V × E) because
108/// `edges_from`/`edges_to` are O(E) linear filters on the edge vector and
109/// the BFS reseeded from every accessor step independently. On the
110/// `dense_5x` bench at V=10 000 the pathological cost was ~1.08 s; with
111/// adjacency pre-indexing and per-source visited sharing it drops to
112/// well under 100 ms.
113fn propagation_analysis_inner(graph: &AuthorityGraph, max_hops: usize) -> Vec<PropagationPath> {
114    let n = graph.nodes.len();
115    let mut results = Vec::new();
116
117    if n == 0 {
118        return results;
119    }
120
121    // Outgoing adjacency: for each node, the (edge_id, dest_node_id) pairs.
122    let mut adj_out: Vec<Vec<(EdgeId, NodeId)>> = vec![Vec::new(); n];
123    // Incoming HasAccessTo edges keyed by destination — we only need this
124    // edge kind for finding accessor steps, so filter at index time.
125    let mut accessors_for: Vec<Vec<NodeId>> = vec![Vec::new(); n];
126
127    for edge in &graph.edges {
128        if edge.from < n && edge.to < n {
129            adj_out[edge.from].push((edge.id, edge.to));
130            if edge.kind == crate::graph::EdgeKind::HasAccessTo {
131                accessors_for[edge.to].push(edge.from);
132            }
133        }
134    }
135
136    // Reused buffers across sources to avoid per-source allocator churn on
137    // large graphs. `visited` is bulk-reset to false on each iteration; on
138    // small graphs that's a couple cache lines, on large graphs it's a
139    // single linear write — still strictly cheaper than re-allocating.
140    let mut visited: Vec<bool> = vec![false; n];
141    let mut queue: VecDeque<(NodeId, Vec<EdgeId>, usize)> = VecDeque::new();
142
143    for source_node in graph.authority_sources() {
144        let accessor_steps = &accessors_for[source_node.id];
145        if accessor_steps.is_empty() {
146            continue;
147        }
148
149        // Bulk reset visited buffer for this source's BFS.
150        for v in visited.iter_mut() {
151            *v = false;
152        }
153        queue.clear();
154
155        // Seed the BFS with every accessor step at once (single visited
156        // set, single frontier) — the previous code ran an independent
157        // BFS per accessor step, which on dense graphs duplicated O(V·E)
158        // work for every additional accessor.
159        for &start_step in accessor_steps {
160            if !visited[start_step] {
161                visited[start_step] = true;
162                for &(edge_id, to) in &adj_out[start_step] {
163                    queue.push_back((to, vec![edge_id], 1));
164                }
165            }
166        }
167
168        let source_zone = source_node.trust_zone;
169
170        while let Some((current_id, path, depth)) = queue.pop_front() {
171            if visited[current_id] {
172                continue;
173            }
174            if depth > max_hops {
175                continue;
176            }
177            visited[current_id] = true;
178
179            let current_node = match graph.node(current_id) {
180                Some(n) => n,
181                None => continue,
182            };
183
184            let current_zone = current_node.trust_zone;
185            if current_zone.is_lower_than(&source_zone) {
186                results.push(PropagationPath {
187                    source: source_node.id,
188                    sink: current_id,
189                    edges: path.clone(),
190                    crossed_boundary: true,
191                    boundary_crossing: Some((source_zone, current_zone)),
192                });
193            }
194
195            if depth >= max_hops {
196                continue;
197            }
198
199            for &(edge_id, to) in &adj_out[current_id] {
200                if !visited[to] {
201                    let mut new_path = path.clone();
202                    new_path.push(edge_id);
203                    queue.push_back((to, new_path, depth + 1));
204                }
205            }
206        }
207    }
208
209    results
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215    use crate::graph::*;
216
217    fn make_source(file: &str) -> PipelineSource {
218        PipelineSource {
219            file: file.into(),
220            repo: None,
221            git_ref: None,
222            commit_sha: None,
223        }
224    }
225
226    #[test]
227    fn detects_secret_propagation_across_trust_boundary() {
228        let mut g = AuthorityGraph::new(make_source("test.yml"));
229
230        // Secret in first-party zone
231        let secret = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
232        // First-party build step reads the secret
233        let build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
234        // Build produces an artifact
235        let artifact = g.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
236        // Third-party deploy step consumes the artifact
237        let deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
238
239        g.add_edge(build, secret, EdgeKind::HasAccessTo);
240        g.add_edge(build, artifact, EdgeKind::Produces);
241        g.add_edge(artifact, deploy, EdgeKind::Consumes);
242
243        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
244
245        assert!(!paths.is_empty(), "should detect propagation");
246        assert!(paths
247            .iter()
248            .any(|p| p.source == secret && p.crossed_boundary));
249    }
250
251    #[test]
252    fn no_finding_when_same_trust_zone() {
253        let mut g = AuthorityGraph::new(make_source("test.yml"));
254
255        let secret = g.add_node(NodeKind::Secret, "TOKEN", TrustZone::FirstParty);
256        let step_a = g.add_node(NodeKind::Step, "lint", TrustZone::FirstParty);
257        let step_b = g.add_node(NodeKind::Step, "test", TrustZone::FirstParty);
258        let artifact = g.add_node(NodeKind::Artifact, "output", TrustZone::FirstParty);
259
260        g.add_edge(step_a, secret, EdgeKind::HasAccessTo);
261        g.add_edge(step_a, artifact, EdgeKind::Produces);
262        g.add_edge(artifact, step_b, EdgeKind::Consumes);
263
264        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
265
266        let boundary_crossings: Vec<_> = paths.iter().filter(|p| p.crossed_boundary).collect();
267        assert!(
268            boundary_crossings.is_empty(),
269            "no boundary crossing expected"
270        );
271    }
272
273    #[test]
274    fn respects_max_hops() {
275        let mut g = AuthorityGraph::new(make_source("test.yml"));
276
277        let secret = g.add_node(NodeKind::Secret, "KEY", TrustZone::FirstParty);
278        let s1 = g.add_node(NodeKind::Step, "s1", TrustZone::FirstParty);
279        let a1 = g.add_node(NodeKind::Artifact, "a1", TrustZone::FirstParty);
280        let s2 = g.add_node(NodeKind::Step, "s2", TrustZone::FirstParty);
281        let a2 = g.add_node(NodeKind::Artifact, "a2", TrustZone::FirstParty);
282        let s3 = g.add_node(NodeKind::Step, "s3", TrustZone::Untrusted);
283
284        g.add_edge(s1, secret, EdgeKind::HasAccessTo);
285        g.add_edge(s1, a1, EdgeKind::Produces);
286        g.add_edge(a1, s2, EdgeKind::Consumes);
287        g.add_edge(s2, a2, EdgeKind::Produces);
288        g.add_edge(a2, s3, EdgeKind::Consumes);
289
290        // With max_hops=2, should NOT reach s3 (which is 4 edges away)
291        let paths_short = propagation_analysis(&g, 2);
292        let boundary_short: Vec<_> = paths_short.iter().filter(|p| p.crossed_boundary).collect();
293        assert!(
294            boundary_short.is_empty(),
295            "should not reach untrusted at depth 2"
296        );
297
298        // With max_hops=5, should reach s3
299        let paths_long = propagation_analysis(&g, 5);
300        let boundary_long: Vec<_> = paths_long.iter().filter(|p| p.crossed_boundary).collect();
301        assert!(
302            !boundary_long.is_empty(),
303            "should reach untrusted at depth 5"
304        );
305    }
306
307    #[test]
308    fn identity_is_authority_source() {
309        let mut g = AuthorityGraph::new(make_source("test.yml"));
310
311        let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
312        let step = g.add_node(NodeKind::Step, "publish", TrustZone::FirstParty);
313        let action = g.add_node(
314            NodeKind::Image,
315            "third-party/deploy@main",
316            TrustZone::Untrusted,
317        );
318
319        g.add_edge(step, identity, EdgeKind::HasAccessTo);
320        g.add_edge(step, action, EdgeKind::UsesImage);
321
322        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
323        assert!(paths
324            .iter()
325            .any(|p| p.source == identity && p.crossed_boundary));
326    }
327
328    #[test]
329    fn dense_graph_check_rejects_oversized_input() {
330        // Construct a synthetic graph that crosses BOTH the node-count and
331        // density thresholds. We don't need real authority — only the
332        // shape matters for the gate.
333        let mut g = AuthorityGraph::new(make_source("dense.yml"));
334        let n = DENSE_GRAPH_NODE_THRESHOLD + 10;
335        let e_per_node = DENSE_GRAPH_EDGE_RATIO + 1;
336
337        for i in 0..n {
338            g.add_node(NodeKind::Step, format!("s{i}"), TrustZone::FirstParty);
339        }
340        for i in 0..n {
341            for k in 0..e_per_node {
342                let to = (i + k + 1) % n;
343                g.add_edge(i, to, EdgeKind::DelegatesTo);
344            }
345        }
346
347        assert!(is_dense_graph(&g), "fixture should trip the density gate");
348
349        let err = propagation_analysis_checked(&g, DEFAULT_MAX_HOPS, false).unwrap_err();
350        assert_eq!(err.nodes, n);
351        assert!(err.to_string().contains("--force-scan-dense"));
352        assert!(err.to_string().contains("graph too dense"));
353    }
354
355    #[test]
356    fn force_scan_dense_overrides_the_gate() {
357        // Same fixture, but pass force_dense=true — the scan must run to
358        // completion even though the gate would otherwise fire.
359        let mut g = AuthorityGraph::new(make_source("dense.yml"));
360        // Smaller fixture so the test stays fast — just past both
361        // thresholds is enough; the BFS itself must not panic or hang.
362        let n = DENSE_GRAPH_NODE_THRESHOLD + 5;
363        let e_per_node = DENSE_GRAPH_EDGE_RATIO + 1;
364
365        for i in 0..n {
366            g.add_node(NodeKind::Step, format!("s{i}"), TrustZone::FirstParty);
367        }
368        for i in 0..n {
369            for k in 0..e_per_node {
370                let to = (i + k + 1) % n;
371                g.add_edge(i, to, EdgeKind::DelegatesTo);
372            }
373        }
374
375        assert!(is_dense_graph(&g));
376        let result = propagation_analysis_checked(&g, DEFAULT_MAX_HOPS, true);
377        assert!(
378            result.is_ok(),
379            "force_dense=true must override the density gate"
380        );
381        // No authority sources in this graph — result is empty by design.
382        assert!(result.unwrap().is_empty());
383    }
384
385    #[test]
386    fn small_graph_below_threshold_passes_check() {
387        let mut g = AuthorityGraph::new(make_source("small.yml"));
388        let secret = g.add_node(NodeKind::Secret, "K", TrustZone::FirstParty);
389        let step = g.add_node(NodeKind::Step, "s", TrustZone::Untrusted);
390        g.add_edge(step, secret, EdgeKind::HasAccessTo);
391
392        assert!(!is_dense_graph(&g));
393        let result = propagation_analysis_checked(&g, DEFAULT_MAX_HOPS, false);
394        assert!(result.is_ok());
395    }
396}