Skip to main content

mati_core/analysis/
blast_radius.rs

1//! Blast radius computation for files in the knowledge graph.
2//!
3//! Measures how many other files depend on a given file (directly or
4//! transitively) via `Imports` edges. Higher blast radius means changes
5//! to the file have wider impact and warrant extra review care.
6
7use std::collections::{HashMap, HashSet};
8
9use serde::{Deserialize, Serialize};
10
11use crate::graph::edges::EdgeKind;
12use crate::graph::graph::Graph;
13
14/// Weight applied to transitive importers when computing the blast score.
15/// Direct importers contribute 1.0, transitive importers contribute this much.
16pub const TRANSITIVE_WEIGHT: f32 = 0.3;
17
18/// Maximum depth for transitive import traversal.
19pub const TRANSITIVE_DEPTH: usize = 3;
20
21#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
22pub struct BlastRadius {
23    /// Number of files that directly import this file (1 hop via Imports edges).
24    pub direct: u32,
25
26    /// Number of files that transitively import this file within TRANSITIVE_DEPTH,
27    /// excluding direct importers. Deduplicated across all paths.
28    pub transitive: u32,
29
30    /// Weighted score: direct + (transitive * TRANSITIVE_WEIGHT).
31    /// Higher means more dangerous to modify.
32    pub score: f32,
33
34    /// Categorical tier for agent-friendly consumption.
35    pub tier: BlastTier,
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
39#[serde(rename_all = "snake_case")]
40pub enum BlastTier {
41    /// No files import this file. Safe to modify in isolation.
42    Isolated,
43    /// 1-5 direct importers. Modest blast radius.
44    Low,
45    /// 6-15 direct importers. Noticeable impact on changes.
46    Moderate,
47    /// 16-40 direct importers. High impact file.
48    High,
49    /// 40+ direct importers. Critical infrastructure file.
50    Critical,
51}
52
53impl BlastTier {
54    pub fn from_direct_count(direct: u32) -> Self {
55        match direct {
56            0 => Self::Isolated,
57            1..=5 => Self::Low,
58            6..=15 => Self::Moderate,
59            16..=40 => Self::High,
60            _ => Self::Critical,
61        }
62    }
63
64    pub fn label(&self) -> &'static str {
65        match self {
66            Self::Isolated => "isolated",
67            Self::Low => "low",
68            Self::Moderate => "moderate",
69            Self::High => "high",
70            Self::Critical => "critical",
71        }
72    }
73}
74
75impl BlastRadius {
76    /// Compute blast radius for a single file given the graph.
77    ///
78    /// Uses `neighbors_incoming` with `EdgeKind::Imports` at depth 1 for direct
79    /// count, and `traverse_incoming` at `TRANSITIVE_DEPTH` for the full
80    /// transitive set. Transitive count excludes direct importers.
81    ///
82    /// Prefer [`Self::compute_all`] for batch computation — it's O(V+E) total
83    /// vs O(N*(V+E)) when calling this in a loop.
84    pub fn compute(file_key: &str, graph: &Graph) -> Self {
85        let direct_set: HashSet<String> = graph
86            .neighbors_incoming(file_key, &EdgeKind::Imports)
87            .into_iter()
88            .collect();
89
90        let all_ancestors: HashSet<String> = graph
91            .traverse_incoming(file_key, &EdgeKind::Imports, TRANSITIVE_DEPTH)
92            .into_iter()
93            .collect();
94
95        let direct = direct_set.len() as u32;
96        let transitive = all_ancestors.difference(&direct_set).count() as u32;
97
98        let score = direct as f32 + (transitive as f32 * TRANSITIVE_WEIGHT);
99        let tier = BlastTier::from_direct_count(direct);
100
101        Self {
102            direct,
103            transitive,
104            score,
105            tier,
106        }
107    }
108
109    /// Compute blast radius for every file in the graph in a single pass.
110    ///
111    /// Returns a map from `file:<path>` keys to their `BlastRadius`.
112    /// Pre-computes the reverse adjacency list once, then runs bounded BFS
113    /// per node. This avoids repeated `edges_directed` lookups in petgraph,
114    /// reducing constant factors significantly on large graphs.
115    pub fn compute_all(graph: &Graph, file_keys: &[String]) -> HashMap<String, BlastRadius> {
116        // Pre-compute reverse adjacency list: for each node, who imports it?
117        let reverse_adj = graph.reverse_adjacency(&EdgeKind::Imports);
118        let mut result = HashMap::with_capacity(file_keys.len());
119
120        for file_key in file_keys {
121            let direct_vec = reverse_adj
122                .get(file_key.as_str())
123                .cloned()
124                .unwrap_or_default();
125            let direct_set: HashSet<&str> = direct_vec.iter().map(|s| s.as_str()).collect();
126
127            // BFS for transitive ancestors at depth 2..=TRANSITIVE_DEPTH
128            let mut all_ancestors: HashSet<&str> = HashSet::new();
129            all_ancestors.extend(direct_set.iter());
130
131            let mut frontier: Vec<&str> = direct_vec.iter().map(|s| s.as_str()).collect();
132            for _depth in 1..TRANSITIVE_DEPTH {
133                let mut next_frontier = Vec::new();
134                for node in &frontier {
135                    if let Some(parents) = reverse_adj.get(*node) {
136                        for p in parents {
137                            if all_ancestors.insert(p.as_str()) {
138                                next_frontier.push(p.as_str());
139                            }
140                        }
141                    }
142                }
143                if next_frontier.is_empty() {
144                    break;
145                }
146                frontier = next_frontier;
147            }
148
149            let direct = direct_set.len() as u32;
150            let transitive = (all_ancestors.len() - direct_set.len()) as u32;
151            let score = direct as f32 + (transitive as f32 * TRANSITIVE_WEIGHT);
152            let tier = BlastTier::from_direct_count(direct);
153
154            result.insert(
155                file_key.clone(),
156                BlastRadius {
157                    direct,
158                    transitive,
159                    score,
160                    tier,
161                },
162            );
163        }
164
165        result
166    }
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172    use crate::graph::Graph;
173    use crate::store::Store;
174    use tempfile::TempDir;
175
176    async fn temp_graph() -> (Graph, TempDir) {
177        let dir = TempDir::new().unwrap();
178        let store = Store::open(dir.path()).await.unwrap();
179        let g = Graph::empty(store);
180        (g, dir)
181    }
182
183    // ── BlastTier::from_direct_count boundary tests ──────────────────────────
184
185    #[test]
186    fn tier_isolated_at_zero() {
187        assert_eq!(BlastTier::from_direct_count(0), BlastTier::Isolated);
188    }
189
190    #[test]
191    fn tier_low_at_one() {
192        assert_eq!(BlastTier::from_direct_count(1), BlastTier::Low);
193    }
194
195    #[test]
196    fn tier_low_at_five() {
197        assert_eq!(BlastTier::from_direct_count(5), BlastTier::Low);
198    }
199
200    #[test]
201    fn tier_moderate_at_six() {
202        assert_eq!(BlastTier::from_direct_count(6), BlastTier::Moderate);
203    }
204
205    #[test]
206    fn tier_moderate_at_fifteen() {
207        assert_eq!(BlastTier::from_direct_count(15), BlastTier::Moderate);
208    }
209
210    #[test]
211    fn tier_high_at_sixteen() {
212        assert_eq!(BlastTier::from_direct_count(16), BlastTier::High);
213    }
214
215    #[test]
216    fn tier_high_at_forty() {
217        assert_eq!(BlastTier::from_direct_count(40), BlastTier::High);
218    }
219
220    #[test]
221    fn tier_critical_at_forty_one() {
222        assert_eq!(BlastTier::from_direct_count(41), BlastTier::Critical);
223    }
224
225    // ── BlastRadius::compute tests ───────────────────────────────────────────
226
227    /// A imports B, C imports B, D imports B → B has direct=3, transitive=0.
228    #[tokio::test]
229    async fn compute_three_direct_importers() {
230        let (mut g, _dir) = temp_graph().await;
231        g.add_edge("file:a", EdgeKind::Imports, "file:b")
232            .await
233            .unwrap();
234        g.add_edge("file:c", EdgeKind::Imports, "file:b")
235            .await
236            .unwrap();
237        g.add_edge("file:d", EdgeKind::Imports, "file:b")
238            .await
239            .unwrap();
240
241        let br = BlastRadius::compute("file:b", &g);
242        assert_eq!(br.direct, 3);
243        assert_eq!(br.transitive, 0);
244        assert_eq!(br.tier, BlastTier::Low);
245        assert!((br.score - 3.0).abs() < f32::EPSILON);
246
247        g.close().await.unwrap();
248    }
249
250    /// Chain A→B→C→D: C has direct=1 (B), transitive=1 (A).
251    #[tokio::test]
252    async fn compute_chain_one_direct_one_transitive() {
253        let (mut g, _dir) = temp_graph().await;
254        // A imports B, B imports C, C imports D
255        g.add_edge("file:a", EdgeKind::Imports, "file:b")
256            .await
257            .unwrap();
258        g.add_edge("file:b", EdgeKind::Imports, "file:c")
259            .await
260            .unwrap();
261        g.add_edge("file:c", EdgeKind::Imports, "file:d")
262            .await
263            .unwrap();
264
265        // Who imports C? B directly, A transitively.
266        let br = BlastRadius::compute("file:c", &g);
267        assert_eq!(br.direct, 1); // B
268        assert_eq!(br.transitive, 1); // A
269        assert_eq!(br.tier, BlastTier::Low);
270        let expected_score = 1.0 + (1.0 * TRANSITIVE_WEIGHT);
271        assert!((br.score - expected_score).abs() < f32::EPSILON);
272
273        g.close().await.unwrap();
274    }
275
276    /// File with no incoming imports → isolated.
277    #[tokio::test]
278    async fn compute_no_importers_is_isolated() {
279        let (mut g, _dir) = temp_graph().await;
280        g.add_edge("file:a", EdgeKind::Imports, "file:b")
281            .await
282            .unwrap();
283
284        // file:a has no incoming imports
285        let br = BlastRadius::compute("file:a", &g);
286        assert_eq!(br.direct, 0);
287        assert_eq!(br.transitive, 0);
288        assert_eq!(br.tier, BlastTier::Isolated);
289        assert!((br.score - 0.0).abs() < f32::EPSILON);
290
291        g.close().await.unwrap();
292    }
293
294    /// Cycle A→B→A must terminate without double-counting.
295    #[tokio::test]
296    async fn compute_cycle_terminates() {
297        let (mut g, _dir) = temp_graph().await;
298        g.add_edge("file:a", EdgeKind::Imports, "file:b")
299            .await
300            .unwrap();
301        g.add_edge("file:b", EdgeKind::Imports, "file:a")
302            .await
303            .unwrap();
304
305        let br_a = BlastRadius::compute("file:a", &g);
306        assert_eq!(br_a.direct, 1); // B imports A
307                                    // B is direct; no transitive beyond that in a 2-node cycle
308        assert_eq!(br_a.tier, BlastTier::Low);
309
310        let br_b = BlastRadius::compute("file:b", &g);
311        assert_eq!(br_b.direct, 1); // A imports B
312
313        g.close().await.unwrap();
314    }
315
316    /// File 5 hops away must NOT appear in transitive count (depth cap = 3).
317    #[tokio::test]
318    async fn compute_depth_cap_excludes_distant_file() {
319        let (mut g, _dir) = temp_graph().await;
320        // Chain: e → d → c → b → a (reading as "e imports d", etc.)
321        g.add_edge("file:e", EdgeKind::Imports, "file:d")
322            .await
323            .unwrap();
324        g.add_edge("file:d", EdgeKind::Imports, "file:c")
325            .await
326            .unwrap();
327        g.add_edge("file:c", EdgeKind::Imports, "file:b")
328            .await
329            .unwrap();
330        g.add_edge("file:b", EdgeKind::Imports, "file:a")
331            .await
332            .unwrap();
333
334        // Who imports file:a?
335        // Direct (depth 1): b
336        // Transitive (depth 2-3): c, d
337        // Beyond depth 3: e — should NOT be counted
338        let br = BlastRadius::compute("file:a", &g);
339        assert_eq!(br.direct, 1); // b
340                                  // traverse_incoming at depth 3 returns b, c, d (3 nodes)
341                                  // minus direct (b) = 2 transitive
342        assert_eq!(br.transitive, 2); // c, d — but NOT e
343        assert_eq!(br.tier, BlastTier::Low);
344
345        g.close().await.unwrap();
346    }
347
348    /// Diamond: a→c, b→c, a→d, d→c — c reachable from a via two paths, counted once.
349    #[tokio::test]
350    async fn compute_deduplication_across_paths() {
351        let (mut g, _dir) = temp_graph().await;
352        // a imports c (direct)
353        g.add_edge("file:a", EdgeKind::Imports, "file:c")
354            .await
355            .unwrap();
356        // b imports c (direct)
357        g.add_edge("file:b", EdgeKind::Imports, "file:c")
358            .await
359            .unwrap();
360        // d imports c (direct)
361        g.add_edge("file:d", EdgeKind::Imports, "file:c")
362            .await
363            .unwrap();
364        // a also imports d (so a reaches c via two paths)
365        g.add_edge("file:a", EdgeKind::Imports, "file:d")
366            .await
367            .unwrap();
368
369        let br = BlastRadius::compute("file:c", &g);
370        // Direct importers of c: a, b, d
371        assert_eq!(br.direct, 3);
372        // a is already counted as direct — no extra transitive
373        assert_eq!(br.transitive, 0);
374        assert_eq!(br.tier, BlastTier::Low);
375
376        g.close().await.unwrap();
377    }
378
379    /// Unknown file key returns isolated (score 0).
380    #[tokio::test]
381    async fn compute_unknown_file_is_isolated() {
382        let (g, _dir) = temp_graph().await;
383
384        let br = BlastRadius::compute("file:nonexistent", &g);
385        assert_eq!(br.direct, 0);
386        assert_eq!(br.transitive, 0);
387        assert_eq!(br.tier, BlastTier::Isolated);
388        assert!((br.score - 0.0).abs() < f32::EPSILON);
389
390        g.close().await.unwrap();
391    }
392
393    /// Serde roundtrip preserves all fields.
394    #[test]
395    fn serde_roundtrip() {
396        let br = BlastRadius {
397            direct: 7,
398            transitive: 3,
399            score: 7.9,
400            tier: BlastTier::Moderate,
401        };
402        let json = serde_json::to_string(&br).unwrap();
403        let back: BlastRadius = serde_json::from_str(&json).unwrap();
404        assert_eq!(br, back);
405    }
406
407    /// All tier labels are lowercase and match serde rename.
408    #[test]
409    fn tier_labels_match_serde() {
410        let tiers = [
411            BlastTier::Isolated,
412            BlastTier::Low,
413            BlastTier::Moderate,
414            BlastTier::High,
415            BlastTier::Critical,
416        ];
417        for tier in tiers {
418            let json = serde_json::to_string(&tier).unwrap();
419            let label = tier.label();
420            assert_eq!(json, format!("\"{label}\""));
421        }
422    }
423
424    // ── compute_all tests ────────────────────────────────────────────────────
425
426    /// compute_all matches per-file compute on the same graph.
427    #[tokio::test]
428    async fn compute_all_matches_per_file_compute() {
429        let (mut g, _dir) = temp_graph().await;
430        // a→b, b→c, c→d
431        g.add_edge("file:a", EdgeKind::Imports, "file:b")
432            .await
433            .unwrap();
434        g.add_edge("file:b", EdgeKind::Imports, "file:c")
435            .await
436            .unwrap();
437        g.add_edge("file:c", EdgeKind::Imports, "file:d")
438            .await
439            .unwrap();
440
441        let keys: Vec<String> = ["file:a", "file:b", "file:c", "file:d"]
442            .iter()
443            .map(|s| s.to_string())
444            .collect();
445
446        let batch = BlastRadius::compute_all(&g, &keys);
447        for key in &keys {
448            let single = BlastRadius::compute(key, &g);
449            let from_batch = batch.get(key).expect("key missing from compute_all");
450            assert_eq!(
451                single.direct, from_batch.direct,
452                "direct mismatch for {key}"
453            );
454            assert_eq!(
455                single.transitive, from_batch.transitive,
456                "transitive mismatch for {key}"
457            );
458        }
459
460        g.close().await.unwrap();
461    }
462
463    /// compute_all handles cycles without infinite recursion.
464    #[tokio::test]
465    async fn compute_all_handles_cycle_safely() {
466        let (mut g, _dir) = temp_graph().await;
467        g.add_edge("file:a", EdgeKind::Imports, "file:b")
468            .await
469            .unwrap();
470        g.add_edge("file:b", EdgeKind::Imports, "file:a")
471            .await
472            .unwrap();
473
474        let keys = vec!["file:a".to_string(), "file:b".to_string()];
475        let batch = BlastRadius::compute_all(&g, &keys);
476
477        let br_a = batch.get("file:a").unwrap();
478        let br_b = batch.get("file:b").unwrap();
479        assert_eq!(br_a.direct, 1);
480        assert_eq!(br_b.direct, 1);
481
482        // Verify matches single-file compute
483        let single_a = BlastRadius::compute("file:a", &g);
484        let single_b = BlastRadius::compute("file:b", &g);
485        assert_eq!(br_a.direct, single_a.direct);
486        assert_eq!(br_b.direct, single_b.direct);
487
488        g.close().await.unwrap();
489    }
490
491    /// compute_all on empty graph returns empty map.
492    #[tokio::test]
493    async fn compute_all_on_empty_graph_returns_empty_map() {
494        let (g, _dir) = temp_graph().await;
495        let batch = BlastRadius::compute_all(&g, &[]);
496        assert!(batch.is_empty());
497        g.close().await.unwrap();
498    }
499
500    /// Deserialization of BlastRadius with default (missing field in parent).
501    #[test]
502    fn deserialize_optional_blast_radius() {
503        let val: Option<BlastRadius> = serde_json::from_str("null").unwrap();
504        assert!(val.is_none());
505
506        let val: BlastRadius =
507            serde_json::from_str(r#"{"direct":0,"transitive":0,"score":0.0,"tier":"isolated"}"#)
508                .unwrap();
509        assert_eq!(val.tier, BlastTier::Isolated);
510    }
511}