Skip to main content

ucm_reason/
impact.rs

1//! Impact analysis — determines what is directly/indirectly impacted by a change.
2//!
3//! This module orchestrates the graph's impact_bfs with change classification
4//! to produce a structured ImpactReport with explanations.
5//!
6//! References:
7//! - Google TAP: reverse dependency traversal in build graph
8//! - Meta PTS: MinDist (shortest path in dependency graph) is the most
9//!   predictive feature for test relevance
10//! - Test failure likelihood diminishes beyond MinDist=10
11
12use crate::explanation::{explain_impact, explain_not_impacted, ExplanationChain};
13use petgraph::visit::EdgeRef;
14use petgraph::Direction;
15use serde::{Deserialize, Serialize};
16use std::collections::{HashMap, HashSet, VecDeque};
17use ucm_graph_core::edge::ConfidenceTier;
18use ucm_graph_core::entity::EntityId;
19use ucm_graph_core::graph::{ImpactType, ImpactedEntity, NotImpactedEntity, UcmGraph};
20
21/// Full impact report for a change set — the primary output of the reasoning engine.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct ImpactReport {
24    /// What entities were changed
25    pub changes: Vec<ChangeDescription>,
26    /// Entities directly impacted (1-hop dependency)
27    pub direct_impacts: Vec<ImpactEntry>,
28    /// Entities indirectly impacted (2+ hop, confidence-weighted)
29    pub indirect_impacts: Vec<ImpactEntry>,
30    /// Entities determined to NOT be impacted (with explanation)
31    pub not_impacted: Vec<NotImpactedEntry>,
32    /// Ambiguities and conflicts detected
33    pub ambiguities: Vec<AmbiguityEntry>,
34    /// Graph traversal statistics
35    pub stats: ImpactStats,
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct ChangeDescription {
40    pub entity_id: String,
41    pub name: String,
42    pub change_type: String,
43    pub file_path: String,
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct ImpactEntry {
48    pub entity_id: String,
49    pub name: String,
50    pub confidence: f64,
51    pub tier: String,
52    pub depth: usize,
53    pub path: Vec<String>,
54    pub reason: String,
55    pub explanation_chain: ExplanationChain,
56}
57
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct NotImpactedEntry {
60    pub entity_id: String,
61    pub name: String,
62    pub confidence: f64,
63    pub reason: String,
64    pub explanation_chain: ExplanationChain,
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct AmbiguityEntry {
69    pub entity_id: Option<String>,
70    pub ambiguity_type: String,
71    pub description: String,
72    pub sources: Vec<String>,
73    pub recommendation: String,
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct ImpactStats {
78    pub total_entities: usize,
79    pub directly_impacted: usize,
80    pub indirectly_impacted: usize,
81    pub not_impacted: usize,
82    pub max_depth_reached: usize,
83}
84
85/// Reverse BFS from a set of changed entities.
86/// Returns all transitively affected entities with confidence scores.
87///
88/// This implements the dependency-based test selection approach used by
89/// Google TAP: reverse BFS/DFS from changed nodes in the build graph.
90/// Confidence decays along the path: each hop reduces confidence multiplicatively.
91pub fn impact_bfs(
92    graph: &UcmGraph,
93    changed: &[EntityId],
94    min_confidence: f64,
95    max_depth: usize,
96) -> Vec<ImpactedEntity> {
97    let inner = graph.inner();
98    let mut visited: HashMap<petgraph::stable_graph::NodeIndex, ImpactedEntity> = HashMap::new();
99    let mut queue: VecDeque<(petgraph::stable_graph::NodeIndex, f64, usize, Vec<String>)> =
100        VecDeque::new();
101
102    // Seed with changed entities
103    for id in changed {
104        if let Some(idx) = graph.entity_node_index(id) {
105            let entity = inner.node_weight(idx).unwrap();
106            visited.insert(
107                idx,
108                ImpactedEntity {
109                    entity_id: id.clone(),
110                    name: entity.name.clone(),
111                    confidence: 1.0,
112                    depth: 0,
113                    impact_type: ImpactType::Direct,
114                    path: vec![id.as_str().to_string()],
115                    reason: "Directly changed".to_string(),
116                },
117            );
118            queue.push_back((idx, 1.0, 0, vec![id.as_str().to_string()]));
119        }
120    }
121
122    while let Some((current, current_confidence, depth, path)) = queue.pop_front() {
123        if depth >= max_depth {
124            continue;
125        }
126
127        for edge in inner.edges_directed(current, Direction::Incoming) {
128            let neighbor = edge.source();
129            let edge_weight = edge.weight();
130
131            let propagated = current_confidence * edge_weight.decayed_confidence();
132            if propagated < min_confidence {
133                continue;
134            }
135
136            let neighbor_entity = match inner.node_weight(neighbor) {
137                Some(e) => e,
138                None => continue,
139            };
140
141            let mut new_path = path.clone();
142            new_path.push(neighbor_entity.id.as_str().to_string());
143
144            let impact = ImpactedEntity {
145                entity_id: neighbor_entity.id.clone(),
146                name: neighbor_entity.name.clone(),
147                confidence: propagated,
148                depth: depth + 1,
149                impact_type: if depth == 0 {
150                    ImpactType::Direct
151                } else {
152                    ImpactType::Indirect
153                },
154                path: new_path.clone(),
155                reason: format!(
156                    "{} via {} ({})",
157                    edge_weight.relation_type_str(),
158                    path.last().unwrap_or(&"?".to_string()),
159                    ConfidenceTier::from_score(propagated).emoji()
160                ),
161            };
162
163            let should_update = match visited.get(&neighbor) {
164                Some(existing) => propagated > existing.confidence,
165                None => true,
166            };
167
168            if should_update {
169                visited.insert(neighbor, impact);
170                queue.push_back((neighbor, propagated, depth + 1, new_path));
171            }
172        }
173    }
174
175    let changed_indices: HashSet<_> = changed
176        .iter()
177        .filter_map(|id| graph.entity_node_index(id))
178        .collect();
179
180    visited
181        .into_iter()
182        .filter(|(idx, _)| !changed_indices.contains(idx))
183        .map(|(_, impact)| impact)
184        .collect()
185}
186
187/// Find entities that are NOT impacted by a change set.
188pub fn find_not_impacted(
189    graph: &UcmGraph,
190    changed: &[EntityId],
191    impacted: &[ImpactedEntity],
192) -> Vec<NotImpactedEntity> {
193    let inner = graph.inner();
194    let changed_set: HashSet<&str> = changed.iter().map(|id| id.as_str()).collect();
195    let impacted_set: HashSet<&str> = impacted.iter().map(|i| i.entity_id.as_str()).collect();
196
197    inner
198        .node_weights()
199        .filter(|entity| {
200            !changed_set.contains(entity.id.as_str()) && !impacted_set.contains(entity.id.as_str())
201        })
202        .map(|entity| {
203            let has_path = has_path_to_any(graph, &entity.id, changed);
204            let reason = if has_path {
205                "Path exists but confidence below threshold".to_string()
206            } else {
207                "No graph path exists to changed entities".to_string()
208            };
209            let confidence = if has_path { 0.60 } else { 0.90 };
210            NotImpactedEntity {
211                entity_id: entity.id.clone(),
212                name: entity.name.clone(),
213                confidence,
214                reason,
215            }
216        })
217        .collect()
218}
219
220fn has_path_to_any(graph: &UcmGraph, from: &EntityId, targets: &[EntityId]) -> bool {
221    let inner = graph.inner();
222    let from_idx = match graph.entity_node_index(from) {
223        Some(idx) => idx,
224        None => return false,
225    };
226    let target_indices: HashSet<_> = targets
227        .iter()
228        .filter_map(|id| graph.entity_node_index(id))
229        .collect();
230
231    let mut visited = HashSet::new();
232    let mut queue = VecDeque::new();
233    queue.push_back(from_idx);
234
235    while let Some(current) = queue.pop_front() {
236        if target_indices.contains(&current) {
237            return true;
238        }
239        if !visited.insert(current) {
240            continue;
241        }
242        for neighbor in inner.neighbors_directed(current, Direction::Outgoing) {
243            queue.push_back(neighbor);
244        }
245    }
246    false
247}
248
249/// Analyze the impact of a set of changes on the context graph.
250///
251/// This is the core reasoning function. It:
252/// 1. Identifies changed entities
253/// 2. Runs reverse BFS to find impacted entities (with confidence decay)
254/// 3. Identifies NOT-impacted entities (with explanations)
255/// 4. Generates explanation chains for each conclusion
256pub fn analyze_impact(
257    graph: &UcmGraph,
258    changed_entities: &[EntityId],
259    min_confidence: f64,
260    max_depth: usize,
261) -> ImpactReport {
262    // Run impact BFS
263    let impacted = impact_bfs(graph, changed_entities, min_confidence, max_depth);
264    let not_impacted_entities = find_not_impacted(graph, changed_entities, &impacted);
265
266    // Classify into direct and indirect
267    let mut direct_impacts = Vec::new();
268    let mut indirect_impacts = Vec::new();
269    let mut max_depth_reached: usize = 0;
270
271    for impact in &impacted {
272        let tier = ConfidenceTier::from_score(impact.confidence);
273        let explanation = explain_impact(&impact.name, &impact.path, impact.confidence);
274
275        let entry = ImpactEntry {
276            entity_id: impact.entity_id.as_str().to_string(),
277            name: impact.name.clone(),
278            confidence: impact.confidence,
279            tier: format!("{} {:?}", tier.emoji(), tier),
280            depth: impact.depth,
281            path: impact.path.clone(),
282            reason: impact.reason.clone(),
283            explanation_chain: explanation,
284        };
285
286        max_depth_reached = max_depth_reached.max(impact.depth);
287
288        match impact.impact_type {
289            ImpactType::Direct => direct_impacts.push(entry),
290            ImpactType::Indirect => indirect_impacts.push(entry),
291        }
292    }
293
294    // Build not-impacted entries with explanations
295    let not_impacted: Vec<NotImpactedEntry> = not_impacted_entities
296        .iter()
297        .map(|ni| {
298            let explanation = explain_not_impacted(&ni.name, &ni.reason, ni.confidence);
299            NotImpactedEntry {
300                entity_id: ni.entity_id.as_str().to_string(),
301                name: ni.name.clone(),
302                confidence: ni.confidence,
303                reason: ni.reason.clone(),
304                explanation_chain: explanation,
305            }
306        })
307        .collect();
308
309    // Build change descriptions
310    let changes: Vec<ChangeDescription> = changed_entities
311        .iter()
312        .map(|id| {
313            let entity = graph.get_entity(id);
314            ChangeDescription {
315                entity_id: id.as_str().to_string(),
316                name: entity
317                    .map(|e| e.name.clone())
318                    .unwrap_or_else(|| "Unknown".into()),
319                change_type: "Modified".into(),
320                file_path: entity.map(|e| e.file_path.clone()).unwrap_or_default(),
321            }
322        })
323        .collect();
324
325    let stats = ImpactStats {
326        total_entities: graph.stats().entity_count,
327        directly_impacted: direct_impacts.len(),
328        indirectly_impacted: indirect_impacts.len(),
329        not_impacted: not_impacted.len(),
330        max_depth_reached,
331    };
332
333    ImpactReport {
334        changes,
335        direct_impacts,
336        indirect_impacts,
337        not_impacted,
338        ambiguities: Vec::new(), // Filled by ambiguity detector
339        stats,
340    }
341}
342
343#[cfg(test)]
344mod tests {
345    use super::*;
346    use ucm_graph_core::edge::*;
347    use ucm_graph_core::entity::*;
348
349    fn build_test_graph() -> UcmGraph {
350        let mut graph = UcmGraph::new();
351
352        let entities = vec![
353            ("src/auth/service.ts", "validateToken", "validateToken"),
354            ("src/api/middleware.ts", "authMiddleware", "authMiddleware"),
355            (
356                "src/payments/checkout.ts",
357                "processPayment",
358                "processPayment",
359            ),
360            ("src/admin/reports.ts", "generateReport", "generateReport"),
361        ];
362
363        for (file, symbol, name) in &entities {
364            graph
365                .add_entity(UcmEntity::new(
366                    EntityId::local(file, symbol),
367                    EntityKind::Function {
368                        is_async: true,
369                        parameter_count: 1,
370                        return_type: None,
371                    },
372                    *name,
373                    *file,
374                    "typescript",
375                    DiscoverySource::StaticAnalysis,
376                ))
377                .unwrap();
378        }
379
380        // middleware → validateToken
381        graph
382            .add_relationship(
383                &EntityId::local("src/api/middleware.ts", "authMiddleware"),
384                &EntityId::local("src/auth/service.ts", "validateToken"),
385                UcmEdge::new(
386                    RelationType::Imports,
387                    DiscoverySource::StaticAnalysis,
388                    0.95,
389                    "imports directly",
390                ),
391            )
392            .unwrap();
393
394        // processPayment → middleware
395        graph
396            .add_relationship(
397                &EntityId::local("src/payments/checkout.ts", "processPayment"),
398                &EntityId::local("src/api/middleware.ts", "authMiddleware"),
399                UcmEdge::new(
400                    RelationType::DependsOn,
401                    DiscoverySource::StaticAnalysis,
402                    0.80,
403                    "uses auth middleware",
404                ),
405            )
406            .unwrap();
407
408        graph
409    }
410
411    #[test]
412    fn test_impact_analysis() {
413        let graph = build_test_graph();
414        let changed = vec![EntityId::local("src/auth/service.ts", "validateToken")];
415
416        let report = analyze_impact(&graph, &changed, 0.1, 10);
417
418        // Should have direct impacts
419        assert!(
420            !report.direct_impacts.is_empty(),
421            "Should have direct impacts"
422        );
423        assert!(report
424            .direct_impacts
425            .iter()
426            .any(|i| i.name == "authMiddleware"));
427
428        // Should have indirect impacts
429        assert!(
430            !report.indirect_impacts.is_empty(),
431            "Should have indirect impacts"
432        );
433        assert!(report
434            .indirect_impacts
435            .iter()
436            .any(|i| i.name == "processPayment"));
437
438        // Should have not-impacted
439        assert!(!report.not_impacted.is_empty(), "Should have not-impacted");
440        assert!(report
441            .not_impacted
442            .iter()
443            .any(|n| n.name == "generateReport"));
444
445        // All entries should have explanation chains
446        for impact in &report.direct_impacts {
447            assert!(!impact.explanation_chain.steps.is_empty());
448        }
449    }
450
451    #[test]
452    fn test_impact_report_serializable() {
453        let graph = build_test_graph();
454        let changed = vec![EntityId::local("src/auth/service.ts", "validateToken")];
455        let report = analyze_impact(&graph, &changed, 0.1, 10);
456
457        let json = serde_json::to_string_pretty(&report).unwrap();
458        assert!(json.contains("explanation_chain"));
459        assert!(json.contains("not_impacted"));
460
461        // Round-trip
462        let _: ImpactReport = serde_json::from_str(&json).unwrap();
463    }
464
465    #[test]
466    fn test_find_not_impacted() {
467        let graph = build_test_graph();
468        // validateToken is changed
469        let changed = vec![EntityId::local("src/auth/service.ts", "validateToken")];
470        // authMiddleware is impacted (directly)
471        let impacted = vec![ImpactedEntity {
472            entity_id: EntityId::local("src/api/middleware.ts", "authMiddleware"),
473            name: "authMiddleware".to_string(),
474            confidence: 0.95,
475            depth: 1,
476            impact_type: ImpactType::Direct,
477            path: vec!["validateToken".to_string(), "authMiddleware".to_string()],
478            reason: "imports directly".to_string(),
479        }];
480
481        let not_impacted = find_not_impacted(&graph, &changed, &impacted);
482
483        // generateReport should be in not_impacted because it has no path to validateToken
484        let report_ni = not_impacted.iter().find(|ni| ni.name == "generateReport");
485        assert!(report_ni.is_some());
486        assert_eq!(
487            report_ni.unwrap().reason,
488            "No graph path exists to changed entities"
489        );
490        assert_eq!(report_ni.unwrap().confidence, 0.90);
491
492        // processPayment should be in not_impacted because it WAS NOT in the impacted list passed in,
493        // even though it HAS a path to validateToken in the graph.
494        let payment_ni = not_impacted.iter().find(|ni| ni.name == "processPayment");
495        assert!(payment_ni.is_some());
496        assert_eq!(
497            payment_ni.unwrap().reason,
498            "Path exists but confidence below threshold"
499        );
500        assert_eq!(payment_ni.unwrap().confidence, 0.60);
501
502        // validateToken itself should NOT be in not_impacted
503        assert!(!not_impacted.iter().any(|ni| ni.name == "validateToken"));
504
505        // authMiddleware itself should NOT be in not_impacted
506        assert!(!not_impacted.iter().any(|ni| ni.name == "authMiddleware"));
507    }
508}