Skip to main content

contextqa_core/
graph.rs

1//! Materialized context graph with Glean-style immutable fact layering.
2//!
3//! The graph is a petgraph StableGraph (stable indices across mutations)
4//! wrapped with typed operations, ownership tracking, and query methods.
5//!
6//! Key design decisions:
7//! - StableGraph for stable NodeIndex/EdgeIndex across removals (petgraph docs)
8//! - Glean-style layers: each "fact layer" is an overlay that can add or hide
9//!   facts from layers below. When a file changes, all facts owned by that file
10//!   are hidden, and only affected files are re-indexed. O(changes) cost.
11//! - SCIP identity: entities identified by globally-unique strings, so files
12//!   can be re-indexed independently without graph-local ID coordination.
13//!
14//! References:
15//! - petgraph StableGraph: https://docs.rs/petgraph
16//! - Meta Glean fact stacking: https://glean.software/docs/angle/incrementality
17//! - Sourcegraph SCIP: https://github.com/sourcegraph/scip
18
19use petgraph::stable_graph::{EdgeIndex, NodeIndex, StableGraph};
20use petgraph::visit::EdgeRef;
21use petgraph::Direction;
22use serde::{Deserialize, Serialize};
23use std::collections::{HashMap, HashSet};
24
25use crate::edge::{RelationType, UcmEdge};
26use crate::entity::{EntityId, UcmEntity};
27
28use crate::error::{Result, UcmError};
29
30/// The materialized context graph — primary queryable data structure.
31///
32/// Wraps petgraph's `StableGraph` with typed operations and
33/// SCIP-identity → NodeIndex lookup.
34#[derive(Debug)]
35pub struct UcmGraph {
36    /// The underlying petgraph stable graph
37    graph: StableGraph<UcmEntity, UcmEdge>,
38    /// Fast lookup: SCIP EntityId string → NodeIndex
39    entity_index: HashMap<String, NodeIndex>,
40    /// Ownership tracking: which source file "owns" which entities
41    /// Used for Glean-style incremental updates — when a file changes,
42    /// all entities owned by that file are invalidated.
43    ownership: HashMap<String, HashSet<NodeIndex>>,
44}
45
46impl UcmGraph {
47    pub fn new() -> Self {
48        Self {
49            graph: StableGraph::new(),
50            entity_index: HashMap::new(),
51            ownership: HashMap::new(),
52        }
53    }
54
55    // ─── Mutation Operations ───────────────────────────────────────
56
57    /// Add an entity to the graph. Returns the node index.
58    /// If an entity with the same SCIP ID exists, returns error.
59    pub fn add_entity(&mut self, entity: UcmEntity) -> Result<NodeIndex> {
60        let id_str = entity.id.as_str().to_string();
61        if self.entity_index.contains_key(&id_str) {
62            return Err(UcmError::DuplicateEntity(id_str));
63        }
64
65        let file_path = entity.file_path.clone();
66        let idx = self.graph.add_node(entity);
67        self.entity_index.insert(id_str, idx);
68
69        // Track ownership: this file owns this entity
70        self.ownership.entry(file_path).or_default().insert(idx);
71
72        Ok(idx)
73    }
74
75    /// Add or update an entity (upsert semantics for replay safety).
76    pub fn upsert_entity(&mut self, entity: UcmEntity) -> NodeIndex {
77        let id_str = entity.id.as_str().to_string();
78        if let Some(&idx) = self.entity_index.get(&id_str) {
79            // Update existing
80            if let Some(node) = self.graph.node_weight_mut(idx) {
81                *node = entity;
82            }
83            idx
84        } else {
85            // add_entity only fails if duplicate — we checked above, so this is safe.
86            // Use expect with a clear message rather than unwrap.
87            self.add_entity(entity)
88                .expect("add_entity: duplicate despite index miss (logic error)")
89        }
90    }
91
92    /// Add a relationship between two entities.
93    pub fn add_relationship(
94        &mut self,
95        from: &EntityId,
96        to: &EntityId,
97        edge: UcmEdge,
98    ) -> Result<EdgeIndex> {
99        let from_idx = self.resolve_entity(from)?;
100        let to_idx = self.resolve_entity(to)?;
101        Ok(self.graph.add_edge(from_idx, to_idx, edge))
102    }
103
104    /// Remove all entities and edges owned by a file path.
105    /// This is the Glean-style "hide facts" operation for incremental updates.
106    pub fn invalidate_file(&mut self, file_path: &str) -> Vec<EntityId> {
107        let mut removed = Vec::new();
108
109        if let Some(nodes) = self.ownership.remove(file_path) {
110            for idx in nodes {
111                if let Some(entity) = self.graph.remove_node(idx) {
112                    self.entity_index.remove(entity.id.as_str());
113                    removed.push(entity.id);
114                }
115            }
116        }
117
118        removed
119    }
120
121    // ─── Query Operations ──────────────────────────────────────────
122
123    /// Get an entity by its SCIP ID.
124    pub fn get_entity(&self, id: &EntityId) -> Option<&UcmEntity> {
125        let idx = self.entity_index.get(id.as_str())?;
126        self.graph.node_weight(*idx)
127    }
128
129    /// Get all entities in the graph.
130    pub fn all_entities(&self) -> Vec<&UcmEntity> {
131        self.graph.node_weights().collect()
132    }
133
134    /// Get direct dependencies of an entity (outgoing edges).
135    pub fn dependencies(&self, id: &EntityId) -> Result<Vec<(&UcmEntity, &UcmEdge)>> {
136        let idx = self.resolve_entity(id)?;
137        Ok(self
138            .graph
139            .edges_directed(idx, Direction::Outgoing)
140            .filter_map(|edge| {
141                let target = self.graph.node_weight(edge.target())?;
142                Some((target, edge.weight()))
143            })
144            .collect())
145    }
146
147    /// Get reverse dependencies of an entity (incoming edges).
148    /// "What depends on this entity?"
149    ///
150    /// This is the core query for impact analysis — when entity X changes,
151    /// which entities are affected? (Google TAP, Meta PTS)
152    pub fn reverse_deps(&self, id: &EntityId) -> Result<Vec<(&UcmEntity, &UcmEdge)>> {
153        let idx = self.resolve_entity(id)?;
154        Ok(self
155            .graph
156            .edges_directed(idx, Direction::Incoming)
157            .filter_map(|edge| {
158                let source = self.graph.node_weight(edge.source())?;
159                Some((source, edge.weight()))
160            })
161            .collect())
162    }
163
164    /// Get graph statistics.
165    pub fn stats(&self) -> GraphStats {
166        let edge_count = self.graph.edge_count();
167        let avg_confidence = if edge_count > 0 {
168            self.graph.edge_weights().map(|e| e.confidence).sum::<f64>() / edge_count as f64
169        } else {
170            0.0
171        };
172
173        GraphStats {
174            entity_count: self.graph.node_count(),
175            edge_count,
176            avg_confidence,
177            files_tracked: self.ownership.len(),
178        }
179    }
180
181    // ─── Low-Level Accessors (for external analysis modules) ──────
182
183    /// Get a read-only reference to the inner petgraph.
184    pub fn inner(&self) -> &StableGraph<UcmEntity, UcmEdge> {
185        &self.graph
186    }
187
188    /// Resolve an EntityId to a NodeIndex, if it exists.
189    pub fn entity_node_index(&self, id: &EntityId) -> Option<NodeIndex> {
190        self.entity_index.get(id.as_str()).copied()
191    }
192
193    /// Get a reference to the full entity index map.
194    pub fn entity_index_map(&self) -> &HashMap<String, NodeIndex> {
195        &self.entity_index
196    }
197
198    // ─── Serialization ─────────────────────────────────────────────
199
200    /// Serialize the graph to JSON.
201    pub fn to_json(&self) -> Result<String> {
202        let snapshot = GraphSnapshot {
203            entities: self.graph.node_weights().cloned().collect(),
204            edges: self
205                .graph
206                .edge_indices()
207                .filter_map(|idx| {
208                    let (source, target) = self.graph.edge_endpoints(idx)?;
209                    let source_entity = self.graph.node_weight(source)?;
210                    let target_entity = self.graph.node_weight(target)?;
211                    let edge = self.graph.edge_weight(idx)?;
212                    Some(EdgeSnapshot {
213                        from: source_entity.id.clone(),
214                        to: target_entity.id.clone(),
215                        edge: edge.clone(),
216                    })
217                })
218                .collect(),
219        };
220        Ok(serde_json::to_string_pretty(&snapshot)?)
221    }
222
223    // ─── Internal helpers ──────────────────────────────────────────
224
225    fn resolve_entity(&self, id: &EntityId) -> Result<NodeIndex> {
226        self.entity_index
227            .get(id.as_str())
228            .copied()
229            .ok_or_else(|| UcmError::EntityNotFound(id.as_str().to_string()))
230    }
231}
232
233impl Default for UcmGraph {
234    fn default() -> Self {
235        Self::new()
236    }
237}
238
239// Helper method on UcmEdge for display
240impl UcmEdge {
241    pub fn relation_type_str(&self) -> &str {
242        match &self.relation_type {
243            RelationType::Imports => "imports",
244            RelationType::Calls => "calls",
245            RelationType::TestedBy => "tested by",
246            RelationType::Implements => "implements",
247            RelationType::DependsOn => "depends on",
248            RelationType::RequiredBy => "required by",
249            RelationType::Contains => "contains",
250            RelationType::Extends => "extends",
251            RelationType::DataFlow => "data flow",
252            RelationType::CoChanged => "co-changed with",
253        }
254    }
255}
256
257// ─── Result Types ──────────────────────────────────────────────────
258
259/// An entity identified as impacted by a change.
260#[derive(Debug, Clone, Serialize, Deserialize)]
261pub struct ImpactedEntity {
262    pub entity_id: EntityId,
263    pub name: String,
264    pub confidence: f64,
265    pub depth: usize,
266    pub impact_type: ImpactType,
267    pub path: Vec<String>,
268    pub reason: String,
269}
270
271#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
272pub enum ImpactType {
273    /// Directly references the changed entity
274    Direct,
275    /// Transitively depends on the changed entity
276    Indirect,
277}
278
279/// An entity determined to NOT be impacted by a change.
280#[derive(Debug, Clone, Serialize, Deserialize)]
281pub struct NotImpactedEntity {
282    pub entity_id: EntityId,
283    pub name: String,
284    pub confidence: f64,
285    pub reason: String,
286}
287
288/// Serializable snapshot of the full graph.
289#[derive(Debug, Serialize, Deserialize)]
290struct GraphSnapshot {
291    entities: Vec<UcmEntity>,
292    edges: Vec<EdgeSnapshot>,
293}
294
295#[derive(Debug, Serialize, Deserialize)]
296struct EdgeSnapshot {
297    from: EntityId,
298    to: EntityId,
299    edge: UcmEdge,
300}
301
302/// Graph statistics summary.
303#[derive(Debug, Serialize, Deserialize)]
304pub struct GraphStats {
305    pub entity_count: usize,
306    pub edge_count: usize,
307    pub avg_confidence: f64,
308    pub files_tracked: usize,
309}
310
311#[cfg(test)]
312mod tests {
313    use super::*;
314    use crate::edge::*;
315    use crate::entity::*;
316
317    fn make_test_graph() -> UcmGraph {
318        let mut graph = UcmGraph::new();
319
320        // Create entities
321        let auth_svc = UcmEntity::new(
322            EntityId::local("src/auth/service.ts", "validateToken"),
323            EntityKind::Function {
324                is_async: true,
325                parameter_count: 1,
326                return_type: Some("boolean".into()),
327            },
328            "validateToken",
329            "src/auth/service.ts",
330            "typescript",
331            DiscoverySource::StaticAnalysis,
332        );
333
334        let middleware = UcmEntity::new(
335            EntityId::local("src/api/middleware.ts", "authMiddleware"),
336            EntityKind::Function {
337                is_async: true,
338                parameter_count: 2,
339                return_type: None,
340            },
341            "authMiddleware",
342            "src/api/middleware.ts",
343            "typescript",
344            DiscoverySource::StaticAnalysis,
345        );
346
347        let payment = UcmEntity::new(
348            EntityId::local("src/payments/checkout.ts", "processPayment"),
349            EntityKind::Function {
350                is_async: true,
351                parameter_count: 1,
352                return_type: Some("PaymentResult".into()),
353            },
354            "processPayment",
355            "src/payments/checkout.ts",
356            "typescript",
357            DiscoverySource::StaticAnalysis,
358        );
359
360        let admin = UcmEntity::new(
361            EntityId::local("src/admin/reports.ts", "generateReport"),
362            EntityKind::Function {
363                is_async: false,
364                parameter_count: 0,
365                return_type: Some("Report".into()),
366            },
367            "generateReport",
368            "src/admin/reports.ts",
369            "typescript",
370            DiscoverySource::StaticAnalysis,
371        );
372
373        graph.add_entity(auth_svc).unwrap();
374        graph.add_entity(middleware).unwrap();
375        graph.add_entity(payment).unwrap();
376        graph.add_entity(admin).unwrap();
377
378        // middleware imports validateToken
379        graph
380            .add_relationship(
381                &EntityId::local("src/api/middleware.ts", "authMiddleware"),
382                &EntityId::local("src/auth/service.ts", "validateToken"),
383                UcmEdge::new(
384                    RelationType::Imports,
385                    DiscoverySource::StaticAnalysis,
386                    0.95,
387                    "imports validateToken directly",
388                ),
389            )
390            .unwrap();
391
392        // payment depends on middleware (protected route)
393        graph
394            .add_relationship(
395                &EntityId::local("src/payments/checkout.ts", "processPayment"),
396                &EntityId::local("src/api/middleware.ts", "authMiddleware"),
397                UcmEdge::new(
398                    RelationType::DependsOn,
399                    DiscoverySource::StaticAnalysis,
400                    0.80,
401                    "route uses authMiddleware",
402                ),
403            )
404            .unwrap();
405
406        // admin has NO connection to auth
407        // (separate auth flow — this tests "not impacted" logic)
408
409        graph
410    }
411
412    #[test]
413    fn test_entity_lookup() {
414        let graph = make_test_graph();
415        let entity = graph.get_entity(&EntityId::local("src/auth/service.ts", "validateToken"));
416        assert!(entity.is_some());
417        assert_eq!(entity.unwrap().name, "validateToken");
418    }
419
420    #[test]
421    fn test_duplicate_entity_error() {
422        let mut graph = make_test_graph();
423        let dup = UcmEntity::new(
424            EntityId::local("src/auth/service.ts", "validateToken"),
425            EntityKind::Function {
426                is_async: false,
427                parameter_count: 0,
428                return_type: None,
429            },
430            "validateToken",
431            "src/auth/service.ts",
432            "typescript",
433            DiscoverySource::StaticAnalysis,
434        );
435        assert!(graph.add_entity(dup).is_err());
436    }
437
438    #[test]
439    fn test_reverse_deps() {
440        let graph = make_test_graph();
441        let rdeps = graph
442            .reverse_deps(&EntityId::local("src/auth/service.ts", "validateToken"))
443            .unwrap();
444        assert_eq!(rdeps.len(), 1);
445        assert_eq!(rdeps[0].0.name, "authMiddleware");
446    }
447
448    #[test]
449    fn test_file_invalidation() {
450        let mut graph = make_test_graph();
451        assert!(graph
452            .get_entity(&EntityId::local("src/auth/service.ts", "validateToken"))
453            .is_some());
454
455        let removed = graph.invalidate_file("src/auth/service.ts");
456        assert_eq!(removed.len(), 1);
457        assert!(graph
458            .get_entity(&EntityId::local("src/auth/service.ts", "validateToken"))
459            .is_none());
460    }
461
462    #[test]
463    fn test_graph_stats() {
464        let graph = make_test_graph();
465        let stats = graph.stats();
466        assert_eq!(stats.entity_count, 4);
467        assert_eq!(stats.edge_count, 2);
468        assert!(stats.avg_confidence > 0.0);
469    }
470
471    #[test]
472    fn test_graph_serialization() {
473        let graph = make_test_graph();
474        let json = graph.to_json().unwrap();
475        assert!(json.contains("validateToken"));
476        assert!(json.contains("authMiddleware"));
477    }
478}