Skip to main content

codemem_engine/
graph_ops.rs

1//! Engine facade methods for graph algorithms.
2//!
3//! These wrap lock acquisition + graph algorithm calls so the MCP/API
4//! transport layers don't interact with the graph mutex directly.
5
6use crate::CodememEngine;
7use chrono::{DateTime, Utc};
8use codemem_core::{CodememError, Edge, GraphNode, NodeKind, RelationshipType};
9use std::collections::{HashMap, HashSet};
10
11// ── Result Types ─────────────────────────────────────────────────────────────
12
13/// A node with its PageRank score.
14#[derive(Debug, Clone)]
15pub struct RankedNode {
16    pub id: String,
17    pub score: f64,
18    pub kind: Option<String>,
19    pub label: Option<String>,
20}
21
22/// In-memory graph statistics snapshot.
23#[derive(Debug, Clone)]
24pub struct GraphStats {
25    pub node_count: usize,
26    pub edge_count: usize,
27    pub node_kind_counts: HashMap<String, usize>,
28    pub relationship_type_counts: HashMap<String, usize>,
29}
30
31/// A commit entry in a temporal change report.
32#[derive(Debug, Clone, serde::Serialize)]
33pub struct ChangeEntry {
34    pub commit_id: String,
35    pub hash: String,
36    pub author: String,
37    pub date: String,
38    pub subject: String,
39    pub changed_symbols: Vec<String>,
40    pub changed_files: Vec<String>,
41}
42
43/// Snapshot of the graph at a point in time.
44#[derive(Debug, Clone, serde::Serialize)]
45pub struct TemporalSnapshot {
46    pub at: String,
47    pub live_nodes: usize,
48    pub live_edges: usize,
49    pub node_kind_counts: HashMap<String, usize>,
50}
51
52/// A file that may be stale (not modified recently but still referenced).
53#[derive(Debug, Clone, serde::Serialize)]
54pub struct StaleFile {
55    pub file_path: String,
56    pub centrality: f64,
57    pub last_modified: Option<String>,
58    pub incoming_edges: usize,
59}
60
61/// Report on architectural drift between two time periods.
62#[derive(Debug, Clone, serde::Serialize)]
63pub struct DriftReport {
64    pub period: String,
65    pub new_cross_module_edges: usize,
66    pub removed_files: usize,
67    pub added_files: usize,
68    pub hotspot_files: Vec<String>,
69    pub coupling_increases: Vec<(String, String, usize)>,
70}
71
72// ── Engine Methods ───────────────────────────────────────────────────────────
73
74impl CodememEngine {
75    /// BFS or DFS traversal from a start node, with optional kind/relationship filters.
76    /// When `at_time` is provided, nodes/edges outside their validity window are excluded.
77    ///
78    /// Note: temporal filtering is post-hoc — the traversal runs on the full graph,
79    /// then expired nodes are removed. This means depth counts may include hops through
80    /// expired nodes. For precise temporal traversals, use `graph_at_time` instead.
81    pub fn graph_traverse(
82        &self,
83        start_id: &str,
84        depth: usize,
85        algorithm: &str,
86        exclude_kinds: &[NodeKind],
87        include_relationships: Option<&[RelationshipType]>,
88        at_time: Option<DateTime<Utc>>,
89    ) -> Result<Vec<GraphNode>, CodememError> {
90        let graph = self.lock_graph()?;
91        let has_filters = !exclude_kinds.is_empty() || include_relationships.is_some();
92
93        let mut nodes = if has_filters {
94            match algorithm {
95                "bfs" => graph.bfs_filtered(start_id, depth, exclude_kinds, include_relationships),
96                "dfs" => graph.dfs_filtered(start_id, depth, exclude_kinds, include_relationships),
97                _ => Err(CodememError::InvalidInput(format!(
98                    "Unknown algorithm: {algorithm}"
99                ))),
100            }
101        } else {
102            match algorithm {
103                "bfs" => graph.bfs(start_id, depth),
104                "dfs" => graph.dfs(start_id, depth),
105                _ => Err(CodememError::InvalidInput(format!(
106                    "Unknown algorithm: {algorithm}"
107                ))),
108            }
109        }?;
110
111        // Filter by temporal validity if at_time is provided
112        if let Some(at) = at_time {
113            nodes.retain(|n| {
114                n.valid_from.is_none_or(|vf| vf <= at) && n.valid_to.is_none_or(|vt| vt > at)
115            });
116        }
117
118        Ok(nodes)
119    }
120
121    /// Get in-memory graph statistics.
122    pub fn graph_stats(&self) -> Result<GraphStats, CodememError> {
123        let graph = self.lock_graph()?;
124        let stats = graph.stats();
125        Ok(GraphStats {
126            node_count: stats.node_count,
127            edge_count: stats.edge_count,
128            node_kind_counts: stats.node_kind_counts,
129            relationship_type_counts: stats.relationship_type_counts,
130        })
131    }
132
133    /// Get all edges for a node.
134    pub fn get_node_edges(&self, node_id: &str) -> Result<Vec<Edge>, CodememError> {
135        let graph = self.lock_graph()?;
136        graph.get_edges(node_id)
137    }
138
139    /// Run Louvain community detection at the given resolution.
140    pub fn louvain_communities(&self, resolution: f64) -> Result<Vec<Vec<String>>, CodememError> {
141        let graph = self.lock_graph()?;
142        Ok(graph.louvain_communities(resolution))
143    }
144
145    /// Compute PageRank and return the top-k nodes with their scores,
146    /// kinds, and labels.
147    pub fn find_important_nodes(
148        &self,
149        top_k: usize,
150        damping: f64,
151    ) -> Result<Vec<RankedNode>, CodememError> {
152        let graph = self.lock_graph()?;
153        let scores = graph.pagerank(damping, 100, 1e-6);
154
155        let mut sorted: Vec<(String, f64)> = scores.into_iter().collect();
156        sorted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
157        sorted.truncate(top_k);
158
159        let results = sorted
160            .into_iter()
161            .map(|(id, score)| {
162                let node = graph.get_node(&id).ok().flatten();
163                RankedNode {
164                    id,
165                    score,
166                    kind: node.as_ref().map(|n| n.kind.to_string()),
167                    label: node.as_ref().map(|n| n.label.clone()),
168                }
169            })
170            .collect();
171
172        Ok(results)
173    }
174
175    // ── Temporal Queries ─────────────────────────────────────────────────
176
177    /// Return what changed between two timestamps: commits, their files, and symbols.
178    pub fn what_changed(
179        &self,
180        from: DateTime<Utc>,
181        to: DateTime<Utc>,
182        namespace: Option<&str>,
183    ) -> Result<Vec<ChangeEntry>, CodememError> {
184        let all_nodes = {
185            let graph = self.lock_graph()?;
186            graph.get_all_nodes()
187        };
188        let all_edges = self.storage.all_graph_edges()?;
189
190        // Find commit nodes in the time range
191        let commits: Vec<&GraphNode> = all_nodes
192            .iter()
193            .filter(|n| {
194                n.kind == NodeKind::Commit
195                    && !n.payload.contains_key("sentinel")
196                    && n.valid_from.is_some_and(|vf| vf >= from && vf <= to)
197                    && (namespace.is_none() || n.namespace.as_deref() == namespace)
198            })
199            .collect();
200
201        // Index ModifiedBy edges by dst (commit ID) for O(1) lookup
202        let mut edges_by_dst: HashMap<&str, Vec<&Edge>> = HashMap::new();
203        for edge in &all_edges {
204            if edge.relationship == RelationshipType::ModifiedBy {
205                edges_by_dst.entry(&edge.dst).or_default().push(edge);
206            }
207        }
208
209        let mut entries = Vec::new();
210        for commit in &commits {
211            let mut changed_files = Vec::new();
212            let mut changed_symbols = Vec::new();
213
214            if let Some(commit_edges) = edges_by_dst.get(commit.id.as_str()) {
215                for edge in commit_edges {
216                    if edge.src.starts_with("file:") {
217                        changed_files.push(
218                            edge.src
219                                .strip_prefix("file:")
220                                .unwrap_or(&edge.src)
221                                .to_string(),
222                        );
223                    } else if edge.src.starts_with("sym:") {
224                        changed_symbols.push(
225                            edge.src
226                                .strip_prefix("sym:")
227                                .unwrap_or(&edge.src)
228                                .to_string(),
229                        );
230                    }
231                }
232            }
233
234            entries.push(ChangeEntry {
235                commit_id: commit.id.clone(),
236                hash: commit
237                    .payload
238                    .get("hash")
239                    .and_then(|v| v.as_str())
240                    .unwrap_or("")
241                    .to_string(),
242                author: commit
243                    .payload
244                    .get("author")
245                    .and_then(|v| v.as_str())
246                    .unwrap_or("")
247                    .to_string(),
248                date: commit
249                    .payload
250                    .get("date")
251                    .and_then(|v| v.as_str())
252                    .unwrap_or("")
253                    .to_string(),
254                subject: commit
255                    .payload
256                    .get("subject")
257                    .and_then(|v| v.as_str())
258                    .unwrap_or("")
259                    .to_string(),
260                changed_symbols,
261                changed_files,
262            });
263        }
264
265        // Sort by date descending (newest first)
266        entries.sort_by(|a, b| b.date.cmp(&a.date));
267        Ok(entries)
268    }
269
270    /// Snapshot of the graph at a point in time.
271    ///
272    /// Counts nodes/edges that were valid at `at`: `valid_from <= at` and
273    /// (`valid_to` is None or `valid_to > at`). Nodes/edges with no temporal
274    /// fields are always counted (they predate the temporal layer).
275    pub fn graph_at_time(&self, at: DateTime<Utc>) -> Result<TemporalSnapshot, CodememError> {
276        let all_nodes = {
277            let graph = self.lock_graph()?;
278            graph.get_all_nodes()
279        };
280        let all_edges = self.storage.all_graph_edges()?;
281
282        let is_node_live = |n: &GraphNode| -> bool {
283            let after_start = n.valid_from.is_none_or(|vf| vf <= at);
284            let before_end = n.valid_to.is_none_or(|vt| vt > at);
285            after_start && before_end
286        };
287
288        let is_edge_live = |e: &Edge| -> bool {
289            let after_start = e.valid_from.is_none_or(|vf| vf <= at);
290            let before_end = e.valid_to.is_none_or(|vt| vt > at);
291            after_start && before_end
292        };
293
294        let live_nodes: Vec<&GraphNode> = all_nodes.iter().filter(|n| is_node_live(n)).collect();
295        let live_edges = all_edges.iter().filter(|e| is_edge_live(e)).count();
296
297        let mut kind_counts: HashMap<String, usize> = HashMap::new();
298        for node in &live_nodes {
299            *kind_counts.entry(node.kind.to_string()).or_default() += 1;
300        }
301
302        Ok(TemporalSnapshot {
303            at: at.to_rfc3339(),
304            live_nodes: live_nodes.len(),
305            live_edges,
306            node_kind_counts: kind_counts,
307        })
308    }
309
310    /// Find files that haven't been modified recently but have high centrality
311    /// or many incoming edges (potential stale code).
312    pub fn find_stale_files(
313        &self,
314        namespace: Option<&str>,
315        stale_days: u64,
316    ) -> Result<Vec<StaleFile>, CodememError> {
317        let all_nodes = {
318            let graph = self.lock_graph()?;
319            graph.get_all_nodes()
320        };
321        let all_edges = self.storage.all_graph_edges()?;
322        let cutoff = Utc::now() - chrono::Duration::days(stale_days as i64);
323
324        // Find the latest ModifiedBy edge date for each file
325        let mut file_last_modified: HashMap<&str, DateTime<Utc>> = HashMap::new();
326        for edge in &all_edges {
327            if edge.relationship == RelationshipType::ModifiedBy && edge.src.starts_with("file:") {
328                if let Some(vf) = edge.valid_from {
329                    let entry = file_last_modified.entry(&edge.src).or_insert(vf);
330                    if vf > *entry {
331                        *entry = vf;
332                    }
333                }
334            }
335        }
336
337        // Count incoming edges per file (how many things depend on it)
338        let mut incoming: HashMap<&str, usize> = HashMap::new();
339        for edge in &all_edges {
340            if edge.dst.starts_with("file:") {
341                *incoming.entry(&edge.dst).or_default() += 1;
342            }
343        }
344
345        let mut stale = Vec::new();
346        for node in &all_nodes {
347            if node.kind != NodeKind::File {
348                continue;
349            }
350            if node.valid_to.is_some() {
351                continue; // Already deleted
352            }
353            if namespace.is_some() && node.namespace.as_deref() != namespace {
354                continue;
355            }
356
357            let last_mod = file_last_modified.get(node.id.as_str()).copied();
358            // Only flag as stale if we have temporal data — files with no
359            // ModifiedBy edges haven't been through temporal ingestion yet,
360            // so we can't determine staleness.
361            let is_stale = last_mod.is_some_and(|lm| lm < cutoff);
362
363            if is_stale
364                && (node.centrality > 0.0
365                    || incoming.get(node.id.as_str()).copied().unwrap_or(0) > 0)
366            {
367                let file_path = node
368                    .id
369                    .strip_prefix("file:")
370                    .unwrap_or(&node.id)
371                    .to_string();
372                stale.push(StaleFile {
373                    file_path,
374                    centrality: node.centrality,
375                    last_modified: last_mod.map(|d| d.to_rfc3339()),
376                    incoming_edges: incoming.get(node.id.as_str()).copied().unwrap_or(0),
377                });
378            }
379        }
380
381        // Sort by centrality descending (most important stale files first)
382        stale.sort_by(|a, b| {
383            b.centrality
384                .partial_cmp(&a.centrality)
385                .unwrap_or(std::cmp::Ordering::Equal)
386        });
387        Ok(stale)
388    }
389
390    /// Detect architectural drift between two time periods.
391    ///
392    /// Compares the graph at `from` vs `to` to find: new cross-module edges,
393    /// files added/removed, hotspot files (most commits), and coupling increases
394    /// (module pairs with growing CoChanged counts).
395    pub fn detect_drift(
396        &self,
397        from: DateTime<Utc>,
398        to: DateTime<Utc>,
399        namespace: Option<&str>,
400    ) -> Result<DriftReport, CodememError> {
401        let all_nodes = {
402            let graph = self.lock_graph()?;
403            graph.get_all_nodes()
404        };
405        let all_edges = self.storage.all_graph_edges()?;
406
407        // Files alive at `from` vs `to`
408        let files_at = |at: DateTime<Utc>| -> HashSet<String> {
409            all_nodes
410                .iter()
411                .filter(|n| {
412                    n.kind == NodeKind::File
413                        && n.valid_from.is_none_or(|vf| vf <= at)
414                        && n.valid_to.is_none_or(|vt| vt > at)
415                        && (namespace.is_none() || n.namespace.as_deref() == namespace)
416                })
417                .map(|n| n.id.clone())
418                .collect()
419        };
420
421        let files_before = files_at(from);
422        let files_after = files_at(to);
423        let added_files = files_after.difference(&files_before).count();
424        let removed_files = files_before.difference(&files_after).count();
425
426        // Count cross-module edges added in the period
427        // A "cross-module" edge connects nodes in different top-level directories
428        let module_of = |id: &str| -> String {
429            let path = id
430                .strip_prefix("file:")
431                .or_else(|| id.strip_prefix("sym:"))
432                .unwrap_or(id);
433            path.split('/').next().unwrap_or("root").to_string()
434        };
435
436        let new_cross_module = all_edges
437            .iter()
438            .filter(|e| {
439                e.valid_from.is_some_and(|vf| vf >= from && vf <= to)
440                    && module_of(&e.src) != module_of(&e.dst)
441                    && !matches!(
442                        e.relationship,
443                        RelationshipType::ModifiedBy | RelationshipType::PartOf
444                    )
445            })
446            .count();
447
448        // Hotspot files: most ModifiedBy edges in the period
449        let mut file_commit_count: HashMap<String, usize> = HashMap::new();
450        for edge in &all_edges {
451            if edge.relationship == RelationshipType::ModifiedBy
452                && edge.src.starts_with("file:")
453                && edge.valid_from.is_some_and(|vf| vf >= from && vf <= to)
454            {
455                *file_commit_count
456                    .entry(
457                        edge.src
458                            .strip_prefix("file:")
459                            .unwrap_or(&edge.src)
460                            .to_string(),
461                    )
462                    .or_default() += 1;
463            }
464        }
465        let mut hotspots: Vec<(String, usize)> = file_commit_count.into_iter().collect();
466        hotspots.sort_by(|a, b| b.1.cmp(&a.1));
467        let hotspot_files: Vec<String> = hotspots.into_iter().take(10).map(|(f, _)| f).collect();
468
469        // Coupling increases: CoChanged pairs with growing counts.
470        // CoChanged edges from enrich_git_history use created_at (not valid_from),
471        // so check both fields.
472        let mut coupling: HashMap<(String, String), usize> = HashMap::new();
473        for edge in &all_edges {
474            if edge.relationship == RelationshipType::CoChanged
475                && (edge.valid_from.is_some_and(|vf| vf >= from && vf <= to)
476                    || (edge.valid_from.is_none()
477                        && edge.created_at >= from
478                        && edge.created_at <= to))
479            {
480                let pair = if edge.src < edge.dst {
481                    (edge.src.clone(), edge.dst.clone())
482                } else {
483                    (edge.dst.clone(), edge.src.clone())
484                };
485                let count = edge
486                    .properties
487                    .get("commit_count")
488                    .and_then(|v| v.as_u64())
489                    .unwrap_or(1) as usize;
490                *coupling.entry(pair).or_default() += count;
491            }
492        }
493        let mut coupling_vec: Vec<(String, String, usize)> =
494            coupling.into_iter().map(|((a, b), c)| (a, b, c)).collect();
495        coupling_vec.sort_by(|a, b| b.2.cmp(&a.2));
496        coupling_vec.truncate(10);
497
498        Ok(DriftReport {
499            period: format!("{} to {}", from.to_rfc3339(), to.to_rfc3339()),
500            new_cross_module_edges: new_cross_module,
501            removed_files,
502            added_files,
503            hotspot_files,
504            coupling_increases: coupling_vec,
505        })
506    }
507
508    /// Get the history of commits that modified a specific symbol or file.
509    pub fn symbol_history(&self, node_id: &str) -> Result<Vec<ChangeEntry>, CodememError> {
510        let all_edges = self.storage.all_graph_edges()?;
511
512        // Find all ModifiedBy edges from this node to commit nodes
513        let commit_ids: HashSet<&str> = all_edges
514            .iter()
515            .filter(|e| e.src == node_id && e.relationship == RelationshipType::ModifiedBy)
516            .map(|e| e.dst.as_str())
517            .collect();
518
519        if commit_ids.is_empty() {
520            return Ok(Vec::new());
521        }
522
523        // Batch-load commit nodes from graph (single lock acquisition)
524        let commit_nodes: Vec<GraphNode> = {
525            let graph = self.lock_graph()?;
526            graph
527                .get_all_nodes()
528                .into_iter()
529                .filter(|n| commit_ids.contains(n.id.as_str()))
530                .collect()
531        };
532
533        // Index ModifiedBy edges by dst to populate changed_files/symbols
534        let mut edges_by_dst: HashMap<&str, Vec<&Edge>> = HashMap::new();
535        for edge in &all_edges {
536            if edge.relationship == RelationshipType::ModifiedBy
537                && commit_ids.contains(edge.dst.as_str())
538            {
539                edges_by_dst.entry(&edge.dst).or_default().push(edge);
540            }
541        }
542
543        let mut entries = Vec::new();
544        for node in &commit_nodes {
545            let mut changed_files = Vec::new();
546            let mut changed_symbols = Vec::new();
547
548            if let Some(commit_edges) = edges_by_dst.get(node.id.as_str()) {
549                for edge in commit_edges {
550                    if edge.src.starts_with("file:") {
551                        changed_files.push(
552                            edge.src
553                                .strip_prefix("file:")
554                                .unwrap_or(&edge.src)
555                                .to_string(),
556                        );
557                    } else if edge.src.starts_with("sym:") {
558                        changed_symbols.push(
559                            edge.src
560                                .strip_prefix("sym:")
561                                .unwrap_or(&edge.src)
562                                .to_string(),
563                        );
564                    }
565                }
566            }
567
568            entries.push(ChangeEntry {
569                commit_id: node.id.clone(),
570                hash: node
571                    .payload
572                    .get("hash")
573                    .and_then(|v| v.as_str())
574                    .unwrap_or("")
575                    .to_string(),
576                author: node
577                    .payload
578                    .get("author")
579                    .and_then(|v| v.as_str())
580                    .unwrap_or("")
581                    .to_string(),
582                date: node
583                    .payload
584                    .get("date")
585                    .and_then(|v| v.as_str())
586                    .unwrap_or("")
587                    .to_string(),
588                subject: node
589                    .payload
590                    .get("subject")
591                    .and_then(|v| v.as_str())
592                    .unwrap_or("")
593                    .to_string(),
594                changed_symbols,
595                changed_files,
596            });
597        }
598
599        entries.sort_by(|a, b| b.date.cmp(&a.date));
600        Ok(entries)
601    }
602}