1use crate::CodememEngine;
7use chrono::{DateTime, Utc};
8use codemem_core::{CodememError, Edge, GraphNode, NodeKind, RelationshipType};
9use std::collections::{HashMap, HashSet, VecDeque};
10
11#[derive(Debug, Clone)]
15pub struct RankedNode {
16 pub id: String,
17 pub score: f64,
18 pub kind: Option<String>,
19 pub label: Option<String>,
20}
21
22#[derive(Debug, Clone)]
24pub struct GraphStats {
25 pub node_count: usize,
26 pub edge_count: usize,
27 pub node_kind_counts: HashMap<String, usize>,
28 pub relationship_type_counts: HashMap<String, usize>,
29}
30
31#[derive(Debug, Clone, serde::Serialize)]
33pub struct ChangeEntry {
34 pub commit_id: String,
35 pub hash: String,
36 pub author: String,
37 pub date: String,
38 pub subject: String,
39 pub changed_symbols: Vec<String>,
40 pub changed_files: Vec<String>,
41}
42
43#[derive(Debug, Clone, serde::Serialize)]
45pub struct TemporalSnapshot {
46 pub at: String,
47 pub live_nodes: usize,
48 pub live_edges: usize,
49 pub node_kind_counts: HashMap<String, usize>,
50}
51
52#[derive(Debug, Clone, serde::Serialize)]
54pub struct StaleFile {
55 pub file_path: String,
56 pub centrality: f64,
57 pub last_modified: Option<String>,
58 pub incoming_edges: usize,
59}
60
61#[derive(Debug, Clone, serde::Serialize)]
63pub struct TestImpactResult {
64 pub direct_tests: Vec<TestHit>,
65 pub transitive_tests: Vec<TestHit>,
66 pub analyzed_symbols: Vec<String>,
67}
68
69#[derive(Debug, Clone, serde::Serialize)]
71pub struct TestHit {
72 pub test_symbol: String,
73 pub file_path: String,
74 pub depth: usize,
75}
76
77#[derive(Debug, Clone, serde::Serialize)]
79pub struct CycleReport {
80 pub cycles: Vec<CycleGroup>,
81 pub total_cycles: usize,
82 pub critical_count: usize,
83}
84
85#[derive(Debug, Clone, serde::Serialize)]
87pub struct CycleGroup {
88 pub nodes: Vec<String>,
89 pub size: usize,
90 pub severity: String,
91}
92
93#[derive(Debug, Clone, serde::Serialize)]
95pub struct DriftReport {
96 pub period: String,
97 pub new_cross_module_edges: usize,
98 pub removed_files: usize,
99 pub added_files: usize,
100 pub hotspot_files: Vec<String>,
101 pub coupling_increases: Vec<(String, String, usize)>,
102}
103
104impl CodememEngine {
107 pub fn graph_traverse(
114 &self,
115 start_id: &str,
116 depth: usize,
117 algorithm: &str,
118 exclude_kinds: &[NodeKind],
119 include_relationships: Option<&[RelationshipType]>,
120 at_time: Option<DateTime<Utc>>,
121 ) -> Result<Vec<GraphNode>, CodememError> {
122 let graph = self.lock_graph()?;
123 let has_filters = !exclude_kinds.is_empty() || include_relationships.is_some();
124
125 let mut nodes = if has_filters {
126 match algorithm {
127 "bfs" => graph.bfs_filtered(start_id, depth, exclude_kinds, include_relationships),
128 "dfs" => graph.dfs_filtered(start_id, depth, exclude_kinds, include_relationships),
129 _ => Err(CodememError::InvalidInput(format!(
130 "Unknown algorithm: {algorithm}"
131 ))),
132 }
133 } else {
134 match algorithm {
135 "bfs" => graph.bfs(start_id, depth),
136 "dfs" => graph.dfs(start_id, depth),
137 _ => Err(CodememError::InvalidInput(format!(
138 "Unknown algorithm: {algorithm}"
139 ))),
140 }
141 }?;
142
143 if let Some(at) = at_time {
145 nodes.retain(|n| {
146 n.valid_from.is_none_or(|vf| vf <= at) && n.valid_to.is_none_or(|vt| vt > at)
147 });
148 }
149
150 Ok(nodes)
151 }
152
153 pub fn graph_stats(&self) -> Result<GraphStats, CodememError> {
155 let graph = self.lock_graph()?;
156 let stats = graph.stats();
157 Ok(GraphStats {
158 node_count: stats.node_count,
159 edge_count: stats.edge_count,
160 node_kind_counts: stats.node_kind_counts,
161 relationship_type_counts: stats.relationship_type_counts,
162 })
163 }
164
165 pub fn get_node_edges(&self, node_id: &str) -> Result<Vec<Edge>, CodememError> {
167 let graph = self.lock_graph()?;
168 graph.get_edges(node_id)
169 }
170
171 pub fn louvain_communities(&self, resolution: f64) -> Result<Vec<Vec<String>>, CodememError> {
173 let graph = self.lock_graph()?;
174 Ok(graph.louvain_communities(resolution))
175 }
176
177 pub fn find_important_nodes(
183 &self,
184 top_k: usize,
185 damping: f64,
186 namespace: Option<&str>,
187 ) -> Result<Vec<RankedNode>, CodememError> {
188 let graph = self.lock_graph()?;
189 let scores = if let Some(ns) = namespace {
190 graph.pagerank_for_namespace(
191 ns,
192 damping,
193 codemem_core::PAGERANK_ITERATIONS_DEFAULT,
194 codemem_core::PAGERANK_TOLERANCE_DEFAULT,
195 )
196 } else {
197 graph.pagerank(
198 damping,
199 codemem_core::PAGERANK_ITERATIONS_DEFAULT,
200 codemem_core::PAGERANK_TOLERANCE_DEFAULT,
201 )
202 };
203
204 let mut sorted: Vec<(String, f64)> = scores.into_iter().collect();
205 sorted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
206 sorted.truncate(top_k);
207
208 let results = sorted
209 .into_iter()
210 .map(|(id, score)| {
211 let node = graph.get_node(&id).ok().flatten();
212 RankedNode {
213 id,
214 score,
215 kind: node.as_ref().map(|n| n.kind.to_string()),
216 label: node.as_ref().map(|n| n.label.clone()),
217 }
218 })
219 .collect();
220
221 Ok(results)
222 }
223
224 pub fn detect_cycles(&self) -> Result<CycleReport, CodememError> {
229 let graph = self.lock_graph()?;
230 let sccs = graph.strongly_connected_components();
231
232 let mut cycles: Vec<CycleGroup> = sccs
233 .into_iter()
234 .filter(|scc| scc.len() >= 2)
235 .map(|nodes| {
236 let size = nodes.len();
237 let severity = if size >= 5 {
238 "critical"
239 } else if size >= 3 {
240 "warning"
241 } else {
242 "info"
243 }
244 .to_string();
245 CycleGroup {
246 nodes,
247 size,
248 severity,
249 }
250 })
251 .collect();
252
253 cycles.sort_by(|a, b| b.size.cmp(&a.size));
254 let critical_count = cycles.iter().filter(|c| c.severity == "critical").count();
255 let total_cycles = cycles.len();
256
257 Ok(CycleReport {
258 cycles,
259 total_cycles,
260 critical_count,
261 })
262 }
263
264 pub fn what_changed(
268 &self,
269 from: DateTime<Utc>,
270 to: DateTime<Utc>,
271 namespace: Option<&str>,
272 ) -> Result<Vec<ChangeEntry>, CodememError> {
273 if from > to {
274 return Err(CodememError::InvalidInput(
275 "'from' must be before 'to'".into(),
276 ));
277 }
278 let all_nodes = {
279 let graph = self.lock_graph()?;
280 graph.get_all_nodes()
281 };
282 let all_edges = self.storage.all_graph_edges()?;
283
284 let commits: Vec<&GraphNode> = all_nodes
286 .iter()
287 .filter(|n| {
288 n.kind == NodeKind::Commit
289 && !n.payload.contains_key("sentinel")
290 && n.valid_from.is_some_and(|vf| vf >= from && vf <= to)
291 && (namespace.is_none() || n.namespace.as_deref() == namespace)
292 })
293 .collect();
294
295 let mut edges_by_dst: HashMap<&str, Vec<&Edge>> = HashMap::new();
297 for edge in &all_edges {
298 if edge.relationship == RelationshipType::ModifiedBy {
299 edges_by_dst.entry(&edge.dst).or_default().push(edge);
300 }
301 }
302
303 let mut entries = Vec::new();
304 for commit in &commits {
305 let mut changed_files = Vec::new();
306 let mut changed_symbols = Vec::new();
307
308 if let Some(commit_edges) = edges_by_dst.get(commit.id.as_str()) {
309 for edge in commit_edges {
310 if edge.src.starts_with("file:") {
311 changed_files.push(
312 edge.src
313 .strip_prefix("file:")
314 .unwrap_or(&edge.src)
315 .to_string(),
316 );
317 } else if edge.src.starts_with("sym:") {
318 changed_symbols.push(
319 edge.src
320 .strip_prefix("sym:")
321 .unwrap_or(&edge.src)
322 .to_string(),
323 );
324 }
325 }
326 }
327
328 entries.push(ChangeEntry {
329 commit_id: commit.id.clone(),
330 hash: commit
331 .payload
332 .get("hash")
333 .and_then(|v| v.as_str())
334 .unwrap_or("")
335 .to_string(),
336 author: commit
337 .payload
338 .get("author")
339 .and_then(|v| v.as_str())
340 .unwrap_or("")
341 .to_string(),
342 date: commit
343 .payload
344 .get("date")
345 .and_then(|v| v.as_str())
346 .unwrap_or("")
347 .to_string(),
348 subject: commit
349 .payload
350 .get("subject")
351 .and_then(|v| v.as_str())
352 .unwrap_or("")
353 .to_string(),
354 changed_symbols,
355 changed_files,
356 });
357 }
358
359 entries.sort_by(|a, b| b.date.cmp(&a.date));
361 Ok(entries)
362 }
363
364 pub fn graph_at_time(&self, at: DateTime<Utc>) -> Result<TemporalSnapshot, CodememError> {
370 let all_nodes = {
371 let graph = self.lock_graph()?;
372 graph.get_all_nodes()
373 };
374 let all_edges = self.storage.all_graph_edges()?;
375
376 let is_node_live = |n: &GraphNode| -> bool {
377 let after_start = n.valid_from.is_none_or(|vf| vf <= at);
378 let before_end = n.valid_to.is_none_or(|vt| vt > at);
379 after_start && before_end
380 };
381
382 let is_edge_live = |e: &Edge| -> bool {
383 let after_start = e.valid_from.is_none_or(|vf| vf <= at);
384 let before_end = e.valid_to.is_none_or(|vt| vt > at);
385 after_start && before_end
386 };
387
388 let live_nodes: Vec<&GraphNode> = all_nodes.iter().filter(|n| is_node_live(n)).collect();
389 let live_edges = all_edges.iter().filter(|e| is_edge_live(e)).count();
390
391 let mut kind_counts: HashMap<String, usize> = HashMap::new();
392 for node in &live_nodes {
393 *kind_counts.entry(node.kind.to_string()).or_default() += 1;
394 }
395
396 Ok(TemporalSnapshot {
397 at: at.to_rfc3339(),
398 live_nodes: live_nodes.len(),
399 live_edges,
400 node_kind_counts: kind_counts,
401 })
402 }
403
404 pub fn find_stale_files(
407 &self,
408 namespace: Option<&str>,
409 stale_days: u64,
410 ) -> Result<Vec<StaleFile>, CodememError> {
411 let all_nodes = {
412 let graph = self.lock_graph()?;
413 graph.get_all_nodes()
414 };
415 let all_edges = self.storage.all_graph_edges()?;
416 let stale_days = stale_days.min(3650); let cutoff = Utc::now() - chrono::Duration::days(stale_days as i64);
418
419 let mut file_last_modified: HashMap<&str, DateTime<Utc>> = HashMap::new();
421 for edge in &all_edges {
422 if edge.relationship == RelationshipType::ModifiedBy && edge.src.starts_with("file:") {
423 if let Some(vf) = edge.valid_from {
424 let entry = file_last_modified.entry(&edge.src).or_insert(vf);
425 if vf > *entry {
426 *entry = vf;
427 }
428 }
429 }
430 }
431
432 let mut incoming: HashMap<&str, usize> = HashMap::new();
434 for edge in &all_edges {
435 if edge.dst.starts_with("file:") {
436 *incoming.entry(&edge.dst).or_default() += 1;
437 }
438 }
439
440 let mut stale = Vec::new();
441 for node in &all_nodes {
442 if node.kind != NodeKind::File {
443 continue;
444 }
445 if node.valid_to.is_some() {
446 continue; }
448 if namespace.is_some() && node.namespace.as_deref() != namespace {
449 continue;
450 }
451
452 let last_mod = file_last_modified.get(node.id.as_str()).copied();
453 let is_stale = last_mod.is_some_and(|lm| lm < cutoff);
457
458 if is_stale
459 && (node.centrality > 0.0
460 || incoming.get(node.id.as_str()).copied().unwrap_or(0) > 0)
461 {
462 let file_path = node
463 .id
464 .strip_prefix("file:")
465 .unwrap_or(&node.id)
466 .to_string();
467 stale.push(StaleFile {
468 file_path,
469 centrality: node.centrality,
470 last_modified: last_mod.map(|d| d.to_rfc3339()),
471 incoming_edges: incoming.get(node.id.as_str()).copied().unwrap_or(0),
472 });
473 }
474 }
475
476 stale.sort_by(|a, b| {
478 b.centrality
479 .partial_cmp(&a.centrality)
480 .unwrap_or(std::cmp::Ordering::Equal)
481 });
482 Ok(stale)
483 }
484
485 pub fn detect_drift(
491 &self,
492 from: DateTime<Utc>,
493 to: DateTime<Utc>,
494 namespace: Option<&str>,
495 ) -> Result<DriftReport, CodememError> {
496 if from > to {
497 return Err(CodememError::InvalidInput(
498 "'from' must be before 'to'".into(),
499 ));
500 }
501 let all_nodes = {
502 let graph = self.lock_graph()?;
503 graph.get_all_nodes()
504 };
505 let all_edges = self.storage.all_graph_edges()?;
506
507 let files_at = |at: DateTime<Utc>| -> HashSet<String> {
509 all_nodes
510 .iter()
511 .filter(|n| {
512 n.kind == NodeKind::File
513 && n.valid_from.is_none_or(|vf| vf <= at)
514 && n.valid_to.is_none_or(|vt| vt > at)
515 && (namespace.is_none() || n.namespace.as_deref() == namespace)
516 })
517 .map(|n| n.id.clone())
518 .collect()
519 };
520
521 let files_before = files_at(from);
522 let files_after = files_at(to);
523 let added_files = files_after.difference(&files_before).count();
524 let removed_files = files_before.difference(&files_after).count();
525
526 let module_of = |id: &str| -> String {
529 let path = id
530 .strip_prefix("file:")
531 .or_else(|| id.strip_prefix("sym:"))
532 .unwrap_or(id);
533 path.split('/').next().unwrap_or("root").to_string()
534 };
535
536 let new_cross_module = all_edges
537 .iter()
538 .filter(|e| {
539 e.valid_from.is_some_and(|vf| vf >= from && vf <= to)
540 && module_of(&e.src) != module_of(&e.dst)
541 && !matches!(
542 e.relationship,
543 RelationshipType::ModifiedBy | RelationshipType::PartOf
544 )
545 })
546 .count();
547
548 let mut file_commit_count: HashMap<String, usize> = HashMap::new();
550 for edge in &all_edges {
551 if edge.relationship == RelationshipType::ModifiedBy
552 && edge.src.starts_with("file:")
553 && edge.valid_from.is_some_and(|vf| vf >= from && vf <= to)
554 {
555 *file_commit_count
556 .entry(
557 edge.src
558 .strip_prefix("file:")
559 .unwrap_or(&edge.src)
560 .to_string(),
561 )
562 .or_default() += 1;
563 }
564 }
565 let mut hotspots: Vec<(String, usize)> = file_commit_count.into_iter().collect();
566 hotspots.sort_by(|a, b| b.1.cmp(&a.1));
567 let hotspot_files: Vec<String> = hotspots.into_iter().take(10).map(|(f, _)| f).collect();
568
569 let mut coupling: HashMap<(String, String), usize> = HashMap::new();
573 for edge in &all_edges {
574 if edge.relationship == RelationshipType::CoChanged
575 && (edge.valid_from.is_some_and(|vf| vf >= from && vf <= to)
576 || (edge.valid_from.is_none()
577 && edge.created_at >= from
578 && edge.created_at <= to))
579 {
580 let pair = if edge.src < edge.dst {
581 (edge.src.clone(), edge.dst.clone())
582 } else {
583 (edge.dst.clone(), edge.src.clone())
584 };
585 let count = edge
586 .properties
587 .get("commit_count")
588 .and_then(|v| v.as_u64())
589 .unwrap_or(1) as usize;
590 *coupling.entry(pair).or_default() += count;
591 }
592 }
593 let mut coupling_vec: Vec<(String, String, usize)> =
594 coupling.into_iter().map(|((a, b), c)| (a, b, c)).collect();
595 coupling_vec.sort_by(|a, b| b.2.cmp(&a.2));
596 coupling_vec.truncate(10);
597
598 Ok(DriftReport {
599 period: format!("{} to {}", from.to_rfc3339(), to.to_rfc3339()),
600 new_cross_module_edges: new_cross_module,
601 removed_files,
602 added_files,
603 hotspot_files,
604 coupling_increases: coupling_vec,
605 })
606 }
607
608 pub fn symbol_history(&self, node_id: &str) -> Result<Vec<ChangeEntry>, CodememError> {
610 let node_edges = self.storage.get_edges_for_node(node_id)?;
612 let commit_ids: HashSet<String> = node_edges
613 .iter()
614 .filter(|e| e.src == node_id && e.relationship == RelationshipType::ModifiedBy)
615 .map(|e| e.dst.clone())
616 .collect();
617
618 if commit_ids.is_empty() {
619 return Ok(Vec::new());
620 }
621
622 let commit_nodes: Vec<GraphNode> = {
624 let graph = self.lock_graph()?;
625 graph
626 .get_all_nodes()
627 .into_iter()
628 .filter(|n| commit_ids.contains(&n.id))
629 .collect()
630 };
631
632 let mut entries = Vec::new();
634 for node in &commit_nodes {
635 let commit_edges = self.storage.get_edges_for_node(&node.id)?;
636 let mut changed_files = Vec::new();
637 let mut changed_symbols = Vec::new();
638
639 for edge in &commit_edges {
640 if edge.relationship == RelationshipType::ModifiedBy && edge.dst == node.id {
641 if edge.src.starts_with("file:") {
642 changed_files.push(
643 edge.src
644 .strip_prefix("file:")
645 .unwrap_or(&edge.src)
646 .to_string(),
647 );
648 } else if edge.src.starts_with("sym:") {
649 changed_symbols.push(
650 edge.src
651 .strip_prefix("sym:")
652 .unwrap_or(&edge.src)
653 .to_string(),
654 );
655 }
656 }
657 }
658
659 entries.push(ChangeEntry {
660 commit_id: node.id.clone(),
661 hash: node
662 .payload
663 .get("hash")
664 .and_then(|v| v.as_str())
665 .unwrap_or("")
666 .to_string(),
667 author: node
668 .payload
669 .get("author")
670 .and_then(|v| v.as_str())
671 .unwrap_or("")
672 .to_string(),
673 date: node
674 .payload
675 .get("date")
676 .and_then(|v| v.as_str())
677 .unwrap_or("")
678 .to_string(),
679 subject: node
680 .payload
681 .get("subject")
682 .and_then(|v| v.as_str())
683 .unwrap_or("")
684 .to_string(),
685 changed_symbols,
686 changed_files,
687 });
688 }
689
690 entries.sort_by(|a, b| b.date.cmp(&a.date));
691 Ok(entries)
692 }
693
694 pub fn test_impact(
703 &self,
704 changed_symbol_ids: &[&str],
705 max_depth: usize,
706 ) -> Result<TestImpactResult, CodememError> {
707 let graph = self.lock_graph()?;
708
709 let all_nodes = graph.get_all_nodes();
712 let mut callers_of: HashMap<&str, Vec<&str>> = HashMap::new();
713 let mut all_edges_vec: Vec<Edge> = Vec::new();
715 for node in &all_nodes {
716 if let Ok(edges) = graph.get_edges(&node.id) {
717 all_edges_vec.extend(edges);
718 }
719 }
720
721 for edge in &all_edges_vec {
724 if edge.relationship == RelationshipType::Calls {
725 callers_of
726 .entry(edge.dst.as_str())
727 .or_default()
728 .push(edge.src.as_str());
729 }
730 }
731
732 let node_map: HashMap<&str, &GraphNode> =
734 all_nodes.iter().map(|n| (n.id.as_str(), n)).collect();
735
736 let mut test_depths: HashMap<String, usize> = HashMap::new();
738 let mut test_files: HashMap<String, String> = HashMap::new();
739
740 let mut visited: HashSet<&str> = HashSet::new();
743
744 for &symbol_id in changed_symbol_ids {
745 let mut queue: VecDeque<(&str, usize)> = VecDeque::new();
747
748 visited.insert(symbol_id);
749 queue.push_back((symbol_id, 0));
750
751 while let Some((current_id, depth)) = queue.pop_front() {
752 if depth >= max_depth {
753 continue;
754 }
755
756 if let Some(callers) = callers_of.get(current_id) {
758 for &caller_id in callers {
759 if visited.contains(caller_id) {
760 continue;
761 }
762 visited.insert(caller_id);
763
764 let next_depth = depth + 1;
765
766 if let Some(node) = node_map.get(caller_id) {
768 if is_test_node(node) {
769 let file_path = node
770 .payload
771 .get("file_path")
772 .and_then(|v| v.as_str())
773 .unwrap_or("")
774 .to_string();
775
776 let entry = test_depths
777 .entry(caller_id.to_string())
778 .or_insert(next_depth);
779 if next_depth < *entry {
780 *entry = next_depth;
781 }
782 test_files.entry(caller_id.to_string()).or_insert(file_path);
783 }
784 }
785
786 queue.push_back((caller_id, next_depth));
788 }
789 }
790 }
791 }
792
793 let mut direct_tests = Vec::new();
795 let mut transitive_tests = Vec::new();
796
797 for (test_id, depth) in &test_depths {
798 let hit = TestHit {
799 test_symbol: test_id.clone(),
800 file_path: test_files.get(test_id).cloned().unwrap_or_default(),
801 depth: *depth,
802 };
803 if *depth <= 2 {
804 direct_tests.push(hit);
805 } else {
806 transitive_tests.push(hit);
807 }
808 }
809
810 direct_tests.sort_by_key(|h| h.depth);
812 transitive_tests.sort_by_key(|h| h.depth);
813
814 Ok(TestImpactResult {
815 direct_tests,
816 transitive_tests,
817 analyzed_symbols: changed_symbol_ids.iter().map(|s| s.to_string()).collect(),
818 })
819 }
820}
821
822fn is_test_node(node: &GraphNode) -> bool {
824 if node.kind == NodeKind::Test {
826 return true;
827 }
828 if node.payload.get("is_test") == Some(&serde_json::json!(true)) {
830 return true;
831 }
832 if node.payload.get("kind").and_then(|v| v.as_str()) == Some("test") {
833 return true;
834 }
835 if let Some(path) = node.payload.get("file_path").and_then(|v| v.as_str()) {
837 if is_test_file(path) {
838 return true;
839 }
840 }
841 if is_test_file(&node.label) {
843 return true;
844 }
845 false
846}
847
848fn is_test_file(path: &str) -> bool {
853 let p = path.to_lowercase();
854
855 if p.contains("/tests/") || p.contains("/__tests__/") || p.starts_with("tests/") {
857 return true;
858 }
859
860 let filename = p.rsplit('/').next().unwrap_or(&p);
862 filename.starts_with("test_")
863 || filename.contains("_test.")
864 || filename.contains(".test.")
865 || filename.contains(".spec.")
866}