1use crate::CodememEngine;
7use chrono::{DateTime, Utc};
8use codemem_core::{CodememError, Edge, GraphNode, NodeKind, RelationshipType};
9use std::collections::{HashMap, HashSet};
10
11#[derive(Debug, Clone)]
15pub struct RankedNode {
16 pub id: String,
17 pub score: f64,
18 pub kind: Option<String>,
19 pub label: Option<String>,
20}
21
22#[derive(Debug, Clone)]
24pub struct GraphStats {
25 pub node_count: usize,
26 pub edge_count: usize,
27 pub node_kind_counts: HashMap<String, usize>,
28 pub relationship_type_counts: HashMap<String, usize>,
29}
30
31#[derive(Debug, Clone, serde::Serialize)]
33pub struct ChangeEntry {
34 pub commit_id: String,
35 pub hash: String,
36 pub author: String,
37 pub date: String,
38 pub subject: String,
39 pub changed_symbols: Vec<String>,
40 pub changed_files: Vec<String>,
41}
42
43#[derive(Debug, Clone, serde::Serialize)]
45pub struct TemporalSnapshot {
46 pub at: String,
47 pub live_nodes: usize,
48 pub live_edges: usize,
49 pub node_kind_counts: HashMap<String, usize>,
50}
51
52#[derive(Debug, Clone, serde::Serialize)]
54pub struct StaleFile {
55 pub file_path: String,
56 pub centrality: f64,
57 pub last_modified: Option<String>,
58 pub incoming_edges: usize,
59}
60
61#[derive(Debug, Clone, serde::Serialize)]
63pub struct DriftReport {
64 pub period: String,
65 pub new_cross_module_edges: usize,
66 pub removed_files: usize,
67 pub added_files: usize,
68 pub hotspot_files: Vec<String>,
69 pub coupling_increases: Vec<(String, String, usize)>,
70}
71
72impl CodememEngine {
75 pub fn graph_traverse(
82 &self,
83 start_id: &str,
84 depth: usize,
85 algorithm: &str,
86 exclude_kinds: &[NodeKind],
87 include_relationships: Option<&[RelationshipType]>,
88 at_time: Option<DateTime<Utc>>,
89 ) -> Result<Vec<GraphNode>, CodememError> {
90 let graph = self.lock_graph()?;
91 let has_filters = !exclude_kinds.is_empty() || include_relationships.is_some();
92
93 let mut nodes = if has_filters {
94 match algorithm {
95 "bfs" => graph.bfs_filtered(start_id, depth, exclude_kinds, include_relationships),
96 "dfs" => graph.dfs_filtered(start_id, depth, exclude_kinds, include_relationships),
97 _ => Err(CodememError::InvalidInput(format!(
98 "Unknown algorithm: {algorithm}"
99 ))),
100 }
101 } else {
102 match algorithm {
103 "bfs" => graph.bfs(start_id, depth),
104 "dfs" => graph.dfs(start_id, depth),
105 _ => Err(CodememError::InvalidInput(format!(
106 "Unknown algorithm: {algorithm}"
107 ))),
108 }
109 }?;
110
111 if let Some(at) = at_time {
113 nodes.retain(|n| {
114 n.valid_from.is_none_or(|vf| vf <= at) && n.valid_to.is_none_or(|vt| vt > at)
115 });
116 }
117
118 Ok(nodes)
119 }
120
121 pub fn graph_stats(&self) -> Result<GraphStats, CodememError> {
123 let graph = self.lock_graph()?;
124 let stats = graph.stats();
125 Ok(GraphStats {
126 node_count: stats.node_count,
127 edge_count: stats.edge_count,
128 node_kind_counts: stats.node_kind_counts,
129 relationship_type_counts: stats.relationship_type_counts,
130 })
131 }
132
133 pub fn get_node_edges(&self, node_id: &str) -> Result<Vec<Edge>, CodememError> {
135 let graph = self.lock_graph()?;
136 graph.get_edges(node_id)
137 }
138
139 pub fn louvain_communities(&self, resolution: f64) -> Result<Vec<Vec<String>>, CodememError> {
141 let graph = self.lock_graph()?;
142 Ok(graph.louvain_communities(resolution))
143 }
144
145 pub fn find_important_nodes(
148 &self,
149 top_k: usize,
150 damping: f64,
151 ) -> Result<Vec<RankedNode>, CodememError> {
152 let graph = self.lock_graph()?;
153 let scores = graph.pagerank(damping, 100, 1e-6);
154
155 let mut sorted: Vec<(String, f64)> = scores.into_iter().collect();
156 sorted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
157 sorted.truncate(top_k);
158
159 let results = sorted
160 .into_iter()
161 .map(|(id, score)| {
162 let node = graph.get_node(&id).ok().flatten();
163 RankedNode {
164 id,
165 score,
166 kind: node.as_ref().map(|n| n.kind.to_string()),
167 label: node.as_ref().map(|n| n.label.clone()),
168 }
169 })
170 .collect();
171
172 Ok(results)
173 }
174
175 pub fn what_changed(
179 &self,
180 from: DateTime<Utc>,
181 to: DateTime<Utc>,
182 namespace: Option<&str>,
183 ) -> Result<Vec<ChangeEntry>, CodememError> {
184 let all_nodes = {
185 let graph = self.lock_graph()?;
186 graph.get_all_nodes()
187 };
188 let all_edges = self.storage.all_graph_edges()?;
189
190 let commits: Vec<&GraphNode> = all_nodes
192 .iter()
193 .filter(|n| {
194 n.kind == NodeKind::Commit
195 && !n.payload.contains_key("sentinel")
196 && n.valid_from.is_some_and(|vf| vf >= from && vf <= to)
197 && (namespace.is_none() || n.namespace.as_deref() == namespace)
198 })
199 .collect();
200
201 let mut edges_by_dst: HashMap<&str, Vec<&Edge>> = HashMap::new();
203 for edge in &all_edges {
204 if edge.relationship == RelationshipType::ModifiedBy {
205 edges_by_dst.entry(&edge.dst).or_default().push(edge);
206 }
207 }
208
209 let mut entries = Vec::new();
210 for commit in &commits {
211 let mut changed_files = Vec::new();
212 let mut changed_symbols = Vec::new();
213
214 if let Some(commit_edges) = edges_by_dst.get(commit.id.as_str()) {
215 for edge in commit_edges {
216 if edge.src.starts_with("file:") {
217 changed_files.push(
218 edge.src
219 .strip_prefix("file:")
220 .unwrap_or(&edge.src)
221 .to_string(),
222 );
223 } else if edge.src.starts_with("sym:") {
224 changed_symbols.push(
225 edge.src
226 .strip_prefix("sym:")
227 .unwrap_or(&edge.src)
228 .to_string(),
229 );
230 }
231 }
232 }
233
234 entries.push(ChangeEntry {
235 commit_id: commit.id.clone(),
236 hash: commit
237 .payload
238 .get("hash")
239 .and_then(|v| v.as_str())
240 .unwrap_or("")
241 .to_string(),
242 author: commit
243 .payload
244 .get("author")
245 .and_then(|v| v.as_str())
246 .unwrap_or("")
247 .to_string(),
248 date: commit
249 .payload
250 .get("date")
251 .and_then(|v| v.as_str())
252 .unwrap_or("")
253 .to_string(),
254 subject: commit
255 .payload
256 .get("subject")
257 .and_then(|v| v.as_str())
258 .unwrap_or("")
259 .to_string(),
260 changed_symbols,
261 changed_files,
262 });
263 }
264
265 entries.sort_by(|a, b| b.date.cmp(&a.date));
267 Ok(entries)
268 }
269
270 pub fn graph_at_time(&self, at: DateTime<Utc>) -> Result<TemporalSnapshot, CodememError> {
276 let all_nodes = {
277 let graph = self.lock_graph()?;
278 graph.get_all_nodes()
279 };
280 let all_edges = self.storage.all_graph_edges()?;
281
282 let is_node_live = |n: &GraphNode| -> bool {
283 let after_start = n.valid_from.is_none_or(|vf| vf <= at);
284 let before_end = n.valid_to.is_none_or(|vt| vt > at);
285 after_start && before_end
286 };
287
288 let is_edge_live = |e: &Edge| -> bool {
289 let after_start = e.valid_from.is_none_or(|vf| vf <= at);
290 let before_end = e.valid_to.is_none_or(|vt| vt > at);
291 after_start && before_end
292 };
293
294 let live_nodes: Vec<&GraphNode> = all_nodes.iter().filter(|n| is_node_live(n)).collect();
295 let live_edges = all_edges.iter().filter(|e| is_edge_live(e)).count();
296
297 let mut kind_counts: HashMap<String, usize> = HashMap::new();
298 for node in &live_nodes {
299 *kind_counts.entry(node.kind.to_string()).or_default() += 1;
300 }
301
302 Ok(TemporalSnapshot {
303 at: at.to_rfc3339(),
304 live_nodes: live_nodes.len(),
305 live_edges,
306 node_kind_counts: kind_counts,
307 })
308 }
309
310 pub fn find_stale_files(
313 &self,
314 namespace: Option<&str>,
315 stale_days: u64,
316 ) -> Result<Vec<StaleFile>, CodememError> {
317 let all_nodes = {
318 let graph = self.lock_graph()?;
319 graph.get_all_nodes()
320 };
321 let all_edges = self.storage.all_graph_edges()?;
322 let cutoff = Utc::now() - chrono::Duration::days(stale_days as i64);
323
324 let mut file_last_modified: HashMap<&str, DateTime<Utc>> = HashMap::new();
326 for edge in &all_edges {
327 if edge.relationship == RelationshipType::ModifiedBy && edge.src.starts_with("file:") {
328 if let Some(vf) = edge.valid_from {
329 let entry = file_last_modified.entry(&edge.src).or_insert(vf);
330 if vf > *entry {
331 *entry = vf;
332 }
333 }
334 }
335 }
336
337 let mut incoming: HashMap<&str, usize> = HashMap::new();
339 for edge in &all_edges {
340 if edge.dst.starts_with("file:") {
341 *incoming.entry(&edge.dst).or_default() += 1;
342 }
343 }
344
345 let mut stale = Vec::new();
346 for node in &all_nodes {
347 if node.kind != NodeKind::File {
348 continue;
349 }
350 if node.valid_to.is_some() {
351 continue; }
353 if namespace.is_some() && node.namespace.as_deref() != namespace {
354 continue;
355 }
356
357 let last_mod = file_last_modified.get(node.id.as_str()).copied();
358 let is_stale = last_mod.is_some_and(|lm| lm < cutoff);
362
363 if is_stale
364 && (node.centrality > 0.0
365 || incoming.get(node.id.as_str()).copied().unwrap_or(0) > 0)
366 {
367 let file_path = node
368 .id
369 .strip_prefix("file:")
370 .unwrap_or(&node.id)
371 .to_string();
372 stale.push(StaleFile {
373 file_path,
374 centrality: node.centrality,
375 last_modified: last_mod.map(|d| d.to_rfc3339()),
376 incoming_edges: incoming.get(node.id.as_str()).copied().unwrap_or(0),
377 });
378 }
379 }
380
381 stale.sort_by(|a, b| {
383 b.centrality
384 .partial_cmp(&a.centrality)
385 .unwrap_or(std::cmp::Ordering::Equal)
386 });
387 Ok(stale)
388 }
389
390 pub fn detect_drift(
396 &self,
397 from: DateTime<Utc>,
398 to: DateTime<Utc>,
399 namespace: Option<&str>,
400 ) -> Result<DriftReport, CodememError> {
401 let all_nodes = {
402 let graph = self.lock_graph()?;
403 graph.get_all_nodes()
404 };
405 let all_edges = self.storage.all_graph_edges()?;
406
407 let files_at = |at: DateTime<Utc>| -> HashSet<String> {
409 all_nodes
410 .iter()
411 .filter(|n| {
412 n.kind == NodeKind::File
413 && n.valid_from.is_none_or(|vf| vf <= at)
414 && n.valid_to.is_none_or(|vt| vt > at)
415 && (namespace.is_none() || n.namespace.as_deref() == namespace)
416 })
417 .map(|n| n.id.clone())
418 .collect()
419 };
420
421 let files_before = files_at(from);
422 let files_after = files_at(to);
423 let added_files = files_after.difference(&files_before).count();
424 let removed_files = files_before.difference(&files_after).count();
425
426 let module_of = |id: &str| -> String {
429 let path = id
430 .strip_prefix("file:")
431 .or_else(|| id.strip_prefix("sym:"))
432 .unwrap_or(id);
433 path.split('/').next().unwrap_or("root").to_string()
434 };
435
436 let new_cross_module = all_edges
437 .iter()
438 .filter(|e| {
439 e.valid_from.is_some_and(|vf| vf >= from && vf <= to)
440 && module_of(&e.src) != module_of(&e.dst)
441 && !matches!(
442 e.relationship,
443 RelationshipType::ModifiedBy | RelationshipType::PartOf
444 )
445 })
446 .count();
447
448 let mut file_commit_count: HashMap<String, usize> = HashMap::new();
450 for edge in &all_edges {
451 if edge.relationship == RelationshipType::ModifiedBy
452 && edge.src.starts_with("file:")
453 && edge.valid_from.is_some_and(|vf| vf >= from && vf <= to)
454 {
455 *file_commit_count
456 .entry(
457 edge.src
458 .strip_prefix("file:")
459 .unwrap_or(&edge.src)
460 .to_string(),
461 )
462 .or_default() += 1;
463 }
464 }
465 let mut hotspots: Vec<(String, usize)> = file_commit_count.into_iter().collect();
466 hotspots.sort_by(|a, b| b.1.cmp(&a.1));
467 let hotspot_files: Vec<String> = hotspots.into_iter().take(10).map(|(f, _)| f).collect();
468
469 let mut coupling: HashMap<(String, String), usize> = HashMap::new();
473 for edge in &all_edges {
474 if edge.relationship == RelationshipType::CoChanged
475 && (edge.valid_from.is_some_and(|vf| vf >= from && vf <= to)
476 || (edge.valid_from.is_none()
477 && edge.created_at >= from
478 && edge.created_at <= to))
479 {
480 let pair = if edge.src < edge.dst {
481 (edge.src.clone(), edge.dst.clone())
482 } else {
483 (edge.dst.clone(), edge.src.clone())
484 };
485 let count = edge
486 .properties
487 .get("commit_count")
488 .and_then(|v| v.as_u64())
489 .unwrap_or(1) as usize;
490 *coupling.entry(pair).or_default() += count;
491 }
492 }
493 let mut coupling_vec: Vec<(String, String, usize)> =
494 coupling.into_iter().map(|((a, b), c)| (a, b, c)).collect();
495 coupling_vec.sort_by(|a, b| b.2.cmp(&a.2));
496 coupling_vec.truncate(10);
497
498 Ok(DriftReport {
499 period: format!("{} to {}", from.to_rfc3339(), to.to_rfc3339()),
500 new_cross_module_edges: new_cross_module,
501 removed_files,
502 added_files,
503 hotspot_files,
504 coupling_increases: coupling_vec,
505 })
506 }
507
508 pub fn symbol_history(&self, node_id: &str) -> Result<Vec<ChangeEntry>, CodememError> {
510 let all_edges = self.storage.all_graph_edges()?;
511
512 let commit_ids: HashSet<&str> = all_edges
514 .iter()
515 .filter(|e| e.src == node_id && e.relationship == RelationshipType::ModifiedBy)
516 .map(|e| e.dst.as_str())
517 .collect();
518
519 if commit_ids.is_empty() {
520 return Ok(Vec::new());
521 }
522
523 let commit_nodes: Vec<GraphNode> = {
525 let graph = self.lock_graph()?;
526 graph
527 .get_all_nodes()
528 .into_iter()
529 .filter(|n| commit_ids.contains(n.id.as_str()))
530 .collect()
531 };
532
533 let mut edges_by_dst: HashMap<&str, Vec<&Edge>> = HashMap::new();
535 for edge in &all_edges {
536 if edge.relationship == RelationshipType::ModifiedBy
537 && commit_ids.contains(edge.dst.as_str())
538 {
539 edges_by_dst.entry(&edge.dst).or_default().push(edge);
540 }
541 }
542
543 let mut entries = Vec::new();
544 for node in &commit_nodes {
545 let mut changed_files = Vec::new();
546 let mut changed_symbols = Vec::new();
547
548 if let Some(commit_edges) = edges_by_dst.get(node.id.as_str()) {
549 for edge in commit_edges {
550 if edge.src.starts_with("file:") {
551 changed_files.push(
552 edge.src
553 .strip_prefix("file:")
554 .unwrap_or(&edge.src)
555 .to_string(),
556 );
557 } else if edge.src.starts_with("sym:") {
558 changed_symbols.push(
559 edge.src
560 .strip_prefix("sym:")
561 .unwrap_or(&edge.src)
562 .to_string(),
563 );
564 }
565 }
566 }
567
568 entries.push(ChangeEntry {
569 commit_id: node.id.clone(),
570 hash: node
571 .payload
572 .get("hash")
573 .and_then(|v| v.as_str())
574 .unwrap_or("")
575 .to_string(),
576 author: node
577 .payload
578 .get("author")
579 .and_then(|v| v.as_str())
580 .unwrap_or("")
581 .to_string(),
582 date: node
583 .payload
584 .get("date")
585 .and_then(|v| v.as_str())
586 .unwrap_or("")
587 .to_string(),
588 subject: node
589 .payload
590 .get("subject")
591 .and_then(|v| v.as_str())
592 .unwrap_or("")
593 .to_string(),
594 changed_symbols,
595 changed_files,
596 });
597 }
598
599 entries.sort_by(|a, b| b.date.cmp(&a.date));
600 Ok(entries)
601 }
602}