Skip to main content

codemem_engine/
insights.rs

1//! Insight aggregation domain logic.
2//!
3//! These methods compute graph-derived insight summaries (PageRank leaders,
4//! Louvain communities, topology depth, security flags, coupling scores)
5//! so the API/transport layer only formats results.
6
7use crate::CodememEngine;
8use codemem_core::{CodememError, MemoryNode, NodeKind};
9use std::collections::HashSet;
10
11// ── Result Types ─────────────────────────────────────────────────────────────
12
13/// PageRank entry for a graph node.
14#[derive(Debug, Clone)]
15pub struct PagerankEntry {
16    pub node_id: String,
17    pub label: String,
18    pub score: f64,
19}
20
21/// High-coupling node with its coupling score.
22#[derive(Debug, Clone)]
23pub struct CouplingNode {
24    pub node_id: String,
25    pub label: String,
26    pub coupling_score: usize,
27}
28
29/// Git annotation summary from graph node payloads.
30#[derive(Debug, Clone)]
31pub struct GitSummary {
32    pub total_annotated_files: usize,
33    pub top_authors: Vec<String>,
34}
35
36/// Aggregated activity insights.
37#[derive(Debug, Clone)]
38pub struct ActivityInsights {
39    pub insights: Vec<MemoryNode>,
40    pub git_summary: GitSummary,
41}
42
43/// Aggregated code health insights.
44#[derive(Debug, Clone)]
45pub struct CodeHealthInsights {
46    pub insights: Vec<MemoryNode>,
47    pub file_hotspots: Vec<(String, usize, Vec<String>)>,
48    pub decision_chains: Vec<(String, usize, Vec<String>)>,
49    pub pagerank_leaders: Vec<PagerankEntry>,
50    pub community_count: usize,
51}
52
53/// Aggregated security insights.
54#[derive(Debug, Clone)]
55pub struct SecurityInsights {
56    pub insights: Vec<MemoryNode>,
57    pub sensitive_file_count: usize,
58    pub endpoint_count: usize,
59    pub security_function_count: usize,
60}
61
62/// Aggregated performance insights.
63#[derive(Debug, Clone)]
64pub struct PerformanceInsights {
65    pub insights: Vec<MemoryNode>,
66    pub high_coupling_nodes: Vec<CouplingNode>,
67    pub max_depth: usize,
68    pub critical_path: Vec<PagerankEntry>,
69}
70
71// ── Engine Methods ───────────────────────────────────────────────────────────
72
73impl CodememEngine {
74    /// Aggregate activity insights: stored track:activity memories + git annotation summary.
75    pub fn activity_insights(
76        &self,
77        namespace: Option<&str>,
78        limit: usize,
79    ) -> Result<ActivityInsights, CodememError> {
80        let insights = self
81            .storage
82            .list_memories_by_tag("track:activity", namespace, limit)
83            .unwrap_or_default();
84
85        // Count annotated files = file nodes that have MODIFIED_BY edges to commits.
86        // Extract authors from commit node payloads (hash field present).
87        let git_summary = match self.lock_graph() {
88            Ok(graph) => {
89                let all_nodes = graph.get_all_nodes();
90                let all_edges = self.storage.all_graph_edges().unwrap_or_default();
91
92                // Files with MODIFIED_BY edges (filtered by namespace if set)
93                let file_ids_with_commits: HashSet<String> = all_edges
94                    .iter()
95                    .filter(|e| e.relationship == codemem_core::RelationshipType::ModifiedBy)
96                    .filter(|e| e.src.starts_with("file:"))
97                    .map(|e| e.src.clone())
98                    .collect();
99
100                let annotated = if let Some(ns) = namespace {
101                    all_nodes
102                        .iter()
103                        .filter(|n| {
104                            n.kind == codemem_core::NodeKind::File
105                                && n.namespace.as_deref() == Some(ns)
106                                && file_ids_with_commits.contains(&n.id)
107                        })
108                        .count()
109                } else {
110                    file_ids_with_commits.len()
111                };
112
113                // Authors from commit nodes
114                let mut author_set: HashSet<String> = HashSet::new();
115                for node in &all_nodes {
116                    if node.kind != codemem_core::NodeKind::Commit {
117                        continue;
118                    }
119                    if let Some(ns) = namespace {
120                        if node.namespace.as_deref() != Some(ns) {
121                            continue;
122                        }
123                    }
124                    // Author stored in commit label (format: "author: message")
125                    // or in payload
126                    if let Some(author) = node.payload.get("author").and_then(|a| a.as_str()) {
127                        author_set.insert(author.to_string());
128                    } else if let Some(label) = node.label.split(':').next() {
129                        // Some commit labels start with author name
130                        let trimmed = label.trim();
131                        if !trimmed.is_empty() && !trimmed.starts_with("commit") {
132                            author_set.insert(trimmed.to_string());
133                        }
134                    }
135                }
136                let mut top_authors: Vec<String> = author_set.into_iter().collect();
137                top_authors.sort();
138                top_authors.truncate(10);
139                GitSummary {
140                    total_annotated_files: annotated,
141                    top_authors,
142                }
143            }
144            Err(_) => GitSummary {
145                total_annotated_files: 0,
146                top_authors: Vec::new(),
147            },
148        };
149
150        Ok(ActivityInsights {
151            insights,
152            git_summary,
153        })
154    }
155
156    /// Aggregate code health insights: stored memories, file hotspots, decision chains,
157    /// PageRank leaders, and Louvain community count.
158    pub fn code_health_insights(
159        &self,
160        namespace: Option<&str>,
161        limit: usize,
162    ) -> Result<CodeHealthInsights, CodememError> {
163        let mut insights: Vec<MemoryNode> = self
164            .storage
165            .list_memories_by_tag("track:code-health", namespace, limit)
166            .unwrap_or_default();
167
168        if insights.is_empty() {
169            insights = self
170                .storage
171                .list_memories_by_tag("track:performance", namespace, limit)
172                .unwrap_or_default();
173        }
174
175        let file_hotspots = self
176            .storage
177            .get_file_hotspots(2, namespace)
178            .unwrap_or_default();
179
180        let decision_chains = self
181            .storage
182            .get_decision_chains(2, namespace)
183            .unwrap_or_default();
184
185        let (pagerank_leaders, community_count) = match self.lock_graph() {
186            Ok(graph) => {
187                let all_nodes = graph.get_all_nodes();
188                let mut file_pr: Vec<_> = all_nodes
189                    .iter()
190                    .filter(|n| n.kind == NodeKind::File)
191                    .map(|n| PagerankEntry {
192                        node_id: n.id.clone(),
193                        label: n.label.clone(),
194                        score: graph.get_pagerank(&n.id),
195                    })
196                    .filter(|e| e.score > 0.0)
197                    .collect();
198                file_pr.sort_by(|a, b| {
199                    b.score
200                        .partial_cmp(&a.score)
201                        .unwrap_or(std::cmp::Ordering::Equal)
202                });
203                file_pr.truncate(10);
204                let communities = graph.louvain_communities(1.0).len();
205                (file_pr, communities)
206            }
207            Err(_) => (Vec::new(), 0),
208        };
209
210        Ok(CodeHealthInsights {
211            insights,
212            file_hotspots,
213            decision_chains,
214            pagerank_leaders,
215            community_count,
216        })
217    }
218
219    /// Aggregate security insights: stored memories + security flag counts from graph nodes.
220    pub fn security_insights(
221        &self,
222        namespace: Option<&str>,
223        limit: usize,
224    ) -> Result<SecurityInsights, CodememError> {
225        let insights = self
226            .storage
227            .list_memories_by_tag("track:security", namespace, limit)
228            .unwrap_or_default();
229
230        let (sensitive_file_count, endpoint_count, security_function_count) = match self
231            .lock_graph()
232        {
233            Ok(graph) => {
234                let all_nodes = graph.get_all_nodes();
235                let mut sensitive = 0;
236                let mut endpoints = 0;
237                let mut sec_fns = 0;
238                for node in &all_nodes {
239                    if let Some(flags) = node
240                        .payload
241                        .get("security_flags")
242                        .and_then(|f| f.as_array())
243                    {
244                        let flag_strs: Vec<&str> =
245                            flags.iter().filter_map(|f| f.as_str()).collect();
246                        if flag_strs.contains(&"sensitive") || flag_strs.contains(&"auth_related") {
247                            sensitive += 1;
248                        }
249                        if flag_strs.contains(&"exposed_endpoint") {
250                            endpoints += 1;
251                        }
252                        if flag_strs.contains(&"security_function") {
253                            sec_fns += 1;
254                        }
255                    }
256                }
257                (sensitive, endpoints, sec_fns)
258            }
259            Err(_) => (0, 0, 0),
260        };
261
262        Ok(SecurityInsights {
263            insights,
264            sensitive_file_count,
265            endpoint_count,
266            security_function_count,
267        })
268    }
269
270    /// Aggregate performance insights: stored memories, coupling scores,
271    /// topology depth, and PageRank critical path.
272    pub fn performance_insights(
273        &self,
274        namespace: Option<&str>,
275        limit: usize,
276    ) -> Result<PerformanceInsights, CodememError> {
277        let insights = self
278            .storage
279            .list_memories_by_tag("track:performance", namespace, limit)
280            .unwrap_or_default();
281
282        let (high_coupling_nodes, max_depth, critical_path) = match self.lock_graph() {
283            Ok(graph) => {
284                let all_nodes = graph.get_all_nodes();
285
286                // Coupling scores from annotations
287                let mut coupling_data: Vec<CouplingNode> = Vec::new();
288                for node in &all_nodes {
289                    if let Some(score) = node.payload.get("coupling_score").and_then(|v| v.as_u64())
290                    {
291                        if score > 15 {
292                            coupling_data.push(CouplingNode {
293                                node_id: node.id.clone(),
294                                label: node.label.clone(),
295                                coupling_score: score as usize,
296                            });
297                        }
298                    }
299                }
300                coupling_data.sort_by(|a, b| b.coupling_score.cmp(&a.coupling_score));
301                coupling_data.truncate(10);
302
303                // Dependency depth from topological layers
304                let depth = graph.topological_layers().len();
305
306                // Critical path from PageRank
307                let mut file_pr: Vec<_> = all_nodes
308                    .iter()
309                    .filter(|n| n.kind == NodeKind::File)
310                    .map(|n| PagerankEntry {
311                        node_id: n.id.clone(),
312                        label: n.label.clone(),
313                        score: graph.get_pagerank(&n.id),
314                    })
315                    .filter(|e| e.score > 0.0)
316                    .collect();
317                file_pr.sort_by(|a, b| {
318                    b.score
319                        .partial_cmp(&a.score)
320                        .unwrap_or(std::cmp::Ordering::Equal)
321                });
322                file_pr.truncate(10);
323
324                (coupling_data, depth, file_pr)
325            }
326            Err(_) => (Vec::new(), 0, Vec::new()),
327        };
328
329        Ok(PerformanceInsights {
330            insights,
331            high_coupling_nodes,
332            max_depth,
333            critical_path,
334        })
335    }
336}