Skip to main content

codemem_engine/
analysis.rs

1//! Analysis domain logic: impact-aware recall, decision chains, session checkpoints.
2//!
3//! These methods were extracted from the MCP transport layer to keep domain logic
4//! in the engine crate.
5
6use crate::CodememEngine;
7use codemem_core::{
8    CodememError, DetectedPattern, GraphBackend, MemoryNode, MemoryType, NodeCoverageEntry,
9    NodeKind, RelationshipType, SearchResult,
10};
11use serde_json::json;
12use std::collections::{HashMap, HashSet};
13
14// ── Result Types ─────────────────────────────────────────────────────────────
15
16/// Impact data enrichment for a single search result.
17#[derive(Debug, Clone)]
18pub struct ImpactResult {
19    /// The underlying search result (memory + score).
20    pub search_result: SearchResult,
21    /// PageRank score for this memory in the graph.
22    pub pagerank: f64,
23    /// Betweenness centrality score.
24    pub centrality: f64,
25    /// IDs of connected Decision-type memories.
26    pub connected_decisions: Vec<String>,
27    /// Labels/paths of connected File-type nodes.
28    pub dependent_files: Vec<String>,
29}
30
31/// A single decision entry in a decision chain.
32#[derive(Debug, Clone)]
33pub struct DecisionEntry {
34    pub memory: MemoryNode,
35    /// Edges connecting this decision to others in the chain.
36    pub connections: Vec<DecisionConnection>,
37}
38
39/// A connection between two decisions in a chain.
40#[derive(Debug, Clone)]
41pub struct DecisionConnection {
42    pub relationship: String,
43    pub source: String,
44    pub target: String,
45}
46
47/// Result of a decision chain query.
48#[derive(Debug, Clone)]
49pub struct DecisionChain {
50    /// Number of decisions in the chain.
51    pub chain_length: usize,
52    /// The filter that was used.
53    pub file_path: Option<String>,
54    /// The topic filter that was used.
55    pub topic: Option<String>,
56    /// The decisions in chronological order.
57    pub decisions: Vec<DecisionEntry>,
58}
59
60/// Result of a session checkpoint.
61#[derive(Debug, Clone)]
62pub struct SessionCheckpointReport {
63    /// Number of files read in this session.
64    pub files_read: usize,
65    /// Number of files edited in this session.
66    pub files_edited: usize,
67    /// Number of searches in this session.
68    pub searches: usize,
69    /// Total actions in this session.
70    pub total_actions: usize,
71    /// Hot directories with their action counts.
72    pub hot_dirs: Vec<(String, usize)>,
73    /// Patterns detected within this session.
74    pub session_patterns: Vec<DetectedPattern>,
75    /// Patterns detected across sessions (excluding session-scoped duplicates).
76    pub cross_patterns: Vec<DetectedPattern>,
77    /// Number of new pattern insights stored during this checkpoint.
78    pub stored_pattern_count: usize,
79    /// Pre-built markdown report.
80    pub report: String,
81}
82
83// ── Engine Methods ───────────────────────────────────────────────────────────
84
85impl CodememEngine {
86    /// Recall memories enriched with graph impact data (PageRank, centrality,
87    /// connected decisions, dependent files).
88    pub fn recall_with_impact(
89        &self,
90        query: &str,
91        k: usize,
92        namespace: Option<&str>,
93    ) -> Result<Vec<ImpactResult>, CodememError> {
94        let results = self.recall(&crate::recall::RecallQuery {
95            query,
96            k,
97            namespace_filter: namespace,
98            ..crate::recall::RecallQuery::new(query, k)
99        })?;
100
101        if results.is_empty() {
102            return Ok(vec![]);
103        }
104
105        let mut graph = self.lock_graph()?;
106        // C1: Ensure betweenness is computed before reading centrality values.
107        graph.ensure_betweenness_computed();
108
109        let output: Vec<ImpactResult> = results
110            .into_iter()
111            .map(|r| {
112                let memory_id = &r.memory.id;
113
114                let pagerank = graph.get_pagerank(memory_id);
115                let centrality = graph.get_betweenness(memory_id);
116
117                let edges = graph.get_edges(memory_id).unwrap_or_default();
118
119                let connected_decisions: Vec<String> = edges
120                    .iter()
121                    .filter_map(|e| {
122                        let other_id = if e.src == *memory_id { &e.dst } else { &e.src };
123                        self.storage
124                            .get_memory_no_touch(other_id)
125                            .ok()
126                            .flatten()
127                            .and_then(|m| {
128                                if m.memory_type == MemoryType::Decision {
129                                    Some(m.id)
130                                } else {
131                                    None
132                                }
133                            })
134                    })
135                    .collect();
136
137                let dependent_files: Vec<String> = edges
138                    .iter()
139                    .filter_map(|e| {
140                        let other_id = if e.src == *memory_id { &e.dst } else { &e.src };
141                        graph.get_node(other_id).ok().flatten().and_then(|n| {
142                            if n.kind == NodeKind::File {
143                                Some(n.label.clone())
144                            } else {
145                                n.payload
146                                    .get("file_path")
147                                    .and_then(|v| v.as_str().map(String::from))
148                            }
149                        })
150                    })
151                    .collect();
152
153                ImpactResult {
154                    search_result: r,
155                    pagerank,
156                    centrality,
157                    connected_decisions,
158                    dependent_files,
159                }
160            })
161            .collect();
162
163        Ok(output)
164    }
165
166    /// Find Decision-type memories matching a file_path or topic, then follow
167    /// EvolvedInto/LeadsTo/DerivedFrom edges via BFS to build a chronological chain.
168    pub fn get_decision_chain(
169        &self,
170        file_path: Option<&str>,
171        topic: Option<&str>,
172    ) -> Result<DecisionChain, CodememError> {
173        if file_path.is_none() && topic.is_none() {
174            return Err(CodememError::InvalidInput(
175                "Must provide either 'file_path' or 'topic' parameter".to_string(),
176            ));
177        }
178
179        let graph = self.lock_graph()?;
180
181        let decision_edge_types = [
182            RelationshipType::EvolvedInto,
183            RelationshipType::LeadsTo,
184            RelationshipType::DerivedFrom,
185        ];
186
187        // Batch-load all Decision memories in one query
188        let all_decisions = self
189            .storage
190            .list_memories_filtered(None, Some("decision"))?;
191
192        // Hoist lowercased filter values outside the loop
193        let filter_lower = file_path.map(|f| f.to_lowercase());
194        let topic_lower = topic.map(|t| t.to_lowercase());
195
196        // Collect Decision memories matching the filter
197        let mut decision_memories: Vec<MemoryNode> = Vec::new();
198        for memory in all_decisions {
199            let content_lower = memory.content.to_lowercase();
200            let tags_lower: String = memory.tags.join(" ").to_lowercase();
201
202            let matches = if let Some(ref fp) = filter_lower {
203                content_lower.contains(fp)
204                    || tags_lower.contains(fp)
205                    || memory
206                        .metadata
207                        .get("file_path")
208                        .and_then(|v| v.as_str())
209                        .map(|v| v.to_lowercase().contains(fp))
210                        .unwrap_or(false)
211            } else if let Some(ref tl) = topic_lower {
212                content_lower.contains(tl) || tags_lower.contains(tl)
213            } else {
214                false
215            };
216
217            if matches {
218                decision_memories.push(memory);
219            }
220        }
221
222        if decision_memories.is_empty() {
223            return Ok(DecisionChain {
224                chain_length: 0,
225                file_path: file_path.map(String::from),
226                topic: topic.map(String::from),
227                decisions: vec![],
228            });
229        }
230
231        // Expand through decision-related edges to find the full chain (BFS)
232        let mut chain_ids: HashSet<String> = HashSet::new();
233        let mut to_explore: Vec<String> = decision_memories.iter().map(|m| m.id.clone()).collect();
234
235        while let Some(current_id) = to_explore.pop() {
236            if !chain_ids.insert(current_id.clone()) {
237                continue;
238            }
239
240            if let Ok(edges) = graph.get_edges(&current_id) {
241                for edge in &edges {
242                    if decision_edge_types.contains(&edge.relationship) {
243                        let other_id = if edge.src == current_id {
244                            &edge.dst
245                        } else {
246                            &edge.src
247                        };
248                        if !chain_ids.contains(other_id) {
249                            // Only follow to other Decision memories
250                            if let Ok(Some(m)) = self.storage.get_memory_no_touch(other_id) {
251                                if m.memory_type == MemoryType::Decision {
252                                    to_explore.push(other_id.clone());
253                                }
254                            }
255                        }
256                    }
257                }
258            }
259        }
260
261        // Collect all chain memories and sort by created_at (temporal order)
262        let mut chain: Vec<DecisionEntry> = Vec::new();
263        for id in &chain_ids {
264            if let Ok(Some(memory)) = self.storage.get_memory_no_touch(id) {
265                let connections: Vec<DecisionConnection> = graph
266                    .get_edges(id)
267                    .unwrap_or_default()
268                    .iter()
269                    .filter(|e| {
270                        decision_edge_types.contains(&e.relationship)
271                            && (chain_ids.contains(&e.src) && chain_ids.contains(&e.dst))
272                    })
273                    .map(|e| DecisionConnection {
274                        relationship: e.relationship.to_string(),
275                        source: e.src.clone(),
276                        target: e.dst.clone(),
277                    })
278                    .collect();
279
280                chain.push(DecisionEntry {
281                    memory,
282                    connections,
283                });
284            }
285        }
286
287        // Sort chronologically
288        chain.sort_by(|a, b| a.memory.created_at.cmp(&b.memory.created_at));
289
290        let chain_length = chain.len();
291        Ok(DecisionChain {
292            chain_length,
293            file_path: file_path.map(String::from),
294            topic: topic.map(String::from),
295            decisions: chain,
296        })
297    }
298
299    /// Build a mid-session progress report: activity summary, pattern detection
300    /// (session-scoped + cross-session), stores new pattern insights, hot directories,
301    /// markdown report.
302    pub fn session_checkpoint(
303        &self,
304        session_id: &str,
305        namespace: Option<&str>,
306    ) -> Result<SessionCheckpointReport, CodememError> {
307        // 1. Get session activity summary
308        let activity = self.storage.get_session_activity_summary(session_id)?;
309
310        // 2. Run session-scoped pattern detection (lower thresholds for single session)
311        let total_sessions = self.storage.session_count(namespace).unwrap_or(1).max(1);
312
313        let session_patterns = crate::patterns::detect_patterns(
314            &*self.storage,
315            namespace,
316            2, // session-scoped: min_frequency=2
317            total_sessions,
318        )
319        .unwrap_or_default();
320
321        // Cross-session patterns with higher threshold
322        let cross_patterns = crate::patterns::detect_patterns(
323            &*self.storage,
324            namespace,
325            3, // cross-session: min_frequency=3
326            total_sessions,
327        )
328        .unwrap_or_default();
329
330        // 3. Store new session patterns as Insight memories (with dedup)
331        let mut stored_patterns = 0usize;
332        for pattern in &session_patterns {
333            let dedup_tag = format!("checkpoint:{}:{}", session_id, pattern.description);
334            let already_exists = self
335                .storage
336                .has_auto_insight(session_id, &dedup_tag)
337                .unwrap_or(true);
338            if !already_exists && pattern.confidence > 0.3 {
339                let mut metadata = HashMap::new();
340                metadata.insert("session_id".to_string(), json!(session_id));
341                metadata.insert("auto_insight_tag".to_string(), json!(dedup_tag));
342                metadata.insert("source".to_string(), json!("session_checkpoint"));
343                metadata.insert(
344                    "pattern_type".to_string(),
345                    json!(pattern.pattern_type.to_string()),
346                );
347
348                let mut mem = codemem_core::MemoryNode::new(
349                    format!("Session pattern: {}", pattern.description),
350                    MemoryType::Insight,
351                );
352                mem.importance = 0.6;
353                mem.confidence = pattern.confidence;
354                mem.tags = vec![
355                    "session-checkpoint".to_string(),
356                    format!("pattern:{}", pattern.pattern_type),
357                ];
358                mem.metadata = metadata;
359                mem.namespace = namespace.map(|s| s.to_string());
360                if self.persist_memory_no_save(&mem).is_ok() {
361                    stored_patterns += 1;
362                }
363            }
364        }
365
366        // 4. Get hot directories
367        let hot_dirs = self
368            .storage
369            .get_session_hot_directories(session_id, 5)
370            .unwrap_or_default();
371
372        // 5. Filter unique cross-session patterns
373        let unique_cross: Vec<DetectedPattern> = cross_patterns
374            .iter()
375            .filter(|p| {
376                !session_patterns
377                    .iter()
378                    .any(|sp| sp.description == p.description)
379            })
380            .take(5)
381            .cloned()
382            .collect();
383
384        // 6. Build markdown report
385        let report = Self::format_checkpoint_report(
386            &activity,
387            &hot_dirs,
388            &session_patterns,
389            &unique_cross,
390            stored_patterns,
391        );
392
393        // 7. Persist a checkpoint memory with session state metadata
394        let memory_count = self.storage.memory_count().unwrap_or(0);
395        let now = chrono::Utc::now();
396        let checkpoint_content = format!(
397            "Session checkpoint for {}: {} actions ({} reads, {} edits, {} searches), {} total memories, {} patterns detected",
398            session_id,
399            activity.total_actions,
400            activity.files_read,
401            activity.files_edited,
402            activity.searches,
403            memory_count,
404            session_patterns.len(),
405        );
406        let mut checkpoint_metadata = HashMap::new();
407        checkpoint_metadata.insert("checkpoint_type".to_string(), json!("manual"));
408        checkpoint_metadata.insert("session_id".to_string(), json!(session_id));
409        checkpoint_metadata.insert("memory_count".to_string(), json!(memory_count));
410        checkpoint_metadata.insert("timestamp".to_string(), json!(now.to_rfc3339()));
411        checkpoint_metadata.insert("files_read".to_string(), json!(activity.files_read));
412        checkpoint_metadata.insert("files_edited".to_string(), json!(activity.files_edited));
413        checkpoint_metadata.insert("searches".to_string(), json!(activity.searches));
414        checkpoint_metadata.insert("total_actions".to_string(), json!(activity.total_actions));
415        checkpoint_metadata.insert("pattern_count".to_string(), json!(session_patterns.len()));
416        checkpoint_metadata.insert("cross_pattern_count".to_string(), json!(unique_cross.len()));
417        checkpoint_metadata.insert("stored_pattern_count".to_string(), json!(stored_patterns));
418        if !hot_dirs.is_empty() {
419            let dirs: Vec<&str> = hot_dirs.iter().map(|(d, _)| d.as_str()).collect();
420            checkpoint_metadata.insert("hot_directories".to_string(), json!(dirs));
421        }
422
423        let mut checkpoint_mem =
424            codemem_core::MemoryNode::new(checkpoint_content, MemoryType::Context);
425        checkpoint_mem.tags = vec![
426            "session-checkpoint".to_string(),
427            format!("session:{session_id}"),
428        ];
429        checkpoint_mem.metadata = checkpoint_metadata;
430        checkpoint_mem.namespace = namespace.map(|s| s.to_string());
431        checkpoint_mem.session_id = Some(session_id.to_string());
432        // Best-effort persist; don't fail the checkpoint if this errors
433        let _ = self.persist_memory(&checkpoint_mem);
434
435        Ok(SessionCheckpointReport {
436            files_read: activity.files_read,
437            files_edited: activity.files_edited,
438            searches: activity.searches,
439            total_actions: activity.total_actions,
440            hot_dirs,
441            session_patterns,
442            cross_patterns: unique_cross,
443            stored_pattern_count: stored_patterns,
444            report,
445        })
446    }
447
448    /// Format the checkpoint data into a markdown report string.
449    fn format_checkpoint_report(
450        activity: &codemem_core::SessionActivitySummary,
451        hot_dirs: &[(String, usize)],
452        session_patterns: &[DetectedPattern],
453        cross_patterns: &[DetectedPattern],
454        stored_patterns: usize,
455    ) -> String {
456        let mut report = String::from("## Session Checkpoint\n\n");
457
458        // Activity summary
459        report.push_str("### Activity Summary\n\n");
460        report.push_str(&format!(
461            "| Metric | Count |\n|--------|-------|\n\
462             | Files read | {} |\n\
463             | Files edited | {} |\n\
464             | Searches | {} |\n\
465             | Total actions | {} |\n\n",
466            activity.files_read, activity.files_edited, activity.searches, activity.total_actions,
467        ));
468
469        // Focus areas
470        if !hot_dirs.is_empty() {
471            report.push_str("### Focus Areas\n\n");
472            report.push_str("Directories with most activity in this session:\n\n");
473            for (dir, count) in hot_dirs {
474                report.push_str(&format!("- `{}` ({} actions)\n", dir, count));
475            }
476            report.push('\n');
477        }
478
479        // Session-scoped patterns
480        if !session_patterns.is_empty() {
481            report.push_str("### Session Patterns\n\n");
482            for p in session_patterns.iter().take(10) {
483                report.push_str(&format!(
484                    "- [{}] {} (confidence: {:.0}%)\n",
485                    p.pattern_type,
486                    p.description,
487                    p.confidence * 100.0,
488                ));
489            }
490            report.push('\n');
491        }
492
493        // Cross-session patterns
494        if !cross_patterns.is_empty() {
495            report.push_str("### Cross-Session Patterns\n\n");
496            for p in cross_patterns {
497                report.push_str(&format!(
498                    "- [{}] {} (confidence: {:.0}%)\n",
499                    p.pattern_type,
500                    p.description,
501                    p.confidence * 100.0,
502                ));
503            }
504            report.push('\n');
505        }
506
507        // Suggestions
508        report.push_str("### Suggestions\n\n");
509        if activity.files_read > 5 && activity.files_edited == 0 {
510            report.push_str(
511                "- You've read many files but haven't edited any yet. \
512                 Consider storing a `decision` memory about what you've learned.\n",
513            );
514        }
515        if activity.searches > 3 {
516            report.push_str(
517                "- Multiple searches detected. Use `store_memory` to save \
518                 key findings so you don't need to search again.\n",
519            );
520        }
521        if stored_patterns > 0 {
522            report.push_str(&format!(
523                "- {} new pattern insight(s) stored from this checkpoint.\n",
524                stored_patterns,
525            ));
526        }
527        if activity.total_actions == 0 {
528            report.push_str("- No activity recorded yet for this session.\n");
529        }
530
531        report
532    }
533
534    /// Check which graph nodes have attached memories (depth-1 only).
535    pub fn node_coverage(&self, node_ids: &[&str]) -> Result<Vec<NodeCoverageEntry>, CodememError> {
536        let graph = self.lock_graph()?;
537        let mut results = Vec::with_capacity(node_ids.len());
538
539        for &node_id in node_ids {
540            let edges = graph.get_edges_ref(node_id);
541            let memory_count = edges
542                .iter()
543                .filter(|e| {
544                    let other_id = if e.src == node_id { &e.dst } else { &e.src };
545                    graph
546                        .get_node_ref(other_id)
547                        .map(|n| n.kind == NodeKind::Memory)
548                        .unwrap_or(false)
549                })
550                .count();
551
552            results.push(NodeCoverageEntry {
553                node_id: node_id.to_string(),
554                memory_count,
555                has_coverage: memory_count > 0,
556            });
557        }
558
559        Ok(results)
560    }
561}