Skip to main content

codemem_engine/
analysis.rs

1//! Analysis domain logic: impact-aware recall, decision chains, session checkpoints.
2//!
3//! These methods were extracted from the MCP transport layer to keep domain logic
4//! in the engine crate.
5
6use crate::CodememEngine;
7use codemem_core::{
8    CodememError, DetectedPattern, GraphBackend, MemoryNode, MemoryType, NodeCoverageEntry,
9    NodeKind, RelationshipType, SearchResult,
10};
11use serde_json::json;
12use std::collections::{HashMap, HashSet};
13
14// ── Result Types ─────────────────────────────────────────────────────────────
15
16/// Impact data enrichment for a single search result.
17#[derive(Debug, Clone)]
18pub struct ImpactResult {
19    /// The underlying search result (memory + score).
20    pub search_result: SearchResult,
21    /// PageRank score for this memory in the graph.
22    pub pagerank: f64,
23    /// Betweenness centrality score.
24    pub centrality: f64,
25    /// IDs of connected Decision-type memories.
26    pub connected_decisions: Vec<String>,
27    /// Labels/paths of connected File-type nodes.
28    pub dependent_files: Vec<String>,
29}
30
31/// A single decision entry in a decision chain.
32#[derive(Debug, Clone)]
33pub struct DecisionEntry {
34    pub memory: MemoryNode,
35    /// Edges connecting this decision to others in the chain.
36    pub connections: Vec<DecisionConnection>,
37}
38
39/// A connection between two decisions in a chain.
40#[derive(Debug, Clone)]
41pub struct DecisionConnection {
42    pub relationship: String,
43    pub source: String,
44    pub target: String,
45}
46
47/// Result of a decision chain query.
48#[derive(Debug, Clone)]
49pub struct DecisionChain {
50    /// Number of decisions in the chain.
51    pub chain_length: usize,
52    /// The filter that was used.
53    pub file_path: Option<String>,
54    /// The topic filter that was used.
55    pub topic: Option<String>,
56    /// The decisions in chronological order.
57    pub decisions: Vec<DecisionEntry>,
58}
59
60/// Result of a session checkpoint.
61#[derive(Debug, Clone)]
62pub struct SessionCheckpointReport {
63    /// Number of files read in this session.
64    pub files_read: usize,
65    /// Number of files edited in this session.
66    pub files_edited: usize,
67    /// Number of searches in this session.
68    pub searches: usize,
69    /// Total actions in this session.
70    pub total_actions: usize,
71    /// Hot directories with their action counts.
72    pub hot_dirs: Vec<(String, usize)>,
73    /// Patterns detected within this session.
74    pub session_patterns: Vec<DetectedPattern>,
75    /// Patterns detected across sessions (excluding session-scoped duplicates).
76    pub cross_patterns: Vec<DetectedPattern>,
77    /// Number of new pattern insights stored during this checkpoint.
78    pub stored_pattern_count: usize,
79    /// Pre-built markdown report.
80    pub report: String,
81}
82
83// ── Engine Methods ───────────────────────────────────────────────────────────
84
85impl CodememEngine {
86    /// Recall memories enriched with graph impact data (PageRank, centrality,
87    /// connected decisions, dependent files).
88    pub fn recall_with_impact(
89        &self,
90        query: &str,
91        k: usize,
92        namespace: Option<&str>,
93    ) -> Result<Vec<ImpactResult>, CodememError> {
94        let results = self.recall(query, k, None, namespace, &[], None, None)?;
95
96        if results.is_empty() {
97            return Ok(vec![]);
98        }
99
100        let mut graph = self.lock_graph()?;
101        // C1: Ensure betweenness is computed before reading centrality values.
102        graph.ensure_betweenness_computed();
103
104        let output: Vec<ImpactResult> = results
105            .into_iter()
106            .map(|r| {
107                let memory_id = &r.memory.id;
108
109                let pagerank = graph.get_pagerank(memory_id);
110                let centrality = graph.get_betweenness(memory_id);
111
112                let edges = graph.get_edges(memory_id).unwrap_or_default();
113
114                let connected_decisions: Vec<String> = edges
115                    .iter()
116                    .filter_map(|e| {
117                        let other_id = if e.src == *memory_id { &e.dst } else { &e.src };
118                        self.storage
119                            .get_memory_no_touch(other_id)
120                            .ok()
121                            .flatten()
122                            .and_then(|m| {
123                                if m.memory_type == MemoryType::Decision {
124                                    Some(m.id)
125                                } else {
126                                    None
127                                }
128                            })
129                    })
130                    .collect();
131
132                let dependent_files: Vec<String> = edges
133                    .iter()
134                    .filter_map(|e| {
135                        let other_id = if e.src == *memory_id { &e.dst } else { &e.src };
136                        graph.get_node(other_id).ok().flatten().and_then(|n| {
137                            if n.kind == NodeKind::File {
138                                Some(n.label.clone())
139                            } else {
140                                n.payload
141                                    .get("file_path")
142                                    .and_then(|v| v.as_str().map(String::from))
143                            }
144                        })
145                    })
146                    .collect();
147
148                ImpactResult {
149                    search_result: r,
150                    pagerank,
151                    centrality,
152                    connected_decisions,
153                    dependent_files,
154                }
155            })
156            .collect();
157
158        Ok(output)
159    }
160
161    /// Find Decision-type memories matching a file_path or topic, then follow
162    /// EvolvedInto/LeadsTo/DerivedFrom edges via BFS to build a chronological chain.
163    pub fn get_decision_chain(
164        &self,
165        file_path: Option<&str>,
166        topic: Option<&str>,
167    ) -> Result<DecisionChain, CodememError> {
168        if file_path.is_none() && topic.is_none() {
169            return Err(CodememError::InvalidInput(
170                "Must provide either 'file_path' or 'topic' parameter".to_string(),
171            ));
172        }
173
174        let graph = self.lock_graph()?;
175
176        let decision_edge_types = [
177            RelationshipType::EvolvedInto,
178            RelationshipType::LeadsTo,
179            RelationshipType::DerivedFrom,
180        ];
181
182        // Batch-load all Decision memories in one query
183        let all_decisions = self
184            .storage
185            .list_memories_filtered(None, Some("decision"))?;
186
187        // Hoist lowercased filter values outside the loop
188        let filter_lower = file_path.map(|f| f.to_lowercase());
189        let topic_lower = topic.map(|t| t.to_lowercase());
190
191        // Collect Decision memories matching the filter
192        let mut decision_memories: Vec<MemoryNode> = Vec::new();
193        for memory in all_decisions {
194            let content_lower = memory.content.to_lowercase();
195            let tags_lower: String = memory.tags.join(" ").to_lowercase();
196
197            let matches = if let Some(ref fp) = filter_lower {
198                content_lower.contains(fp)
199                    || tags_lower.contains(fp)
200                    || memory
201                        .metadata
202                        .get("file_path")
203                        .and_then(|v| v.as_str())
204                        .map(|v| v.to_lowercase().contains(fp))
205                        .unwrap_or(false)
206            } else if let Some(ref tl) = topic_lower {
207                content_lower.contains(tl) || tags_lower.contains(tl)
208            } else {
209                false
210            };
211
212            if matches {
213                decision_memories.push(memory);
214            }
215        }
216
217        if decision_memories.is_empty() {
218            return Ok(DecisionChain {
219                chain_length: 0,
220                file_path: file_path.map(String::from),
221                topic: topic.map(String::from),
222                decisions: vec![],
223            });
224        }
225
226        // Expand through decision-related edges to find the full chain (BFS)
227        let mut chain_ids: HashSet<String> = HashSet::new();
228        let mut to_explore: Vec<String> = decision_memories.iter().map(|m| m.id.clone()).collect();
229
230        while let Some(current_id) = to_explore.pop() {
231            if !chain_ids.insert(current_id.clone()) {
232                continue;
233            }
234
235            if let Ok(edges) = graph.get_edges(&current_id) {
236                for edge in &edges {
237                    if decision_edge_types.contains(&edge.relationship) {
238                        let other_id = if edge.src == current_id {
239                            &edge.dst
240                        } else {
241                            &edge.src
242                        };
243                        if !chain_ids.contains(other_id) {
244                            // Only follow to other Decision memories
245                            if let Ok(Some(m)) = self.storage.get_memory_no_touch(other_id) {
246                                if m.memory_type == MemoryType::Decision {
247                                    to_explore.push(other_id.clone());
248                                }
249                            }
250                        }
251                    }
252                }
253            }
254        }
255
256        // Collect all chain memories and sort by created_at (temporal order)
257        let mut chain: Vec<DecisionEntry> = Vec::new();
258        for id in &chain_ids {
259            if let Ok(Some(memory)) = self.storage.get_memory_no_touch(id) {
260                let connections: Vec<DecisionConnection> = graph
261                    .get_edges(id)
262                    .unwrap_or_default()
263                    .iter()
264                    .filter(|e| {
265                        decision_edge_types.contains(&e.relationship)
266                            && (chain_ids.contains(&e.src) && chain_ids.contains(&e.dst))
267                    })
268                    .map(|e| DecisionConnection {
269                        relationship: e.relationship.to_string(),
270                        source: e.src.clone(),
271                        target: e.dst.clone(),
272                    })
273                    .collect();
274
275                chain.push(DecisionEntry {
276                    memory,
277                    connections,
278                });
279            }
280        }
281
282        // Sort chronologically
283        chain.sort_by(|a, b| a.memory.created_at.cmp(&b.memory.created_at));
284
285        let chain_length = chain.len();
286        Ok(DecisionChain {
287            chain_length,
288            file_path: file_path.map(String::from),
289            topic: topic.map(String::from),
290            decisions: chain,
291        })
292    }
293
294    /// Build a mid-session progress report: activity summary, pattern detection
295    /// (session-scoped + cross-session), stores new pattern insights, hot directories,
296    /// markdown report.
297    pub fn session_checkpoint(
298        &self,
299        session_id: &str,
300        namespace: Option<&str>,
301    ) -> Result<SessionCheckpointReport, CodememError> {
302        // 1. Get session activity summary
303        let activity = self.storage.get_session_activity_summary(session_id)?;
304
305        // 2. Run session-scoped pattern detection (lower thresholds for single session)
306        let total_sessions = self.storage.session_count(namespace).unwrap_or(1).max(1);
307
308        let session_patterns = crate::patterns::detect_patterns(
309            &*self.storage,
310            namespace,
311            2, // session-scoped: min_frequency=2
312            total_sessions,
313        )
314        .unwrap_or_default();
315
316        // Cross-session patterns with higher threshold
317        let cross_patterns = crate::patterns::detect_patterns(
318            &*self.storage,
319            namespace,
320            3, // cross-session: min_frequency=3
321            total_sessions,
322        )
323        .unwrap_or_default();
324
325        // 3. Store new session patterns as Insight memories (with dedup)
326        let mut stored_patterns = 0usize;
327        for pattern in &session_patterns {
328            let dedup_tag = format!("checkpoint:{}:{}", session_id, pattern.description);
329            let already_exists = self
330                .storage
331                .has_auto_insight(session_id, &dedup_tag)
332                .unwrap_or(true);
333            if !already_exists && pattern.confidence > 0.3 {
334                let now = chrono::Utc::now();
335                let hash = codemem_storage::Storage::content_hash(&pattern.description);
336                let mut metadata = HashMap::new();
337                metadata.insert("session_id".to_string(), json!(session_id));
338                metadata.insert("auto_insight_tag".to_string(), json!(dedup_tag));
339                metadata.insert("source".to_string(), json!("session_checkpoint"));
340                metadata.insert(
341                    "pattern_type".to_string(),
342                    json!(pattern.pattern_type.to_string()),
343                );
344
345                let mem = codemem_core::MemoryNode {
346                    id: uuid::Uuid::new_v4().to_string(),
347                    content: format!("Session pattern: {}", pattern.description),
348                    memory_type: MemoryType::Insight,
349                    importance: 0.6,
350                    confidence: pattern.confidence,
351                    access_count: 0,
352                    content_hash: hash,
353                    tags: vec![
354                        "session-checkpoint".to_string(),
355                        format!("pattern:{}", pattern.pattern_type),
356                    ],
357                    metadata,
358                    namespace: namespace.map(|s| s.to_string()),
359                    session_id: None,
360                    created_at: now,
361                    updated_at: now,
362                    last_accessed_at: now,
363                };
364                if self.storage.insert_memory(&mem).is_ok() {
365                    stored_patterns += 1;
366                }
367            }
368        }
369
370        // 4. Get hot directories
371        let hot_dirs = self
372            .storage
373            .get_session_hot_directories(session_id, 5)
374            .unwrap_or_default();
375
376        // 5. Filter unique cross-session patterns
377        let unique_cross: Vec<DetectedPattern> = cross_patterns
378            .iter()
379            .filter(|p| {
380                !session_patterns
381                    .iter()
382                    .any(|sp| sp.description == p.description)
383            })
384            .take(5)
385            .cloned()
386            .collect();
387
388        // 6. Build markdown report
389        let report = Self::format_checkpoint_report(
390            &activity,
391            &hot_dirs,
392            &session_patterns,
393            &unique_cross,
394            stored_patterns,
395        );
396
397        // 7. Persist a checkpoint memory with session state metadata
398        let memory_count = self.storage.memory_count().unwrap_or(0);
399        let now = chrono::Utc::now();
400        let checkpoint_content = format!(
401            "Session checkpoint for {}: {} actions ({} reads, {} edits, {} searches), {} total memories, {} patterns detected",
402            session_id,
403            activity.total_actions,
404            activity.files_read,
405            activity.files_edited,
406            activity.searches,
407            memory_count,
408            session_patterns.len(),
409        );
410        let hash = codemem_storage::Storage::content_hash(&checkpoint_content);
411
412        let mut checkpoint_metadata = HashMap::new();
413        checkpoint_metadata.insert("checkpoint_type".to_string(), json!("manual"));
414        checkpoint_metadata.insert("session_id".to_string(), json!(session_id));
415        checkpoint_metadata.insert("memory_count".to_string(), json!(memory_count));
416        checkpoint_metadata.insert("timestamp".to_string(), json!(now.to_rfc3339()));
417        checkpoint_metadata.insert("files_read".to_string(), json!(activity.files_read));
418        checkpoint_metadata.insert("files_edited".to_string(), json!(activity.files_edited));
419        checkpoint_metadata.insert("searches".to_string(), json!(activity.searches));
420        checkpoint_metadata.insert("total_actions".to_string(), json!(activity.total_actions));
421        checkpoint_metadata.insert("pattern_count".to_string(), json!(session_patterns.len()));
422        checkpoint_metadata.insert("cross_pattern_count".to_string(), json!(unique_cross.len()));
423        checkpoint_metadata.insert("stored_pattern_count".to_string(), json!(stored_patterns));
424        if !hot_dirs.is_empty() {
425            let dirs: Vec<&str> = hot_dirs.iter().map(|(d, _)| d.as_str()).collect();
426            checkpoint_metadata.insert("hot_directories".to_string(), json!(dirs));
427        }
428
429        let checkpoint_mem = codemem_core::MemoryNode {
430            id: uuid::Uuid::new_v4().to_string(),
431            content: checkpoint_content,
432            memory_type: MemoryType::Context,
433            importance: 0.5,
434            confidence: 1.0,
435            access_count: 0,
436            content_hash: hash,
437            tags: vec![
438                "session-checkpoint".to_string(),
439                format!("session:{session_id}"),
440            ],
441            metadata: checkpoint_metadata,
442            namespace: namespace.map(|s| s.to_string()),
443            session_id: Some(session_id.to_string()),
444            created_at: now,
445            updated_at: now,
446            last_accessed_at: now,
447        };
448        // Best-effort persist; don't fail the checkpoint if this errors
449        let _ = self.persist_memory(&checkpoint_mem);
450
451        Ok(SessionCheckpointReport {
452            files_read: activity.files_read,
453            files_edited: activity.files_edited,
454            searches: activity.searches,
455            total_actions: activity.total_actions,
456            hot_dirs,
457            session_patterns,
458            cross_patterns: unique_cross,
459            stored_pattern_count: stored_patterns,
460            report,
461        })
462    }
463
464    /// Format the checkpoint data into a markdown report string.
465    fn format_checkpoint_report(
466        activity: &codemem_core::SessionActivitySummary,
467        hot_dirs: &[(String, usize)],
468        session_patterns: &[DetectedPattern],
469        cross_patterns: &[DetectedPattern],
470        stored_patterns: usize,
471    ) -> String {
472        let mut report = String::from("## Session Checkpoint\n\n");
473
474        // Activity summary
475        report.push_str("### Activity Summary\n\n");
476        report.push_str(&format!(
477            "| Metric | Count |\n|--------|-------|\n\
478             | Files read | {} |\n\
479             | Files edited | {} |\n\
480             | Searches | {} |\n\
481             | Total actions | {} |\n\n",
482            activity.files_read, activity.files_edited, activity.searches, activity.total_actions,
483        ));
484
485        // Focus areas
486        if !hot_dirs.is_empty() {
487            report.push_str("### Focus Areas\n\n");
488            report.push_str("Directories with most activity in this session:\n\n");
489            for (dir, count) in hot_dirs {
490                report.push_str(&format!("- `{}` ({} actions)\n", dir, count));
491            }
492            report.push('\n');
493        }
494
495        // Session-scoped patterns
496        if !session_patterns.is_empty() {
497            report.push_str("### Session Patterns\n\n");
498            for p in session_patterns.iter().take(10) {
499                report.push_str(&format!(
500                    "- [{}] {} (confidence: {:.0}%)\n",
501                    p.pattern_type,
502                    p.description,
503                    p.confidence * 100.0,
504                ));
505            }
506            report.push('\n');
507        }
508
509        // Cross-session patterns
510        if !cross_patterns.is_empty() {
511            report.push_str("### Cross-Session Patterns\n\n");
512            for p in cross_patterns {
513                report.push_str(&format!(
514                    "- [{}] {} (confidence: {:.0}%)\n",
515                    p.pattern_type,
516                    p.description,
517                    p.confidence * 100.0,
518                ));
519            }
520            report.push('\n');
521        }
522
523        // Suggestions
524        report.push_str("### Suggestions\n\n");
525        if activity.files_read > 5 && activity.files_edited == 0 {
526            report.push_str(
527                "- You've read many files but haven't edited any yet. \
528                 Consider storing a `decision` memory about what you've learned.\n",
529            );
530        }
531        if activity.searches > 3 {
532            report.push_str(
533                "- Multiple searches detected. Use `store_memory` to save \
534                 key findings so you don't need to search again.\n",
535            );
536        }
537        if stored_patterns > 0 {
538            report.push_str(&format!(
539                "- {} new pattern insight(s) stored from this checkpoint.\n",
540                stored_patterns,
541            ));
542        }
543        if activity.total_actions == 0 {
544            report.push_str("- No activity recorded yet for this session.\n");
545        }
546
547        report
548    }
549
550    /// Check which graph nodes have attached memories (depth-1 only).
551    pub fn node_coverage(&self, node_ids: &[&str]) -> Result<Vec<NodeCoverageEntry>, CodememError> {
552        let graph = self.lock_graph()?;
553        let mut results = Vec::with_capacity(node_ids.len());
554
555        for &node_id in node_ids {
556            let edges = graph.get_edges_ref(node_id);
557            let memory_count = edges
558                .iter()
559                .filter(|e| {
560                    let other_id = if e.src == node_id { &e.dst } else { &e.src };
561                    graph
562                        .get_node_ref(other_id)
563                        .map(|n| n.kind == NodeKind::Memory)
564                        .unwrap_or(false)
565                })
566                .count();
567
568            results.push(NodeCoverageEntry {
569                node_id: node_id.to_string(),
570                memory_count,
571                has_coverage: memory_count > 0,
572            });
573        }
574
575        Ok(results)
576    }
577}