Skip to main content

codemem_mcp/
patterns.rs

1//! Cross-session pattern detection for Codemem.
2//!
3//! Analyzes stored memories to detect recurring patterns like repeated searches,
4//! file hotspots, decision chains, and tool preferences across sessions.
5
6use codemem_core::{CodememError, DetectedPattern, PatternType, StorageBackend};
7
8/// Detect all patterns in the memory store.
9///
10/// Runs multiple detectors and returns all patterns found, sorted by confidence
11/// descending. The `min_frequency` parameter controls the threshold for how many
12/// times a pattern must appear before it is flagged.
13pub fn detect_patterns(
14    storage: &dyn StorageBackend,
15    namespace: Option<&str>,
16    min_frequency: usize,
17) -> Result<Vec<DetectedPattern>, CodememError> {
18    let mut patterns = Vec::new();
19
20    patterns.extend(detect_repeated_searches(storage, namespace, min_frequency)?);
21    patterns.extend(detect_file_hotspots(storage, namespace, min_frequency)?);
22    patterns.extend(detect_decision_chains(storage, namespace, min_frequency)?);
23    patterns.extend(detect_tool_preferences(storage, namespace)?);
24
25    // Sort by confidence descending
26    patterns.sort_by(|a, b| {
27        b.confidence
28            .partial_cmp(&a.confidence)
29            .unwrap_or(std::cmp::Ordering::Equal)
30    });
31
32    Ok(patterns)
33}
34
35/// Detect repeated search patterns (Grep/Glob queries used multiple times).
36fn detect_repeated_searches(
37    storage: &dyn StorageBackend,
38    namespace: Option<&str>,
39    min_frequency: usize,
40) -> Result<Vec<DetectedPattern>, CodememError> {
41    let results = storage.get_repeated_searches(min_frequency, namespace)?;
42
43    Ok(results
44        .into_iter()
45        .map(|(pattern, count, memory_ids)| DetectedPattern {
46            pattern_type: PatternType::RepeatedSearch,
47            description: format!(
48                "Search pattern '{}' used {} times across sessions",
49                pattern, count
50            ),
51            frequency: count,
52            related_memories: memory_ids,
53            confidence: (count as f64 / 10.0).min(1.0),
54        })
55        .collect())
56}
57
58/// Detect file hotspots (files accessed frequently via Read/Edit/Write).
59fn detect_file_hotspots(
60    storage: &dyn StorageBackend,
61    namespace: Option<&str>,
62    min_frequency: usize,
63) -> Result<Vec<DetectedPattern>, CodememError> {
64    let results = storage.get_file_hotspots(min_frequency, namespace)?;
65
66    Ok(results
67        .into_iter()
68        .map(|(file_path, count, memory_ids)| DetectedPattern {
69            pattern_type: PatternType::FileHotspot,
70            description: format!(
71                "File '{}' accessed {} times across sessions",
72                file_path, count
73            ),
74            frequency: count,
75            related_memories: memory_ids,
76            confidence: (count as f64 / 10.0).min(1.0),
77        })
78        .collect())
79}
80
81/// Detect decision chains: files modified multiple times via Edit/Write over time.
82fn detect_decision_chains(
83    storage: &dyn StorageBackend,
84    namespace: Option<&str>,
85    min_frequency: usize,
86) -> Result<Vec<DetectedPattern>, CodememError> {
87    let results = storage.get_decision_chains(min_frequency, namespace)?;
88
89    Ok(results
90        .into_iter()
91        .map(|(file_path, count, memory_ids)| DetectedPattern {
92            pattern_type: PatternType::DecisionChain,
93            description: format!(
94                "File '{}' modified {} times, forming a decision chain",
95                file_path, count
96            ),
97            frequency: count,
98            related_memories: memory_ids,
99            confidence: (count as f64 / 8.0).min(1.0),
100        })
101        .collect())
102}
103
104/// Detect tool usage preferences by analyzing the distribution of tool usage.
105fn detect_tool_preferences(
106    storage: &dyn StorageBackend,
107    namespace: Option<&str>,
108) -> Result<Vec<DetectedPattern>, CodememError> {
109    let tool_entries = storage.get_tool_usage_stats(namespace)?;
110
111    if tool_entries.len() < 2 {
112        return Ok(vec![]);
113    }
114
115    let total: usize = tool_entries.iter().map(|(_, c)| c).sum();
116    if total == 0 {
117        return Ok(vec![]);
118    }
119
120    Ok(tool_entries
121        .into_iter()
122        .map(|(tool, count)| {
123            let pct = (count as f64 / total as f64 * 100.0) as usize;
124            DetectedPattern {
125                pattern_type: PatternType::ToolPreference,
126                description: format!(
127                    "Tool '{}' used {} times ({}% of all tool usage)",
128                    tool, count, pct
129                ),
130                frequency: count,
131                related_memories: vec![],
132                confidence: count as f64 / total as f64,
133            }
134        })
135        .collect())
136}
137
138/// Generate human-readable pattern insights as markdown.
139pub fn generate_insights(patterns: &[DetectedPattern]) -> String {
140    if patterns.is_empty() {
141        return "No patterns detected yet. Keep using Codemem to build up session history."
142            .to_string();
143    }
144
145    let mut md = String::from("## Cross-Session Pattern Insights\n\n");
146
147    // File Hotspots
148    let hotspots: Vec<_> = patterns
149        .iter()
150        .filter(|p| p.pattern_type == PatternType::FileHotspot)
151        .collect();
152    if !hotspots.is_empty() {
153        md.push_str("### File Hotspots\n");
154        md.push_str("Files you keep coming back to across sessions:\n\n");
155        for p in hotspots.iter().take(10) {
156            md.push_str(&format!(
157                "- {} (confidence: {:.0}%)\n",
158                p.description,
159                p.confidence * 100.0
160            ));
161        }
162        md.push('\n');
163    }
164
165    // Repeated Searches
166    let searches: Vec<_> = patterns
167        .iter()
168        .filter(|p| p.pattern_type == PatternType::RepeatedSearch)
169        .collect();
170    if !searches.is_empty() {
171        md.push_str("### Repeated Searches\n");
172        md.push_str(
173            "Search patterns you use repeatedly (consider creating a memory for these):\n\n",
174        );
175        for p in searches.iter().take(10) {
176            md.push_str(&format!(
177                "- {} (confidence: {:.0}%)\n",
178                p.description,
179                p.confidence * 100.0
180            ));
181        }
182        md.push('\n');
183    }
184
185    // Decision Chains
186    let chains: Vec<_> = patterns
187        .iter()
188        .filter(|p| p.pattern_type == PatternType::DecisionChain)
189        .collect();
190    if !chains.is_empty() {
191        md.push_str("### Decision Chains\n");
192        md.push_str("Files modified multiple times, suggesting evolving decisions:\n\n");
193        for p in chains.iter().take(10) {
194            md.push_str(&format!(
195                "- {} (confidence: {:.0}%)\n",
196                p.description,
197                p.confidence * 100.0
198            ));
199        }
200        md.push('\n');
201    }
202
203    // Tool Preferences
204    let prefs: Vec<_> = patterns
205        .iter()
206        .filter(|p| p.pattern_type == PatternType::ToolPreference)
207        .collect();
208    if !prefs.is_empty() {
209        md.push_str("### Tool Usage Distribution\n");
210        for p in &prefs {
211            md.push_str(&format!("- {}\n", p.description));
212        }
213        md.push('\n');
214    }
215
216    // Summary
217    md.push_str(&format!(
218        "**Total patterns detected:** {}\n",
219        patterns.len()
220    ));
221
222    md
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228    use codemem_core::MemoryNode;
229    use codemem_core::MemoryType;
230    use codemem_storage::Storage;
231    use std::collections::HashMap;
232
233    fn make_memory(content: &str, tool: &str, extra_metadata: Vec<(&str, &str)>) -> MemoryNode {
234        let now = chrono::Utc::now();
235        let mut metadata = HashMap::new();
236        metadata.insert(
237            "tool".to_string(),
238            serde_json::Value::String(tool.to_string()),
239        );
240        for (k, v) in extra_metadata {
241            metadata.insert(k.to_string(), serde_json::Value::String(v.to_string()));
242        }
243        MemoryNode {
244            id: uuid::Uuid::new_v4().to_string(),
245            content: content.to_string(),
246            memory_type: MemoryType::Context,
247            importance: 0.5,
248            confidence: 1.0,
249            access_count: 0,
250            content_hash: codemem_storage::Storage::content_hash(content),
251            tags: vec![],
252            metadata,
253            namespace: None,
254            created_at: now,
255            updated_at: now,
256            last_accessed_at: now,
257        }
258    }
259
260    #[test]
261    fn detect_patterns_empty_db() {
262        let storage = Storage::open_in_memory().unwrap();
263        let patterns = detect_patterns(&storage, None, 2).unwrap();
264        assert!(patterns.is_empty());
265    }
266
267    #[test]
268    fn detect_repeated_search_patterns() {
269        let storage = Storage::open_in_memory().unwrap();
270
271        // Store 3 Grep searches for "error handling"
272        for i in 0..3 {
273            let mem = make_memory(
274                &format!("grep for error handling {i}"),
275                "Grep",
276                vec![("pattern", "error handling")],
277            );
278            storage.insert_memory(&mem).unwrap();
279        }
280
281        // Store 1 Glob search (below threshold)
282        let mem = make_memory("glob for rs files", "Glob", vec![("pattern", "*.rs")]);
283        storage.insert_memory(&mem).unwrap();
284
285        let patterns = detect_patterns(&storage, None, 2).unwrap();
286        let searches: Vec<_> = patterns
287            .iter()
288            .filter(|p| p.pattern_type == PatternType::RepeatedSearch)
289            .collect();
290
291        assert_eq!(searches.len(), 1);
292        assert_eq!(searches[0].frequency, 3);
293        assert_eq!(searches[0].related_memories.len(), 3);
294    }
295
296    #[test]
297    fn detect_file_hotspot_patterns() {
298        let storage = Storage::open_in_memory().unwrap();
299
300        // Access main.rs 4 times
301        for i in 0..4 {
302            let mem = make_memory(
303                &format!("read main.rs {i}"),
304                "Read",
305                vec![("file_path", "src/main.rs")],
306            );
307            storage.insert_memory(&mem).unwrap();
308        }
309
310        // Access lib.rs once (below threshold)
311        let mem = make_memory("read lib.rs", "Read", vec![("file_path", "src/lib.rs")]);
312        storage.insert_memory(&mem).unwrap();
313
314        let patterns = detect_patterns(&storage, None, 3).unwrap();
315        let hotspots: Vec<_> = patterns
316            .iter()
317            .filter(|p| p.pattern_type == PatternType::FileHotspot)
318            .collect();
319
320        assert_eq!(hotspots.len(), 1);
321        assert!(hotspots[0].description.contains("src/main.rs"));
322        assert_eq!(hotspots[0].frequency, 4);
323    }
324
325    #[test]
326    fn detect_decision_chain_patterns() {
327        let storage = Storage::open_in_memory().unwrap();
328
329        // Edit main.rs 3 times
330        for i in 0..3 {
331            let mem = make_memory(
332                &format!("edit main.rs {i}"),
333                "Edit",
334                vec![("file_path", "src/main.rs")],
335            );
336            storage.insert_memory(&mem).unwrap();
337        }
338
339        let patterns = detect_patterns(&storage, None, 2).unwrap();
340        let chains: Vec<_> = patterns
341            .iter()
342            .filter(|p| p.pattern_type == PatternType::DecisionChain)
343            .collect();
344
345        assert_eq!(chains.len(), 1);
346        assert!(chains[0].description.contains("decision chain"));
347    }
348
349    #[test]
350    fn detect_tool_preference_patterns() {
351        let storage = Storage::open_in_memory().unwrap();
352
353        // 5 reads, 2 greps
354        for i in 0..5 {
355            let mem = make_memory(&format!("read file {i}"), "Read", vec![]);
356            storage.insert_memory(&mem).unwrap();
357        }
358        for i in 0..2 {
359            let mem = make_memory(&format!("grep {i}"), "Grep", vec![]);
360            storage.insert_memory(&mem).unwrap();
361        }
362
363        let patterns = detect_patterns(&storage, None, 1).unwrap();
364        let prefs: Vec<_> = patterns
365            .iter()
366            .filter(|p| p.pattern_type == PatternType::ToolPreference)
367            .collect();
368
369        assert_eq!(prefs.len(), 2);
370        // Most used tool should be first (sorted by confidence)
371        let read_pref = prefs
372            .iter()
373            .find(|p| p.description.contains("Read"))
374            .unwrap();
375        assert_eq!(read_pref.frequency, 5);
376    }
377
378    #[test]
379    fn generate_insights_empty() {
380        let md = generate_insights(&[]);
381        assert!(md.contains("No patterns detected"));
382    }
383
384    #[test]
385    fn generate_insights_with_patterns() {
386        let patterns = vec![
387            DetectedPattern {
388                pattern_type: PatternType::FileHotspot,
389                description: "File 'src/main.rs' accessed 5 times".to_string(),
390                frequency: 5,
391                related_memories: vec!["a".to_string()],
392                confidence: 0.5,
393            },
394            DetectedPattern {
395                pattern_type: PatternType::RepeatedSearch,
396                description: "Search pattern 'error' used 3 times".to_string(),
397                frequency: 3,
398                related_memories: vec!["b".to_string()],
399                confidence: 0.3,
400            },
401        ];
402
403        let md = generate_insights(&patterns);
404        assert!(md.contains("File Hotspots"));
405        assert!(md.contains("Repeated Searches"));
406        assert!(md.contains("src/main.rs"));
407        assert!(md.contains("**Total patterns detected:** 2"));
408    }
409
410    #[test]
411    fn single_tool_no_preference_detected() {
412        let storage = Storage::open_in_memory().unwrap();
413
414        // Only 1 tool type - should return empty preferences
415        let mem = make_memory("read file", "Read", vec![]);
416        storage.insert_memory(&mem).unwrap();
417
418        let patterns = detect_patterns(&storage, None, 1).unwrap();
419        let prefs: Vec<_> = patterns
420            .iter()
421            .filter(|p| p.pattern_type == PatternType::ToolPreference)
422            .collect();
423
424        assert!(prefs.is_empty());
425    }
426}