Skip to main content

codemem_engine/
patterns.rs

1//! Cross-session pattern detection for Codemem.
2//!
3//! Analyzes stored memories to detect recurring patterns like repeated searches,
4//! file hotspots, decision chains, and tool preferences across sessions.
5
6use codemem_core::{CodememError, DetectedPattern, PatternType, StorageBackend};
7
8/// Compute log-scaled confidence from frequency, total sessions, and a recency factor.
9///
10/// Uses `ln(frequency) / ln(total_sessions)` as a base, scaled by `recency_factor`.
11/// Returns 0.0 for zero inputs, clamped to [0.0, 1.0].
12fn compute_confidence(frequency: usize, total_sessions: usize, recency_factor: f64) -> f64 {
13    if frequency == 0 || total_sessions == 0 {
14        return 0.0;
15    }
16    let base = (frequency as f64).ln() / (total_sessions as f64).ln().max(1.0);
17    (base * recency_factor).min(1.0)
18}
19
20/// Detect all patterns in the memory store.
21///
22/// Runs multiple detectors and returns all patterns found, sorted by confidence
23/// descending. The `min_frequency` parameter controls the threshold for how many
24/// times a pattern must appear before it is flagged. `total_sessions` is used for
25/// log-scaled confidence computation.
26pub fn detect_patterns(
27    storage: &dyn StorageBackend,
28    namespace: Option<&str>,
29    min_frequency: usize,
30    total_sessions: usize,
31) -> Result<Vec<DetectedPattern>, CodememError> {
32    let mut patterns = Vec::new();
33
34    patterns.extend(detect_repeated_searches(
35        storage,
36        namespace,
37        min_frequency,
38        total_sessions,
39    )?);
40    patterns.extend(detect_file_hotspots(
41        storage,
42        namespace,
43        min_frequency,
44        total_sessions,
45    )?);
46    patterns.extend(detect_decision_chains(
47        storage,
48        namespace,
49        min_frequency,
50        total_sessions,
51    )?);
52    patterns.extend(detect_tool_preferences(storage, namespace)?);
53
54    // Sort by confidence descending
55    patterns.sort_by(|a, b| {
56        b.confidence
57            .partial_cmp(&a.confidence)
58            .unwrap_or(std::cmp::Ordering::Equal)
59    });
60
61    Ok(patterns)
62}
63
64/// Detect repeated search patterns (Grep/Glob queries used multiple times).
65fn detect_repeated_searches(
66    storage: &dyn StorageBackend,
67    namespace: Option<&str>,
68    min_frequency: usize,
69    total_sessions: usize,
70) -> Result<Vec<DetectedPattern>, CodememError> {
71    let results = storage.get_repeated_searches(min_frequency, namespace)?;
72
73    Ok(results
74        .into_iter()
75        .map(|(pattern, count, memory_ids)| DetectedPattern {
76            pattern_type: PatternType::RepeatedSearch,
77            description: format!(
78                "Search pattern '{}' used {} times across sessions",
79                pattern, count
80            ),
81            frequency: count,
82            related_memories: memory_ids,
83            confidence: compute_confidence(count, total_sessions, 1.0),
84        })
85        .collect())
86}
87
88/// Detect file hotspots (files accessed frequently via Read/Edit/Write).
89fn detect_file_hotspots(
90    storage: &dyn StorageBackend,
91    namespace: Option<&str>,
92    min_frequency: usize,
93    total_sessions: usize,
94) -> Result<Vec<DetectedPattern>, CodememError> {
95    let results = storage.get_file_hotspots(min_frequency, namespace)?;
96
97    Ok(results
98        .into_iter()
99        .map(|(file_path, count, memory_ids)| DetectedPattern {
100            pattern_type: PatternType::FileHotspot,
101            description: format!(
102                "File '{}' accessed {} times across sessions",
103                file_path, count
104            ),
105            frequency: count,
106            related_memories: memory_ids,
107            confidence: compute_confidence(count, total_sessions, 1.0),
108        })
109        .collect())
110}
111
112/// Detect decision chains: files modified multiple times via Edit/Write over time.
113fn detect_decision_chains(
114    storage: &dyn StorageBackend,
115    namespace: Option<&str>,
116    min_frequency: usize,
117    total_sessions: usize,
118) -> Result<Vec<DetectedPattern>, CodememError> {
119    let results = storage.get_decision_chains(min_frequency, namespace)?;
120
121    Ok(results
122        .into_iter()
123        .map(|(file_path, count, memory_ids)| DetectedPattern {
124            pattern_type: PatternType::DecisionChain,
125            description: format!(
126                "File '{}' modified {} times, forming a decision chain",
127                file_path, count
128            ),
129            frequency: count,
130            related_memories: memory_ids,
131            confidence: compute_confidence(count, total_sessions, 1.0),
132        })
133        .collect())
134}
135
136/// Detect tool usage preferences by analyzing the distribution of tool usage.
137fn detect_tool_preferences(
138    storage: &dyn StorageBackend,
139    namespace: Option<&str>,
140) -> Result<Vec<DetectedPattern>, CodememError> {
141    let tool_entries = storage.get_tool_usage_stats(namespace)?;
142
143    if tool_entries.len() < 2 {
144        return Ok(vec![]);
145    }
146
147    let total: usize = tool_entries.iter().map(|(_, c)| c).sum();
148    if total == 0 {
149        return Ok(vec![]);
150    }
151
152    Ok(tool_entries
153        .into_iter()
154        .map(|(tool, count)| {
155            let pct = (count as f64 / total as f64 * 100.0).round() as usize;
156            DetectedPattern {
157                pattern_type: PatternType::ToolPreference,
158                description: format!(
159                    "Tool '{}' used {} times ({}% of all tool usage)",
160                    tool, count, pct
161                ),
162                frequency: count,
163                related_memories: vec![],
164                confidence: count as f64 / total as f64,
165            }
166        })
167        .collect())
168}
169
170/// Generate human-readable pattern insights as markdown.
171pub fn generate_insights(patterns: &[DetectedPattern]) -> String {
172    if patterns.is_empty() {
173        return "No patterns detected yet. Keep using Codemem to build up session history."
174            .to_string();
175    }
176
177    let mut md = String::from("## Cross-Session Pattern Insights\n\n");
178
179    // File Hotspots
180    let hotspots: Vec<_> = patterns
181        .iter()
182        .filter(|p| p.pattern_type == PatternType::FileHotspot)
183        .collect();
184    if !hotspots.is_empty() {
185        md.push_str("### File Hotspots\n");
186        md.push_str("Files you keep coming back to across sessions:\n\n");
187        for p in hotspots.iter().take(10) {
188            md.push_str(&format!(
189                "- {} (confidence: {:.0}%)\n",
190                p.description,
191                p.confidence * 100.0
192            ));
193        }
194        md.push('\n');
195    }
196
197    // Repeated Searches
198    let searches: Vec<_> = patterns
199        .iter()
200        .filter(|p| p.pattern_type == PatternType::RepeatedSearch)
201        .collect();
202    if !searches.is_empty() {
203        md.push_str("### Repeated Searches\n");
204        md.push_str(
205            "Search patterns you use repeatedly (consider creating a memory for these):\n\n",
206        );
207        for p in searches.iter().take(10) {
208            md.push_str(&format!(
209                "- {} (confidence: {:.0}%)\n",
210                p.description,
211                p.confidence * 100.0
212            ));
213        }
214        md.push('\n');
215    }
216
217    // Decision Chains
218    let chains: Vec<_> = patterns
219        .iter()
220        .filter(|p| p.pattern_type == PatternType::DecisionChain)
221        .collect();
222    if !chains.is_empty() {
223        md.push_str("### Decision Chains\n");
224        md.push_str("Files modified multiple times, suggesting evolving decisions:\n\n");
225        for p in chains.iter().take(10) {
226            md.push_str(&format!(
227                "- {} (confidence: {:.0}%)\n",
228                p.description,
229                p.confidence * 100.0
230            ));
231        }
232        md.push('\n');
233    }
234
235    // Tool Preferences
236    let prefs: Vec<_> = patterns
237        .iter()
238        .filter(|p| p.pattern_type == PatternType::ToolPreference)
239        .collect();
240    if !prefs.is_empty() {
241        md.push_str("### Tool Usage Distribution\n");
242        for p in &prefs {
243            md.push_str(&format!("- {}\n", p.description));
244        }
245        md.push('\n');
246    }
247
248    // Summary
249    md.push_str(&format!(
250        "**Total patterns detected:** {}\n",
251        patterns.len()
252    ));
253
254    md
255}
256
257#[cfg(test)]
258#[path = "tests/patterns_tests.rs"]
259mod tests;