Skip to main content

lean_ctx/core/
memory_lifecycle.rs

1//! Memory Lifecycle Management — consolidation, decay, compaction, archival.
2//!
3//! Runs automatically on knowledge stores to keep memory healthy:
4//! - Confidence decay over time
5//! - Semantic consolidation of similar facts
6//! - Compaction when limits are exceeded
7//! - Archival of old/unused facts
8
9use chrono::{DateTime, Duration, Utc};
10use serde::{Deserialize, Serialize};
11use std::path::PathBuf;
12
13use super::knowledge::KnowledgeFact;
14
15const DEFAULT_DECAY_RATE: f32 = 0.01;
16const DEFAULT_MAX_FACTS: usize = 1000;
17const LOW_CONFIDENCE_THRESHOLD: f32 = 0.3;
18const STALE_DAYS: i64 = 30;
19
20#[derive(Debug, Clone)]
21pub struct LifecycleConfig {
22    pub decay_rate_per_day: f32,
23    pub max_facts: usize,
24    pub low_confidence_threshold: f32,
25    pub stale_days: i64,
26    pub consolidation_similarity: f32,
27}
28
29impl Default for LifecycleConfig {
30    fn default() -> Self {
31        Self {
32            decay_rate_per_day: DEFAULT_DECAY_RATE,
33            max_facts: DEFAULT_MAX_FACTS,
34            low_confidence_threshold: LOW_CONFIDENCE_THRESHOLD,
35            stale_days: STALE_DAYS,
36            consolidation_similarity: 0.85,
37        }
38    }
39}
40
41#[derive(Debug, Default)]
42pub struct LifecycleReport {
43    pub decayed_count: usize,
44    pub consolidated_count: usize,
45    pub archived_count: usize,
46    pub compacted_count: usize,
47    pub remaining_facts: usize,
48}
49
50pub fn apply_confidence_decay(facts: &mut [KnowledgeFact], config: &LifecycleConfig) -> usize {
51    let now = Utc::now();
52    let mut count = 0;
53
54    for fact in facts.iter_mut() {
55        if !fact.is_current() {
56            continue;
57        }
58
59        if let Some(valid_until) = fact.valid_until {
60            if valid_until < now && fact.confidence > 0.1 {
61                fact.confidence = 0.1;
62                count += 1;
63                continue;
64            }
65        }
66
67        let days_since_confirmed = now.signed_duration_since(fact.last_confirmed).num_days() as f32;
68        let days_since_retrieved = fact
69            .last_retrieved
70            .map_or(3650.0, |t| now.signed_duration_since(t).num_days() as f32);
71        let retrieval_count = fact.retrieval_count as f32;
72
73        if days_since_confirmed > 0.0 {
74            // FadeMem-inspired: protect frequently/recently retrieved facts.
75            // Deterministic, local-only signals; never hard-delete (archive-only elsewhere).
76            let freq_protect = 1.0 / (1.0 + retrieval_count.ln_1p()); // 1.0 .. ~0.2
77            let recency_protect = (1.0 - (days_since_retrieved / 30.0).min(1.0)).max(0.0); // 1.0 if today, 0.0 after 30d
78            let protect = (freq_protect * (1.0 - 0.5 * recency_protect)).max(0.05);
79            let decay = config.decay_rate_per_day * days_since_confirmed * protect;
80            let new_confidence = (fact.confidence - decay).max(0.05);
81            if (new_confidence - fact.confidence).abs() > 0.001 {
82                fact.confidence = new_confidence;
83                count += 1;
84            }
85        }
86    }
87
88    count
89}
90
91pub fn consolidate_similar(facts: &mut Vec<KnowledgeFact>, similarity_threshold: f32) -> usize {
92    let mut to_remove: std::collections::HashSet<usize> = std::collections::HashSet::new();
93
94    let mut category_groups: std::collections::HashMap<String, Vec<usize>> =
95        std::collections::HashMap::new();
96    for (i, f) in facts.iter().enumerate() {
97        if f.is_current() {
98            category_groups
99                .entry(f.category.clone())
100                .or_default()
101                .push(i);
102        }
103    }
104
105    for indices in category_groups.values() {
106        for (pos_a, &i) in indices.iter().enumerate() {
107            if to_remove.contains(&i) {
108                continue;
109            }
110            for &j in &indices[pos_a + 1..] {
111                if to_remove.contains(&j) {
112                    continue;
113                }
114                let sim = word_similarity(&facts[i].value, &facts[j].value);
115                if sim >= similarity_threshold {
116                    if facts[i].confidence >= facts[j].confidence {
117                        facts[i].confirmation_count += facts[j].confirmation_count;
118                        if facts[j].last_confirmed > facts[i].last_confirmed {
119                            facts[i].last_confirmed = facts[j].last_confirmed;
120                        }
121                        to_remove.insert(j);
122                    } else {
123                        facts[j].confirmation_count += facts[i].confirmation_count;
124                        if facts[i].last_confirmed > facts[j].last_confirmed {
125                            facts[j].last_confirmed = facts[i].last_confirmed;
126                        }
127                        to_remove.insert(i);
128                        break;
129                    }
130                }
131            }
132        }
133    }
134
135    let count = to_remove.len();
136    let mut sorted: Vec<usize> = to_remove.into_iter().collect();
137    sorted.sort_unstable();
138    for idx in sorted.into_iter().rev() {
139        facts.remove(idx);
140    }
141
142    count
143}
144
145pub fn compact(
146    facts: &mut Vec<KnowledgeFact>,
147    config: &LifecycleConfig,
148) -> (usize, Vec<KnowledgeFact>) {
149    let mut archived: Vec<KnowledgeFact> = Vec::new();
150    let now = Utc::now();
151    let stale_threshold = now - Duration::days(config.stale_days);
152
153    let mut to_archive: Vec<usize> = Vec::new();
154
155    for (i, fact) in facts.iter().enumerate() {
156        let recently_retrieved = fact
157            .last_retrieved
158            .is_some_and(|t| now.signed_duration_since(t).num_days() < 14);
159        let frequently_retrieved = fact.retrieval_count >= 5;
160
161        if fact.confidence < config.low_confidence_threshold {
162            to_archive.push(i);
163            continue;
164        }
165
166        if fact.last_confirmed < stale_threshold
167            && fact.confirmation_count <= 1
168            && fact.confidence < 0.5
169            && !recently_retrieved
170            && !frequently_retrieved
171        {
172            to_archive.push(i);
173        }
174    }
175
176    to_archive.sort_unstable();
177    to_archive.dedup();
178    let count = to_archive.len();
179
180    for idx in to_archive.into_iter().rev() {
181        archived.push(facts.remove(idx));
182    }
183
184    if facts.len() > config.max_facts {
185        facts.sort_by(|a, b| {
186            b.confidence
187                .partial_cmp(&a.confidence)
188                .unwrap_or(std::cmp::Ordering::Equal)
189        });
190        let excess: Vec<KnowledgeFact> = facts.drain(config.max_facts..).collect();
191        archived.extend(excess);
192    }
193
194    (count, archived)
195}
196
197pub fn run_lifecycle(facts: &mut Vec<KnowledgeFact>, config: &LifecycleConfig) -> LifecycleReport {
198    let decayed = apply_confidence_decay(facts, config);
199    let consolidated = consolidate_similar(facts, config.consolidation_similarity);
200    let (compacted, archived) = compact(facts, config);
201
202    if !archived.is_empty() {
203        let _ = archive_facts(&archived);
204    }
205
206    LifecycleReport {
207        decayed_count: decayed,
208        consolidated_count: consolidated,
209        archived_count: archived.len(),
210        compacted_count: compacted,
211        remaining_facts: facts.len(),
212    }
213}
214
215#[derive(Debug, Serialize, Deserialize)]
216struct ArchivedFacts {
217    pub archived_at: DateTime<Utc>,
218    pub facts: Vec<KnowledgeFact>,
219}
220
221fn archive_facts(facts: &[KnowledgeFact]) -> Result<(), String> {
222    let dir = crate::core::data_dir::lean_ctx_data_dir()?
223        .join("memory")
224        .join("archive");
225    std::fs::create_dir_all(&dir).map_err(|e| format!("{e}"))?;
226
227    let filename = format!("archive-{}.json", Utc::now().format("%Y%m%d-%H%M%S"));
228    let archive = ArchivedFacts {
229        archived_at: Utc::now(),
230        facts: facts.to_vec(),
231    };
232    let json = serde_json::to_string_pretty(&archive).map_err(|e| format!("{e}"))?;
233    std::fs::write(dir.join(filename), json).map_err(|e| format!("{e}"))
234}
235
236pub fn restore_archive(archive_path: &str) -> Result<Vec<KnowledgeFact>, String> {
237    let data = std::fs::read_to_string(archive_path).map_err(|e| format!("{e}"))?;
238    let archive: ArchivedFacts = serde_json::from_str(&data).map_err(|e| format!("{e}"))?;
239    Ok(archive.facts)
240}
241
242pub fn list_archives() -> Vec<PathBuf> {
243    let dir = match crate::core::data_dir::lean_ctx_data_dir() {
244        Ok(d) => d.join("memory").join("archive"),
245        Err(_) => return Vec::new(),
246    };
247
248    if !dir.exists() {
249        return Vec::new();
250    }
251
252    let mut archives: Vec<PathBuf> = std::fs::read_dir(&dir)
253        .into_iter()
254        .flatten()
255        .flatten()
256        .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
257        .map(|e| e.path())
258        .collect();
259
260    archives.sort();
261    archives
262}
263
264fn word_similarity(a: &str, b: &str) -> f32 {
265    let a_lower = a.to_lowercase();
266    let b_lower = b.to_lowercase();
267    let a_words: std::collections::HashSet<&str> = a_lower.split_whitespace().collect();
268    let b_words: std::collections::HashSet<&str> = b_lower.split_whitespace().collect();
269
270    if a_words.is_empty() && b_words.is_empty() {
271        return 1.0;
272    }
273
274    let intersection = a_words.intersection(&b_words).count();
275    let union = a_words.union(&b_words).count();
276
277    if union == 0 {
278        return 0.0;
279    }
280
281    intersection as f32 / union as f32
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287
288    fn make_fact(category: &str, key: &str, value: &str, confidence: f32) -> KnowledgeFact {
289        KnowledgeFact {
290            category: category.to_string(),
291            key: key.to_string(),
292            value: value.to_string(),
293            source_session: "s1".to_string(),
294            confidence,
295            created_at: Utc::now(),
296            last_confirmed: Utc::now(),
297            retrieval_count: 0,
298            last_retrieved: None,
299            valid_from: Some(Utc::now()),
300            valid_until: None,
301            supersedes: None,
302            confirmation_count: 1,
303            feedback_up: 0,
304            feedback_down: 0,
305            last_feedback: None,
306            privacy: crate::core::memory_boundary::FactPrivacy::default(),
307            imported_from: None,
308        }
309    }
310
311    fn make_old_fact(
312        category: &str,
313        key: &str,
314        value: &str,
315        confidence: f32,
316        days_old: i64,
317    ) -> KnowledgeFact {
318        let past = Utc::now() - Duration::days(days_old);
319        KnowledgeFact {
320            category: category.to_string(),
321            key: key.to_string(),
322            value: value.to_string(),
323            source_session: "s1".to_string(),
324            confidence,
325            created_at: past,
326            last_confirmed: past,
327            retrieval_count: 0,
328            last_retrieved: None,
329            valid_from: Some(past),
330            valid_until: None,
331            supersedes: None,
332            confirmation_count: 1,
333            feedback_up: 0,
334            feedback_down: 0,
335            last_feedback: None,
336            privacy: crate::core::memory_boundary::FactPrivacy::default(),
337            imported_from: None,
338        }
339    }
340
341    #[test]
342    fn decay_reduces_confidence() {
343        let config = LifecycleConfig::default();
344        let mut facts = vec![make_old_fact("arch", "db", "PostgreSQL", 0.9, 10)];
345
346        let count = apply_confidence_decay(&mut facts, &config);
347        assert_eq!(count, 1);
348        assert!(facts[0].confidence < 0.9);
349        assert!(facts[0].confidence > 0.7);
350    }
351
352    #[test]
353    fn decay_skips_recent_facts() {
354        let config = LifecycleConfig::default();
355        let mut facts = vec![make_fact("arch", "db", "PostgreSQL", 0.9)];
356
357        let count = apply_confidence_decay(&mut facts, &config);
358        assert_eq!(count, 0);
359    }
360
361    #[test]
362    fn consolidate_similar_facts() {
363        let mut facts = vec![
364            make_fact("arch", "db", "uses PostgreSQL database", 0.8),
365            make_fact("arch", "db2", "uses PostgreSQL database system", 0.6),
366            make_fact("ops", "deploy", "docker compose up", 0.9),
367        ];
368
369        let count = consolidate_similar(&mut facts, 0.7);
370        assert!(count > 0, "Should consolidate similar facts");
371        assert!(facts.len() < 3);
372    }
373
374    #[test]
375    fn consolidate_keeps_different_categories() {
376        let mut facts = vec![
377            make_fact("arch", "db", "PostgreSQL", 0.8),
378            make_fact("ops", "db", "PostgreSQL", 0.8),
379        ];
380
381        let count = consolidate_similar(&mut facts, 0.9);
382        assert_eq!(count, 0, "Different categories should not consolidate");
383    }
384
385    #[test]
386    fn compact_removes_low_confidence() {
387        let config = LifecycleConfig::default();
388        let mut facts = vec![
389            make_fact("arch", "db", "PostgreSQL", 0.9),
390            make_fact("arch", "cache", "Redis", 0.1),
391        ];
392
393        let (count, archived) = compact(&mut facts, &config);
394        assert_eq!(count, 1);
395        assert_eq!(facts.len(), 1);
396        assert_eq!(archived.len(), 1);
397        assert_eq!(archived[0].key, "cache");
398    }
399
400    #[test]
401    fn compact_archives_stale_facts() {
402        let config = LifecycleConfig::default();
403        let mut facts = vec![
404            make_fact("arch", "db", "PostgreSQL", 0.9),
405            make_old_fact("arch", "old", "ancient thing", 0.4, 60),
406        ];
407
408        let (count, archived) = compact(&mut facts, &config);
409        assert_eq!(count, 1);
410        assert_eq!(archived[0].key, "old");
411    }
412
413    #[test]
414    fn full_lifecycle_run() {
415        let config = LifecycleConfig {
416            max_facts: 5,
417            ..Default::default()
418        };
419
420        let mut facts = vec![
421            make_fact("arch", "db", "PostgreSQL", 0.9),
422            make_fact("arch", "cache", "Redis", 0.8),
423            make_old_fact("arch", "old1", "thing1", 0.2, 50),
424            make_old_fact("arch", "old2", "thing2", 0.15, 60),
425            make_fact("ops", "deploy", "docker compose", 0.7),
426        ];
427
428        let report = run_lifecycle(&mut facts, &config);
429        assert!(report.remaining_facts <= config.max_facts);
430        assert!(report.decayed_count > 0 || report.compacted_count > 0);
431    }
432
433    #[test]
434    fn word_similarity_identical() {
435        assert!((word_similarity("hello world", "hello world") - 1.0).abs() < 0.01);
436    }
437
438    #[test]
439    fn word_similarity_partial() {
440        let sim = word_similarity("uses PostgreSQL database", "PostgreSQL database system");
441        assert!(sim >= 0.5, "Expected >= 0.5 but got {sim}");
442        assert!(sim < 1.0);
443    }
444
445    #[test]
446    fn word_similarity_different() {
447        let sim = word_similarity("Redis cache", "Docker compose");
448        assert!(sim < 0.1);
449    }
450}