Skip to main content

lean_ctx/core/
memory_lifecycle.rs

1//! Memory Lifecycle Management — consolidation, decay, compaction, archival.
2//!
3//! Runs automatically on knowledge stores to keep memory healthy:
4//! - Confidence decay over time
5//! - Semantic consolidation of similar facts
6//! - Compaction when limits are exceeded
7//! - Archival of old/unused facts
8
9use chrono::{DateTime, Duration, Utc};
10use serde::{Deserialize, Serialize};
11use std::path::PathBuf;
12
13use super::knowledge::KnowledgeFact;
14
15const DEFAULT_DECAY_RATE: f32 = 0.01;
16const DEFAULT_MAX_FACTS: usize = 1000;
17const LOW_CONFIDENCE_THRESHOLD: f32 = 0.3;
18const STALE_DAYS: i64 = 30;
19
20#[derive(Debug, Clone)]
21pub struct LifecycleConfig {
22    pub decay_rate_per_day: f32,
23    pub max_facts: usize,
24    pub low_confidence_threshold: f32,
25    pub stale_days: i64,
26    pub consolidation_similarity: f32,
27}
28
29impl Default for LifecycleConfig {
30    fn default() -> Self {
31        Self {
32            decay_rate_per_day: DEFAULT_DECAY_RATE,
33            max_facts: DEFAULT_MAX_FACTS,
34            low_confidence_threshold: LOW_CONFIDENCE_THRESHOLD,
35            stale_days: STALE_DAYS,
36            consolidation_similarity: 0.85,
37        }
38    }
39}
40
41#[derive(Debug, Default)]
42pub struct LifecycleReport {
43    pub decayed_count: usize,
44    pub consolidated_count: usize,
45    pub archived_count: usize,
46    pub compacted_count: usize,
47    pub remaining_facts: usize,
48}
49
50pub fn apply_confidence_decay(facts: &mut [KnowledgeFact], config: &LifecycleConfig) -> usize {
51    let now = Utc::now();
52    let mut count = 0;
53
54    for fact in facts.iter_mut() {
55        if !fact.is_current() {
56            continue;
57        }
58
59        if let Some(valid_until) = fact.valid_until {
60            if valid_until < now && fact.confidence > 0.1 {
61                fact.confidence = 0.1;
62                count += 1;
63                continue;
64            }
65        }
66
67        let days_since_confirmed = now.signed_duration_since(fact.last_confirmed).num_days() as f32;
68        let days_since_retrieved = fact
69            .last_retrieved
70            .map(|t| now.signed_duration_since(t).num_days() as f32)
71            .unwrap_or(3650.0);
72        let retrieval_count = fact.retrieval_count as f32;
73
74        if days_since_confirmed > 0.0 {
75            // FadeMem-inspired: protect frequently/recently retrieved facts.
76            // Deterministic, local-only signals; never hard-delete (archive-only elsewhere).
77            let freq_protect = 1.0 / (1.0 + retrieval_count.ln_1p()); // 1.0 .. ~0.2
78            let recency_protect = (1.0 - (days_since_retrieved / 30.0).min(1.0)).max(0.0); // 1.0 if today, 0.0 after 30d
79            let protect = (freq_protect * (1.0 - 0.5 * recency_protect)).max(0.05);
80            let decay = config.decay_rate_per_day * days_since_confirmed * protect;
81            let new_confidence = (fact.confidence - decay).max(0.05);
82            if (new_confidence - fact.confidence).abs() > 0.001 {
83                fact.confidence = new_confidence;
84                count += 1;
85            }
86        }
87    }
88
89    count
90}
91
92pub fn consolidate_similar(facts: &mut Vec<KnowledgeFact>, similarity_threshold: f32) -> usize {
93    let mut to_remove: Vec<usize> = Vec::new();
94    let len = facts.len();
95
96    for i in 0..len {
97        if to_remove.contains(&i) || !facts[i].is_current() {
98            continue;
99        }
100
101        for j in (i + 1)..len {
102            if to_remove.contains(&j) || !facts[j].is_current() {
103                continue;
104            }
105
106            if facts[i].category != facts[j].category {
107                continue;
108            }
109
110            let sim = word_similarity(&facts[i].value, &facts[j].value);
111            if sim >= similarity_threshold {
112                if facts[i].confidence >= facts[j].confidence {
113                    facts[i].confirmation_count += facts[j].confirmation_count;
114                    if facts[j].last_confirmed > facts[i].last_confirmed {
115                        facts[i].last_confirmed = facts[j].last_confirmed;
116                    }
117                    to_remove.push(j);
118                } else {
119                    facts[j].confirmation_count += facts[i].confirmation_count;
120                    if facts[i].last_confirmed > facts[j].last_confirmed {
121                        facts[j].last_confirmed = facts[i].last_confirmed;
122                    }
123                    to_remove.push(i);
124                    break;
125                }
126            }
127        }
128    }
129
130    to_remove.sort_unstable();
131    to_remove.dedup();
132    let count = to_remove.len();
133
134    for idx in to_remove.into_iter().rev() {
135        facts.remove(idx);
136    }
137
138    count
139}
140
141pub fn compact(
142    facts: &mut Vec<KnowledgeFact>,
143    config: &LifecycleConfig,
144) -> (usize, Vec<KnowledgeFact>) {
145    let mut archived: Vec<KnowledgeFact> = Vec::new();
146    let now = Utc::now();
147    let stale_threshold = now - Duration::days(config.stale_days);
148
149    let mut to_archive: Vec<usize> = Vec::new();
150
151    for (i, fact) in facts.iter().enumerate() {
152        let recently_retrieved = fact
153            .last_retrieved
154            .is_some_and(|t| now.signed_duration_since(t).num_days() < 14);
155        let frequently_retrieved = fact.retrieval_count >= 5;
156
157        if fact.confidence < config.low_confidence_threshold {
158            to_archive.push(i);
159            continue;
160        }
161
162        if fact.last_confirmed < stale_threshold
163            && fact.confirmation_count <= 1
164            && fact.confidence < 0.5
165            && !recently_retrieved
166            && !frequently_retrieved
167        {
168            to_archive.push(i);
169        }
170    }
171
172    to_archive.sort_unstable();
173    to_archive.dedup();
174    let count = to_archive.len();
175
176    for idx in to_archive.into_iter().rev() {
177        archived.push(facts.remove(idx));
178    }
179
180    if facts.len() > config.max_facts {
181        facts.sort_by(|a, b| {
182            b.confidence
183                .partial_cmp(&a.confidence)
184                .unwrap_or(std::cmp::Ordering::Equal)
185        });
186        let excess: Vec<KnowledgeFact> = facts.drain(config.max_facts..).collect();
187        archived.extend(excess);
188    }
189
190    (count, archived)
191}
192
193pub fn run_lifecycle(facts: &mut Vec<KnowledgeFact>, config: &LifecycleConfig) -> LifecycleReport {
194    let decayed = apply_confidence_decay(facts, config);
195    let consolidated = consolidate_similar(facts, config.consolidation_similarity);
196    let (compacted, archived) = compact(facts, config);
197
198    if !archived.is_empty() {
199        let _ = archive_facts(&archived);
200    }
201
202    LifecycleReport {
203        decayed_count: decayed,
204        consolidated_count: consolidated,
205        archived_count: archived.len(),
206        compacted_count: compacted,
207        remaining_facts: facts.len(),
208    }
209}
210
211#[derive(Debug, Serialize, Deserialize)]
212struct ArchivedFacts {
213    pub archived_at: DateTime<Utc>,
214    pub facts: Vec<KnowledgeFact>,
215}
216
217fn archive_facts(facts: &[KnowledgeFact]) -> Result<(), String> {
218    let dir = crate::core::data_dir::lean_ctx_data_dir()?
219        .join("memory")
220        .join("archive");
221    std::fs::create_dir_all(&dir).map_err(|e| format!("{e}"))?;
222
223    let filename = format!("archive-{}.json", Utc::now().format("%Y%m%d-%H%M%S"));
224    let archive = ArchivedFacts {
225        archived_at: Utc::now(),
226        facts: facts.to_vec(),
227    };
228    let json = serde_json::to_string_pretty(&archive).map_err(|e| format!("{e}"))?;
229    std::fs::write(dir.join(filename), json).map_err(|e| format!("{e}"))
230}
231
232pub fn restore_archive(archive_path: &str) -> Result<Vec<KnowledgeFact>, String> {
233    let data = std::fs::read_to_string(archive_path).map_err(|e| format!("{e}"))?;
234    let archive: ArchivedFacts = serde_json::from_str(&data).map_err(|e| format!("{e}"))?;
235    Ok(archive.facts)
236}
237
238pub fn list_archives() -> Vec<PathBuf> {
239    let dir = match crate::core::data_dir::lean_ctx_data_dir() {
240        Ok(d) => d.join("memory").join("archive"),
241        Err(_) => return Vec::new(),
242    };
243
244    if !dir.exists() {
245        return Vec::new();
246    }
247
248    let mut archives: Vec<PathBuf> = std::fs::read_dir(&dir)
249        .into_iter()
250        .flatten()
251        .flatten()
252        .filter(|e| {
253            e.path()
254                .extension()
255                .map(|ext| ext == "json")
256                .unwrap_or(false)
257        })
258        .map(|e| e.path())
259        .collect();
260
261    archives.sort();
262    archives
263}
264
265fn word_similarity(a: &str, b: &str) -> f32 {
266    let a_lower = a.to_lowercase();
267    let b_lower = b.to_lowercase();
268    let a_words: std::collections::HashSet<&str> = a_lower.split_whitespace().collect();
269    let b_words: std::collections::HashSet<&str> = b_lower.split_whitespace().collect();
270
271    if a_words.is_empty() && b_words.is_empty() {
272        return 1.0;
273    }
274
275    let intersection = a_words.intersection(&b_words).count();
276    let union = a_words.union(&b_words).count();
277
278    if union == 0 {
279        return 0.0;
280    }
281
282    intersection as f32 / union as f32
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    fn make_fact(category: &str, key: &str, value: &str, confidence: f32) -> KnowledgeFact {
290        KnowledgeFact {
291            category: category.to_string(),
292            key: key.to_string(),
293            value: value.to_string(),
294            source_session: "s1".to_string(),
295            confidence,
296            created_at: Utc::now(),
297            last_confirmed: Utc::now(),
298            retrieval_count: 0,
299            last_retrieved: None,
300            valid_from: Some(Utc::now()),
301            valid_until: None,
302            supersedes: None,
303            confirmation_count: 1,
304        }
305    }
306
307    fn make_old_fact(
308        category: &str,
309        key: &str,
310        value: &str,
311        confidence: f32,
312        days_old: i64,
313    ) -> KnowledgeFact {
314        let past = Utc::now() - Duration::days(days_old);
315        KnowledgeFact {
316            category: category.to_string(),
317            key: key.to_string(),
318            value: value.to_string(),
319            source_session: "s1".to_string(),
320            confidence,
321            created_at: past,
322            last_confirmed: past,
323            retrieval_count: 0,
324            last_retrieved: None,
325            valid_from: Some(past),
326            valid_until: None,
327            supersedes: None,
328            confirmation_count: 1,
329        }
330    }
331
332    #[test]
333    fn decay_reduces_confidence() {
334        let config = LifecycleConfig::default();
335        let mut facts = vec![make_old_fact("arch", "db", "PostgreSQL", 0.9, 10)];
336
337        let count = apply_confidence_decay(&mut facts, &config);
338        assert_eq!(count, 1);
339        assert!(facts[0].confidence < 0.9);
340        assert!(facts[0].confidence > 0.7);
341    }
342
343    #[test]
344    fn decay_skips_recent_facts() {
345        let config = LifecycleConfig::default();
346        let mut facts = vec![make_fact("arch", "db", "PostgreSQL", 0.9)];
347
348        let count = apply_confidence_decay(&mut facts, &config);
349        assert_eq!(count, 0);
350    }
351
352    #[test]
353    fn consolidate_similar_facts() {
354        let mut facts = vec![
355            make_fact("arch", "db", "uses PostgreSQL database", 0.8),
356            make_fact("arch", "db2", "uses PostgreSQL database system", 0.6),
357            make_fact("ops", "deploy", "docker compose up", 0.9),
358        ];
359
360        let count = consolidate_similar(&mut facts, 0.7);
361        assert!(count > 0, "Should consolidate similar facts");
362        assert!(facts.len() < 3);
363    }
364
365    #[test]
366    fn consolidate_keeps_different_categories() {
367        let mut facts = vec![
368            make_fact("arch", "db", "PostgreSQL", 0.8),
369            make_fact("ops", "db", "PostgreSQL", 0.8),
370        ];
371
372        let count = consolidate_similar(&mut facts, 0.9);
373        assert_eq!(count, 0, "Different categories should not consolidate");
374    }
375
376    #[test]
377    fn compact_removes_low_confidence() {
378        let config = LifecycleConfig::default();
379        let mut facts = vec![
380            make_fact("arch", "db", "PostgreSQL", 0.9),
381            make_fact("arch", "cache", "Redis", 0.1),
382        ];
383
384        let (count, archived) = compact(&mut facts, &config);
385        assert_eq!(count, 1);
386        assert_eq!(facts.len(), 1);
387        assert_eq!(archived.len(), 1);
388        assert_eq!(archived[0].key, "cache");
389    }
390
391    #[test]
392    fn compact_archives_stale_facts() {
393        let config = LifecycleConfig::default();
394        let mut facts = vec![
395            make_fact("arch", "db", "PostgreSQL", 0.9),
396            make_old_fact("arch", "old", "ancient thing", 0.4, 60),
397        ];
398
399        let (count, archived) = compact(&mut facts, &config);
400        assert_eq!(count, 1);
401        assert_eq!(archived[0].key, "old");
402    }
403
404    #[test]
405    fn full_lifecycle_run() {
406        let config = LifecycleConfig {
407            max_facts: 5,
408            ..Default::default()
409        };
410
411        let mut facts = vec![
412            make_fact("arch", "db", "PostgreSQL", 0.9),
413            make_fact("arch", "cache", "Redis", 0.8),
414            make_old_fact("arch", "old1", "thing1", 0.2, 50),
415            make_old_fact("arch", "old2", "thing2", 0.15, 60),
416            make_fact("ops", "deploy", "docker compose", 0.7),
417        ];
418
419        let report = run_lifecycle(&mut facts, &config);
420        assert!(report.remaining_facts <= config.max_facts);
421        assert!(report.decayed_count > 0 || report.compacted_count > 0);
422    }
423
424    #[test]
425    fn word_similarity_identical() {
426        assert!((word_similarity("hello world", "hello world") - 1.0).abs() < 0.01);
427    }
428
429    #[test]
430    fn word_similarity_partial() {
431        let sim = word_similarity("uses PostgreSQL database", "PostgreSQL database system");
432        assert!(sim >= 0.5, "Expected >= 0.5 but got {sim}");
433        assert!(sim < 1.0);
434    }
435
436    #[test]
437    fn word_similarity_different() {
438        let sim = word_similarity("Redis cache", "Docker compose");
439        assert!(sim < 0.1);
440    }
441}