Skip to main content

lean_ctx/core/
memory_lifecycle.rs

1//! Memory Lifecycle Management — consolidation, decay, compaction, archival.
2//!
3//! Runs automatically on knowledge stores to keep memory healthy:
4//! - Confidence decay over time
5//! - Semantic consolidation of similar facts
6//! - Compaction when limits are exceeded
7//! - Archival of old/unused facts
8
9use chrono::{DateTime, Duration, Utc};
10use serde::{Deserialize, Serialize};
11use std::path::PathBuf;
12
13use super::knowledge::KnowledgeFact;
14
15const DEFAULT_DECAY_RATE: f32 = 0.01;
16const DEFAULT_MAX_FACTS: usize = 1000;
17const LOW_CONFIDENCE_THRESHOLD: f32 = 0.3;
18const STALE_DAYS: i64 = 30;
19
20#[derive(Debug, Clone)]
21pub struct LifecycleConfig {
22    pub decay_rate_per_day: f32,
23    pub max_facts: usize,
24    pub low_confidence_threshold: f32,
25    pub stale_days: i64,
26    pub consolidation_similarity: f32,
27}
28
29impl Default for LifecycleConfig {
30    fn default() -> Self {
31        Self {
32            decay_rate_per_day: DEFAULT_DECAY_RATE,
33            max_facts: DEFAULT_MAX_FACTS,
34            low_confidence_threshold: LOW_CONFIDENCE_THRESHOLD,
35            stale_days: STALE_DAYS,
36            consolidation_similarity: 0.85,
37        }
38    }
39}
40
41#[derive(Debug, Default)]
42pub struct LifecycleReport {
43    pub decayed_count: usize,
44    pub consolidated_count: usize,
45    pub archived_count: usize,
46    pub compacted_count: usize,
47    pub remaining_facts: usize,
48}
49
50pub fn apply_confidence_decay(facts: &mut [KnowledgeFact], config: &LifecycleConfig) -> usize {
51    let now = Utc::now();
52    let mut count = 0;
53
54    for fact in facts.iter_mut() {
55        if !fact.is_current() {
56            continue;
57        }
58
59        if let Some(valid_until) = fact.valid_until {
60            if valid_until < now && fact.confidence > 0.1 {
61                fact.confidence = 0.1;
62                count += 1;
63                continue;
64            }
65        }
66
67        let days_since_confirmed = now.signed_duration_since(fact.last_confirmed).num_days() as f32;
68        let days_since_retrieved = fact
69            .last_retrieved
70            .map_or(3650.0, |t| now.signed_duration_since(t).num_days() as f32);
71        let retrieval_count = fact.retrieval_count as f32;
72
73        if days_since_confirmed > 0.0 {
74            // FadeMem-inspired: protect frequently/recently retrieved facts.
75            // Deterministic, local-only signals; never hard-delete (archive-only elsewhere).
76            let freq_protect = 1.0 / (1.0 + retrieval_count.ln_1p()); // 1.0 .. ~0.2
77            let recency_protect = (1.0 - (days_since_retrieved / 30.0).min(1.0)).max(0.0); // 1.0 if today, 0.0 after 30d
78            let protect = (freq_protect * (1.0 - 0.5 * recency_protect)).max(0.05);
79            // Reward bridge: explicit thumbs-up/down feedback steers retention.
80            // Net-positive feedback scales decay down (keep longer); net-negative
81            // scales it up (forget faster). Logarithmic so a few votes matter but
82            // can't run away, and the penalty is capped so one downvote never
83            // collapses an otherwise healthy fact.
84            let net_feedback = i64::from(fact.feedback_up) - i64::from(fact.feedback_down);
85            let feedback_factor = match net_feedback.cmp(&0) {
86                std::cmp::Ordering::Greater => 1.0 / (1.0 + (net_feedback as f32).ln_1p()),
87                std::cmp::Ordering::Less => {
88                    (1.0 + (net_feedback.unsigned_abs() as f32).ln_1p()).min(4.0)
89                }
90                std::cmp::Ordering::Equal => 1.0,
91            };
92            let decay =
93                config.decay_rate_per_day * days_since_confirmed * protect * feedback_factor;
94            let new_confidence = (fact.confidence - decay).max(0.05);
95            if (new_confidence - fact.confidence).abs() > 0.001 {
96                fact.confidence = new_confidence;
97                count += 1;
98            }
99        }
100    }
101
102    count
103}
104
105pub fn consolidate_similar(facts: &mut Vec<KnowledgeFact>, similarity_threshold: f32) -> usize {
106    let mut to_remove: std::collections::HashSet<usize> = std::collections::HashSet::new();
107
108    let mut category_groups: std::collections::HashMap<String, Vec<usize>> =
109        std::collections::HashMap::new();
110    for (i, f) in facts.iter().enumerate() {
111        if f.is_current() {
112            category_groups
113                .entry(f.category.clone())
114                .or_default()
115                .push(i);
116        }
117    }
118
119    for indices in category_groups.values() {
120        for (pos_a, &i) in indices.iter().enumerate() {
121            if to_remove.contains(&i) {
122                continue;
123            }
124            for &j in &indices[pos_a + 1..] {
125                if to_remove.contains(&j) {
126                    continue;
127                }
128                let sim = word_similarity(&facts[i].value, &facts[j].value);
129                if sim >= similarity_threshold {
130                    if facts[i].confidence >= facts[j].confidence {
131                        facts[i].confirmation_count += facts[j].confirmation_count;
132                        if facts[j].last_confirmed > facts[i].last_confirmed {
133                            facts[i].last_confirmed = facts[j].last_confirmed;
134                        }
135                        to_remove.insert(j);
136                    } else {
137                        facts[j].confirmation_count += facts[i].confirmation_count;
138                        if facts[i].last_confirmed > facts[j].last_confirmed {
139                            facts[j].last_confirmed = facts[i].last_confirmed;
140                        }
141                        to_remove.insert(i);
142                        break;
143                    }
144                }
145            }
146        }
147    }
148
149    let count = to_remove.len();
150    let mut sorted: Vec<usize> = to_remove.into_iter().collect();
151    sorted.sort_unstable();
152    for idx in sorted.into_iter().rev() {
153        facts.remove(idx);
154    }
155
156    count
157}
158
159pub fn compact(
160    facts: &mut Vec<KnowledgeFact>,
161    config: &LifecycleConfig,
162) -> (usize, Vec<KnowledgeFact>) {
163    let mut archived: Vec<KnowledgeFact> = Vec::new();
164    let now = Utc::now();
165    let stale_threshold = now - Duration::days(config.stale_days);
166
167    let mut to_archive: Vec<usize> = Vec::new();
168
169    for (i, fact) in facts.iter().enumerate() {
170        let recently_retrieved = fact
171            .last_retrieved
172            .is_some_and(|t| now.signed_duration_since(t).num_days() < 14);
173        let frequently_retrieved = fact.retrieval_count >= 5;
174
175        if fact.confidence < config.low_confidence_threshold {
176            to_archive.push(i);
177            continue;
178        }
179
180        if fact.last_confirmed < stale_threshold
181            && fact.confirmation_count <= 1
182            && fact.confidence < 0.5
183            && !recently_retrieved
184            && !frequently_retrieved
185        {
186            to_archive.push(i);
187        }
188    }
189
190    to_archive.sort_unstable();
191    to_archive.dedup();
192    let count = to_archive.len();
193
194    for idx in to_archive.into_iter().rev() {
195        archived.push(facts.remove(idx));
196    }
197
198    if facts.len() > config.max_facts {
199        facts.sort_by(|a, b| {
200            b.confidence
201                .partial_cmp(&a.confidence)
202                .unwrap_or(std::cmp::Ordering::Equal)
203        });
204        let excess: Vec<KnowledgeFact> = facts.drain(config.max_facts..).collect();
205        archived.extend(excess);
206    }
207
208    (count, archived)
209}
210
211pub fn run_lifecycle(facts: &mut Vec<KnowledgeFact>, config: &LifecycleConfig) -> LifecycleReport {
212    let decayed = apply_confidence_decay(facts, config);
213    let consolidated = consolidate_similar(facts, config.consolidation_similarity);
214    let (compacted, archived) = compact(facts, config);
215
216    if !archived.is_empty() {
217        let _ = archive_facts(&archived);
218    }
219
220    LifecycleReport {
221        decayed_count: decayed,
222        consolidated_count: consolidated,
223        archived_count: archived.len(),
224        compacted_count: compacted,
225        remaining_facts: facts.len(),
226    }
227}
228
229#[derive(Debug, Serialize, Deserialize)]
230struct ArchivedFacts {
231    pub archived_at: DateTime<Utc>,
232    pub facts: Vec<KnowledgeFact>,
233}
234
235fn archive_facts(facts: &[KnowledgeFact]) -> Result<(), String> {
236    let dir = crate::core::data_dir::lean_ctx_data_dir()?
237        .join("memory")
238        .join("archive");
239    std::fs::create_dir_all(&dir).map_err(|e| format!("{e}"))?;
240
241    let filename = format!("archive-{}.json", Utc::now().format("%Y%m%d-%H%M%S"));
242    let archive = ArchivedFacts {
243        archived_at: Utc::now(),
244        facts: facts.to_vec(),
245    };
246    let json = serde_json::to_string_pretty(&archive).map_err(|e| format!("{e}"))?;
247    std::fs::write(dir.join(filename), json).map_err(|e| format!("{e}"))
248}
249
250pub fn restore_archive(archive_path: &str) -> Result<Vec<KnowledgeFact>, String> {
251    let data = std::fs::read_to_string(archive_path).map_err(|e| format!("{e}"))?;
252    let archive: ArchivedFacts = serde_json::from_str(&data).map_err(|e| format!("{e}"))?;
253    Ok(archive.facts)
254}
255
256pub fn list_archives() -> Vec<PathBuf> {
257    let dir = match crate::core::data_dir::lean_ctx_data_dir() {
258        Ok(d) => d.join("memory").join("archive"),
259        Err(_) => return Vec::new(),
260    };
261
262    if !dir.exists() {
263        return Vec::new();
264    }
265
266    let mut archives: Vec<PathBuf> = std::fs::read_dir(&dir)
267        .into_iter()
268        .flatten()
269        .flatten()
270        .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
271        .map(|e| e.path())
272        .collect();
273
274    archives.sort();
275    archives
276}
277
278fn word_similarity(a: &str, b: &str) -> f32 {
279    let a_lower = a.to_lowercase();
280    let b_lower = b.to_lowercase();
281    let a_words: std::collections::HashSet<&str> = a_lower.split_whitespace().collect();
282    let b_words: std::collections::HashSet<&str> = b_lower.split_whitespace().collect();
283
284    if a_words.is_empty() && b_words.is_empty() {
285        return 1.0;
286    }
287
288    let intersection = a_words.intersection(&b_words).count();
289    let union = a_words.union(&b_words).count();
290
291    if union == 0 {
292        return 0.0;
293    }
294
295    intersection as f32 / union as f32
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301    use crate::core::knowledge::KnowledgeArchetype;
302
303    fn make_fact(category: &str, key: &str, value: &str, confidence: f32) -> KnowledgeFact {
304        KnowledgeFact {
305            category: category.to_string(),
306            key: key.to_string(),
307            value: value.to_string(),
308            source_session: "s1".to_string(),
309            confidence,
310            created_at: Utc::now(),
311            last_confirmed: Utc::now(),
312            retrieval_count: 0,
313            last_retrieved: None,
314            valid_from: Some(Utc::now()),
315            valid_until: None,
316            supersedes: None,
317            confirmation_count: 1,
318            feedback_up: 0,
319            feedback_down: 0,
320            last_feedback: None,
321            privacy: crate::core::memory_boundary::FactPrivacy::default(),
322            imported_from: None,
323            archetype: KnowledgeArchetype::default(),
324            fidelity: None,
325            revision_count: 0,
326        }
327    }
328
329    fn make_old_fact(
330        category: &str,
331        key: &str,
332        value: &str,
333        confidence: f32,
334        days_old: i64,
335    ) -> KnowledgeFact {
336        let past = Utc::now() - Duration::days(days_old);
337        KnowledgeFact {
338            category: category.to_string(),
339            key: key.to_string(),
340            value: value.to_string(),
341            source_session: "s1".to_string(),
342            confidence,
343            created_at: past,
344            last_confirmed: past,
345            retrieval_count: 0,
346            last_retrieved: None,
347            valid_from: Some(past),
348            valid_until: None,
349            supersedes: None,
350            confirmation_count: 1,
351            feedback_up: 0,
352            feedback_down: 0,
353            last_feedback: None,
354            privacy: crate::core::memory_boundary::FactPrivacy::default(),
355            imported_from: None,
356            archetype: KnowledgeArchetype::default(),
357            fidelity: None,
358            revision_count: 0,
359        }
360    }
361
362    #[test]
363    fn decay_reduces_confidence() {
364        let config = LifecycleConfig::default();
365        let mut facts = vec![make_old_fact("arch", "db", "PostgreSQL", 0.9, 10)];
366
367        let count = apply_confidence_decay(&mut facts, &config);
368        assert_eq!(count, 1);
369        assert!(facts[0].confidence < 0.9);
370        assert!(facts[0].confidence > 0.7);
371    }
372
373    #[test]
374    fn decay_skips_recent_facts() {
375        let config = LifecycleConfig::default();
376        let mut facts = vec![make_fact("arch", "db", "PostgreSQL", 0.9)];
377
378        let count = apply_confidence_decay(&mut facts, &config);
379        assert_eq!(count, 0);
380    }
381
382    #[test]
383    fn feedback_steers_decay_keep_vs_forget() {
384        let config = LifecycleConfig::default();
385        let mut praised = make_old_fact("arch", "loved", "keep me", 0.9, 10);
386        praised.feedback_up = 5;
387        let mut panned = make_old_fact("arch", "hated", "forget me", 0.9, 10);
388        panned.feedback_down = 5;
389        let neutral = make_old_fact("arch", "meh", "neutral", 0.9, 10);
390
391        let mut facts = vec![praised, panned, neutral];
392        apply_confidence_decay(&mut facts, &config);
393
394        let (praised_c, panned_c, neutral_c) = (
395            facts[0].confidence,
396            facts[1].confidence,
397            facts[2].confidence,
398        );
399
400        // Reward bridge: up-voted retains more than neutral, neutral more than down-voted.
401        assert!(
402            praised_c > neutral_c,
403            "praised {praised_c} should outlast neutral {neutral_c}"
404        );
405        assert!(
406            neutral_c > panned_c,
407            "neutral {neutral_c} should outlast panned {panned_c}"
408        );
409        // Even a heavily down-voted fact only fades toward the floor — never hard-deleted.
410        assert!(panned_c >= 0.05);
411    }
412
413    #[test]
414    fn consolidate_similar_facts() {
415        let mut facts = vec![
416            make_fact("arch", "db", "uses PostgreSQL database", 0.8),
417            make_fact("arch", "db2", "uses PostgreSQL database system", 0.6),
418            make_fact("ops", "deploy", "docker compose up", 0.9),
419        ];
420
421        let count = consolidate_similar(&mut facts, 0.7);
422        assert!(count > 0, "Should consolidate similar facts");
423        assert!(facts.len() < 3);
424    }
425
426    #[test]
427    fn consolidate_keeps_different_categories() {
428        let mut facts = vec![
429            make_fact("arch", "db", "PostgreSQL", 0.8),
430            make_fact("ops", "db", "PostgreSQL", 0.8),
431        ];
432
433        let count = consolidate_similar(&mut facts, 0.9);
434        assert_eq!(count, 0, "Different categories should not consolidate");
435    }
436
437    #[test]
438    fn compact_removes_low_confidence() {
439        let config = LifecycleConfig::default();
440        let mut facts = vec![
441            make_fact("arch", "db", "PostgreSQL", 0.9),
442            make_fact("arch", "cache", "Redis", 0.1),
443        ];
444
445        let (count, archived) = compact(&mut facts, &config);
446        assert_eq!(count, 1);
447        assert_eq!(facts.len(), 1);
448        assert_eq!(archived.len(), 1);
449        assert_eq!(archived[0].key, "cache");
450    }
451
452    #[test]
453    fn compact_archives_stale_facts() {
454        let config = LifecycleConfig::default();
455        let mut facts = vec![
456            make_fact("arch", "db", "PostgreSQL", 0.9),
457            make_old_fact("arch", "old", "ancient thing", 0.4, 60),
458        ];
459
460        let (count, archived) = compact(&mut facts, &config);
461        assert_eq!(count, 1);
462        assert_eq!(archived[0].key, "old");
463    }
464
465    #[test]
466    fn full_lifecycle_run() {
467        let config = LifecycleConfig {
468            max_facts: 5,
469            ..Default::default()
470        };
471
472        let mut facts = vec![
473            make_fact("arch", "db", "PostgreSQL", 0.9),
474            make_fact("arch", "cache", "Redis", 0.8),
475            make_old_fact("arch", "old1", "thing1", 0.2, 50),
476            make_old_fact("arch", "old2", "thing2", 0.15, 60),
477            make_fact("ops", "deploy", "docker compose", 0.7),
478        ];
479
480        let report = run_lifecycle(&mut facts, &config);
481        assert!(report.remaining_facts <= config.max_facts);
482        assert!(report.decayed_count > 0 || report.compacted_count > 0);
483    }
484
485    #[test]
486    fn word_similarity_identical() {
487        assert!((word_similarity("hello world", "hello world") - 1.0).abs() < 0.01);
488    }
489
490    #[test]
491    fn word_similarity_partial() {
492        let sim = word_similarity("uses PostgreSQL database", "PostgreSQL database system");
493        assert!(sim >= 0.5, "Expected >= 0.5 but got {sim}");
494        assert!(sim < 1.0);
495    }
496
497    #[test]
498    fn word_similarity_different() {
499        let sim = word_similarity("Redis cache", "Docker compose");
500        assert!(sim < 0.1);
501    }
502}