Skip to main content

lean_ctx/core/
memory_consolidation.rs

1//! Sleep-inspired consolidation for in-memory knowledge entries (NREM merge, REM prune, replay boost).
2use std::collections::HashSet;
3use std::time::{SystemTime, UNIX_EPOCH};
4
5/// Knowledge unit subject to consolidation.
6#[derive(Debug, Clone, PartialEq)]
7pub struct KnowledgeEntry {
8    pub key: String,
9    pub content: String,
10    pub access_count: u64,
11    /// Unix timestamp (seconds) of last access.
12    pub last_access: u64,
13    /// Unix timestamp (seconds) when created.
14    pub created_at: u64,
15    pub importance: f64,
16}
17
18/// Jaccard similarity over whitespace-split tokens (case-folded).
19pub fn token_jaccard(a: &str, b: &str) -> f64 {
20    let sa: HashSet<String> = a.split_whitespace().map(str::to_lowercase).collect();
21    let sb: HashSet<String> = b.split_whitespace().map(str::to_lowercase).collect();
22    let inter = sa.intersection(&sb).count();
23    let uni = sa.union(&sb).count();
24    if uni == 0 {
25        0.0
26    } else {
27        inter as f64 / uni as f64
28    }
29}
30
31fn unix_now() -> u64 {
32    SystemTime::now()
33        .duration_since(UNIX_EPOCH)
34        .map_or(0, |d| d.as_secs())
35}
36
37fn days_since(ts: u64, now: u64) -> f64 {
38    now.saturating_sub(ts) as f64 / 86400.0
39}
40
41const NREM_SIM_THRESHOLD: f64 = 0.8;
42const REM_STALE_DAYS: f64 = 30.0;
43const REM_MAX_IMPORTANCE: f64 = 0.35;
44const REPLAY_RELATED_LOW: f64 = 0.12;
45const REPLAY_RELATED_HIGH: f64 = 0.79;
46const REPLAY_BOOST_SCALE: f64 = 0.02;
47
48/// NREM: merge highly similar entries (keep highest-access body).
49/// REM: drop stale + low-importance.
50/// Replay: boost importance of pairwise related entries that are often accessed together (proxy).
51pub fn consolidate(entries: &mut Vec<KnowledgeEntry>) {
52    if entries.is_empty() {
53        return;
54    }
55    nrem_merge(entries);
56    let now = unix_now();
57    rem_prune(entries, now);
58    replay_boost(entries);
59}
60
61fn merge_two(dst: &mut KnowledgeEntry, src: &KnowledgeEntry) {
62    let use_dst_body = dst.access_count > src.access_count
63        || (dst.access_count == src.access_count && dst.importance >= src.importance);
64    let total_access = dst.access_count.saturating_add(src.access_count);
65    let la = dst.last_access.max(src.last_access);
66    let ca = dst.created_at.min(src.created_at);
67    let imp = dst.importance.max(src.importance);
68    if use_dst_body {
69        dst.access_count = total_access;
70        dst.last_access = la;
71        dst.created_at = ca;
72        dst.importance = imp;
73    } else {
74        dst.key.clone_from(&src.key);
75        dst.content.clone_from(&src.content);
76        dst.access_count = total_access;
77        dst.last_access = la;
78        dst.created_at = ca;
79        dst.importance = imp;
80    }
81}
82
83fn nrem_merge(entries: &mut Vec<KnowledgeEntry>) {
84    let mut out: Vec<KnowledgeEntry> = Vec::new();
85    'outer: for e in entries.drain(..) {
86        for slot in &mut out {
87            if token_jaccard(&slot.content, &e.content) >= NREM_SIM_THRESHOLD {
88                merge_two(slot, &e);
89                continue 'outer;
90            }
91        }
92        out.push(e);
93    }
94    *entries = out;
95}
96
97fn rem_prune(entries: &mut Vec<KnowledgeEntry>, now: u64) {
98    entries.retain(|e| {
99        let stale = days_since(e.last_access, now) >= REM_STALE_DAYS;
100        !(stale && e.importance <= REM_MAX_IMPORTANCE)
101    });
102}
103
104fn replay_boost(entries: &mut [KnowledgeEntry]) {
105    let n = entries.len();
106    if n < 2 {
107        return;
108    }
109    let mut deltas = vec![0.0_f64; n];
110    for i in 0..n {
111        for j in (i + 1)..n {
112            let jac = token_jaccard(&entries[i].content, &entries[j].content);
113            if !(REPLAY_RELATED_LOW..REPLAY_RELATED_HIGH).contains(&jac) {
114                continue;
115            }
116            let co = ((entries[i].access_count as f64 + 1.0).ln()
117                * (entries[j].access_count as f64 + 1.0).ln())
118            .sqrt();
119            let bump = REPLAY_BOOST_SCALE * co;
120            deltas[i] += bump;
121            deltas[j] += bump;
122        }
123    }
124    for (e, d) in entries.iter_mut().zip(deltas) {
125        e.importance += d;
126    }
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132
133    fn ts_days_ago(days: u64) -> u64 {
134        unix_now().saturating_sub(days * 86400)
135    }
136
137    #[test]
138    fn nrem_merges_similar_keeps_most_accessed_body() {
139        let mut v = vec![
140            KnowledgeEntry {
141                key: "a".into(),
142                content: "alpha beta gamma delta".into(),
143                access_count: 2,
144                last_access: unix_now(),
145                created_at: 1,
146                importance: 0.5,
147            },
148            KnowledgeEntry {
149                key: "b".into(),
150                content: "alpha beta gamma delta epsilon".into(),
151                access_count: 10,
152                last_access: unix_now(),
153                created_at: 2,
154                importance: 0.4,
155            },
156        ];
157        consolidate(&mut v);
158        assert_eq!(v.len(), 1);
159        assert_eq!(v[0].content, "alpha beta gamma delta epsilon");
160        assert_eq!(v[0].access_count, 12);
161    }
162
163    #[test]
164    fn rem_drops_stale_low_importance() {
165        let old = ts_days_ago(40);
166        let mut v = vec![
167            KnowledgeEntry {
168                key: "keep".into(),
169                content: "unique one".into(),
170                access_count: 0,
171                last_access: old,
172                created_at: 0,
173                importance: 0.9,
174            },
175            KnowledgeEntry {
176                key: "gone".into(),
177                content: "unique two".into(),
178                access_count: 0,
179                last_access: old,
180                created_at: 0,
181                importance: 0.2,
182            },
183        ];
184        consolidate(&mut v);
185        assert_eq!(v.len(), 1);
186        assert_eq!(v[0].key, "keep");
187    }
188
189    #[test]
190    fn replay_raises_importance_for_related_accessed_pairs() {
191        let mut v = vec![
192            KnowledgeEntry {
193                key: "1".into(),
194                content: "foo bar baz quux widget".into(),
195                access_count: 100,
196                last_access: unix_now(),
197                created_at: 0,
198                importance: 0.5,
199            },
200            KnowledgeEntry {
201                key: "2".into(),
202                content: "foo bar baz quux wobble".into(),
203                access_count: 100,
204                last_access: unix_now(),
205                created_at: 0,
206                importance: 0.5,
207            },
208            KnowledgeEntry {
209                key: "3".into(),
210                content: "totally different xyz".into(),
211                access_count: 1,
212                last_access: unix_now(),
213                created_at: 0,
214                importance: 0.5,
215            },
216        ];
217        let unrelated_imp = v[2].importance;
218        consolidate(&mut v);
219        assert!(v[0].importance > 0.5 || v[1].importance > 0.5);
220        assert!((v.iter().find(|e| e.key == "3").unwrap().importance - unrelated_imp).abs() < 1e-9);
221    }
222}