lean_ctx/core/
memory_consolidation.rs1use std::collections::HashSet;
3use std::time::{SystemTime, UNIX_EPOCH};
4
5#[derive(Debug, Clone, PartialEq)]
7pub struct KnowledgeEntry {
8 pub key: String,
9 pub content: String,
10 pub access_count: u64,
11 pub last_access: u64,
13 pub created_at: u64,
15 pub importance: f64,
16}
17
18pub fn token_jaccard(a: &str, b: &str) -> f64 {
20 let sa: HashSet<String> = a.split_whitespace().map(str::to_lowercase).collect();
21 let sb: HashSet<String> = b.split_whitespace().map(str::to_lowercase).collect();
22 let inter = sa.intersection(&sb).count();
23 let uni = sa.union(&sb).count();
24 if uni == 0 {
25 0.0
26 } else {
27 inter as f64 / uni as f64
28 }
29}
30
31fn unix_now() -> u64 {
32 SystemTime::now()
33 .duration_since(UNIX_EPOCH)
34 .map_or(0, |d| d.as_secs())
35}
36
37fn days_since(ts: u64, now: u64) -> f64 {
38 now.saturating_sub(ts) as f64 / 86400.0
39}
40
41const NREM_SIM_THRESHOLD: f64 = 0.8;
42const REM_STALE_DAYS: f64 = 30.0;
43const REM_MAX_IMPORTANCE: f64 = 0.35;
44const REPLAY_RELATED_LOW: f64 = 0.12;
45const REPLAY_RELATED_HIGH: f64 = 0.79;
46const REPLAY_BOOST_SCALE: f64 = 0.02;
47
48pub fn consolidate(entries: &mut Vec<KnowledgeEntry>) {
52 if entries.is_empty() {
53 return;
54 }
55 nrem_merge(entries);
56 let now = unix_now();
57 rem_prune(entries, now);
58 replay_boost(entries);
59}
60
61fn merge_two(dst: &mut KnowledgeEntry, src: &KnowledgeEntry) {
62 let use_dst_body = dst.access_count > src.access_count
63 || (dst.access_count == src.access_count && dst.importance >= src.importance);
64 let total_access = dst.access_count.saturating_add(src.access_count);
65 let la = dst.last_access.max(src.last_access);
66 let ca = dst.created_at.min(src.created_at);
67 let imp = dst.importance.max(src.importance);
68 if use_dst_body {
69 dst.access_count = total_access;
70 dst.last_access = la;
71 dst.created_at = ca;
72 dst.importance = imp;
73 } else {
74 dst.key.clone_from(&src.key);
75 dst.content.clone_from(&src.content);
76 dst.access_count = total_access;
77 dst.last_access = la;
78 dst.created_at = ca;
79 dst.importance = imp;
80 }
81}
82
83fn nrem_merge(entries: &mut Vec<KnowledgeEntry>) {
84 let mut out: Vec<KnowledgeEntry> = Vec::new();
85 'outer: for e in entries.drain(..) {
86 for slot in &mut out {
87 if token_jaccard(&slot.content, &e.content) >= NREM_SIM_THRESHOLD {
88 merge_two(slot, &e);
89 continue 'outer;
90 }
91 }
92 out.push(e);
93 }
94 *entries = out;
95}
96
97fn rem_prune(entries: &mut Vec<KnowledgeEntry>, now: u64) {
98 entries.retain(|e| {
99 let stale = days_since(e.last_access, now) >= REM_STALE_DAYS;
100 !(stale && e.importance <= REM_MAX_IMPORTANCE)
101 });
102}
103
104fn replay_boost(entries: &mut [KnowledgeEntry]) {
105 let n = entries.len();
106 if n < 2 {
107 return;
108 }
109 let mut deltas = vec![0.0_f64; n];
110 for i in 0..n {
111 for j in (i + 1)..n {
112 let jac = token_jaccard(&entries[i].content, &entries[j].content);
113 if !(REPLAY_RELATED_LOW..REPLAY_RELATED_HIGH).contains(&jac) {
114 continue;
115 }
116 let co = ((entries[i].access_count as f64 + 1.0).ln()
117 * (entries[j].access_count as f64 + 1.0).ln())
118 .sqrt();
119 let bump = REPLAY_BOOST_SCALE * co;
120 deltas[i] += bump;
121 deltas[j] += bump;
122 }
123 }
124 for (e, d) in entries.iter_mut().zip(deltas) {
125 e.importance += d;
126 }
127}
128
129#[cfg(test)]
130mod tests {
131 use super::*;
132
133 fn ts_days_ago(days: u64) -> u64 {
134 unix_now().saturating_sub(days * 86400)
135 }
136
137 #[test]
138 fn nrem_merges_similar_keeps_most_accessed_body() {
139 let mut v = vec![
140 KnowledgeEntry {
141 key: "a".into(),
142 content: "alpha beta gamma delta".into(),
143 access_count: 2,
144 last_access: unix_now(),
145 created_at: 1,
146 importance: 0.5,
147 },
148 KnowledgeEntry {
149 key: "b".into(),
150 content: "alpha beta gamma delta epsilon".into(),
151 access_count: 10,
152 last_access: unix_now(),
153 created_at: 2,
154 importance: 0.4,
155 },
156 ];
157 consolidate(&mut v);
158 assert_eq!(v.len(), 1);
159 assert_eq!(v[0].content, "alpha beta gamma delta epsilon");
160 assert_eq!(v[0].access_count, 12);
161 }
162
163 #[test]
164 fn rem_drops_stale_low_importance() {
165 let old = ts_days_ago(40);
166 let mut v = vec![
167 KnowledgeEntry {
168 key: "keep".into(),
169 content: "unique one".into(),
170 access_count: 0,
171 last_access: old,
172 created_at: 0,
173 importance: 0.9,
174 },
175 KnowledgeEntry {
176 key: "gone".into(),
177 content: "unique two".into(),
178 access_count: 0,
179 last_access: old,
180 created_at: 0,
181 importance: 0.2,
182 },
183 ];
184 consolidate(&mut v);
185 assert_eq!(v.len(), 1);
186 assert_eq!(v[0].key, "keep");
187 }
188
189 #[test]
190 fn replay_raises_importance_for_related_accessed_pairs() {
191 let mut v = vec![
192 KnowledgeEntry {
193 key: "1".into(),
194 content: "foo bar baz quux widget".into(),
195 access_count: 100,
196 last_access: unix_now(),
197 created_at: 0,
198 importance: 0.5,
199 },
200 KnowledgeEntry {
201 key: "2".into(),
202 content: "foo bar baz quux wobble".into(),
203 access_count: 100,
204 last_access: unix_now(),
205 created_at: 0,
206 importance: 0.5,
207 },
208 KnowledgeEntry {
209 key: "3".into(),
210 content: "totally different xyz".into(),
211 access_count: 1,
212 last_access: unix_now(),
213 created_at: 0,
214 importance: 0.5,
215 },
216 ];
217 let unrelated_imp = v[2].importance;
218 consolidate(&mut v);
219 assert!(v[0].importance > 0.5 || v[1].importance > 0.5);
220 assert!((v.iter().find(|e| e.key == "3").unwrap().importance - unrelated_imp).abs() < 1e-9);
221 }
222}