1use chrono::{DateTime, Duration, Utc};
10use serde::{Deserialize, Serialize};
11use std::path::PathBuf;
12
13use super::knowledge::KnowledgeFact;
14
15const DEFAULT_DECAY_RATE: f32 = 0.01;
16const DEFAULT_MAX_FACTS: usize = 1000;
17const LOW_CONFIDENCE_THRESHOLD: f32 = 0.3;
18const STALE_DAYS: i64 = 30;
19
20#[derive(Debug, Clone)]
21pub struct LifecycleConfig {
22 pub decay_rate_per_day: f32,
23 pub max_facts: usize,
24 pub low_confidence_threshold: f32,
25 pub stale_days: i64,
26 pub consolidation_similarity: f32,
27}
28
29impl Default for LifecycleConfig {
30 fn default() -> Self {
31 Self {
32 decay_rate_per_day: DEFAULT_DECAY_RATE,
33 max_facts: DEFAULT_MAX_FACTS,
34 low_confidence_threshold: LOW_CONFIDENCE_THRESHOLD,
35 stale_days: STALE_DAYS,
36 consolidation_similarity: 0.85,
37 }
38 }
39}
40
41#[derive(Debug, Default)]
42pub struct LifecycleReport {
43 pub decayed_count: usize,
44 pub consolidated_count: usize,
45 pub archived_count: usize,
46 pub compacted_count: usize,
47 pub remaining_facts: usize,
48}
49
50pub fn apply_confidence_decay(facts: &mut [KnowledgeFact], config: &LifecycleConfig) -> usize {
51 let now = Utc::now();
52 let mut count = 0;
53
54 for fact in facts.iter_mut() {
55 if !fact.is_current() {
56 continue;
57 }
58
59 if let Some(valid_until) = fact.valid_until {
60 if valid_until < now && fact.confidence > 0.1 {
61 fact.confidence = 0.1;
62 count += 1;
63 continue;
64 }
65 }
66
67 let days_since_confirmed = now.signed_duration_since(fact.last_confirmed).num_days() as f32;
68 let days_since_retrieved = fact
69 .last_retrieved
70 .map_or(3650.0, |t| now.signed_duration_since(t).num_days() as f32);
71 let retrieval_count = fact.retrieval_count as f32;
72
73 if days_since_confirmed > 0.0 {
74 let freq_protect = 1.0 / (1.0 + retrieval_count.ln_1p()); let recency_protect = (1.0 - (days_since_retrieved / 30.0).min(1.0)).max(0.0); let protect = (freq_protect * (1.0 - 0.5 * recency_protect)).max(0.05);
79 let decay = config.decay_rate_per_day * days_since_confirmed * protect;
80 let new_confidence = (fact.confidence - decay).max(0.05);
81 if (new_confidence - fact.confidence).abs() > 0.001 {
82 fact.confidence = new_confidence;
83 count += 1;
84 }
85 }
86 }
87
88 count
89}
90
91pub fn consolidate_similar(facts: &mut Vec<KnowledgeFact>, similarity_threshold: f32) -> usize {
92 let mut to_remove: std::collections::HashSet<usize> = std::collections::HashSet::new();
93
94 let mut category_groups: std::collections::HashMap<String, Vec<usize>> =
95 std::collections::HashMap::new();
96 for (i, f) in facts.iter().enumerate() {
97 if f.is_current() {
98 category_groups
99 .entry(f.category.clone())
100 .or_default()
101 .push(i);
102 }
103 }
104
105 for indices in category_groups.values() {
106 for (pos_a, &i) in indices.iter().enumerate() {
107 if to_remove.contains(&i) {
108 continue;
109 }
110 for &j in &indices[pos_a + 1..] {
111 if to_remove.contains(&j) {
112 continue;
113 }
114 let sim = word_similarity(&facts[i].value, &facts[j].value);
115 if sim >= similarity_threshold {
116 if facts[i].confidence >= facts[j].confidence {
117 facts[i].confirmation_count += facts[j].confirmation_count;
118 if facts[j].last_confirmed > facts[i].last_confirmed {
119 facts[i].last_confirmed = facts[j].last_confirmed;
120 }
121 to_remove.insert(j);
122 } else {
123 facts[j].confirmation_count += facts[i].confirmation_count;
124 if facts[i].last_confirmed > facts[j].last_confirmed {
125 facts[j].last_confirmed = facts[i].last_confirmed;
126 }
127 to_remove.insert(i);
128 break;
129 }
130 }
131 }
132 }
133 }
134
135 let count = to_remove.len();
136 let mut sorted: Vec<usize> = to_remove.into_iter().collect();
137 sorted.sort_unstable();
138 for idx in sorted.into_iter().rev() {
139 facts.remove(idx);
140 }
141
142 count
143}
144
145pub fn compact(
146 facts: &mut Vec<KnowledgeFact>,
147 config: &LifecycleConfig,
148) -> (usize, Vec<KnowledgeFact>) {
149 let mut archived: Vec<KnowledgeFact> = Vec::new();
150 let now = Utc::now();
151 let stale_threshold = now - Duration::days(config.stale_days);
152
153 let mut to_archive: Vec<usize> = Vec::new();
154
155 for (i, fact) in facts.iter().enumerate() {
156 let recently_retrieved = fact
157 .last_retrieved
158 .is_some_and(|t| now.signed_duration_since(t).num_days() < 14);
159 let frequently_retrieved = fact.retrieval_count >= 5;
160
161 if fact.confidence < config.low_confidence_threshold {
162 to_archive.push(i);
163 continue;
164 }
165
166 if fact.last_confirmed < stale_threshold
167 && fact.confirmation_count <= 1
168 && fact.confidence < 0.5
169 && !recently_retrieved
170 && !frequently_retrieved
171 {
172 to_archive.push(i);
173 }
174 }
175
176 to_archive.sort_unstable();
177 to_archive.dedup();
178 let count = to_archive.len();
179
180 for idx in to_archive.into_iter().rev() {
181 archived.push(facts.remove(idx));
182 }
183
184 if facts.len() > config.max_facts {
185 facts.sort_by(|a, b| {
186 b.confidence
187 .partial_cmp(&a.confidence)
188 .unwrap_or(std::cmp::Ordering::Equal)
189 });
190 let excess: Vec<KnowledgeFact> = facts.drain(config.max_facts..).collect();
191 archived.extend(excess);
192 }
193
194 (count, archived)
195}
196
197pub fn run_lifecycle(facts: &mut Vec<KnowledgeFact>, config: &LifecycleConfig) -> LifecycleReport {
198 let decayed = apply_confidence_decay(facts, config);
199 let consolidated = consolidate_similar(facts, config.consolidation_similarity);
200 let (compacted, archived) = compact(facts, config);
201
202 if !archived.is_empty() {
203 let _ = archive_facts(&archived);
204 }
205
206 LifecycleReport {
207 decayed_count: decayed,
208 consolidated_count: consolidated,
209 archived_count: archived.len(),
210 compacted_count: compacted,
211 remaining_facts: facts.len(),
212 }
213}
214
215#[derive(Debug, Serialize, Deserialize)]
216struct ArchivedFacts {
217 pub archived_at: DateTime<Utc>,
218 pub facts: Vec<KnowledgeFact>,
219}
220
221fn archive_facts(facts: &[KnowledgeFact]) -> Result<(), String> {
222 let dir = crate::core::data_dir::lean_ctx_data_dir()?
223 .join("memory")
224 .join("archive");
225 std::fs::create_dir_all(&dir).map_err(|e| format!("{e}"))?;
226
227 let filename = format!("archive-{}.json", Utc::now().format("%Y%m%d-%H%M%S"));
228 let archive = ArchivedFacts {
229 archived_at: Utc::now(),
230 facts: facts.to_vec(),
231 };
232 let json = serde_json::to_string_pretty(&archive).map_err(|e| format!("{e}"))?;
233 std::fs::write(dir.join(filename), json).map_err(|e| format!("{e}"))
234}
235
236pub fn restore_archive(archive_path: &str) -> Result<Vec<KnowledgeFact>, String> {
237 let data = std::fs::read_to_string(archive_path).map_err(|e| format!("{e}"))?;
238 let archive: ArchivedFacts = serde_json::from_str(&data).map_err(|e| format!("{e}"))?;
239 Ok(archive.facts)
240}
241
242pub fn list_archives() -> Vec<PathBuf> {
243 let dir = match crate::core::data_dir::lean_ctx_data_dir() {
244 Ok(d) => d.join("memory").join("archive"),
245 Err(_) => return Vec::new(),
246 };
247
248 if !dir.exists() {
249 return Vec::new();
250 }
251
252 let mut archives: Vec<PathBuf> = std::fs::read_dir(&dir)
253 .into_iter()
254 .flatten()
255 .flatten()
256 .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
257 .map(|e| e.path())
258 .collect();
259
260 archives.sort();
261 archives
262}
263
264fn word_similarity(a: &str, b: &str) -> f32 {
265 let a_lower = a.to_lowercase();
266 let b_lower = b.to_lowercase();
267 let a_words: std::collections::HashSet<&str> = a_lower.split_whitespace().collect();
268 let b_words: std::collections::HashSet<&str> = b_lower.split_whitespace().collect();
269
270 if a_words.is_empty() && b_words.is_empty() {
271 return 1.0;
272 }
273
274 let intersection = a_words.intersection(&b_words).count();
275 let union = a_words.union(&b_words).count();
276
277 if union == 0 {
278 return 0.0;
279 }
280
281 intersection as f32 / union as f32
282}
283
284#[cfg(test)]
285mod tests {
286 use super::*;
287
288 fn make_fact(category: &str, key: &str, value: &str, confidence: f32) -> KnowledgeFact {
289 KnowledgeFact {
290 category: category.to_string(),
291 key: key.to_string(),
292 value: value.to_string(),
293 source_session: "s1".to_string(),
294 confidence,
295 created_at: Utc::now(),
296 last_confirmed: Utc::now(),
297 retrieval_count: 0,
298 last_retrieved: None,
299 valid_from: Some(Utc::now()),
300 valid_until: None,
301 supersedes: None,
302 confirmation_count: 1,
303 feedback_up: 0,
304 feedback_down: 0,
305 last_feedback: None,
306 privacy: crate::core::memory_boundary::FactPrivacy::default(),
307 }
308 }
309
310 fn make_old_fact(
311 category: &str,
312 key: &str,
313 value: &str,
314 confidence: f32,
315 days_old: i64,
316 ) -> KnowledgeFact {
317 let past = Utc::now() - Duration::days(days_old);
318 KnowledgeFact {
319 category: category.to_string(),
320 key: key.to_string(),
321 value: value.to_string(),
322 source_session: "s1".to_string(),
323 confidence,
324 created_at: past,
325 last_confirmed: past,
326 retrieval_count: 0,
327 last_retrieved: None,
328 valid_from: Some(past),
329 valid_until: None,
330 supersedes: None,
331 confirmation_count: 1,
332 feedback_up: 0,
333 feedback_down: 0,
334 last_feedback: None,
335 privacy: crate::core::memory_boundary::FactPrivacy::default(),
336 }
337 }
338
339 #[test]
340 fn decay_reduces_confidence() {
341 let config = LifecycleConfig::default();
342 let mut facts = vec![make_old_fact("arch", "db", "PostgreSQL", 0.9, 10)];
343
344 let count = apply_confidence_decay(&mut facts, &config);
345 assert_eq!(count, 1);
346 assert!(facts[0].confidence < 0.9);
347 assert!(facts[0].confidence > 0.7);
348 }
349
350 #[test]
351 fn decay_skips_recent_facts() {
352 let config = LifecycleConfig::default();
353 let mut facts = vec![make_fact("arch", "db", "PostgreSQL", 0.9)];
354
355 let count = apply_confidence_decay(&mut facts, &config);
356 assert_eq!(count, 0);
357 }
358
359 #[test]
360 fn consolidate_similar_facts() {
361 let mut facts = vec![
362 make_fact("arch", "db", "uses PostgreSQL database", 0.8),
363 make_fact("arch", "db2", "uses PostgreSQL database system", 0.6),
364 make_fact("ops", "deploy", "docker compose up", 0.9),
365 ];
366
367 let count = consolidate_similar(&mut facts, 0.7);
368 assert!(count > 0, "Should consolidate similar facts");
369 assert!(facts.len() < 3);
370 }
371
372 #[test]
373 fn consolidate_keeps_different_categories() {
374 let mut facts = vec![
375 make_fact("arch", "db", "PostgreSQL", 0.8),
376 make_fact("ops", "db", "PostgreSQL", 0.8),
377 ];
378
379 let count = consolidate_similar(&mut facts, 0.9);
380 assert_eq!(count, 0, "Different categories should not consolidate");
381 }
382
383 #[test]
384 fn compact_removes_low_confidence() {
385 let config = LifecycleConfig::default();
386 let mut facts = vec![
387 make_fact("arch", "db", "PostgreSQL", 0.9),
388 make_fact("arch", "cache", "Redis", 0.1),
389 ];
390
391 let (count, archived) = compact(&mut facts, &config);
392 assert_eq!(count, 1);
393 assert_eq!(facts.len(), 1);
394 assert_eq!(archived.len(), 1);
395 assert_eq!(archived[0].key, "cache");
396 }
397
398 #[test]
399 fn compact_archives_stale_facts() {
400 let config = LifecycleConfig::default();
401 let mut facts = vec![
402 make_fact("arch", "db", "PostgreSQL", 0.9),
403 make_old_fact("arch", "old", "ancient thing", 0.4, 60),
404 ];
405
406 let (count, archived) = compact(&mut facts, &config);
407 assert_eq!(count, 1);
408 assert_eq!(archived[0].key, "old");
409 }
410
411 #[test]
412 fn full_lifecycle_run() {
413 let config = LifecycleConfig {
414 max_facts: 5,
415 ..Default::default()
416 };
417
418 let mut facts = vec![
419 make_fact("arch", "db", "PostgreSQL", 0.9),
420 make_fact("arch", "cache", "Redis", 0.8),
421 make_old_fact("arch", "old1", "thing1", 0.2, 50),
422 make_old_fact("arch", "old2", "thing2", 0.15, 60),
423 make_fact("ops", "deploy", "docker compose", 0.7),
424 ];
425
426 let report = run_lifecycle(&mut facts, &config);
427 assert!(report.remaining_facts <= config.max_facts);
428 assert!(report.decayed_count > 0 || report.compacted_count > 0);
429 }
430
431 #[test]
432 fn word_similarity_identical() {
433 assert!((word_similarity("hello world", "hello world") - 1.0).abs() < 0.01);
434 }
435
436 #[test]
437 fn word_similarity_partial() {
438 let sim = word_similarity("uses PostgreSQL database", "PostgreSQL database system");
439 assert!(sim >= 0.5, "Expected >= 0.5 but got {sim}");
440 assert!(sim < 1.0);
441 }
442
443 #[test]
444 fn word_similarity_different() {
445 let sim = word_similarity("Redis cache", "Docker compose");
446 assert!(sim < 0.1);
447 }
448}