1use chrono::{DateTime, Duration, Utc};
10use serde::{Deserialize, Serialize};
11use std::path::PathBuf;
12
13use super::knowledge::KnowledgeFact;
14
15const DEFAULT_DECAY_RATE: f32 = 0.01;
16const DEFAULT_MAX_FACTS: usize = 1000;
17const LOW_CONFIDENCE_THRESHOLD: f32 = 0.3;
18const STALE_DAYS: i64 = 30;
19
20#[derive(Debug, Clone)]
21pub struct LifecycleConfig {
22 pub decay_rate_per_day: f32,
23 pub max_facts: usize,
24 pub low_confidence_threshold: f32,
25 pub stale_days: i64,
26 pub consolidation_similarity: f32,
27}
28
29impl Default for LifecycleConfig {
30 fn default() -> Self {
31 Self {
32 decay_rate_per_day: DEFAULT_DECAY_RATE,
33 max_facts: DEFAULT_MAX_FACTS,
34 low_confidence_threshold: LOW_CONFIDENCE_THRESHOLD,
35 stale_days: STALE_DAYS,
36 consolidation_similarity: 0.85,
37 }
38 }
39}
40
41#[derive(Debug, Default)]
42pub struct LifecycleReport {
43 pub decayed_count: usize,
44 pub consolidated_count: usize,
45 pub archived_count: usize,
46 pub compacted_count: usize,
47 pub remaining_facts: usize,
48}
49
50pub fn apply_confidence_decay(facts: &mut [KnowledgeFact], config: &LifecycleConfig) -> usize {
51 let now = Utc::now();
52 let mut count = 0;
53
54 for fact in facts.iter_mut() {
55 if !fact.is_current() {
56 continue;
57 }
58
59 if let Some(valid_until) = fact.valid_until {
60 if valid_until < now && fact.confidence > 0.1 {
61 fact.confidence = 0.1;
62 count += 1;
63 continue;
64 }
65 }
66
67 let days_since_confirmed = now.signed_duration_since(fact.last_confirmed).num_days() as f32;
68 let days_since_retrieved = fact
69 .last_retrieved
70 .map_or(3650.0, |t| now.signed_duration_since(t).num_days() as f32);
71 let retrieval_count = fact.retrieval_count as f32;
72
73 if days_since_confirmed > 0.0 {
74 let freq_protect = 1.0 / (1.0 + retrieval_count.ln_1p()); let recency_protect = (1.0 - (days_since_retrieved / 30.0).min(1.0)).max(0.0); let protect = (freq_protect * (1.0 - 0.5 * recency_protect)).max(0.05);
79 let net_feedback = i64::from(fact.feedback_up) - i64::from(fact.feedback_down);
85 let feedback_factor = match net_feedback.cmp(&0) {
86 std::cmp::Ordering::Greater => 1.0 / (1.0 + (net_feedback as f32).ln_1p()),
87 std::cmp::Ordering::Less => {
88 (1.0 + (net_feedback.unsigned_abs() as f32).ln_1p()).min(4.0)
89 }
90 std::cmp::Ordering::Equal => 1.0,
91 };
92 let decay =
93 config.decay_rate_per_day * days_since_confirmed * protect * feedback_factor;
94 let new_confidence = (fact.confidence - decay).max(0.05);
95 if (new_confidence - fact.confidence).abs() > 0.001 {
96 fact.confidence = new_confidence;
97 count += 1;
98 }
99 }
100 }
101
102 count
103}
104
105pub fn consolidate_similar(facts: &mut Vec<KnowledgeFact>, similarity_threshold: f32) -> usize {
106 let mut to_remove: std::collections::HashSet<usize> = std::collections::HashSet::new();
107
108 let mut category_groups: std::collections::HashMap<String, Vec<usize>> =
109 std::collections::HashMap::new();
110 for (i, f) in facts.iter().enumerate() {
111 if f.is_current() {
112 category_groups
113 .entry(f.category.clone())
114 .or_default()
115 .push(i);
116 }
117 }
118
119 for indices in category_groups.values() {
120 for (pos_a, &i) in indices.iter().enumerate() {
121 if to_remove.contains(&i) {
122 continue;
123 }
124 for &j in &indices[pos_a + 1..] {
125 if to_remove.contains(&j) {
126 continue;
127 }
128 let sim = word_similarity(&facts[i].value, &facts[j].value);
129 if sim >= similarity_threshold {
130 if facts[i].confidence >= facts[j].confidence {
131 facts[i].confirmation_count += facts[j].confirmation_count;
132 if facts[j].last_confirmed > facts[i].last_confirmed {
133 facts[i].last_confirmed = facts[j].last_confirmed;
134 }
135 to_remove.insert(j);
136 } else {
137 facts[j].confirmation_count += facts[i].confirmation_count;
138 if facts[i].last_confirmed > facts[j].last_confirmed {
139 facts[j].last_confirmed = facts[i].last_confirmed;
140 }
141 to_remove.insert(i);
142 break;
143 }
144 }
145 }
146 }
147 }
148
149 let count = to_remove.len();
150 let mut sorted: Vec<usize> = to_remove.into_iter().collect();
151 sorted.sort_unstable();
152 for idx in sorted.into_iter().rev() {
153 facts.remove(idx);
154 }
155
156 count
157}
158
159pub fn compact(
160 facts: &mut Vec<KnowledgeFact>,
161 config: &LifecycleConfig,
162) -> (usize, Vec<KnowledgeFact>) {
163 let mut archived: Vec<KnowledgeFact> = Vec::new();
164 let now = Utc::now();
165 let stale_threshold = now - Duration::days(config.stale_days);
166
167 let mut to_archive: Vec<usize> = Vec::new();
168
169 for (i, fact) in facts.iter().enumerate() {
170 let recently_retrieved = fact
171 .last_retrieved
172 .is_some_and(|t| now.signed_duration_since(t).num_days() < 14);
173 let frequently_retrieved = fact.retrieval_count >= 5;
174
175 if fact.confidence < config.low_confidence_threshold {
176 to_archive.push(i);
177 continue;
178 }
179
180 if fact.last_confirmed < stale_threshold
181 && fact.confirmation_count <= 1
182 && fact.confidence < 0.5
183 && !recently_retrieved
184 && !frequently_retrieved
185 {
186 to_archive.push(i);
187 }
188 }
189
190 to_archive.sort_unstable();
191 to_archive.dedup();
192 let count = to_archive.len();
193
194 for idx in to_archive.into_iter().rev() {
195 archived.push(facts.remove(idx));
196 }
197
198 if facts.len() > config.max_facts {
199 facts.sort_by(|a, b| {
200 b.confidence
201 .partial_cmp(&a.confidence)
202 .unwrap_or(std::cmp::Ordering::Equal)
203 });
204 let excess: Vec<KnowledgeFact> = facts.drain(config.max_facts..).collect();
205 archived.extend(excess);
206 }
207
208 (count, archived)
209}
210
211pub fn run_lifecycle(facts: &mut Vec<KnowledgeFact>, config: &LifecycleConfig) -> LifecycleReport {
212 let decayed = apply_confidence_decay(facts, config);
213 let consolidated = consolidate_similar(facts, config.consolidation_similarity);
214 let (compacted, archived) = compact(facts, config);
215
216 if !archived.is_empty() {
217 let _ = archive_facts(&archived);
218 }
219
220 LifecycleReport {
221 decayed_count: decayed,
222 consolidated_count: consolidated,
223 archived_count: archived.len(),
224 compacted_count: compacted,
225 remaining_facts: facts.len(),
226 }
227}
228
229#[derive(Debug, Serialize, Deserialize)]
230struct ArchivedFacts {
231 pub archived_at: DateTime<Utc>,
232 pub facts: Vec<KnowledgeFact>,
233}
234
235fn archive_facts(facts: &[KnowledgeFact]) -> Result<(), String> {
236 let dir = crate::core::data_dir::lean_ctx_data_dir()?
237 .join("memory")
238 .join("archive");
239 std::fs::create_dir_all(&dir).map_err(|e| format!("{e}"))?;
240
241 let filename = format!("archive-{}.json", Utc::now().format("%Y%m%d-%H%M%S"));
242 let archive = ArchivedFacts {
243 archived_at: Utc::now(),
244 facts: facts.to_vec(),
245 };
246 let json = serde_json::to_string_pretty(&archive).map_err(|e| format!("{e}"))?;
247 std::fs::write(dir.join(filename), json).map_err(|e| format!("{e}"))
248}
249
250pub fn restore_archive(archive_path: &str) -> Result<Vec<KnowledgeFact>, String> {
251 let data = std::fs::read_to_string(archive_path).map_err(|e| format!("{e}"))?;
252 let archive: ArchivedFacts = serde_json::from_str(&data).map_err(|e| format!("{e}"))?;
253 Ok(archive.facts)
254}
255
256pub fn list_archives() -> Vec<PathBuf> {
257 let dir = match crate::core::data_dir::lean_ctx_data_dir() {
258 Ok(d) => d.join("memory").join("archive"),
259 Err(_) => return Vec::new(),
260 };
261
262 if !dir.exists() {
263 return Vec::new();
264 }
265
266 let mut archives: Vec<PathBuf> = std::fs::read_dir(&dir)
267 .into_iter()
268 .flatten()
269 .flatten()
270 .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
271 .map(|e| e.path())
272 .collect();
273
274 archives.sort();
275 archives
276}
277
278fn word_similarity(a: &str, b: &str) -> f32 {
279 let a_lower = a.to_lowercase();
280 let b_lower = b.to_lowercase();
281 let a_words: std::collections::HashSet<&str> = a_lower.split_whitespace().collect();
282 let b_words: std::collections::HashSet<&str> = b_lower.split_whitespace().collect();
283
284 if a_words.is_empty() && b_words.is_empty() {
285 return 1.0;
286 }
287
288 let intersection = a_words.intersection(&b_words).count();
289 let union = a_words.union(&b_words).count();
290
291 if union == 0 {
292 return 0.0;
293 }
294
295 intersection as f32 / union as f32
296}
297
298#[cfg(test)]
299mod tests {
300 use super::*;
301 use crate::core::knowledge::KnowledgeArchetype;
302
303 fn make_fact(category: &str, key: &str, value: &str, confidence: f32) -> KnowledgeFact {
304 KnowledgeFact {
305 category: category.to_string(),
306 key: key.to_string(),
307 value: value.to_string(),
308 source_session: "s1".to_string(),
309 confidence,
310 created_at: Utc::now(),
311 last_confirmed: Utc::now(),
312 retrieval_count: 0,
313 last_retrieved: None,
314 valid_from: Some(Utc::now()),
315 valid_until: None,
316 supersedes: None,
317 confirmation_count: 1,
318 feedback_up: 0,
319 feedback_down: 0,
320 last_feedback: None,
321 privacy: crate::core::memory_boundary::FactPrivacy::default(),
322 imported_from: None,
323 archetype: KnowledgeArchetype::default(),
324 fidelity: None,
325 revision_count: 0,
326 }
327 }
328
329 fn make_old_fact(
330 category: &str,
331 key: &str,
332 value: &str,
333 confidence: f32,
334 days_old: i64,
335 ) -> KnowledgeFact {
336 let past = Utc::now() - Duration::days(days_old);
337 KnowledgeFact {
338 category: category.to_string(),
339 key: key.to_string(),
340 value: value.to_string(),
341 source_session: "s1".to_string(),
342 confidence,
343 created_at: past,
344 last_confirmed: past,
345 retrieval_count: 0,
346 last_retrieved: None,
347 valid_from: Some(past),
348 valid_until: None,
349 supersedes: None,
350 confirmation_count: 1,
351 feedback_up: 0,
352 feedback_down: 0,
353 last_feedback: None,
354 privacy: crate::core::memory_boundary::FactPrivacy::default(),
355 imported_from: None,
356 archetype: KnowledgeArchetype::default(),
357 fidelity: None,
358 revision_count: 0,
359 }
360 }
361
362 #[test]
363 fn decay_reduces_confidence() {
364 let config = LifecycleConfig::default();
365 let mut facts = vec![make_old_fact("arch", "db", "PostgreSQL", 0.9, 10)];
366
367 let count = apply_confidence_decay(&mut facts, &config);
368 assert_eq!(count, 1);
369 assert!(facts[0].confidence < 0.9);
370 assert!(facts[0].confidence > 0.7);
371 }
372
373 #[test]
374 fn decay_skips_recent_facts() {
375 let config = LifecycleConfig::default();
376 let mut facts = vec![make_fact("arch", "db", "PostgreSQL", 0.9)];
377
378 let count = apply_confidence_decay(&mut facts, &config);
379 assert_eq!(count, 0);
380 }
381
382 #[test]
383 fn feedback_steers_decay_keep_vs_forget() {
384 let config = LifecycleConfig::default();
385 let mut praised = make_old_fact("arch", "loved", "keep me", 0.9, 10);
386 praised.feedback_up = 5;
387 let mut panned = make_old_fact("arch", "hated", "forget me", 0.9, 10);
388 panned.feedback_down = 5;
389 let neutral = make_old_fact("arch", "meh", "neutral", 0.9, 10);
390
391 let mut facts = vec![praised, panned, neutral];
392 apply_confidence_decay(&mut facts, &config);
393
394 let (praised_c, panned_c, neutral_c) = (
395 facts[0].confidence,
396 facts[1].confidence,
397 facts[2].confidence,
398 );
399
400 assert!(
402 praised_c > neutral_c,
403 "praised {praised_c} should outlast neutral {neutral_c}"
404 );
405 assert!(
406 neutral_c > panned_c,
407 "neutral {neutral_c} should outlast panned {panned_c}"
408 );
409 assert!(panned_c >= 0.05);
411 }
412
413 #[test]
414 fn consolidate_similar_facts() {
415 let mut facts = vec![
416 make_fact("arch", "db", "uses PostgreSQL database", 0.8),
417 make_fact("arch", "db2", "uses PostgreSQL database system", 0.6),
418 make_fact("ops", "deploy", "docker compose up", 0.9),
419 ];
420
421 let count = consolidate_similar(&mut facts, 0.7);
422 assert!(count > 0, "Should consolidate similar facts");
423 assert!(facts.len() < 3);
424 }
425
426 #[test]
427 fn consolidate_keeps_different_categories() {
428 let mut facts = vec![
429 make_fact("arch", "db", "PostgreSQL", 0.8),
430 make_fact("ops", "db", "PostgreSQL", 0.8),
431 ];
432
433 let count = consolidate_similar(&mut facts, 0.9);
434 assert_eq!(count, 0, "Different categories should not consolidate");
435 }
436
437 #[test]
438 fn compact_removes_low_confidence() {
439 let config = LifecycleConfig::default();
440 let mut facts = vec![
441 make_fact("arch", "db", "PostgreSQL", 0.9),
442 make_fact("arch", "cache", "Redis", 0.1),
443 ];
444
445 let (count, archived) = compact(&mut facts, &config);
446 assert_eq!(count, 1);
447 assert_eq!(facts.len(), 1);
448 assert_eq!(archived.len(), 1);
449 assert_eq!(archived[0].key, "cache");
450 }
451
452 #[test]
453 fn compact_archives_stale_facts() {
454 let config = LifecycleConfig::default();
455 let mut facts = vec![
456 make_fact("arch", "db", "PostgreSQL", 0.9),
457 make_old_fact("arch", "old", "ancient thing", 0.4, 60),
458 ];
459
460 let (count, archived) = compact(&mut facts, &config);
461 assert_eq!(count, 1);
462 assert_eq!(archived[0].key, "old");
463 }
464
465 #[test]
466 fn full_lifecycle_run() {
467 let config = LifecycleConfig {
468 max_facts: 5,
469 ..Default::default()
470 };
471
472 let mut facts = vec![
473 make_fact("arch", "db", "PostgreSQL", 0.9),
474 make_fact("arch", "cache", "Redis", 0.8),
475 make_old_fact("arch", "old1", "thing1", 0.2, 50),
476 make_old_fact("arch", "old2", "thing2", 0.15, 60),
477 make_fact("ops", "deploy", "docker compose", 0.7),
478 ];
479
480 let report = run_lifecycle(&mut facts, &config);
481 assert!(report.remaining_facts <= config.max_facts);
482 assert!(report.decayed_count > 0 || report.compacted_count > 0);
483 }
484
485 #[test]
486 fn word_similarity_identical() {
487 assert!((word_similarity("hello world", "hello world") - 1.0).abs() < 0.01);
488 }
489
490 #[test]
491 fn word_similarity_partial() {
492 let sim = word_similarity("uses PostgreSQL database", "PostgreSQL database system");
493 assert!(sim >= 0.5, "Expected >= 0.5 but got {sim}");
494 assert!(sim < 1.0);
495 }
496
497 #[test]
498 fn word_similarity_different() {
499 let sim = word_similarity("Redis cache", "Docker compose");
500 assert!(sim < 0.1);
501 }
502}