1use chrono::{DateTime, Duration, Utc};
10use serde::{Deserialize, Serialize};
11use std::path::PathBuf;
12
13use super::knowledge::KnowledgeFact;
14
15const DEFAULT_DECAY_RATE: f32 = 0.01;
16const DEFAULT_MAX_FACTS: usize = 1000;
17const LOW_CONFIDENCE_THRESHOLD: f32 = 0.3;
18const STALE_DAYS: i64 = 30;
19
20#[derive(Debug, Clone)]
21pub struct LifecycleConfig {
22 pub decay_rate_per_day: f32,
23 pub max_facts: usize,
24 pub low_confidence_threshold: f32,
25 pub stale_days: i64,
26 pub consolidation_similarity: f32,
27}
28
29impl Default for LifecycleConfig {
30 fn default() -> Self {
31 Self {
32 decay_rate_per_day: DEFAULT_DECAY_RATE,
33 max_facts: DEFAULT_MAX_FACTS,
34 low_confidence_threshold: LOW_CONFIDENCE_THRESHOLD,
35 stale_days: STALE_DAYS,
36 consolidation_similarity: 0.85,
37 }
38 }
39}
40
41#[derive(Debug, Default)]
42pub struct LifecycleReport {
43 pub decayed_count: usize,
44 pub consolidated_count: usize,
45 pub archived_count: usize,
46 pub compacted_count: usize,
47 pub remaining_facts: usize,
48}
49
50pub fn apply_confidence_decay(facts: &mut [KnowledgeFact], config: &LifecycleConfig) -> usize {
51 let now = Utc::now();
52 let mut count = 0;
53
54 for fact in facts.iter_mut() {
55 if !fact.is_current() {
56 continue;
57 }
58
59 if let Some(valid_until) = fact.valid_until {
60 if valid_until < now && fact.confidence > 0.1 {
61 fact.confidence = 0.1;
62 count += 1;
63 continue;
64 }
65 }
66
67 let days_since_confirmed = now.signed_duration_since(fact.last_confirmed).num_days() as f32;
68 let days_since_retrieved = fact
69 .last_retrieved
70 .map(|t| now.signed_duration_since(t).num_days() as f32)
71 .unwrap_or(3650.0);
72 let retrieval_count = fact.retrieval_count as f32;
73
74 if days_since_confirmed > 0.0 {
75 let freq_protect = 1.0 / (1.0 + retrieval_count.ln_1p()); let recency_protect = (1.0 - (days_since_retrieved / 30.0).min(1.0)).max(0.0); let protect = (freq_protect * (1.0 - 0.5 * recency_protect)).max(0.05);
80 let decay = config.decay_rate_per_day * days_since_confirmed * protect;
81 let new_confidence = (fact.confidence - decay).max(0.05);
82 if (new_confidence - fact.confidence).abs() > 0.001 {
83 fact.confidence = new_confidence;
84 count += 1;
85 }
86 }
87 }
88
89 count
90}
91
92pub fn consolidate_similar(facts: &mut Vec<KnowledgeFact>, similarity_threshold: f32) -> usize {
93 let mut to_remove: Vec<usize> = Vec::new();
94 let len = facts.len();
95
96 for i in 0..len {
97 if to_remove.contains(&i) || !facts[i].is_current() {
98 continue;
99 }
100
101 for j in (i + 1)..len {
102 if to_remove.contains(&j) || !facts[j].is_current() {
103 continue;
104 }
105
106 if facts[i].category != facts[j].category {
107 continue;
108 }
109
110 let sim = word_similarity(&facts[i].value, &facts[j].value);
111 if sim >= similarity_threshold {
112 if facts[i].confidence >= facts[j].confidence {
113 facts[i].confirmation_count += facts[j].confirmation_count;
114 if facts[j].last_confirmed > facts[i].last_confirmed {
115 facts[i].last_confirmed = facts[j].last_confirmed;
116 }
117 to_remove.push(j);
118 } else {
119 facts[j].confirmation_count += facts[i].confirmation_count;
120 if facts[i].last_confirmed > facts[j].last_confirmed {
121 facts[j].last_confirmed = facts[i].last_confirmed;
122 }
123 to_remove.push(i);
124 break;
125 }
126 }
127 }
128 }
129
130 to_remove.sort_unstable();
131 to_remove.dedup();
132 let count = to_remove.len();
133
134 for idx in to_remove.into_iter().rev() {
135 facts.remove(idx);
136 }
137
138 count
139}
140
141pub fn compact(
142 facts: &mut Vec<KnowledgeFact>,
143 config: &LifecycleConfig,
144) -> (usize, Vec<KnowledgeFact>) {
145 let mut archived: Vec<KnowledgeFact> = Vec::new();
146 let now = Utc::now();
147 let stale_threshold = now - Duration::days(config.stale_days);
148
149 let mut to_archive: Vec<usize> = Vec::new();
150
151 for (i, fact) in facts.iter().enumerate() {
152 let recently_retrieved = fact
153 .last_retrieved
154 .is_some_and(|t| now.signed_duration_since(t).num_days() < 14);
155 let frequently_retrieved = fact.retrieval_count >= 5;
156
157 if fact.confidence < config.low_confidence_threshold {
158 to_archive.push(i);
159 continue;
160 }
161
162 if fact.last_confirmed < stale_threshold
163 && fact.confirmation_count <= 1
164 && fact.confidence < 0.5
165 && !recently_retrieved
166 && !frequently_retrieved
167 {
168 to_archive.push(i);
169 }
170 }
171
172 to_archive.sort_unstable();
173 to_archive.dedup();
174 let count = to_archive.len();
175
176 for idx in to_archive.into_iter().rev() {
177 archived.push(facts.remove(idx));
178 }
179
180 if facts.len() > config.max_facts {
181 facts.sort_by(|a, b| {
182 b.confidence
183 .partial_cmp(&a.confidence)
184 .unwrap_or(std::cmp::Ordering::Equal)
185 });
186 let excess: Vec<KnowledgeFact> = facts.drain(config.max_facts..).collect();
187 archived.extend(excess);
188 }
189
190 (count, archived)
191}
192
193pub fn run_lifecycle(facts: &mut Vec<KnowledgeFact>, config: &LifecycleConfig) -> LifecycleReport {
194 let decayed = apply_confidence_decay(facts, config);
195 let consolidated = consolidate_similar(facts, config.consolidation_similarity);
196 let (compacted, archived) = compact(facts, config);
197
198 if !archived.is_empty() {
199 let _ = archive_facts(&archived);
200 }
201
202 LifecycleReport {
203 decayed_count: decayed,
204 consolidated_count: consolidated,
205 archived_count: archived.len(),
206 compacted_count: compacted,
207 remaining_facts: facts.len(),
208 }
209}
210
211#[derive(Debug, Serialize, Deserialize)]
212struct ArchivedFacts {
213 pub archived_at: DateTime<Utc>,
214 pub facts: Vec<KnowledgeFact>,
215}
216
217fn archive_facts(facts: &[KnowledgeFact]) -> Result<(), String> {
218 let dir = crate::core::data_dir::lean_ctx_data_dir()?
219 .join("memory")
220 .join("archive");
221 std::fs::create_dir_all(&dir).map_err(|e| format!("{e}"))?;
222
223 let filename = format!("archive-{}.json", Utc::now().format("%Y%m%d-%H%M%S"));
224 let archive = ArchivedFacts {
225 archived_at: Utc::now(),
226 facts: facts.to_vec(),
227 };
228 let json = serde_json::to_string_pretty(&archive).map_err(|e| format!("{e}"))?;
229 std::fs::write(dir.join(filename), json).map_err(|e| format!("{e}"))
230}
231
232pub fn restore_archive(archive_path: &str) -> Result<Vec<KnowledgeFact>, String> {
233 let data = std::fs::read_to_string(archive_path).map_err(|e| format!("{e}"))?;
234 let archive: ArchivedFacts = serde_json::from_str(&data).map_err(|e| format!("{e}"))?;
235 Ok(archive.facts)
236}
237
238pub fn list_archives() -> Vec<PathBuf> {
239 let dir = match crate::core::data_dir::lean_ctx_data_dir() {
240 Ok(d) => d.join("memory").join("archive"),
241 Err(_) => return Vec::new(),
242 };
243
244 if !dir.exists() {
245 return Vec::new();
246 }
247
248 let mut archives: Vec<PathBuf> = std::fs::read_dir(&dir)
249 .into_iter()
250 .flatten()
251 .flatten()
252 .filter(|e| {
253 e.path()
254 .extension()
255 .map(|ext| ext == "json")
256 .unwrap_or(false)
257 })
258 .map(|e| e.path())
259 .collect();
260
261 archives.sort();
262 archives
263}
264
265fn word_similarity(a: &str, b: &str) -> f32 {
266 let a_lower = a.to_lowercase();
267 let b_lower = b.to_lowercase();
268 let a_words: std::collections::HashSet<&str> = a_lower.split_whitespace().collect();
269 let b_words: std::collections::HashSet<&str> = b_lower.split_whitespace().collect();
270
271 if a_words.is_empty() && b_words.is_empty() {
272 return 1.0;
273 }
274
275 let intersection = a_words.intersection(&b_words).count();
276 let union = a_words.union(&b_words).count();
277
278 if union == 0 {
279 return 0.0;
280 }
281
282 intersection as f32 / union as f32
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288
289 fn make_fact(category: &str, key: &str, value: &str, confidence: f32) -> KnowledgeFact {
290 KnowledgeFact {
291 category: category.to_string(),
292 key: key.to_string(),
293 value: value.to_string(),
294 source_session: "s1".to_string(),
295 confidence,
296 created_at: Utc::now(),
297 last_confirmed: Utc::now(),
298 retrieval_count: 0,
299 last_retrieved: None,
300 valid_from: Some(Utc::now()),
301 valid_until: None,
302 supersedes: None,
303 confirmation_count: 1,
304 }
305 }
306
307 fn make_old_fact(
308 category: &str,
309 key: &str,
310 value: &str,
311 confidence: f32,
312 days_old: i64,
313 ) -> KnowledgeFact {
314 let past = Utc::now() - Duration::days(days_old);
315 KnowledgeFact {
316 category: category.to_string(),
317 key: key.to_string(),
318 value: value.to_string(),
319 source_session: "s1".to_string(),
320 confidence,
321 created_at: past,
322 last_confirmed: past,
323 retrieval_count: 0,
324 last_retrieved: None,
325 valid_from: Some(past),
326 valid_until: None,
327 supersedes: None,
328 confirmation_count: 1,
329 }
330 }
331
332 #[test]
333 fn decay_reduces_confidence() {
334 let config = LifecycleConfig::default();
335 let mut facts = vec![make_old_fact("arch", "db", "PostgreSQL", 0.9, 10)];
336
337 let count = apply_confidence_decay(&mut facts, &config);
338 assert_eq!(count, 1);
339 assert!(facts[0].confidence < 0.9);
340 assert!(facts[0].confidence > 0.7);
341 }
342
343 #[test]
344 fn decay_skips_recent_facts() {
345 let config = LifecycleConfig::default();
346 let mut facts = vec![make_fact("arch", "db", "PostgreSQL", 0.9)];
347
348 let count = apply_confidence_decay(&mut facts, &config);
349 assert_eq!(count, 0);
350 }
351
352 #[test]
353 fn consolidate_similar_facts() {
354 let mut facts = vec![
355 make_fact("arch", "db", "uses PostgreSQL database", 0.8),
356 make_fact("arch", "db2", "uses PostgreSQL database system", 0.6),
357 make_fact("ops", "deploy", "docker compose up", 0.9),
358 ];
359
360 let count = consolidate_similar(&mut facts, 0.7);
361 assert!(count > 0, "Should consolidate similar facts");
362 assert!(facts.len() < 3);
363 }
364
365 #[test]
366 fn consolidate_keeps_different_categories() {
367 let mut facts = vec![
368 make_fact("arch", "db", "PostgreSQL", 0.8),
369 make_fact("ops", "db", "PostgreSQL", 0.8),
370 ];
371
372 let count = consolidate_similar(&mut facts, 0.9);
373 assert_eq!(count, 0, "Different categories should not consolidate");
374 }
375
376 #[test]
377 fn compact_removes_low_confidence() {
378 let config = LifecycleConfig::default();
379 let mut facts = vec![
380 make_fact("arch", "db", "PostgreSQL", 0.9),
381 make_fact("arch", "cache", "Redis", 0.1),
382 ];
383
384 let (count, archived) = compact(&mut facts, &config);
385 assert_eq!(count, 1);
386 assert_eq!(facts.len(), 1);
387 assert_eq!(archived.len(), 1);
388 assert_eq!(archived[0].key, "cache");
389 }
390
391 #[test]
392 fn compact_archives_stale_facts() {
393 let config = LifecycleConfig::default();
394 let mut facts = vec![
395 make_fact("arch", "db", "PostgreSQL", 0.9),
396 make_old_fact("arch", "old", "ancient thing", 0.4, 60),
397 ];
398
399 let (count, archived) = compact(&mut facts, &config);
400 assert_eq!(count, 1);
401 assert_eq!(archived[0].key, "old");
402 }
403
404 #[test]
405 fn full_lifecycle_run() {
406 let config = LifecycleConfig {
407 max_facts: 5,
408 ..Default::default()
409 };
410
411 let mut facts = vec![
412 make_fact("arch", "db", "PostgreSQL", 0.9),
413 make_fact("arch", "cache", "Redis", 0.8),
414 make_old_fact("arch", "old1", "thing1", 0.2, 50),
415 make_old_fact("arch", "old2", "thing2", 0.15, 60),
416 make_fact("ops", "deploy", "docker compose", 0.7),
417 ];
418
419 let report = run_lifecycle(&mut facts, &config);
420 assert!(report.remaining_facts <= config.max_facts);
421 assert!(report.decayed_count > 0 || report.compacted_count > 0);
422 }
423
424 #[test]
425 fn word_similarity_identical() {
426 assert!((word_similarity("hello world", "hello world") - 1.0).abs() < 0.01);
427 }
428
429 #[test]
430 fn word_similarity_partial() {
431 let sim = word_similarity("uses PostgreSQL database", "PostgreSQL database system");
432 assert!(sim >= 0.5, "Expected >= 0.5 but got {sim}");
433 assert!(sim < 1.0);
434 }
435
436 #[test]
437 fn word_similarity_different() {
438 let sim = word_similarity("Redis cache", "Docker compose");
439 assert!(sim < 0.1);
440 }
441}