1use std::collections::{HashMap, HashSet};
12use std::sync::Arc;
13
14use common::{Memory, MemoryType};
15use storage::VectorStorage;
16use tokio::sync::RwLock;
17use tracing;
18
19#[derive(Clone)]
21pub struct AutoPilotConfig {
22 pub enabled: bool,
24 pub dedup_threshold: f32,
26 pub dedup_interval_hours: u64,
28 pub consolidation_interval_hours: u64,
30}
31
32impl Default for AutoPilotConfig {
33 fn default() -> Self {
34 Self {
35 enabled: true,
36 dedup_threshold: 0.93,
37 dedup_interval_hours: 6,
38 consolidation_interval_hours: 12,
39 }
40 }
41}
42
43impl AutoPilotConfig {
44 pub fn from_env() -> Self {
46 let enabled: bool = std::env::var("DAKERA_AUTOPILOT_ENABLED")
47 .ok()
48 .and_then(|v| v.parse().ok())
49 .unwrap_or(true);
50
51 let dedup_threshold: f32 = std::env::var("DAKERA_AUTOPILOT_DEDUP_THRESHOLD")
52 .ok()
53 .and_then(|v| v.parse().ok())
54 .unwrap_or(0.93);
55
56 let dedup_interval_hours: u64 = std::env::var("DAKERA_AUTOPILOT_DEDUP_INTERVAL_HOURS")
57 .ok()
58 .and_then(|v| v.parse().ok())
59 .unwrap_or(6);
60
61 let consolidation_interval_hours: u64 =
62 std::env::var("DAKERA_AUTOPILOT_CONSOLIDATION_INTERVAL_HOURS")
63 .ok()
64 .and_then(|v| v.parse().ok())
65 .unwrap_or(12);
66
67 Self {
68 enabled,
69 dedup_threshold,
70 dedup_interval_hours,
71 consolidation_interval_hours,
72 }
73 }
74}
75
76#[derive(Debug, Default)]
78pub struct DedupResult {
79 pub namespaces_processed: usize,
80 pub memories_scanned: usize,
81 pub duplicates_removed: usize,
82}
83
84#[derive(Debug, Default)]
86pub struct ConsolidationResult {
87 pub namespaces_processed: usize,
88 pub memories_scanned: usize,
89 pub clusters_merged: usize,
90 pub memories_consolidated: usize,
91}
92
93pub struct AutoPilotEngine {
95 pub config: AutoPilotConfig,
96}
97
98impl AutoPilotEngine {
99 pub fn new(config: AutoPilotConfig) -> Self {
100 Self { config }
101 }
102
103 fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
105 if a.len() != b.len() || a.is_empty() {
106 return 0.0;
107 }
108 let mut dot = 0.0_f64;
109 let mut norm_a = 0.0_f64;
110 let mut norm_b = 0.0_f64;
111 for (x, y) in a.iter().zip(b.iter()) {
112 let xd = *x as f64;
113 let yd = *y as f64;
114 dot += xd * yd;
115 norm_a += xd * xd;
116 norm_b += yd * yd;
117 }
118 let denom = norm_a.sqrt() * norm_b.sqrt();
119 if denom == 0.0 {
120 0.0
121 } else {
122 (dot / denom) as f32
123 }
124 }
125
126 fn retention_score(memory: &Memory) -> f64 {
128 memory.importance as f64 + memory.access_count as f64 * 0.01
129 }
130
131 pub async fn run_dedup(&self, storage: &Arc<dyn VectorStorage>) -> DedupResult {
137 let mut result = DedupResult::default();
138
139 let namespaces = match storage.list_namespaces().await {
140 Ok(ns) => ns,
141 Err(e) => {
142 tracing::error!(error = %e, "Auto-dedup: failed to list namespaces");
143 return result;
144 }
145 };
146
147 for namespace in namespaces {
148 if !namespace.starts_with("_dakera_agent_") {
149 continue;
150 }
151 result.namespaces_processed += 1;
152
153 let vectors = match storage.get_all(&namespace).await {
154 Ok(v) => v,
155 Err(e) => {
156 tracing::warn!(
157 namespace = %namespace,
158 error = %e,
159 "Auto-dedup: failed to get vectors"
160 );
161 continue;
162 }
163 };
164
165 let items: Vec<(Memory, &[f32])> = vectors
167 .iter()
168 .filter_map(|v| {
169 let mem = Memory::from_vector(v)?;
170 if v.values.is_empty() {
171 return None;
172 }
173 Some((mem, v.values.as_slice()))
174 })
175 .collect();
176
177 result.memories_scanned += items.len();
178
179 let mut to_delete: HashSet<String> = HashSet::new();
181
182 for i in 0..items.len() {
183 if to_delete.contains(&items[i].0.id) {
184 continue;
185 }
186 for j in (i + 1)..items.len() {
187 if to_delete.contains(&items[j].0.id) {
188 continue;
189 }
190 let sim = Self::cosine_similarity(items[i].1, items[j].1);
191 if sim >= self.config.dedup_threshold {
192 if Self::retention_score(&items[i].0) >= Self::retention_score(&items[j].0)
194 {
195 to_delete.insert(items[j].0.id.clone());
196 } else {
197 to_delete.insert(items[i].0.id.clone());
198 break; }
200 }
201 }
202 }
203
204 if !to_delete.is_empty() {
205 let ids: Vec<String> = to_delete.into_iter().collect();
206 result.duplicates_removed += ids.len();
207 if let Err(e) = storage.delete(&namespace, &ids).await {
208 tracing::warn!(
209 namespace = %namespace,
210 count = ids.len(),
211 error = %e,
212 "Auto-dedup: failed to delete duplicates"
213 );
214 }
215 }
216 }
217
218 tracing::info!(
219 namespaces = result.namespaces_processed,
220 scanned = result.memories_scanned,
221 removed = result.duplicates_removed,
222 "Auto-dedup cycle completed"
223 );
224
225 result
226 }
227
228 pub async fn run_consolidation(&self, storage: &Arc<dyn VectorStorage>) -> ConsolidationResult {
234 let mut result = ConsolidationResult::default();
235
236 let namespaces = match storage.list_namespaces().await {
237 Ok(ns) => ns,
238 Err(e) => {
239 tracing::error!(error = %e, "Auto-consolidation: failed to list namespaces");
240 return result;
241 }
242 };
243
244 for namespace in namespaces {
245 if !namespace.starts_with("_dakera_agent_") {
246 continue;
247 }
248 result.namespaces_processed += 1;
249
250 let vectors = match storage.get_all(&namespace).await {
251 Ok(v) => v,
252 Err(e) => {
253 tracing::warn!(
254 namespace = %namespace,
255 error = %e,
256 "Auto-consolidation: failed to get vectors"
257 );
258 continue;
259 }
260 };
261
262 let items: Vec<(Memory, Vec<f32>)> = vectors
264 .iter()
265 .filter_map(|v| {
266 let mem = Memory::from_vector(v)?;
267 if mem.importance >= 0.3 || v.values.is_empty() || mem.tags.is_empty() {
268 return None;
269 }
270 Some((mem, v.values.clone()))
271 })
272 .collect();
273
274 result.memories_scanned += items.len();
275
276 if items.len() < 3 {
277 continue;
278 }
279
280 let mut tag_to_indices: HashMap<&str, Vec<usize>> = HashMap::new();
282 for (i, (mem, _)) in items.iter().enumerate() {
283 for tag in &mem.tags {
284 tag_to_indices.entry(tag.as_str()).or_default().push(i);
285 }
286 }
287
288 let mut pair_shared_tags: HashMap<(usize, usize), usize> = HashMap::new();
290 for indices in tag_to_indices.values() {
291 for ai in 0..indices.len() {
292 for bi in (ai + 1)..indices.len() {
293 let key = (indices[ai], indices[bi]);
294 *pair_shared_tags.entry(key).or_default() += 1;
295 }
296 }
297 }
298
299 let mut adjacency: HashMap<usize, HashSet<usize>> = HashMap::new();
301 for (&(a, b), &count) in &pair_shared_tags {
302 if count >= 2 {
303 adjacency.entry(a).or_default().insert(b);
304 adjacency.entry(b).or_default().insert(a);
305 }
306 }
307
308 let mut visited: HashSet<usize> = HashSet::new();
310 let mut clusters: Vec<Vec<usize>> = Vec::new();
311
312 for &node in adjacency.keys() {
313 if visited.contains(&node) {
314 continue;
315 }
316 let mut cluster = Vec::new();
317 let mut stack = vec![node];
318 while let Some(n) = stack.pop() {
319 if visited.insert(n) {
320 cluster.push(n);
321 if let Some(neighbors) = adjacency.get(&n) {
322 for &nb in neighbors {
323 if !visited.contains(&nb) {
324 stack.push(nb);
325 }
326 }
327 }
328 }
329 }
330 if cluster.len() >= 3 {
331 clusters.push(cluster);
332 }
333 }
334
335 for (ci, cluster) in clusters.iter().enumerate() {
337 let memories: Vec<&Memory> = cluster.iter().map(|&i| &items[i].0).collect();
338 let embeddings: Vec<&Vec<f32>> = cluster.iter().map(|&i| &items[i].1).collect();
339
340 let max_importance = memories
341 .iter()
342 .map(|m| m.importance)
343 .fold(0.0_f32, f32::max);
344
345 let mut all_tags: Vec<String> =
347 memories.iter().flat_map(|m| m.tags.clone()).collect();
348 all_tags.sort();
349 all_tags.dedup();
350
351 let combined_content: String = memories
353 .iter()
354 .map(|m| m.content.as_str())
355 .collect::<Vec<_>>()
356 .join("\n---\n");
357
358 let dim = embeddings[0].len();
360 let mut avg_embedding = vec![0.0_f32; dim];
361 for emb in &embeddings {
362 for (i, v) in emb.iter().enumerate() {
363 avg_embedding[i] += v;
364 }
365 }
366 let count = embeddings.len() as f32;
367 for v in &mut avg_embedding {
368 *v /= count;
369 }
370
371 let now = std::time::SystemTime::now()
372 .duration_since(std::time::UNIX_EPOCH)
373 .unwrap_or_default()
374 .as_nanos();
375
376 let agent_id = memories[0].agent_id.clone();
377 let merged_id = format!("mem_consolidated_{:x}_{}", now, ci);
378
379 let merged_memory = Memory {
380 id: merged_id,
381 memory_type: MemoryType::Semantic,
382 content: combined_content,
383 agent_id,
384 session_id: None,
385 importance: max_importance,
386 tags: all_tags,
387 metadata: None,
388 created_at: (now / 1_000_000_000) as u64,
389 last_accessed_at: (now / 1_000_000_000) as u64,
390 access_count: 0,
391 ttl_seconds: None,
392 expires_at: None,
393 };
394
395 let merged_vector = merged_memory.to_vector(avg_embedding);
396
397 let ids_to_delete: Vec<String> = memories.iter().map(|m| m.id.clone()).collect();
399
400 if let Err(e) = storage.delete(&namespace, &ids_to_delete).await {
401 tracing::warn!(
402 namespace = %namespace,
403 error = %e,
404 "Auto-consolidation: failed to delete originals"
405 );
406 continue;
407 }
408
409 if let Err(e) = storage.upsert(&namespace, vec![merged_vector]).await {
410 tracing::warn!(
411 namespace = %namespace,
412 error = %e,
413 "Auto-consolidation: failed to insert merged memory"
414 );
415 continue;
416 }
417
418 result.clusters_merged += 1;
419 result.memories_consolidated += ids_to_delete.len();
420 }
421 }
422
423 tracing::info!(
424 namespaces = result.namespaces_processed,
425 scanned = result.memories_scanned,
426 clusters = result.clusters_merged,
427 consolidated = result.memories_consolidated,
428 "Auto-consolidation cycle completed"
429 );
430
431 result
432 }
433
434 pub fn spawn(
441 config: Arc<RwLock<AutoPilotConfig>>,
442 storage: Arc<dyn VectorStorage>,
443 metrics: Arc<crate::decay::BackgroundMetrics>,
444 ) -> (tokio::task::JoinHandle<()>, tokio::task::JoinHandle<()>) {
445 let storage_dedup = storage.clone();
446 let metrics_dedup = metrics.clone();
447 let config_dedup = config.clone();
448
449 let dedup_handle = tokio::spawn(async move {
450 loop {
451 let (enabled, dedup_threshold, interval_hours) = {
452 let cfg = config_dedup.read().await;
453 (cfg.enabled, cfg.dedup_threshold, cfg.dedup_interval_hours)
454 };
455
456 if !enabled {
457 tokio::time::sleep(std::time::Duration::from_secs(300)).await;
459 continue;
460 }
461
462 tokio::time::sleep(std::time::Duration::from_secs(interval_hours * 3600)).await;
463
464 if !config_dedup.read().await.enabled {
466 continue;
467 }
468
469 let engine = AutoPilotEngine::new(AutoPilotConfig {
470 enabled: true,
471 dedup_threshold,
472 ..Default::default()
473 });
474 let result = engine.run_dedup(&storage_dedup).await;
475 metrics_dedup.record_dedup(
476 result.namespaces_processed,
477 result.memories_scanned,
478 result.duplicates_removed,
479 );
480 }
481 });
482
483 let consolidation_handle = tokio::spawn(async move {
484 loop {
485 let (enabled, interval_hours) = {
486 let cfg = config.read().await;
487 (cfg.enabled, cfg.consolidation_interval_hours)
488 };
489
490 if !enabled {
491 tokio::time::sleep(std::time::Duration::from_secs(300)).await;
492 continue;
493 }
494
495 tokio::time::sleep(std::time::Duration::from_secs(interval_hours * 3600)).await;
496
497 if !config.read().await.enabled {
498 continue;
499 }
500
501 let engine = AutoPilotEngine::new(AutoPilotConfig::default());
502 let result = engine.run_consolidation(&storage).await;
503 metrics.record_consolidation(
504 result.namespaces_processed,
505 result.memories_scanned,
506 result.clusters_merged,
507 result.memories_consolidated,
508 );
509 }
510 });
511
512 (dedup_handle, consolidation_handle)
513 }
514}
515
516#[cfg(test)]
517mod tests {
518 use super::*;
519
520 #[test]
521 fn test_cosine_similarity_identical() {
522 let a = vec![1.0, 0.0, 0.0];
523 let b = vec![1.0, 0.0, 0.0];
524 let sim = AutoPilotEngine::cosine_similarity(&a, &b);
525 assert!((sim - 1.0).abs() < 0.001);
526 }
527
528 #[test]
529 fn test_cosine_similarity_orthogonal() {
530 let a = vec![1.0, 0.0, 0.0];
531 let b = vec![0.0, 1.0, 0.0];
532 let sim = AutoPilotEngine::cosine_similarity(&a, &b);
533 assert!(sim.abs() < 0.001);
534 }
535
536 #[test]
537 fn test_cosine_similarity_opposite() {
538 let a = vec![1.0, 0.0];
539 let b = vec![-1.0, 0.0];
540 let sim = AutoPilotEngine::cosine_similarity(&a, &b);
541 assert!((sim - (-1.0)).abs() < 0.001);
542 }
543
544 #[test]
545 fn test_cosine_similarity_empty() {
546 let sim = AutoPilotEngine::cosine_similarity(&[], &[]);
547 assert!(sim.abs() < 0.001);
548 }
549
550 #[test]
551 fn test_retention_score() {
552 let mut mem = Memory {
553 id: "test".to_string(),
554 memory_type: MemoryType::Episodic,
555 content: "test".to_string(),
556 agent_id: "agent".to_string(),
557 session_id: None,
558 importance: 0.5,
559 tags: vec![],
560 metadata: None,
561 created_at: 0,
562 last_accessed_at: 0,
563 access_count: 10,
564 ttl_seconds: None,
565 expires_at: None,
566 };
567 let score_a = AutoPilotEngine::retention_score(&mem);
568
569 mem.importance = 0.8;
570 mem.access_count = 0;
571 let score_b = AutoPilotEngine::retention_score(&mem);
572
573 assert!((score_a - 0.6).abs() < 0.001);
575 assert!((score_b - 0.8).abs() < 0.001);
576 }
577
578 #[test]
579 fn test_config_defaults() {
580 let config = AutoPilotConfig::default();
581 assert!(config.enabled);
582 assert!((config.dedup_threshold - 0.93).abs() < 0.001);
583 assert_eq!(config.dedup_interval_hours, 6);
584 assert_eq!(config.consolidation_interval_hours, 12);
585 }
586}