1mod embeddings;
2mod index;
3mod search;
4mod watcher;
5mod workspace;
6
7#[cfg(feature = "embeddings-local")]
8pub use embeddings::FastEmbedProvider;
9#[cfg(feature = "gguf")]
10pub use embeddings::LlamaCppProvider;
11pub use embeddings::{EmbeddingProvider, OpenAIEmbeddingProvider, hash_text};
12pub use index::{MemoryIndex, ReindexStats};
13pub use search::MemoryChunk;
14pub use watcher::MemoryWatcher;
15pub use workspace::{init_state_dir, init_workspace};
16
17use anyhow::Result;
18use chrono::Local;
19use std::fs;
20use std::path::PathBuf;
21use std::sync::Arc;
22use std::time::Duration;
23use tokio::runtime::Handle;
24use tracing::{debug, info, warn};
25
26use crate::config::{Config, MemoryConfig};
27
28#[derive(Clone)]
29pub struct MemoryManager {
30 workspace: PathBuf,
31 db_path: PathBuf,
32 index: MemoryIndex,
33 config: MemoryConfig,
34 embedding_provider: Option<Arc<dyn EmbeddingProvider>>,
36 is_brand_new: bool,
38}
39
40#[derive(Debug)]
41pub struct MemoryStats {
42 pub workspace: String,
43 pub total_files: usize,
44 pub total_chunks: usize,
45 pub index_size_kb: u64,
46 pub files: Vec<FileStats>,
47}
48
49#[derive(Debug)]
50pub struct FileStats {
51 pub name: String,
52 pub chunks: usize,
53 pub lines: usize,
54}
55
56#[derive(Debug)]
57pub struct RecentEntry {
58 pub timestamp: String,
59 pub file: String,
60 pub preview: String,
61}
62
63impl MemoryManager {
64 pub fn new(config: &MemoryConfig) -> Result<Self> {
66 Self::new_with_agent(config, "main")
67 }
68
69 pub fn new_with_agent(config: &MemoryConfig, agent_id: &str) -> Result<Self> {
71 Self::new_with_full_config(config, None, agent_id)
72 }
73
74 pub fn new_with_full_config(
76 memory_config: &MemoryConfig,
77 app_config: Option<&Config>,
78 agent_id: &str,
79 ) -> Result<Self> {
80 let paths = if let Some(config) = app_config {
82 config.paths.clone()
83 } else {
84 crate::paths::Paths::resolve()?
85 };
86
87 let workspace = paths.workspace.clone();
88
89 let is_brand_new = init_workspace(&workspace, &paths)?;
91
92 let db_path = paths.search_index(agent_id);
94 if let Some(parent) = db_path.parent() {
95 std::fs::create_dir_all(parent)?;
96 }
97
98 let index = MemoryIndex::new_with_db_path(&workspace, &db_path)?
99 .with_chunk_config(memory_config.chunk_size, memory_config.chunk_overlap);
100
101 let embedding_provider: Option<Arc<dyn EmbeddingProvider>> = match memory_config
103 .embedding_provider
104 .as_str()
105 {
106 "local" => {
107 #[cfg(feature = "embeddings-local")]
108 {
109 let model_name = if memory_config.embedding_model.is_empty()
110 || memory_config.embedding_model == "text-embedding-3-small"
111 {
112 None } else {
114 Some(memory_config.embedding_model.as_str())
115 };
116 let cache_dir = if memory_config.embedding_cache_dir.is_empty() {
117 None
118 } else {
119 Some(memory_config.embedding_cache_dir.as_str())
120 };
121 match FastEmbedProvider::new_with_cache_dir(model_name, cache_dir) {
122 Ok(provider) => {
123 info!("Using local embedding provider: {}", provider.model());
124 Some(Arc::new(provider))
125 }
126 Err(e) => {
127 warn!(
128 "Failed to initialize local embeddings: {}. Falling back to FTS-only search.",
129 e
130 );
131 None
132 }
133 }
134 }
135 #[cfg(not(feature = "embeddings-local"))]
136 {
137 warn!(
138 "Local embeddings requested but `embeddings-local` feature is disabled. Falling back to FTS-only search."
139 );
140 None
141 }
142 }
143 "openai" => {
144 if let Some(config) = app_config {
146 if let Some(ref openai) = config.providers.openai {
147 match OpenAIEmbeddingProvider::new(
148 &openai.api_key,
149 &openai.base_url,
150 &memory_config.embedding_model,
151 ) {
152 Ok(provider) => {
153 info!("Using OpenAI embedding provider: {}", provider.model());
154 Some(Arc::new(provider))
155 }
156 Err(e) => {
157 warn!(
158 "Failed to initialize OpenAI embeddings: {}. Falling back to FTS-only search.",
159 e
160 );
161 None
162 }
163 }
164 } else {
165 warn!(
166 "OpenAI embedding provider requested but no OpenAI config found. Falling back to FTS-only search."
167 );
168 None
169 }
170 } else {
171 warn!(
172 "OpenAI embedding provider requested but no app config provided. Falling back to FTS-only search."
173 );
174 None
175 }
176 }
177 #[cfg(feature = "gguf")]
178 "gguf" => {
179 let cache_dir = if memory_config.embedding_cache_dir.is_empty() {
180 None
181 } else {
182 Some(memory_config.embedding_cache_dir.as_str())
183 };
184 match LlamaCppProvider::new(&memory_config.embedding_model, cache_dir) {
185 Ok(provider) => {
186 info!("Using GGUF embedding provider: {}", provider.model());
187 Some(Arc::new(provider))
188 }
189 Err(e) => {
190 warn!(
191 "Failed to initialize GGUF embeddings: {}. Falling back to FTS-only search.",
192 e
193 );
194 None
195 }
196 }
197 }
198 #[cfg(not(feature = "gguf"))]
199 "gguf" => {
200 warn!(
201 "GGUF embedding provider requested but 'gguf' feature is not enabled. Build with --features gguf. Falling back to FTS-only search."
202 );
203 None
204 }
205 "none" => {
206 debug!("Embeddings disabled, using FTS-only search");
207 None
208 }
209 other => {
210 warn!(
211 "Unknown embedding provider '{}'. Falling back to FTS-only search.",
212 other
213 );
214 None
215 }
216 };
217
218 Ok(Self {
219 workspace,
220 db_path,
221 index,
222 config: memory_config.clone(),
223 embedding_provider,
224 is_brand_new,
225 })
226 }
227
228 pub fn with_embedding_provider(mut self, provider: Arc<dyn EmbeddingProvider>) -> Self {
230 self.embedding_provider = Some(provider);
231 self
232 }
233
234 pub fn has_embeddings(&self) -> bool {
236 self.embedding_provider.is_some()
237 }
238
239 pub fn workspace(&self) -> &PathBuf {
240 &self.workspace
241 }
242
243 pub fn read_memory_file(&self) -> Result<String> {
245 let path = self.workspace.join("MEMORY.md");
246 if path.exists() {
247 Ok(fs::read_to_string(&path)?)
248 } else {
249 Ok(String::new())
250 }
251 }
252
253 pub fn read_heartbeat_file(&self) -> Result<String> {
255 let path = self.workspace.join("HEARTBEAT.md");
256 if path.exists() {
257 Ok(fs::read_to_string(&path)?)
258 } else {
259 Ok(String::new())
260 }
261 }
262
263 pub fn read_soul_file(&self) -> Result<String> {
265 let path = self.workspace.join("SOUL.md");
266 if path.exists() {
267 Ok(fs::read_to_string(&path)?)
268 } else {
269 Ok(String::new())
270 }
271 }
272
273 pub fn read_user_file(&self) -> Result<String> {
275 let path = self.workspace.join("USER.md");
276 if path.exists() {
277 Ok(fs::read_to_string(&path)?)
278 } else {
279 Ok(String::new())
280 }
281 }
282
283 pub fn read_identity_file(&self) -> Result<String> {
285 let path = self.workspace.join("IDENTITY.md");
286 if path.exists() {
287 Ok(fs::read_to_string(&path)?)
288 } else {
289 Ok(String::new())
290 }
291 }
292
293 pub fn read_agents_file(&self) -> Result<String> {
295 let path = self.workspace.join("AGENTS.md");
296 if path.exists() {
297 Ok(fs::read_to_string(&path)?)
298 } else {
299 Ok(String::new())
300 }
301 }
302
303 pub fn is_brand_new(&self) -> bool {
305 self.is_brand_new
306 }
307
308 pub fn read_tools_file(&self) -> Result<String> {
310 let path = self.workspace.join("TOOLS.md");
311 if path.exists() {
312 Ok(fs::read_to_string(&path)?)
313 } else {
314 Ok(String::new())
315 }
316 }
317
318 pub fn read_recent_daily_logs(&self, days: usize) -> Result<String> {
320 let memory_dir = self.workspace.join("memory");
321 if !memory_dir.exists() {
322 return Ok(String::new());
323 }
324
325 let today = Local::now().date_naive();
326 let mut content = String::new();
327
328 for i in 0..days {
329 let date = today - chrono::Duration::days(i as i64);
330 let filename = format!("{}.md", date.format("%Y-%m-%d"));
331 let path = memory_dir.join(&filename);
332
333 if path.exists()
334 && let Ok(file_content) = fs::read_to_string(&path)
335 {
336 if !content.is_empty() {
337 content.push_str("\n---\n\n");
338 }
339 content.push_str(&format!("## {}\n\n", filename));
340 content.push_str(&file_content);
341 }
342 }
343
344 Ok(content)
345 }
346
347 pub fn search(&self, query: &str, limit: usize) -> Result<Vec<MemoryChunk>> {
349 let mut results = self.search_raw(query, limit)?;
350
351 if self.config.temporal_decay_lambda > 0.0 {
353 let now = std::time::SystemTime::now()
354 .duration_since(std::time::UNIX_EPOCH)
355 .unwrap_or_default()
356 .as_secs() as i64;
357 for chunk in &mut results {
358 chunk.apply_temporal_decay(self.config.temporal_decay_lambda, now);
359 }
360 results.sort_by(|a, b| {
362 b.score
363 .partial_cmp(&a.score)
364 .unwrap_or(std::cmp::Ordering::Equal)
365 });
366 }
367
368 Ok(results)
369 }
370
371 fn search_raw(&self, query: &str, limit: usize) -> Result<Vec<MemoryChunk>> {
373 if let Some(ref provider) = self.embedding_provider {
375 if let Ok(handle) = Handle::try_current() {
377 let provider = provider.clone();
378 let query_string = query.to_string();
379 let model = provider.model().to_string();
380
381 let embedding_result = std::thread::spawn(move || {
383 handle.block_on(async { provider.embed(&query_string).await })
384 })
385 .join()
386 .map_err(|_| anyhow::anyhow!("Thread panicked"))?;
387
388 if let Ok(embedding) = embedding_result {
389 debug!("Using hybrid search with {} dimensions", embedding.len());
390 return self.index.search_hybrid(
391 query,
392 Some(&embedding),
393 &model,
394 limit,
395 0.3, 0.7, );
398 }
399 }
400 }
401
402 self.index.search(query, limit)
404 }
405
406 pub fn search_fts(&self, query: &str, limit: usize) -> Result<Vec<MemoryChunk>> {
408 self.index.search(query, limit)
409 }
410
411 pub fn chunk_count(&self) -> Result<usize> {
413 self.index.chunk_count()
414 }
415
416 pub fn reindex(&self, force: bool) -> Result<ReindexStats> {
418 let start = std::time::Instant::now();
419 let mut stats = ReindexStats {
420 files_processed: 0,
421 files_updated: 0,
422 chunks_indexed: 0,
423 duration: Duration::default(),
424 };
425
426 let files_removed = self.cleanup_deleted_files()?;
428 if files_removed > 0 {
429 info!("Removed {} deleted files from index", files_removed);
430 }
431
432 let pattern = format!("{}/**/*.md", self.workspace.display());
434 for entry in glob::glob(&pattern)
435 .into_iter()
436 .flatten()
437 .filter_map(|r| r.ok())
438 {
439 if entry.is_file() {
440 stats.files_processed += 1;
441 if self.index.index_file(&entry, force)? {
442 stats.files_updated += 1;
443 }
444 }
445 }
446
447 for index_path in &self.config.paths {
449 let base_path = if index_path.path.starts_with('~') || index_path.path.starts_with('/')
450 {
451 PathBuf::from(shellexpand::tilde(&index_path.path).to_string())
452 } else {
453 self.workspace.join(&index_path.path)
454 };
455
456 if base_path.starts_with(&self.workspace) {
458 continue;
459 }
460
461 if !base_path.exists() {
462 debug!("Skipping non-existent index path: {}", base_path.display());
463 continue;
464 }
465
466 let pattern = format!("{}/{}", base_path.display(), index_path.pattern);
467 debug!("Indexing external path with pattern: {}", pattern);
468
469 for entry in glob::glob(&pattern)
470 .into_iter()
471 .flatten()
472 .filter_map(|r| r.ok())
473 {
474 if entry.is_file() {
475 stats.files_processed += 1;
476 if self.index.index_file(&entry, force)? {
477 stats.files_updated += 1;
478 }
479 }
480 }
481 }
482
483 stats.chunks_indexed = self.index.chunk_count()?;
484 stats.duration = start.elapsed();
485
486 info!("Reindex complete: {:?}", stats);
487 Ok(stats)
488 }
489
490 fn cleanup_deleted_files(&self) -> Result<usize> {
492 let indexed_files = self.index.indexed_files()?;
493 let mut removed = 0;
494
495 for relative_path in indexed_files {
496 let full_path = self.workspace.join(&relative_path);
497 if !full_path.exists() {
498 debug!("Cleaning up deleted file: {}", relative_path);
499 self.index.remove_file(&relative_path)?;
500 removed += 1;
501 }
502 }
503
504 Ok(removed)
505 }
506
507 pub fn stats(&self) -> Result<MemoryStats> {
509 let mut files = Vec::new();
510 let mut total_chunks = 0;
511
512 let pattern = format!("{}/**/*.md", self.workspace.display());
514 for entry in glob::glob(&pattern)
515 .into_iter()
516 .flatten()
517 .filter_map(|r| r.ok())
518 {
519 if entry.is_file() {
520 let content = fs::read_to_string(&entry)?;
521 let lines = content.lines().count();
522 let chunks = self.index.file_chunk_count(&entry)?;
523 total_chunks += chunks;
524
525 let display_name = entry
526 .strip_prefix(&self.workspace)
527 .map(|rel| rel.display().to_string())
528 .unwrap_or_else(|_| entry.display().to_string());
529
530 files.push(FileStats {
531 name: display_name,
532 chunks,
533 lines,
534 });
535 }
536 }
537
538 for index_path in &self.config.paths {
540 let base_path = if index_path.path.starts_with('~') || index_path.path.starts_with('/')
541 {
542 PathBuf::from(shellexpand::tilde(&index_path.path).to_string())
543 } else {
544 self.workspace.join(&index_path.path)
545 };
546
547 if base_path.starts_with(&self.workspace) {
549 continue;
550 }
551
552 if !base_path.exists() {
553 continue;
554 }
555
556 let pattern = format!("{}/{}", base_path.display(), index_path.pattern);
557
558 for entry in glob::glob(&pattern)
559 .into_iter()
560 .flatten()
561 .filter_map(|r| r.ok())
562 {
563 if entry.is_file() {
564 let content = fs::read_to_string(&entry)?;
565 let lines = content.lines().count();
566 let chunks = self.index.file_chunk_count(&entry)?;
567 total_chunks += chunks;
568
569 let display_name = if let Ok(rel) = entry.strip_prefix(&base_path) {
570 format!("{}/{}", index_path.path, rel.display())
571 } else {
572 entry.display().to_string()
573 };
574
575 files.push(FileStats {
576 name: display_name,
577 chunks,
578 lines,
579 });
580 }
581 }
582 }
583
584 let index_size = self.index.size_bytes()? / 1024;
585
586 Ok(MemoryStats {
587 workspace: self.workspace.display().to_string(),
588 total_files: files.len(),
589 total_chunks,
590 index_size_kb: index_size,
591 files,
592 })
593 }
594
595 pub fn recent_entries(&self, count: usize) -> Result<Vec<RecentEntry>> {
597 let mut entries = Vec::new();
598
599 let memory_dir = self.workspace.join("memory");
600 if !memory_dir.exists() {
601 return Ok(entries);
602 }
603
604 let mut files: Vec<_> = fs::read_dir(&memory_dir)?
606 .filter_map(|e| e.ok())
607 .filter(|e| e.path().extension().map(|e| e == "md").unwrap_or(false))
608 .collect();
609
610 files.sort_by_key(|f| std::cmp::Reverse(f.file_name()));
611
612 for entry in files.into_iter().take(count) {
613 let path = entry.path();
614 let filename = path.file_name().unwrap().to_string_lossy().to_string();
615
616 if let Ok(content) = fs::read_to_string(&path) {
617 let preview = content
619 .lines()
620 .rev()
621 .find(|l| !l.trim().is_empty())
622 .unwrap_or("")
623 .chars()
624 .take(100)
625 .collect();
626
627 entries.push(RecentEntry {
628 timestamp: filename.replace(".md", ""),
629 file: format!("memory/{}", filename),
630 preview,
631 });
632 }
633 }
634
635 Ok(entries)
636 }
637
638 pub fn start_watcher(&self) -> Result<MemoryWatcher> {
640 MemoryWatcher::new(
641 self.workspace.clone(),
642 self.db_path.clone(),
643 self.config.clone(),
644 )
645 }
646
647 pub async fn generate_embeddings(&self, batch_size: usize) -> Result<(usize, usize)> {
651 let provider = match &self.embedding_provider {
652 Some(p) => p,
653 None => {
654 debug!("No embedding provider configured, skipping embedding generation");
655 return Ok((0, 0));
656 }
657 };
658
659 let provider_id = provider.id().to_string();
660 let model = provider.model().to_string();
661 let mut total_processed = 0;
662 let mut total_embedded = 0;
663 let mut cache_hits = 0;
664
665 loop {
666 let chunks = self.index.chunks_without_embeddings(batch_size)?;
668 if chunks.is_empty() {
669 break;
670 }
671
672 total_processed += chunks.len();
673
674 let mut to_embed: Vec<(String, String, String)> = Vec::new(); let mut from_cache: Vec<(String, Vec<f32>)> = Vec::new(); for (chunk_id, text) in &chunks {
679 let text_hash = hash_text(text);
680
681 if let Ok(Some(cached)) =
683 self.index
684 .get_cached_embedding(&provider_id, &model, &text_hash)
685 {
686 from_cache.push((chunk_id.clone(), cached));
687 cache_hits += 1;
688 } else {
689 to_embed.push((chunk_id.clone(), text.clone(), text_hash));
690 }
691 }
692
693 for (chunk_id, embedding) in from_cache {
695 if let Err(e) = self.index.store_embedding(&chunk_id, &embedding, &model) {
696 warn!(
697 "Failed to store cached embedding for chunk {}: {}",
698 chunk_id, e
699 );
700 } else {
701 total_embedded += 1;
702 }
703 }
704
705 if !to_embed.is_empty() {
707 let texts: Vec<String> = to_embed.iter().map(|(_, text, _)| text.clone()).collect();
708
709 match provider.embed_batch(&texts).await {
710 Ok(embeddings) => {
711 for ((chunk_id, _text, text_hash), embedding) in
712 to_embed.iter().zip(embeddings.iter())
713 {
714 if let Err(e) = self.index.store_embedding(chunk_id, embedding, &model)
716 {
717 warn!("Failed to store embedding for chunk {}: {}", chunk_id, e);
718 } else {
719 total_embedded += 1;
720 }
721
722 if let Err(e) = self.index.cache_embedding(
724 &provider_id,
725 &model,
726 "", text_hash,
728 embedding,
729 ) {
730 debug!("Failed to cache embedding: {}", e);
731 }
732 }
733 }
734 Err(e) => {
735 warn!("Failed to generate embeddings: {}", e);
736 break;
737 }
738 }
739 }
740
741 debug!(
742 "Generated embeddings: {}/{} chunks ({} from cache)",
743 total_embedded, total_processed, cache_hits
744 );
745
746 if chunks.len() < batch_size {
748 break;
749 }
750 }
751
752 info!(
753 "Embedding generation complete: {} chunks, {} embedded, {} cache hits",
754 total_processed, total_embedded, cache_hits
755 );
756
757 Ok((total_processed, total_embedded))
758 }
759
760 pub fn embedded_chunk_count(&self) -> Result<usize> {
762 let model = self
763 .embedding_provider
764 .as_ref()
765 .map(|p| p.model().to_string())
766 .unwrap_or_default();
767 self.index.embedded_chunk_count(&model)
768 }
769}