1pub mod active_recall;
2pub mod backend;
3mod backend_markdown;
4mod backend_none;
5mod backend_sqlite;
6pub mod dreaming;
7mod embeddings;
8mod index;
9pub mod query_expansion;
10mod search;
11pub mod session_index;
12mod watcher;
13pub mod wiki;
14mod workspace;
15
16pub use backend::{MemoryBackend, MemoryBackendKind};
17pub use backend_markdown::MarkdownBackend;
18pub use backend_none::NoneBackend;
19pub use backend_sqlite::SqliteBackend;
20#[cfg(feature = "embeddings-local")]
21pub use embeddings::FastEmbedProvider;
22#[cfg(feature = "gguf")]
23pub use embeddings::LlamaCppProvider;
24pub use embeddings::{
25 EmbeddingProvider, GeminiEmbeddingProvider, OpenAIEmbeddingProvider, hash_text,
26};
27pub use index::{MemoryIndex, ReindexStats};
28pub use query_expansion::{EXPAND_PROMPT, ExpandedQuery, expand_query_local, parse_llm_keywords};
29pub use search::MemoryChunk;
30pub use watcher::MemoryWatcher;
31pub use workspace::{init_state_dir, init_workspace};
32
33use anyhow::Result;
34use chrono::Local;
35use std::fs;
36use std::path::PathBuf;
37use std::sync::Arc;
38use std::time::Duration;
39use tokio::runtime::Handle;
40use tracing::{debug, info, warn};
41
42use crate::config::{Config, MemoryConfig};
43
44#[derive(Clone)]
45pub struct MemoryManager {
46 workspace: PathBuf,
47 db_path: PathBuf,
48 backend: Arc<dyn MemoryBackend>,
49 config: MemoryConfig,
50 embedding_provider: Option<Arc<dyn EmbeddingProvider>>,
52 is_brand_new: bool,
54}
55
56#[derive(Debug)]
58pub struct MemoryStats {
59 pub workspace: String,
61 pub total_files: usize,
63 pub total_chunks: usize,
65 pub index_size_kb: u64,
67 pub files: Vec<FileStats>,
69}
70
71#[derive(Debug)]
73pub struct FileStats {
74 pub name: String,
76 pub chunks: usize,
78 pub lines: usize,
80}
81
82#[derive(Debug)]
84pub struct RecentEntry {
85 pub timestamp: String,
87 pub file: String,
89 pub preview: String,
91}
92
93impl MemoryManager {
94 pub fn new(config: &MemoryConfig) -> Result<Self> {
96 Self::new_with_agent(config, "main")
97 }
98
99 pub fn new_with_agent(config: &MemoryConfig, agent_id: &str) -> Result<Self> {
101 Self::new_with_full_config(config, None, agent_id)
102 }
103
104 pub fn new_with_full_config(
106 memory_config: &MemoryConfig,
107 app_config: Option<&Config>,
108 agent_id: &str,
109 ) -> Result<Self> {
110 let paths = if let Some(config) = app_config {
112 config.paths.clone()
113 } else {
114 crate::paths::Paths::resolve()?
115 };
116
117 let workspace = paths.workspace.clone();
118
119 let is_brand_new = init_workspace(&workspace, &paths)?;
121
122 let db_path = paths.search_index(agent_id);
124 if let Some(parent) = db_path.parent() {
125 std::fs::create_dir_all(parent)?;
126 }
127
128 let backend: Arc<dyn MemoryBackend> = match memory_config.backend {
130 MemoryBackendKind::Sqlite => {
131 let index = MemoryIndex::new_with_db_path(&workspace, &db_path)?
132 .with_chunk_config(memory_config.chunk_size, memory_config.chunk_overlap);
133 Arc::new(SqliteBackend::new(index))
134 }
135 MemoryBackendKind::Markdown => Arc::new(MarkdownBackend::new(workspace.clone())),
136 MemoryBackendKind::None => Arc::new(NoneBackend::new()),
137 };
138
139 let embedding_provider: Option<Arc<dyn EmbeddingProvider>> = match memory_config
141 .embedding_provider
142 .as_str()
143 {
144 "local" => {
145 #[cfg(feature = "embeddings-local")]
146 {
147 let model_name = if memory_config.embedding_model.is_empty()
148 || memory_config.embedding_model == "text-embedding-3-small"
149 {
150 None } else {
152 Some(memory_config.embedding_model.as_str())
153 };
154 let cache_dir = if memory_config.embedding_cache_dir.is_empty() {
155 None
156 } else {
157 Some(memory_config.embedding_cache_dir.as_str())
158 };
159 match FastEmbedProvider::new_with_cache_dir(model_name, cache_dir) {
160 Ok(provider) => {
161 info!("Using local embedding provider: {}", provider.model());
162 Some(Arc::new(provider))
163 }
164 Err(e) => {
165 warn!(
166 "Failed to initialize local embeddings: {}. Falling back to FTS-only search.",
167 e
168 );
169 None
170 }
171 }
172 }
173 #[cfg(not(feature = "embeddings-local"))]
174 {
175 warn!(
176 "Local embeddings requested but `embeddings-local` feature is disabled. Falling back to FTS-only search."
177 );
178 None
179 }
180 }
181 "openai" => {
182 if let Some(config) = app_config {
184 if let Some(ref openai) = config.providers.openai {
185 match OpenAIEmbeddingProvider::new(
186 &openai.api_key,
187 &openai.base_url,
188 &memory_config.embedding_model,
189 ) {
190 Ok(provider) => {
191 info!("Using OpenAI embedding provider: {}", provider.model());
192 Some(Arc::new(provider))
193 }
194 Err(e) => {
195 warn!(
196 "Failed to initialize OpenAI embeddings: {}. Falling back to FTS-only search.",
197 e
198 );
199 None
200 }
201 }
202 } else {
203 warn!(
204 "OpenAI embedding provider requested but no OpenAI config found. Falling back to FTS-only search."
205 );
206 None
207 }
208 } else {
209 warn!(
210 "OpenAI embedding provider requested but no app config provided. Falling back to FTS-only search."
211 );
212 None
213 }
214 }
215 "gemini" => {
216 let api_key = memory_config.gemini_api_key.as_deref().or_else(|| {
217 app_config
218 .and_then(|c| c.providers.gemini.as_ref())
219 .map(|g| g.api_key.as_str())
220 });
221
222 if let Some(key) = api_key {
223 let model = if memory_config.embedding_model.is_empty()
224 || memory_config.embedding_model == "text-embedding-3-small"
225 {
226 None
227 } else {
228 Some(memory_config.embedding_model.as_str())
229 };
230 let provider = GeminiEmbeddingProvider::new(key, model);
231 info!("Using Gemini embedding provider: {}", provider.model());
232 Some(Arc::new(provider))
233 } else {
234 warn!(
235 "Gemini embedding provider requested but no API key configured. \
236 Set memory.gemini_api_key or providers.gemini.api_key. Falling back to FTS-only search."
237 );
238 None
239 }
240 }
241 #[cfg(feature = "gguf")]
242 "gguf" => {
243 let cache_dir = if memory_config.embedding_cache_dir.is_empty() {
244 None
245 } else {
246 Some(memory_config.embedding_cache_dir.as_str())
247 };
248 match LlamaCppProvider::new(&memory_config.embedding_model, cache_dir) {
249 Ok(provider) => {
250 info!("Using GGUF embedding provider: {}", provider.model());
251 Some(Arc::new(provider))
252 }
253 Err(e) => {
254 warn!(
255 "Failed to initialize GGUF embeddings: {}. Falling back to FTS-only search.",
256 e
257 );
258 None
259 }
260 }
261 }
262 #[cfg(not(feature = "gguf"))]
263 "gguf" => {
264 warn!(
265 "GGUF embedding provider requested but 'gguf' feature is not enabled. Build with --features gguf. Falling back to FTS-only search."
266 );
267 None
268 }
269 "none" => {
270 debug!("Embeddings disabled, using FTS-only search");
271 None
272 }
273 other => {
274 warn!(
275 "Unknown embedding provider '{}'. Falling back to FTS-only search.",
276 other
277 );
278 None
279 }
280 };
281
282 Ok(Self {
283 workspace,
284 db_path,
285 backend,
286 config: memory_config.clone(),
287 embedding_provider,
288 is_brand_new,
289 })
290 }
291
292 pub fn with_embedding_provider(mut self, provider: Arc<dyn EmbeddingProvider>) -> Self {
294 self.embedding_provider = Some(provider);
295 self
296 }
297
298 pub fn has_embeddings(&self) -> bool {
300 self.embedding_provider.is_some()
301 }
302
303 pub fn workspace(&self) -> &PathBuf {
304 &self.workspace
305 }
306
307 pub fn db_path(&self) -> &PathBuf {
309 &self.db_path
310 }
311
312 pub fn backend(&self) -> &dyn MemoryBackend {
314 self.backend.as_ref()
315 }
316
317 pub fn read_memory_file(&self) -> Result<String> {
319 let path = self.workspace.join("MEMORY.md");
320 if path.exists() {
321 Ok(fs::read_to_string(&path)?)
322 } else {
323 Ok(String::new())
324 }
325 }
326
327 pub fn read_heartbeat_file(&self) -> Result<String> {
329 let path = self.workspace.join("HEARTBEAT.md");
330 if path.exists() {
331 Ok(fs::read_to_string(&path)?)
332 } else {
333 Ok(String::new())
334 }
335 }
336
337 pub fn read_soul_file(&self) -> Result<String> {
339 let path = self.workspace.join("SOUL.md");
340 if path.exists() {
341 Ok(fs::read_to_string(&path)?)
342 } else {
343 Ok(String::new())
344 }
345 }
346
347 pub fn read_user_file(&self) -> Result<String> {
349 let path = self.workspace.join("USER.md");
350 if path.exists() {
351 Ok(fs::read_to_string(&path)?)
352 } else {
353 Ok(String::new())
354 }
355 }
356
357 pub fn read_identity_file(&self) -> Result<String> {
359 let path = self.workspace.join("IDENTITY.md");
360 if path.exists() {
361 Ok(fs::read_to_string(&path)?)
362 } else {
363 Ok(String::new())
364 }
365 }
366
367 pub fn read_agents_file(&self) -> Result<String> {
369 let path = self.workspace.join("AGENTS.md");
370 if path.exists() {
371 Ok(fs::read_to_string(&path)?)
372 } else {
373 Ok(String::new())
374 }
375 }
376
377 pub fn is_brand_new(&self) -> bool {
379 self.is_brand_new
380 }
381
382 pub fn read_tools_file(&self) -> Result<String> {
384 let path = self.workspace.join("TOOLS.md");
385 if path.exists() {
386 Ok(fs::read_to_string(&path)?)
387 } else {
388 Ok(String::new())
389 }
390 }
391
392 pub fn read_recent_daily_logs(&self, days: usize) -> Result<String> {
394 let memory_dir = self.workspace.join("memory");
395 if !memory_dir.exists() {
396 return Ok(String::new());
397 }
398
399 let today = Local::now().date_naive();
400 let mut content = String::new();
401
402 for i in 0..days {
403 let date = today - chrono::Duration::days(i as i64);
404 let filename = format!("{}.md", date.format("%Y-%m-%d"));
405 let path = memory_dir.join(&filename);
406
407 if path.exists()
408 && let Ok(file_content) = fs::read_to_string(&path)
409 {
410 if !content.is_empty() {
411 content.push_str("\n---\n\n");
412 }
413 content.push_str(&format!("## {}\n\n", filename));
414 content.push_str(&file_content);
415 }
416 }
417
418 Ok(content)
419 }
420
421 pub fn search(&self, query: &str, limit: usize) -> Result<Vec<MemoryChunk>> {
423 let mut results = self.search_raw(query, limit)?;
424
425 if self.config.temporal_decay_lambda > 0.0 {
427 let now = std::time::SystemTime::now()
428 .duration_since(std::time::UNIX_EPOCH)
429 .unwrap_or_default()
430 .as_secs() as i64;
431 for chunk in &mut results {
432 chunk.apply_temporal_decay(self.config.temporal_decay_lambda, now);
433 }
434 results.sort_by(|a, b| {
436 b.score
437 .partial_cmp(&a.score)
438 .unwrap_or(std::cmp::Ordering::Equal)
439 });
440 }
441
442 Ok(results)
443 }
444
445 fn search_raw(&self, query: &str, limit: usize) -> Result<Vec<MemoryChunk>> {
447 let expanded = query_expansion::expand_query_local(query);
449 let fts_query = &expanded.fts_query;
450 debug!(
451 "Query expanded: {:?} -> {} keywords",
452 query,
453 expanded.keywords.len()
454 );
455
456 if let Some(ref provider) = self.embedding_provider {
458 if let Ok(handle) = Handle::try_current() {
460 let provider = provider.clone();
461 let query_string = query.to_string();
462 let model = provider.model().to_string();
463
464 let embedding_result = std::thread::spawn(move || {
466 handle.block_on(async { provider.embed(&query_string).await })
467 })
468 .join()
469 .map_err(|_| anyhow::anyhow!("Thread panicked"))?;
470
471 if let Ok(embedding) = embedding_result {
472 debug!("Using hybrid search with {} dimensions", embedding.len());
473 return self.backend.search_hybrid(
475 fts_query,
476 Some(&embedding),
477 &model,
478 limit,
479 0.3, 0.7, );
482 }
483 }
484 }
485
486 self.backend.search_fts_raw(fts_query, limit)
488 }
489
490 pub fn search_fts(&self, query: &str, limit: usize) -> Result<Vec<MemoryChunk>> {
492 self.backend.search(query, limit)
493 }
494
495 pub fn chunk_count(&self) -> Result<usize> {
497 self.backend.chunk_count()
498 }
499
500 pub fn reindex(&self, force: bool) -> Result<ReindexStats> {
502 let start = std::time::Instant::now();
503 let mut stats = ReindexStats {
504 files_processed: 0,
505 files_updated: 0,
506 chunks_indexed: 0,
507 duration: Duration::default(),
508 };
509
510 let files_removed = self.cleanup_deleted_files()?;
512 if files_removed > 0 {
513 info!("Removed {} deleted files from index", files_removed);
514 }
515
516 let pattern = format!("{}/**/*.md", self.workspace.display());
518 for entry in glob::glob(&pattern)
519 .into_iter()
520 .flatten()
521 .filter_map(|r| r.ok())
522 {
523 if entry.is_file() {
524 stats.files_processed += 1;
525 if self.backend.index_file(&entry, force)? {
526 stats.files_updated += 1;
527 }
528 }
529 }
530
531 for index_path in &self.config.paths {
533 let base_path = if index_path.path.starts_with('~') || index_path.path.starts_with('/')
534 {
535 PathBuf::from(shellexpand::tilde(&index_path.path).to_string())
536 } else {
537 self.workspace.join(&index_path.path)
538 };
539
540 if base_path.starts_with(&self.workspace) {
542 continue;
543 }
544
545 if !base_path.exists() {
546 debug!("Skipping non-existent index path: {}", base_path.display());
547 continue;
548 }
549
550 let pattern = format!("{}/{}", base_path.display(), index_path.pattern);
551 debug!("Indexing external path with pattern: {}", pattern);
552
553 for entry in glob::glob(&pattern)
554 .into_iter()
555 .flatten()
556 .filter_map(|r| r.ok())
557 {
558 if entry.is_file() {
559 stats.files_processed += 1;
560 if self.backend.index_file(&entry, force)? {
561 stats.files_updated += 1;
562 }
563 }
564 }
565 }
566
567 stats.chunks_indexed = self.backend.chunk_count()?;
568 stats.duration = start.elapsed();
569
570 info!("Reindex complete: {:?}", stats);
571 Ok(stats)
572 }
573
574 fn cleanup_deleted_files(&self) -> Result<usize> {
576 let indexed_files = self.backend.indexed_files()?;
577 let mut removed = 0;
578
579 for relative_path in indexed_files {
580 let full_path = self.workspace.join(&relative_path);
581 if !full_path.exists() {
582 debug!("Cleaning up deleted file: {}", relative_path);
583 self.backend.remove_file(&relative_path)?;
584 removed += 1;
585 }
586 }
587
588 Ok(removed)
589 }
590
591 pub fn stats(&self) -> Result<MemoryStats> {
593 let mut files = Vec::new();
594 let mut total_chunks = 0;
595
596 let pattern = format!("{}/**/*.md", self.workspace.display());
598 for entry in glob::glob(&pattern)
599 .into_iter()
600 .flatten()
601 .filter_map(|r| r.ok())
602 {
603 if entry.is_file() {
604 let content = fs::read_to_string(&entry)?;
605 let lines = content.lines().count();
606 let chunks = self.backend.file_chunk_count(&entry)?;
607 total_chunks += chunks;
608
609 let display_name = entry
610 .strip_prefix(&self.workspace)
611 .map(|rel| rel.display().to_string())
612 .unwrap_or_else(|_| entry.display().to_string());
613
614 files.push(FileStats {
615 name: display_name,
616 chunks,
617 lines,
618 });
619 }
620 }
621
622 for index_path in &self.config.paths {
624 let base_path = if index_path.path.starts_with('~') || index_path.path.starts_with('/')
625 {
626 PathBuf::from(shellexpand::tilde(&index_path.path).to_string())
627 } else {
628 self.workspace.join(&index_path.path)
629 };
630
631 if base_path.starts_with(&self.workspace) {
633 continue;
634 }
635
636 if !base_path.exists() {
637 continue;
638 }
639
640 let pattern = format!("{}/{}", base_path.display(), index_path.pattern);
641
642 for entry in glob::glob(&pattern)
643 .into_iter()
644 .flatten()
645 .filter_map(|r| r.ok())
646 {
647 if entry.is_file() {
648 let content = fs::read_to_string(&entry)?;
649 let lines = content.lines().count();
650 let chunks = self.backend.file_chunk_count(&entry)?;
651 total_chunks += chunks;
652
653 let display_name = if let Ok(rel) = entry.strip_prefix(&base_path) {
654 format!("{}/{}", index_path.path, rel.display())
655 } else {
656 entry.display().to_string()
657 };
658
659 files.push(FileStats {
660 name: display_name,
661 chunks,
662 lines,
663 });
664 }
665 }
666 }
667
668 let index_size = self.backend.size_bytes()? / 1024;
669
670 Ok(MemoryStats {
671 workspace: self.workspace.display().to_string(),
672 total_files: files.len(),
673 total_chunks,
674 index_size_kb: index_size,
675 files,
676 })
677 }
678
679 pub fn recent_entries(&self, count: usize) -> Result<Vec<RecentEntry>> {
681 let mut entries = Vec::new();
682
683 let memory_dir = self.workspace.join("memory");
684 if !memory_dir.exists() {
685 return Ok(entries);
686 }
687
688 let mut files: Vec<_> = fs::read_dir(&memory_dir)?
690 .filter_map(|e| e.ok())
691 .filter(|e| e.path().extension().map(|e| e == "md").unwrap_or(false))
692 .collect();
693
694 files.sort_by_key(|f| std::cmp::Reverse(f.file_name()));
695
696 for entry in files.into_iter().take(count) {
697 let path = entry.path();
698 let filename = path.file_name().unwrap().to_string_lossy().to_string();
699
700 if let Ok(content) = fs::read_to_string(&path) {
701 let preview = content
703 .lines()
704 .rev()
705 .find(|l| !l.trim().is_empty())
706 .unwrap_or("")
707 .chars()
708 .take(100)
709 .collect();
710
711 entries.push(RecentEntry {
712 timestamp: filename.replace(".md", ""),
713 file: format!("memory/{}", filename),
714 preview,
715 });
716 }
717 }
718
719 Ok(entries)
720 }
721
722 pub fn start_watcher(&self) -> Result<MemoryWatcher> {
727 if self.backend.kind() != MemoryBackendKind::Sqlite {
728 return Err(anyhow::anyhow!(
729 "File watcher is only supported with the SQLite backend (current: {})",
730 self.backend.kind()
731 ));
732 }
733 MemoryWatcher::new(
734 self.workspace.clone(),
735 self.db_path.clone(),
736 self.config.clone(),
737 )
738 }
739
740 pub async fn generate_embeddings(&self, batch_size: usize) -> Result<(usize, usize)> {
744 let provider = match &self.embedding_provider {
745 Some(p) => p,
746 None => {
747 debug!("No embedding provider configured, skipping embedding generation");
748 return Ok((0, 0));
749 }
750 };
751
752 let provider_id = provider.id().to_string();
753 let model = provider.model().to_string();
754 let mut total_processed = 0;
755 let mut total_embedded = 0;
756 let mut cache_hits = 0;
757
758 loop {
759 let chunks = self.backend.chunks_without_embeddings(batch_size)?;
761 if chunks.is_empty() {
762 break;
763 }
764
765 total_processed += chunks.len();
766
767 let mut to_embed: Vec<(String, String, String)> = Vec::new(); let mut from_cache: Vec<(String, Vec<f32>)> = Vec::new(); for (chunk_id, text) in &chunks {
772 let text_hash = hash_text(text);
773
774 if let Ok(Some(cached)) =
776 self.backend
777 .get_cached_embedding(&provider_id, &model, &text_hash)
778 {
779 from_cache.push((chunk_id.clone(), cached));
780 cache_hits += 1;
781 } else {
782 to_embed.push((chunk_id.clone(), text.clone(), text_hash));
783 }
784 }
785
786 for (chunk_id, embedding) in from_cache {
788 if let Err(e) = self.backend.store_embedding(&chunk_id, &embedding, &model) {
789 warn!(
790 "Failed to store cached embedding for chunk {}: {}",
791 chunk_id, e
792 );
793 } else {
794 total_embedded += 1;
795 }
796 }
797
798 if !to_embed.is_empty() {
800 let texts: Vec<String> = to_embed.iter().map(|(_, text, _)| text.clone()).collect();
801
802 match provider.embed_batch(&texts).await {
803 Ok(embeddings) => {
804 for ((chunk_id, _text, text_hash), embedding) in
805 to_embed.iter().zip(embeddings.iter())
806 {
807 if let Err(e) =
809 self.backend.store_embedding(chunk_id, embedding, &model)
810 {
811 warn!("Failed to store embedding for chunk {}: {}", chunk_id, e);
812 } else {
813 total_embedded += 1;
814 }
815
816 if let Err(e) = self.backend.cache_embedding(
818 &provider_id,
819 &model,
820 "", text_hash,
822 embedding,
823 ) {
824 debug!("Failed to cache embedding: {}", e);
825 }
826 }
827 }
828 Err(e) => {
829 warn!("Failed to generate embeddings: {}", e);
830 break;
831 }
832 }
833 }
834
835 debug!(
836 "Generated embeddings: {}/{} chunks ({} from cache)",
837 total_embedded, total_processed, cache_hits
838 );
839
840 if chunks.len() < batch_size {
842 break;
843 }
844 }
845
846 info!(
847 "Embedding generation complete: {} chunks, {} embedded, {} cache hits",
848 total_processed, total_embedded, cache_hits
849 );
850
851 Ok((total_processed, total_embedded))
852 }
853
854 pub fn embedded_chunk_count(&self) -> Result<usize> {
856 let model = self
857 .embedding_provider
858 .as_ref()
859 .map(|p| p.model().to_string())
860 .unwrap_or_default();
861 self.backend.embedded_chunk_count(&model)
862 }
863}