Skip to main content

localgpt_core/memory/
mod.rs

1mod embeddings;
2mod index;
3mod search;
4mod watcher;
5mod workspace;
6
7#[cfg(feature = "embeddings-local")]
8pub use embeddings::FastEmbedProvider;
9#[cfg(feature = "gguf")]
10pub use embeddings::LlamaCppProvider;
11pub use embeddings::{EmbeddingProvider, OpenAIEmbeddingProvider, hash_text};
12pub use index::{MemoryIndex, ReindexStats};
13pub use search::MemoryChunk;
14pub use watcher::MemoryWatcher;
15pub use workspace::{init_state_dir, init_workspace};
16
17use anyhow::Result;
18use chrono::Local;
19use std::fs;
20use std::path::PathBuf;
21use std::sync::Arc;
22use std::time::Duration;
23use tokio::runtime::Handle;
24use tracing::{debug, info, warn};
25
26use crate::config::{Config, MemoryConfig};
27
28#[derive(Clone)]
29pub struct MemoryManager {
30    workspace: PathBuf,
31    db_path: PathBuf,
32    index: MemoryIndex,
33    config: MemoryConfig,
34    /// Optional embedding provider for semantic search
35    embedding_provider: Option<Arc<dyn EmbeddingProvider>>,
36    /// True if this was a brand new workspace (first run)
37    is_brand_new: bool,
38}
39
40#[derive(Debug)]
41pub struct MemoryStats {
42    pub workspace: String,
43    pub total_files: usize,
44    pub total_chunks: usize,
45    pub index_size_kb: u64,
46    pub files: Vec<FileStats>,
47}
48
49#[derive(Debug)]
50pub struct FileStats {
51    pub name: String,
52    pub chunks: usize,
53    pub lines: usize,
54}
55
56#[derive(Debug)]
57pub struct RecentEntry {
58    pub timestamp: String,
59    pub file: String,
60    pub preview: String,
61}
62
63impl MemoryManager {
64    /// Create a new MemoryManager with the default agent ID ("main")
65    pub fn new(config: &MemoryConfig) -> Result<Self> {
66        Self::new_with_agent(config, "main")
67    }
68
69    /// Create a new MemoryManager for a specific agent ID (OpenClaw-compatible)
70    pub fn new_with_agent(config: &MemoryConfig, agent_id: &str) -> Result<Self> {
71        Self::new_with_full_config(config, None, agent_id)
72    }
73
74    /// Create a new MemoryManager with full config (for OpenAI embedding provider)
75    pub fn new_with_full_config(
76        memory_config: &MemoryConfig,
77        app_config: Option<&Config>,
78        agent_id: &str,
79    ) -> Result<Self> {
80        // Use paths from app_config if available, otherwise resolve fresh
81        let paths = if let Some(config) = app_config {
82            config.paths.clone()
83        } else {
84            crate::paths::Paths::resolve()?
85        };
86
87        let workspace = paths.workspace.clone();
88
89        // Initialize workspace with templates if needed, returns true if brand new
90        let is_brand_new = init_workspace(&workspace, &paths)?;
91
92        // Database goes in cache_dir/memory/{agentId}.sqlite (XDG cache — regenerable)
93        let db_path = paths.search_index(agent_id);
94        if let Some(parent) = db_path.parent() {
95            std::fs::create_dir_all(parent)?;
96        }
97
98        let index = MemoryIndex::new_with_db_path(&workspace, &db_path)?
99            .with_chunk_config(memory_config.chunk_size, memory_config.chunk_overlap);
100
101        // Create embedding provider based on config
102        let embedding_provider: Option<Arc<dyn EmbeddingProvider>> = match memory_config
103            .embedding_provider
104            .as_str()
105        {
106            "local" => {
107                #[cfg(feature = "embeddings-local")]
108                {
109                    let model_name = if memory_config.embedding_model.is_empty()
110                        || memory_config.embedding_model == "text-embedding-3-small"
111                    {
112                        None // Use default local model
113                    } else {
114                        Some(memory_config.embedding_model.as_str())
115                    };
116                    let cache_dir = if memory_config.embedding_cache_dir.is_empty() {
117                        None
118                    } else {
119                        Some(memory_config.embedding_cache_dir.as_str())
120                    };
121                    match FastEmbedProvider::new_with_cache_dir(model_name, cache_dir) {
122                        Ok(provider) => {
123                            info!("Using local embedding provider: {}", provider.model());
124                            Some(Arc::new(provider))
125                        }
126                        Err(e) => {
127                            warn!(
128                                "Failed to initialize local embeddings: {}. Falling back to FTS-only search.",
129                                e
130                            );
131                            None
132                        }
133                    }
134                }
135                #[cfg(not(feature = "embeddings-local"))]
136                {
137                    warn!(
138                        "Local embeddings requested but `embeddings-local` feature is disabled. Falling back to FTS-only search."
139                    );
140                    None
141                }
142            }
143            "openai" => {
144                // Need OpenAI config for API key
145                if let Some(config) = app_config {
146                    if let Some(ref openai) = config.providers.openai {
147                        match OpenAIEmbeddingProvider::new(
148                            &openai.api_key,
149                            &openai.base_url,
150                            &memory_config.embedding_model,
151                        ) {
152                            Ok(provider) => {
153                                info!("Using OpenAI embedding provider: {}", provider.model());
154                                Some(Arc::new(provider))
155                            }
156                            Err(e) => {
157                                warn!(
158                                    "Failed to initialize OpenAI embeddings: {}. Falling back to FTS-only search.",
159                                    e
160                                );
161                                None
162                            }
163                        }
164                    } else {
165                        warn!(
166                            "OpenAI embedding provider requested but no OpenAI config found. Falling back to FTS-only search."
167                        );
168                        None
169                    }
170                } else {
171                    warn!(
172                        "OpenAI embedding provider requested but no app config provided. Falling back to FTS-only search."
173                    );
174                    None
175                }
176            }
177            #[cfg(feature = "gguf")]
178            "gguf" => {
179                let cache_dir = if memory_config.embedding_cache_dir.is_empty() {
180                    None
181                } else {
182                    Some(memory_config.embedding_cache_dir.as_str())
183                };
184                match LlamaCppProvider::new(&memory_config.embedding_model, cache_dir) {
185                    Ok(provider) => {
186                        info!("Using GGUF embedding provider: {}", provider.model());
187                        Some(Arc::new(provider))
188                    }
189                    Err(e) => {
190                        warn!(
191                            "Failed to initialize GGUF embeddings: {}. Falling back to FTS-only search.",
192                            e
193                        );
194                        None
195                    }
196                }
197            }
198            #[cfg(not(feature = "gguf"))]
199            "gguf" => {
200                warn!(
201                    "GGUF embedding provider requested but 'gguf' feature is not enabled. Build with --features gguf. Falling back to FTS-only search."
202                );
203                None
204            }
205            "none" => {
206                debug!("Embeddings disabled, using FTS-only search");
207                None
208            }
209            other => {
210                warn!(
211                    "Unknown embedding provider '{}'. Falling back to FTS-only search.",
212                    other
213                );
214                None
215            }
216        };
217
218        Ok(Self {
219            workspace,
220            db_path,
221            index,
222            config: memory_config.clone(),
223            embedding_provider,
224            is_brand_new,
225        })
226    }
227
228    /// Set embedding provider for semantic search (requires OpenAI API key)
229    pub fn with_embedding_provider(mut self, provider: Arc<dyn EmbeddingProvider>) -> Self {
230        self.embedding_provider = Some(provider);
231        self
232    }
233
234    /// Check if semantic search is available
235    pub fn has_embeddings(&self) -> bool {
236        self.embedding_provider.is_some()
237    }
238
239    pub fn workspace(&self) -> &PathBuf {
240        &self.workspace
241    }
242
243    /// Read the main MEMORY.md file
244    pub fn read_memory_file(&self) -> Result<String> {
245        let path = self.workspace.join("MEMORY.md");
246        if path.exists() {
247            Ok(fs::read_to_string(&path)?)
248        } else {
249            Ok(String::new())
250        }
251    }
252
253    /// Read the HEARTBEAT.md file
254    pub fn read_heartbeat_file(&self) -> Result<String> {
255        let path = self.workspace.join("HEARTBEAT.md");
256        if path.exists() {
257            Ok(fs::read_to_string(&path)?)
258        } else {
259            Ok(String::new())
260        }
261    }
262
263    /// Read the SOUL.md file (persona/tone guidance)
264    pub fn read_soul_file(&self) -> Result<String> {
265        let path = self.workspace.join("SOUL.md");
266        if path.exists() {
267            Ok(fs::read_to_string(&path)?)
268        } else {
269            Ok(String::new())
270        }
271    }
272
273    /// Read the USER.md file (OpenClaw-compatible: user info)
274    pub fn read_user_file(&self) -> Result<String> {
275        let path = self.workspace.join("USER.md");
276        if path.exists() {
277            Ok(fs::read_to_string(&path)?)
278        } else {
279            Ok(String::new())
280        }
281    }
282
283    /// Read the IDENTITY.md file (OpenClaw-compatible: agent identity context)
284    pub fn read_identity_file(&self) -> Result<String> {
285        let path = self.workspace.join("IDENTITY.md");
286        if path.exists() {
287            Ok(fs::read_to_string(&path)?)
288        } else {
289            Ok(String::new())
290        }
291    }
292
293    /// Read the AGENTS.md file (OpenClaw-compatible: list of agents)
294    pub fn read_agents_file(&self) -> Result<String> {
295        let path = self.workspace.join("AGENTS.md");
296        if path.exists() {
297            Ok(fs::read_to_string(&path)?)
298        } else {
299            Ok(String::new())
300        }
301    }
302
303    /// Check if this is a brand new workspace (first run)
304    pub fn is_brand_new(&self) -> bool {
305        self.is_brand_new
306    }
307
308    /// Read the TOOLS.md file (OpenClaw-compatible: local tool notes)
309    pub fn read_tools_file(&self) -> Result<String> {
310        let path = self.workspace.join("TOOLS.md");
311        if path.exists() {
312            Ok(fs::read_to_string(&path)?)
313        } else {
314            Ok(String::new())
315        }
316    }
317
318    /// Read recent daily log files
319    pub fn read_recent_daily_logs(&self, days: usize) -> Result<String> {
320        let memory_dir = self.workspace.join("memory");
321        if !memory_dir.exists() {
322            return Ok(String::new());
323        }
324
325        let today = Local::now().date_naive();
326        let mut content = String::new();
327
328        for i in 0..days {
329            let date = today - chrono::Duration::days(i as i64);
330            let filename = format!("{}.md", date.format("%Y-%m-%d"));
331            let path = memory_dir.join(&filename);
332
333            if path.exists()
334                && let Ok(file_content) = fs::read_to_string(&path)
335            {
336                if !content.is_empty() {
337                    content.push_str("\n---\n\n");
338                }
339                content.push_str(&format!("## {}\n\n", filename));
340                content.push_str(&file_content);
341            }
342        }
343
344        Ok(content)
345    }
346
347    /// Search memory using hybrid search (FTS + semantic if available)
348    pub fn search(&self, query: &str, limit: usize) -> Result<Vec<MemoryChunk>> {
349        let mut results = self.search_raw(query, limit)?;
350
351        // Apply temporal decay if configured
352        if self.config.temporal_decay_lambda > 0.0 {
353            let now = std::time::SystemTime::now()
354                .duration_since(std::time::UNIX_EPOCH)
355                .unwrap_or_default()
356                .as_secs() as i64;
357            for chunk in &mut results {
358                chunk.apply_temporal_decay(self.config.temporal_decay_lambda, now);
359            }
360            // Re-sort after decay
361            results.sort_by(|a, b| {
362                b.score
363                    .partial_cmp(&a.score)
364                    .unwrap_or(std::cmp::Ordering::Equal)
365            });
366        }
367
368        Ok(results)
369    }
370
371    /// Search memory without temporal decay (internal use)
372    fn search_raw(&self, query: &str, limit: usize) -> Result<Vec<MemoryChunk>> {
373        // If we have an embedding provider, try hybrid search
374        if let Some(ref provider) = self.embedding_provider {
375            // Try to get query embedding (may fail if no API key, rate limited, etc.)
376            if let Ok(handle) = Handle::try_current() {
377                let provider = provider.clone();
378                let query_string = query.to_string();
379                let model = provider.model().to_string();
380
381                // Run embedding in blocking context
382                let embedding_result = std::thread::spawn(move || {
383                    handle.block_on(async { provider.embed(&query_string).await })
384                })
385                .join()
386                .map_err(|_| anyhow::anyhow!("Thread panicked"))?;
387
388                if let Ok(embedding) = embedding_result {
389                    debug!("Using hybrid search with {} dimensions", embedding.len());
390                    return self.index.search_hybrid(
391                        query,
392                        Some(&embedding),
393                        &model,
394                        limit,
395                        0.3, // FTS weight
396                        0.7, // Vector weight
397                    );
398                }
399            }
400        }
401
402        // Fallback to FTS-only search
403        self.index.search(query, limit)
404    }
405
406    /// Search memory using FTS only (faster, no API calls)
407    pub fn search_fts(&self, query: &str, limit: usize) -> Result<Vec<MemoryChunk>> {
408        self.index.search(query, limit)
409    }
410
411    /// Get total chunk count
412    pub fn chunk_count(&self) -> Result<usize> {
413        self.index.chunk_count()
414    }
415
416    /// Reindex all memory files
417    pub fn reindex(&self, force: bool) -> Result<ReindexStats> {
418        let start = std::time::Instant::now();
419        let mut stats = ReindexStats {
420            files_processed: 0,
421            files_updated: 0,
422            chunks_indexed: 0,
423            duration: Duration::default(),
424        };
425
426        // First, clean up deleted files from the index
427        let files_removed = self.cleanup_deleted_files()?;
428        if files_removed > 0 {
429            info!("Removed {} deleted files from index", files_removed);
430        }
431
432        // Index all .md files recursively under workspace
433        let pattern = format!("{}/**/*.md", self.workspace.display());
434        for entry in glob::glob(&pattern)
435            .into_iter()
436            .flatten()
437            .filter_map(|r| r.ok())
438        {
439            if entry.is_file() {
440                stats.files_processed += 1;
441                if self.index.index_file(&entry, force)? {
442                    stats.files_updated += 1;
443                }
444            }
445        }
446
447        // Index configured external paths (outside workspace)
448        for index_path in &self.config.paths {
449            let base_path = if index_path.path.starts_with('~') || index_path.path.starts_with('/')
450            {
451                PathBuf::from(shellexpand::tilde(&index_path.path).to_string())
452            } else {
453                self.workspace.join(&index_path.path)
454            };
455
456            // Skip paths inside workspace (already covered by recursive glob above)
457            if base_path.starts_with(&self.workspace) {
458                continue;
459            }
460
461            if !base_path.exists() {
462                debug!("Skipping non-existent index path: {}", base_path.display());
463                continue;
464            }
465
466            let pattern = format!("{}/{}", base_path.display(), index_path.pattern);
467            debug!("Indexing external path with pattern: {}", pattern);
468
469            for entry in glob::glob(&pattern)
470                .into_iter()
471                .flatten()
472                .filter_map(|r| r.ok())
473            {
474                if entry.is_file() {
475                    stats.files_processed += 1;
476                    if self.index.index_file(&entry, force)? {
477                        stats.files_updated += 1;
478                    }
479                }
480            }
481        }
482
483        stats.chunks_indexed = self.index.chunk_count()?;
484        stats.duration = start.elapsed();
485
486        info!("Reindex complete: {:?}", stats);
487        Ok(stats)
488    }
489
490    /// Remove files from index that no longer exist on disk
491    fn cleanup_deleted_files(&self) -> Result<usize> {
492        let indexed_files = self.index.indexed_files()?;
493        let mut removed = 0;
494
495        for relative_path in indexed_files {
496            let full_path = self.workspace.join(&relative_path);
497            if !full_path.exists() {
498                debug!("Cleaning up deleted file: {}", relative_path);
499                self.index.remove_file(&relative_path)?;
500                removed += 1;
501            }
502        }
503
504        Ok(removed)
505    }
506
507    /// Get memory statistics
508    pub fn stats(&self) -> Result<MemoryStats> {
509        let mut files = Vec::new();
510        let mut total_chunks = 0;
511
512        // Get stats for all .md files recursively under workspace
513        let pattern = format!("{}/**/*.md", self.workspace.display());
514        for entry in glob::glob(&pattern)
515            .into_iter()
516            .flatten()
517            .filter_map(|r| r.ok())
518        {
519            if entry.is_file() {
520                let content = fs::read_to_string(&entry)?;
521                let lines = content.lines().count();
522                let chunks = self.index.file_chunk_count(&entry)?;
523                total_chunks += chunks;
524
525                let display_name = entry
526                    .strip_prefix(&self.workspace)
527                    .map(|rel| rel.display().to_string())
528                    .unwrap_or_else(|_| entry.display().to_string());
529
530                files.push(FileStats {
531                    name: display_name,
532                    chunks,
533                    lines,
534                });
535            }
536        }
537
538        // Configured external paths (outside workspace)
539        for index_path in &self.config.paths {
540            let base_path = if index_path.path.starts_with('~') || index_path.path.starts_with('/')
541            {
542                PathBuf::from(shellexpand::tilde(&index_path.path).to_string())
543            } else {
544                self.workspace.join(&index_path.path)
545            };
546
547            // Skip paths inside workspace (already covered above)
548            if base_path.starts_with(&self.workspace) {
549                continue;
550            }
551
552            if !base_path.exists() {
553                continue;
554            }
555
556            let pattern = format!("{}/{}", base_path.display(), index_path.pattern);
557
558            for entry in glob::glob(&pattern)
559                .into_iter()
560                .flatten()
561                .filter_map(|r| r.ok())
562            {
563                if entry.is_file() {
564                    let content = fs::read_to_string(&entry)?;
565                    let lines = content.lines().count();
566                    let chunks = self.index.file_chunk_count(&entry)?;
567                    total_chunks += chunks;
568
569                    let display_name = if let Ok(rel) = entry.strip_prefix(&base_path) {
570                        format!("{}/{}", index_path.path, rel.display())
571                    } else {
572                        entry.display().to_string()
573                    };
574
575                    files.push(FileStats {
576                        name: display_name,
577                        chunks,
578                        lines,
579                    });
580                }
581            }
582        }
583
584        let index_size = self.index.size_bytes()? / 1024;
585
586        Ok(MemoryStats {
587            workspace: self.workspace.display().to_string(),
588            total_files: files.len(),
589            total_chunks,
590            index_size_kb: index_size,
591            files,
592        })
593    }
594
595    /// Get recent memory entries
596    pub fn recent_entries(&self, count: usize) -> Result<Vec<RecentEntry>> {
597        let mut entries = Vec::new();
598
599        let memory_dir = self.workspace.join("memory");
600        if !memory_dir.exists() {
601            return Ok(entries);
602        }
603
604        // Get all daily log files sorted by date (newest first)
605        let mut files: Vec<_> = fs::read_dir(&memory_dir)?
606            .filter_map(|e| e.ok())
607            .filter(|e| e.path().extension().map(|e| e == "md").unwrap_or(false))
608            .collect();
609
610        files.sort_by_key(|f| std::cmp::Reverse(f.file_name()));
611
612        for entry in files.into_iter().take(count) {
613            let path = entry.path();
614            let filename = path.file_name().unwrap().to_string_lossy().to_string();
615
616            if let Ok(content) = fs::read_to_string(&path) {
617                // Get last non-empty line as preview
618                let preview = content
619                    .lines()
620                    .rev()
621                    .find(|l| !l.trim().is_empty())
622                    .unwrap_or("")
623                    .chars()
624                    .take(100)
625                    .collect();
626
627                entries.push(RecentEntry {
628                    timestamp: filename.replace(".md", ""),
629                    file: format!("memory/{}", filename),
630                    preview,
631                });
632            }
633        }
634
635        Ok(entries)
636    }
637
638    /// Start file watcher for automatic reindexing
639    pub fn start_watcher(&self) -> Result<MemoryWatcher> {
640        MemoryWatcher::new(
641            self.workspace.clone(),
642            self.db_path.clone(),
643            self.config.clone(),
644        )
645    }
646
647    /// Generate embeddings for chunks that don't have them
648    /// Returns (chunks_processed, chunks_embedded)
649    /// Uses embedding cache to avoid regenerating identical content
650    pub async fn generate_embeddings(&self, batch_size: usize) -> Result<(usize, usize)> {
651        let provider = match &self.embedding_provider {
652            Some(p) => p,
653            None => {
654                debug!("No embedding provider configured, skipping embedding generation");
655                return Ok((0, 0));
656            }
657        };
658
659        let provider_id = provider.id().to_string();
660        let model = provider.model().to_string();
661        let mut total_processed = 0;
662        let mut total_embedded = 0;
663        let mut cache_hits = 0;
664
665        loop {
666            // Get chunks without embeddings
667            let chunks = self.index.chunks_without_embeddings(batch_size)?;
668            if chunks.is_empty() {
669                break;
670            }
671
672            total_processed += chunks.len();
673
674            // Separate chunks into cached and uncached
675            let mut to_embed: Vec<(String, String, String)> = Vec::new(); // (id, text, hash)
676            let mut from_cache: Vec<(String, Vec<f32>)> = Vec::new(); // (id, embedding)
677
678            for (chunk_id, text) in &chunks {
679                let text_hash = hash_text(text);
680
681                // Check cache first
682                if let Ok(Some(cached)) =
683                    self.index
684                        .get_cached_embedding(&provider_id, &model, &text_hash)
685                {
686                    from_cache.push((chunk_id.clone(), cached));
687                    cache_hits += 1;
688                } else {
689                    to_embed.push((chunk_id.clone(), text.clone(), text_hash));
690                }
691            }
692
693            // Store cached embeddings
694            for (chunk_id, embedding) in from_cache {
695                if let Err(e) = self.index.store_embedding(&chunk_id, &embedding, &model) {
696                    warn!(
697                        "Failed to store cached embedding for chunk {}: {}",
698                        chunk_id, e
699                    );
700                } else {
701                    total_embedded += 1;
702                }
703            }
704
705            // Generate new embeddings for uncached chunks
706            if !to_embed.is_empty() {
707                let texts: Vec<String> = to_embed.iter().map(|(_, text, _)| text.clone()).collect();
708
709                match provider.embed_batch(&texts).await {
710                    Ok(embeddings) => {
711                        for ((chunk_id, _text, text_hash), embedding) in
712                            to_embed.iter().zip(embeddings.iter())
713                        {
714                            // Store in chunk
715                            if let Err(e) = self.index.store_embedding(chunk_id, embedding, &model)
716                            {
717                                warn!("Failed to store embedding for chunk {}: {}", chunk_id, e);
718                            } else {
719                                total_embedded += 1;
720                            }
721
722                            // Store in cache for future reuse
723                            if let Err(e) = self.index.cache_embedding(
724                                &provider_id,
725                                &model,
726                                "", // provider_key (API key identifier, can be empty)
727                                text_hash,
728                                embedding,
729                            ) {
730                                debug!("Failed to cache embedding: {}", e);
731                            }
732                        }
733                    }
734                    Err(e) => {
735                        warn!("Failed to generate embeddings: {}", e);
736                        break;
737                    }
738                }
739            }
740
741            debug!(
742                "Generated embeddings: {}/{} chunks ({} from cache)",
743                total_embedded, total_processed, cache_hits
744            );
745
746            // Break if we processed fewer than batch_size (last batch)
747            if chunks.len() < batch_size {
748                break;
749            }
750        }
751
752        info!(
753            "Embedding generation complete: {} chunks, {} embedded, {} cache hits",
754            total_processed, total_embedded, cache_hits
755        );
756
757        Ok((total_processed, total_embedded))
758    }
759
760    /// Get count of chunks with embeddings
761    pub fn embedded_chunk_count(&self) -> Result<usize> {
762        let model = self
763            .embedding_provider
764            .as_ref()
765            .map(|p| p.model().to_string())
766            .unwrap_or_default();
767        self.index.embedded_chunk_count(&model)
768    }
769}