1use std::path::{Path, PathBuf};
2use std::str::FromStr;
3use std::sync::Arc;
4
5use chrono::Local;
6use sqlx::sqlite::{SqliteConnectOptions, SqlitePoolOptions};
7use sqlx::{Row, SqlitePool};
8use tracing::debug;
9
10use starpod_core::{Result, StarpodError};
11
12use crate::defaults;
13use crate::embedder::{self, Embedder};
14use crate::fusion;
15use crate::indexer::{self, reindex_source, CHUNK_OVERLAP, CHUNK_SIZE};
16use crate::schema;
17use crate::scoring;
18
19const BOOTSTRAP_FILE_CAP: usize = 20_000;
21
22const DEFAULT_HALF_LIFE_DAYS: f64 = 30.0;
24
25#[derive(Debug, Clone)]
32pub struct SearchResult {
33 pub source: String,
35 pub text: String,
37 pub line_start: usize,
39 pub line_end: usize,
41 pub rank: f64,
46}
47
48pub struct MemoryStore {
68 agent_home: PathBuf,
70 config_dir: PathBuf,
72 pool: SqlitePool,
73 half_life_days: f64,
75 mmr_lambda: f64,
77 embedder: Option<Arc<dyn Embedder>>,
79 chunk_size: usize,
81 chunk_overlap: usize,
83 bootstrap_file_cap: usize,
85}
86
87impl MemoryStore {
88 pub async fn new(agent_home: &Path, config_dir: &Path, db_dir: &Path) -> Result<Self> {
94 std::fs::create_dir_all(agent_home).map_err(StarpodError::Io)?;
96 std::fs::create_dir_all(config_dir).map_err(StarpodError::Io)?;
97 std::fs::create_dir_all(db_dir).map_err(StarpodError::Io)?;
98
99 let db_path = db_dir.join("memory.db");
102 let opts =
103 SqliteConnectOptions::from_str(&format!("sqlite://{}?mode=rwc", db_path.display()))
104 .map_err(|e| StarpodError::Database(format!("Invalid DB path: {}", e)))?
105 .pragma("journal_mode", "WAL")
106 .pragma("busy_timeout", "5000")
107 .pragma("synchronous", "NORMAL");
108
109 let pool = SqlitePoolOptions::new()
110 .max_connections(2)
111 .connect_with(opts)
112 .await
113 .map_err(|e| StarpodError::Database(format!("Failed to open database: {}", e)))?;
114
115 schema::run_migrations(&pool).await?;
117
118 let store = Self {
119 agent_home: agent_home.to_path_buf(),
120 config_dir: config_dir.to_path_buf(),
121 pool,
122 half_life_days: DEFAULT_HALF_LIFE_DAYS,
123 mmr_lambda: 0.7,
124 embedder: None,
125 chunk_size: CHUNK_SIZE,
126 chunk_overlap: CHUNK_OVERLAP,
127 bootstrap_file_cap: BOOTSTRAP_FILE_CAP,
128 };
129
130 store.seed_defaults()?;
132
133 store.reindex().await?;
135
136 Ok(store)
137 }
138
139 pub async fn new_user(user_dir: &Path) -> Result<Self> {
150 std::fs::create_dir_all(user_dir).map_err(StarpodError::Io)?;
151
152 let db_path = user_dir.join("memory.db");
153 let opts =
154 SqliteConnectOptions::from_str(&format!("sqlite://{}?mode=rwc", db_path.display()))
155 .map_err(|e| StarpodError::Database(format!("Invalid DB path: {}", e)))?;
156
157 let pool = SqlitePoolOptions::new()
158 .max_connections(1)
159 .connect_with(opts)
160 .await
161 .map_err(|e| StarpodError::Database(format!("Failed to open user database: {}", e)))?;
162
163 schema::run_migrations(&pool).await?;
164
165 let store = Self {
166 agent_home: user_dir.to_path_buf(),
167 config_dir: user_dir.to_path_buf(),
168 pool,
169 half_life_days: DEFAULT_HALF_LIFE_DAYS,
170 mmr_lambda: 0.7,
171 embedder: None,
172 chunk_size: CHUNK_SIZE,
173 chunk_overlap: CHUNK_OVERLAP,
174 bootstrap_file_cap: BOOTSTRAP_FILE_CAP,
175 };
176
177 store.reindex().await?;
179
180 Ok(store)
181 }
182
183 fn seed_defaults(&self) -> Result<bool> {
191 let fresh = !self.config_dir.join("SOUL.md").exists();
192
193 if fresh {
195 let path = self.config_dir.join("SOUL.md");
196 debug!(file = "SOUL.md", "Seeding default SOUL.md");
197 std::fs::write(&path, defaults::DEFAULT_SOUL)?;
198 }
199
200 let lifecycle_files = [
202 ("HEARTBEAT.md", defaults::DEFAULT_HEARTBEAT),
203 ("BOOT.md", defaults::DEFAULT_BOOT),
204 ("BOOTSTRAP.md", defaults::DEFAULT_BOOTSTRAP),
205 ];
206
207 for (name, content) in &lifecycle_files {
208 let path = self.config_dir.join(name);
209 if !path.exists() {
210 debug!(file = %name, "Seeding default file");
211 std::fs::write(&path, content)?;
212 }
213 }
214
215 Ok(fresh)
216 }
217
218 pub fn agent_home(&self) -> &Path {
220 &self.agent_home
221 }
222
223 pub fn config_dir(&self) -> &Path {
225 &self.config_dir
226 }
227
228 const CONFIG_FILES: &[&str] = &["SOUL.md", "HEARTBEAT.md", "BOOT.md", "BOOTSTRAP.md"];
230
231 fn resolve_path(&self, name: &str) -> PathBuf {
233 if !name.contains('/') && Self::CONFIG_FILES.contains(&name) {
235 self.config_dir.join(name)
236 } else {
237 self.agent_home.join(name)
238 }
239 }
240
241 pub fn has_bootstrap(&self) -> bool {
243 let path = self.config_dir.join("BOOTSTRAP.md");
244 path.is_file()
245 && std::fs::read_to_string(&path)
246 .map(|c| !c.trim().is_empty())
247 .unwrap_or(false)
248 }
249
250 pub fn clear_bootstrap(&self) -> Result<()> {
252 let path = self.config_dir.join("BOOTSTRAP.md");
253 if path.exists() {
254 std::fs::write(&path, "")?;
255 }
256 Ok(())
257 }
258
259 pub fn bootstrap_context(&self) -> Result<String> {
264 let content = self.read_file("SOUL.md")?;
265 let capped = if content.len() > self.bootstrap_file_cap {
266 let mut end = self.bootstrap_file_cap;
267 while end > 0 && !content.is_char_boundary(end) {
268 end -= 1;
269 }
270 &content[..end]
271 } else {
272 &content
273 };
274 Ok(format!("--- SOUL.md ---\n{}", capped))
275 }
276
277 pub fn set_half_life_days(&mut self, days: f64) {
279 self.half_life_days = days;
280 }
281
282 pub fn set_mmr_lambda(&mut self, lambda: f64) {
284 self.mmr_lambda = lambda;
285 }
286
287 pub fn set_chunk_size(&mut self, size: usize) {
289 self.chunk_size = size;
290 }
291
292 pub fn set_chunk_overlap(&mut self, overlap: usize) {
294 self.chunk_overlap = overlap;
295 }
296
297 pub fn set_bootstrap_file_cap(&mut self, cap: usize) {
299 self.bootstrap_file_cap = cap;
300 }
301
302 pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
308 let fetch_limit = (limit * 3).max(30);
310 let rows = sqlx::query(
311 "SELECT source, chunk_text, line_start, line_end, rank
312 FROM memory_fts
313 WHERE memory_fts MATCH ?1
314 ORDER BY rank
315 LIMIT ?2",
316 )
317 .bind(query)
318 .bind(fetch_limit as i64)
319 .fetch_all(&self.pool)
320 .await
321 .map_err(|e| StarpodError::Database(format!("Search query failed: {}", e)))?;
322
323 let mut results: Vec<SearchResult> = rows
324 .iter()
325 .map(|row| {
326 let source = row.get::<String, _>("source");
327 let raw_rank = row.get::<f64, _>("rank");
328 let adjusted_rank = scoring::apply_decay(raw_rank, &source, self.half_life_days);
329 SearchResult {
330 source,
331 text: row.get::<String, _>("chunk_text"),
332 line_start: row.get::<i64, _>("line_start") as usize,
333 line_end: row.get::<i64, _>("line_end") as usize,
334 rank: adjusted_rank,
335 }
336 })
337 .collect();
338
339 results.sort_by(|a, b| {
341 a.rank
342 .partial_cmp(&b.rank)
343 .unwrap_or(std::cmp::Ordering::Equal)
344 });
345 results.truncate(limit);
346
347 Ok(results)
348 }
349
350 pub fn set_embedder(&mut self, embedder: Arc<dyn Embedder>) {
352 self.embedder = Some(embedder);
353 }
354
355 pub async fn vector_search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
359 let embedder = match &self.embedder {
360 Some(e) => e,
361 None => return Ok(Vec::new()),
362 };
363
364 let query_vecs = embedder.embed(&[query.to_string()]).await?;
366 let query_vec = match query_vecs.first() {
367 Some(v) => v,
368 None => return Ok(Vec::new()),
369 };
370
371 let rows = sqlx::query(
373 "SELECT v.source, v.embedding, v.line_start, v.line_end, f.chunk_text
374 FROM memory_vectors v
375 LEFT JOIN memory_fts f ON f.source = v.source
376 AND f.line_start = v.line_start AND f.line_end = v.line_end",
377 )
378 .fetch_all(&self.pool)
379 .await
380 .map_err(|e| StarpodError::Database(format!("Vector search failed: {}", e)))?;
381
382 let mut scored: Vec<(f32, SearchResult)> = Vec::new();
383 for row in &rows {
384 let blob: Vec<u8> = row.get("embedding");
385 let embedding = bytes_to_f32_vec(&blob);
386 let similarity = embedder::cosine_similarity(query_vec, &embedding);
387
388 let source: String = row.get("source");
389 let text: String = row.try_get("chunk_text").unwrap_or_default();
390
391 scored.push((
392 similarity,
393 SearchResult {
394 source,
395 text,
396 line_start: row.get::<i64, _>("line_start") as usize,
397 line_end: row.get::<i64, _>("line_end") as usize,
398 rank: -(similarity as f64), },
400 ));
401 }
402
403 scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
405 scored.truncate(limit);
406
407 Ok(scored.into_iter().map(|(_, r)| r).collect())
408 }
409
410 pub async fn hybrid_search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
415 let embedder = match &self.embedder {
416 Some(e) => e,
417 None => return self.search(query, limit).await,
418 };
419
420 let fts_limit = (limit * 3).max(30);
422 let vec_limit = (limit * 3).max(30);
423
424 let (fts_results, vec_results) = tokio::join!(
425 self.fts_search_raw(query, fts_limit),
426 self.vector_search(query, vec_limit),
427 );
428
429 let fts_results = fts_results?;
430 let vec_results = vec_results?;
431
432 let mut fused = fusion::reciprocal_rank_fusion(&fts_results, &vec_results, limit * 3);
434
435 for result in &mut fused {
437 let decay = scoring::decay_factor(&result.source, self.half_life_days);
438 if decay > 0.0 && decay < 1.0 {
439 result.rank *= decay;
440 }
441 }
442
443 fused.sort_by(|a, b| {
445 a.rank
446 .partial_cmp(&b.rank)
447 .unwrap_or(std::cmp::Ordering::Equal)
448 });
449
450 let mmr_pool_size = (limit * 2).min(fused.len());
452 if mmr_pool_size > 0 {
453 let query_vecs = embedder.embed(&[query.to_string()]).await?;
455 if let Some(query_vec) = query_vecs.first() {
456 let texts: Vec<String> = fused[..mmr_pool_size]
458 .iter()
459 .map(|r| r.text.clone())
460 .collect();
461 let embeddings = embedder.embed(&texts).await?;
462
463 let candidates: Vec<(Vec<f32>, usize)> = embeddings
464 .into_iter()
465 .enumerate()
466 .map(|(i, emb)| (emb, i))
467 .collect();
468
469 let selected_indices =
470 scoring::mmr_rerank(query_vec, &candidates, limit, self.mmr_lambda);
471
472 let pool = fused;
473 fused = selected_indices
474 .into_iter()
475 .map(|idx| pool[idx].clone())
476 .collect();
477 } else {
478 fused.truncate(limit);
479 }
480 }
481
482 Ok(fused)
483 }
484
485 async fn fts_search_raw(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
487 let rows = sqlx::query(
488 "SELECT source, chunk_text, line_start, line_end, rank
489 FROM memory_fts
490 WHERE memory_fts MATCH ?1
491 ORDER BY rank
492 LIMIT ?2",
493 )
494 .bind(query)
495 .bind(limit as i64)
496 .fetch_all(&self.pool)
497 .await
498 .map_err(|e| StarpodError::Database(format!("Search query failed: {}", e)))?;
499
500 Ok(rows
501 .iter()
502 .map(|row| SearchResult {
503 source: row.get::<String, _>("source"),
504 text: row.get::<String, _>("chunk_text"),
505 line_start: row.get::<i64, _>("line_start") as usize,
506 line_end: row.get::<i64, _>("line_end") as usize,
507 rank: row.get::<f64, _>("rank"),
508 })
509 .collect())
510 }
511
512 async fn embed_and_store_source(&self, source: &str, text: &str) -> Result<()> {
514 let embedder = match &self.embedder {
515 Some(e) => e,
516 None => return Ok(()),
517 };
518
519 sqlx::query("DELETE FROM memory_vectors WHERE source = ?1")
521 .bind(source)
522 .execute(&self.pool)
523 .await
524 .map_err(|e| StarpodError::Database(format!("Failed to delete old vectors: {}", e)))?;
525
526 let chunks = indexer::chunk_text(source, text, self.chunk_size, self.chunk_overlap);
528 if chunks.is_empty() {
529 return Ok(());
530 }
531
532 let texts: Vec<String> = chunks.iter().map(|c| c.text.clone()).collect();
534 let embeddings = embedder.embed(&texts).await?;
535
536 for (idx, (chunk, embedding)) in chunks.iter().zip(embeddings.iter()).enumerate() {
538 let blob = f32_vec_to_bytes(embedding);
539 sqlx::query(
540 "INSERT INTO memory_vectors (source, chunk_idx, embedding, line_start, line_end)
541 VALUES (?1, ?2, ?3, ?4, ?5)",
542 )
543 .bind(&chunk.source)
544 .bind(idx as i64)
545 .bind(&blob)
546 .bind(chunk.line_start as i64)
547 .bind(chunk.line_end as i64)
548 .execute(&self.pool)
549 .await
550 .map_err(|e| StarpodError::Database(format!("Failed to insert vector: {}", e)))?;
551 }
552
553 Ok(())
554 }
555
556 pub fn read_file(&self, name: &str) -> Result<String> {
558 scoring::validate_path(name, &self.agent_home)?;
560 let path = self.resolve_path(name);
561 if !path.exists() {
562 return Ok(String::new());
563 }
564 std::fs::read_to_string(&path).map_err(StarpodError::Io)
565 }
566
567 pub async fn write_file(&self, name: &str, content: &str) -> Result<()> {
572 scoring::validate_path(name, &self.agent_home)?;
573 scoring::validate_content_size(content)?;
574
575 let path = self.resolve_path(name);
576
577 if let Some(parent) = path.parent() {
579 std::fs::create_dir_all(parent)?;
580 }
581
582 std::fs::write(&path, content)?;
583
584 reindex_source(
586 &self.pool,
587 name,
588 content,
589 self.chunk_size,
590 self.chunk_overlap,
591 )
592 .await?;
593 self.embed_and_store_source(name, content).await?;
594
595 Ok(())
596 }
597
598 pub async fn append_daily(&self, text: &str) -> Result<()> {
600 let today = Local::now().format("%Y-%m-%d").to_string();
601 let filename = format!("memory/{}.md", today);
602 let path = self.agent_home.join(&filename);
603
604 if let Some(parent) = path.parent() {
606 std::fs::create_dir_all(parent)?;
607 }
608
609 let timestamp = Local::now().format("%H:%M:%S").to_string();
610 let entry = format!("\n## {}\n{}\n", timestamp, text);
611
612 let mut content = if path.exists() {
613 std::fs::read_to_string(&path)?
614 } else {
615 format!("# Daily Log — {}\n", today)
616 };
617
618 content.push_str(&entry);
619 std::fs::write(&path, &content)?;
620
621 reindex_source(
623 &self.pool,
624 &filename,
625 &content,
626 self.chunk_size,
627 self.chunk_overlap,
628 )
629 .await?;
630 self.embed_and_store_source(&filename, &content).await?;
631
632 Ok(())
633 }
634
635 pub async fn reindex(&self) -> Result<()> {
641 sqlx::query("DELETE FROM memory_fts")
643 .execute(&self.pool)
644 .await
645 .map_err(|e| StarpodError::Database(format!("Failed to clear FTS: {}", e)))?;
646
647 sqlx::query("DELETE FROM memory_vectors")
649 .execute(&self.pool)
650 .await
651 .map_err(|e| StarpodError::Database(format!("Failed to clear vectors: {}", e)))?;
652
653 self.index_dir(&self.config_dir.clone(), "").await?;
655
656 if let Ok(entries) = std::fs::read_dir(&self.agent_home) {
658 for entry in entries.flatten() {
659 let path = entry.path();
660 if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
661 let filename = entry.file_name().to_string_lossy().to_string();
662 if !Self::CONFIG_FILES.iter().any(|&f| f == filename) {
664 let content = std::fs::read_to_string(&path)?;
665 reindex_source(
666 &self.pool,
667 &filename,
668 &content,
669 self.chunk_size,
670 self.chunk_overlap,
671 )
672 .await?;
673 self.embed_and_store_source(&filename, &content).await?;
674 }
675 }
676 }
677 }
678
679 Ok(())
680 }
681
682 async fn index_dir(&self, dir: &Path, prefix: &str) -> Result<()> {
684 let entries = std::fs::read_dir(dir).map_err(StarpodError::Io)?;
685
686 for entry in entries {
687 let entry = entry.map_err(StarpodError::Io)?;
688 let path = entry.path();
689 if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
690 let filename = entry.file_name().to_string_lossy().to_string();
691 let source = format!("{}{}", prefix, filename);
692 let content = std::fs::read_to_string(&path)?;
693 reindex_source(
694 &self.pool,
695 &source,
696 &content,
697 self.chunk_size,
698 self.chunk_overlap,
699 )
700 .await?;
701 self.embed_and_store_source(&source, &content).await?;
702 }
703 }
704
705 Ok(())
706 }
707}
708
709fn f32_vec_to_bytes(vec: &[f32]) -> Vec<u8> {
711 let mut bytes = Vec::with_capacity(vec.len() * 4);
712 for &v in vec {
713 bytes.extend_from_slice(&v.to_le_bytes());
714 }
715 bytes
716}
717
718fn bytes_to_f32_vec(bytes: &[u8]) -> Vec<f32> {
720 bytes
721 .chunks_exact(4)
722 .map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
723 .collect()
724}
725
726#[cfg(test)]
727mod tests {
728 use super::*;
729 use tempfile::TempDir;
730
731 async fn test_store(tmp: &TempDir) -> MemoryStore {
735 let agent_home = tmp.path().join("agent_home");
736 let config_dir = tmp.path().join("agent_home").join("config");
737 let db_dir = tmp.path().join("db");
738 MemoryStore::new(&agent_home, &config_dir, &db_dir)
739 .await
740 .unwrap()
741 }
742
743 #[tokio::test]
746 async fn test_new_seeds_defaults() {
747 let tmp = TempDir::new().unwrap();
748 let store = test_store(&tmp).await;
749 let config_dir = tmp.path().join("agent_home").join("config");
750
751 assert!(config_dir.join("SOUL.md").exists());
753 assert!(config_dir.join("HEARTBEAT.md").exists());
754 assert!(config_dir.join("BOOT.md").exists());
755 assert!(config_dir.join("BOOTSTRAP.md").exists());
756
757 assert!(!config_dir.join("USER.md").exists());
759 assert!(!config_dir.join("MEMORY.md").exists());
760
761 assert!(tmp.path().join("db").join("memory.db").exists());
763
764 let soul = store.read_file("SOUL.md").unwrap();
766 assert!(soul.contains("Nova"));
767 }
768
769 #[tokio::test]
770 async fn test_write_and_search() {
771 let tmp = TempDir::new().unwrap();
772 let store = test_store(&tmp).await;
773
774 store
775 .write_file(
776 "test-content.md",
777 "Rust is a systems programming language focused on safety and performance.",
778 )
779 .await
780 .unwrap();
781
782 let results = store.search("Rust programming", 5).await.unwrap();
783 assert!(!results.is_empty());
784 assert!(results[0].text.contains("Rust"));
785 }
786
787 #[tokio::test]
788 async fn test_append_daily() {
789 let tmp = TempDir::new().unwrap();
790 let store = test_store(&tmp).await;
791 let agent_home = tmp.path().join("agent_home");
792
793 std::fs::create_dir_all(agent_home.join("memory")).unwrap();
795
796 store
797 .append_daily("Had a great conversation about Rust.")
798 .await
799 .unwrap();
800 store
801 .append_daily("Discussed memory management.")
802 .await
803 .unwrap();
804
805 let today = Local::now().format("%Y-%m-%d").to_string();
806 let content = store.read_file(&format!("memory/{}.md", today)).unwrap();
807 assert!(content.contains("great conversation"));
808 assert!(content.contains("memory management"));
809 }
810
811 #[tokio::test]
812 async fn test_bootstrap_context() {
813 let tmp = TempDir::new().unwrap();
814 let store = test_store(&tmp).await;
815
816 let ctx = store.bootstrap_context().unwrap();
817 assert!(ctx.contains("SOUL.md"));
818 assert!(ctx.contains("Nova"));
819 assert!(!ctx.contains("USER.md"));
821 assert!(!ctx.contains("MEMORY.md"));
822 }
823
824 #[tokio::test]
825 async fn test_reindex() {
826 let tmp = TempDir::new().unwrap();
827 let store = test_store(&tmp).await;
828 let agent_home = tmp.path().join("agent_home");
829
830 std::fs::write(
832 agent_home.join("test-quantum.md"),
833 "This is about quantum computing and qubits.",
834 )
835 .unwrap();
836
837 store.reindex().await.unwrap();
839
840 let results = store.search("quantum computing", 5).await.unwrap();
841 assert!(!results.is_empty());
842 }
843
844 #[tokio::test]
847 async fn write_file_rejects_traversal() {
848 let tmp = TempDir::new().unwrap();
849 let store = test_store(&tmp).await;
850 let err = store.write_file("../escape.md", "evil content").await;
851 assert!(err.is_err(), "write_file should reject path traversal");
852 }
853
854 #[tokio::test]
855 async fn write_file_rejects_non_md() {
856 let tmp = TempDir::new().unwrap();
857 let store = test_store(&tmp).await;
858 let err = store.write_file("script.sh", "#!/bin/bash").await;
859 assert!(err.is_err(), "write_file should reject non-.md files");
860 }
861
862 #[tokio::test]
863 async fn write_file_rejects_absolute_path() {
864 let tmp = TempDir::new().unwrap();
865 let store = test_store(&tmp).await;
866 let err = store.write_file("/tmp/evil.md", "content").await;
867 assert!(err.is_err(), "write_file should reject absolute paths");
868 }
869
870 #[tokio::test]
871 async fn read_file_rejects_traversal() {
872 let tmp = TempDir::new().unwrap();
873 let store = test_store(&tmp).await;
874 let err = store.read_file("../../etc/passwd.md");
875 assert!(err.is_err(), "read_file should reject path traversal");
876 }
877
878 #[tokio::test]
879 async fn read_file_rejects_non_md() {
880 let tmp = TempDir::new().unwrap();
881 let store = test_store(&tmp).await;
882 let err = store.read_file("secret.json");
883 assert!(err.is_err(), "read_file should reject non-.md files");
884 }
885
886 #[tokio::test]
889 async fn write_file_rejects_oversized_content() {
890 let tmp = TempDir::new().unwrap();
891 let store = test_store(&tmp).await;
892 let big = "x".repeat(scoring::MAX_WRITE_SIZE + 1);
893 let err = store.write_file("big.md", &big).await;
894 assert!(err.is_err(), "write_file should reject content > 1 MB");
895 }
896
897 #[tokio::test]
898 async fn write_file_accepts_content_at_limit() {
899 let tmp = TempDir::new().unwrap();
900 let store = test_store(&tmp).await;
901 let exact = "x".repeat(scoring::MAX_WRITE_SIZE);
902 let result = store.write_file("exact.md", &exact).await;
903 assert!(
904 result.is_ok(),
905 "write_file should accept content at exactly 1 MB"
906 );
907 }
908
909 #[tokio::test]
912 async fn set_half_life_days_is_applied() {
913 let tmp = TempDir::new().unwrap();
914 let mut store = test_store(&tmp).await;
915 store.set_half_life_days(7.0);
916 assert_eq!(store.half_life_days, 7.0);
917 }
918
919 #[tokio::test]
920 async fn set_mmr_lambda_is_applied() {
921 let tmp = TempDir::new().unwrap();
922 let mut store = test_store(&tmp).await;
923 store.set_mmr_lambda(0.5);
924 assert_eq!(store.mmr_lambda, 0.5);
925 }
926
927 #[tokio::test]
928 async fn set_chunk_size_is_applied() {
929 let tmp = TempDir::new().unwrap();
930 let mut store = test_store(&tmp).await;
931 store.set_chunk_size(800);
932 assert_eq!(store.chunk_size, 800);
933 }
934
935 #[tokio::test]
936 async fn set_chunk_overlap_is_applied() {
937 let tmp = TempDir::new().unwrap();
938 let mut store = test_store(&tmp).await;
939 store.set_chunk_overlap(160);
940 assert_eq!(store.chunk_overlap, 160);
941 }
942
943 #[tokio::test]
944 async fn set_bootstrap_file_cap_is_applied() {
945 let tmp = TempDir::new().unwrap();
946 let mut store = test_store(&tmp).await;
947 store.set_bootstrap_file_cap(5000);
948 assert_eq!(store.bootstrap_file_cap, 5000);
949 }
950
951 #[tokio::test]
952 async fn bootstrap_file_cap_limits_output() {
953 let tmp = TempDir::new().unwrap();
954 let mut store = test_store(&tmp).await;
955
956 let large_content = "x".repeat(10_000);
958 store.write_file("SOUL.md", &large_content).await.unwrap();
959
960 store.set_bootstrap_file_cap(500);
962
963 let ctx = store.bootstrap_context().unwrap();
964 let soul_section = ctx
967 .split("--- SOUL.md ---\n")
968 .nth(1)
969 .unwrap_or("")
970 .split("\n\n--- ")
971 .next()
972 .unwrap_or("");
973 assert!(
974 soul_section.len() <= 500,
975 "SOUL.md section should be capped at 500 chars, got {}",
976 soul_section.len(),
977 );
978 }
979
980 #[tokio::test]
983 async fn vector_search_returns_empty_without_embedder() {
984 let tmp = TempDir::new().unwrap();
985 let store = test_store(&tmp).await;
986 let results = store.vector_search("anything", 10).await.unwrap();
987 assert!(
988 results.is_empty(),
989 "vector_search should return empty without embedder"
990 );
991 }
992
993 #[tokio::test]
994 async fn hybrid_search_falls_back_to_fts_without_embedder() {
995 let tmp = TempDir::new().unwrap();
996 let store = test_store(&tmp).await;
997
998 store
999 .write_file(
1000 "test-elephants.md",
1001 "Unique test content about elephants in Africa.",
1002 )
1003 .await
1004 .unwrap();
1005
1006 let results = store.hybrid_search("elephants Africa", 5).await.unwrap();
1008 assert!(
1009 !results.is_empty(),
1010 "hybrid_search should fall back to FTS without embedder"
1011 );
1012 assert!(results[0].text.contains("elephants"));
1013 }
1014
1015 struct MockEmbedder;
1021
1022 #[async_trait::async_trait]
1023 impl Embedder for MockEmbedder {
1024 async fn embed(&self, texts: &[String]) -> Result<Vec<Vec<f32>>> {
1025 Ok(texts
1026 .iter()
1027 .map(|t| {
1028 let mut vec = vec![0.0f32; 8];
1029 for ch in t.chars() {
1030 let idx = (ch.to_ascii_lowercase() as usize).wrapping_sub('a' as usize);
1031 if idx < 8 {
1032 vec[idx] += 1.0;
1033 }
1034 }
1035 let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
1037 if norm > 0.0 {
1038 for v in &mut vec {
1039 *v /= norm;
1040 }
1041 }
1042 vec
1043 })
1044 .collect())
1045 }
1046
1047 fn dimensions(&self) -> usize {
1048 8
1049 }
1050 }
1051
1052 #[tokio::test]
1053 async fn set_embedder_enables_vector_storage() {
1054 let tmp = TempDir::new().unwrap();
1055 let mut store = test_store(&tmp).await;
1056 store.set_embedder(Arc::new(MockEmbedder));
1057
1058 store
1059 .write_file(
1060 "test-cats.md",
1061 "Cats are wonderful animals that love to sleep.",
1062 )
1063 .await
1064 .unwrap();
1065
1066 let count: i64 =
1068 sqlx::query_scalar("SELECT COUNT(*) FROM memory_vectors WHERE source = 'test-cats.md'")
1069 .fetch_one(&store.pool)
1070 .await
1071 .unwrap();
1072 assert!(
1073 count > 0,
1074 "Vectors should be stored after write_file with embedder"
1075 );
1076 }
1077
1078 #[tokio::test]
1079 async fn vector_search_with_mock_embedder() {
1080 let tmp = TempDir::new().unwrap();
1081 let mut store = test_store(&tmp).await;
1082 store.set_embedder(Arc::new(MockEmbedder));
1083
1084 store
1085 .write_file("test-abc.md", "aaa bbb ccc abc")
1086 .await
1087 .unwrap();
1088 store
1089 .write_file("test-def.md", "ddd eee fff def")
1090 .await
1091 .unwrap();
1092
1093 let results = store.vector_search("aaa abc", 5).await.unwrap();
1095 assert!(
1096 !results.is_empty(),
1097 "vector_search should return results with embedder"
1098 );
1099 }
1100
1101 #[tokio::test]
1102 async fn hybrid_search_with_mock_embedder() {
1103 let tmp = TempDir::new().unwrap();
1104 let mut store = test_store(&tmp).await;
1105 store.set_embedder(Arc::new(MockEmbedder));
1106
1107 store
1108 .write_file("test-alpha.md", "Alpha beta gamma delta")
1109 .await
1110 .unwrap();
1111 store
1112 .write_file("test-beta.md", "Beta epsilon zeta eta")
1113 .await
1114 .unwrap();
1115
1116 let results = store.hybrid_search("alpha beta", 5).await.unwrap();
1117 assert!(
1118 !results.is_empty(),
1119 "hybrid_search should return results with embedder"
1120 );
1121 }
1122
1123 #[tokio::test]
1124 async fn reindex_clears_and_rebuilds_vectors() {
1125 let tmp = TempDir::new().unwrap();
1126 let mut store = test_store(&tmp).await;
1127 store.set_embedder(Arc::new(MockEmbedder));
1128
1129 store
1130 .write_file("test-vectors.md", "Test content here")
1131 .await
1132 .unwrap();
1133
1134 let before: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM memory_vectors")
1136 .fetch_one(&store.pool)
1137 .await
1138 .unwrap();
1139 assert!(before > 0);
1140
1141 store.reindex().await.unwrap();
1143
1144 let after: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM memory_vectors")
1145 .fetch_one(&store.pool)
1146 .await
1147 .unwrap();
1148 assert!(after > 0, "Reindex should rebuild vectors");
1150 }
1151
1152 #[test]
1155 fn f32_bytes_round_trip() {
1156 let original = vec![1.0f32, -2.5, 0.0, std::f32::consts::PI, f32::MAX, f32::MIN];
1157 let bytes = f32_vec_to_bytes(&original);
1158 assert_eq!(bytes.len(), original.len() * 4);
1159 let restored = bytes_to_f32_vec(&bytes);
1160 assert_eq!(original, restored);
1161 }
1162
1163 #[test]
1164 fn f32_bytes_empty_round_trip() {
1165 let original: Vec<f32> = vec![];
1166 let bytes = f32_vec_to_bytes(&original);
1167 assert!(bytes.is_empty());
1168 let restored = bytes_to_f32_vec(&bytes);
1169 assert!(restored.is_empty());
1170 }
1171
1172 #[test]
1173 fn f32_bytes_single_value() {
1174 let original = vec![42.0f32];
1175 let bytes = f32_vec_to_bytes(&original);
1176 assert_eq!(bytes.len(), 4);
1177 let restored = bytes_to_f32_vec(&bytes);
1178 assert_eq!(original, restored);
1179 }
1180
1181 #[tokio::test]
1186 async fn search_applies_temporal_decay() {
1187 let tmp = TempDir::new().unwrap();
1188 let store = test_store(&tmp).await;
1189 let agent_home = tmp.path().join("agent_home");
1190
1191 let content = "Temporal decay test content about quantum physics and relativity.";
1193 store.write_file("test-physics.md", content).await.unwrap();
1194
1195 let old_date = Local::now().date_naive() - chrono::Duration::days(90);
1197 let old_filename = format!("memory/{}.md", old_date.format("%Y-%m-%d"));
1198 std::fs::create_dir_all(agent_home.join("memory")).unwrap();
1199 let old_path = agent_home.join(&old_filename);
1200 std::fs::write(&old_path, content).unwrap();
1201 store.reindex().await.unwrap();
1202
1203 let results = store
1204 .search("quantum physics relativity", 10)
1205 .await
1206 .unwrap();
1207 assert!(
1209 !results.is_empty(),
1210 "Should find at least the evergreen file"
1211 );
1212 }
1213
1214 #[tokio::test]
1215 async fn test_append_daily_creates_memory_dir() {
1216 let tmp = TempDir::new().unwrap();
1217 let store = test_store(&tmp).await;
1218 let agent_home = tmp.path().join("agent_home");
1219
1220 assert!(!agent_home.join("memory").exists());
1222
1223 store
1224 .append_daily("First entry without pre-existing dir.")
1225 .await
1226 .unwrap();
1227
1228 assert!(agent_home.join("memory").exists());
1229 let today = Local::now().format("%Y-%m-%d").to_string();
1230 let content = store.read_file(&format!("memory/{}.md", today)).unwrap();
1231 assert!(content.contains("First entry"));
1232 }
1233
1234 #[tokio::test]
1235 async fn test_bootstrap_context_multibyte_safe() {
1236 let tmp = TempDir::new().unwrap();
1237 let agent_home = tmp.path().join("agent_home");
1238 let config_dir = agent_home.join("config");
1239 let db_dir = tmp.path().join("db");
1240 std::fs::create_dir_all(&config_dir).unwrap();
1241
1242 let soul = "# Soul\n".to_string() + &"café 🌟 ".repeat(5000);
1245 std::fs::write(config_dir.join("SOUL.md"), &soul).unwrap();
1246
1247 let store = MemoryStore::new(&agent_home, &config_dir, &db_dir)
1248 .await
1249 .unwrap();
1250 let ctx = store.bootstrap_context().unwrap();
1252 assert!(ctx.contains("SOUL.md"));
1253 assert!(ctx.is_char_boundary(ctx.len()));
1255 }
1256
1257 #[tokio::test]
1260 async fn config_files_routed_to_config_dir() {
1261 let tmp = TempDir::new().unwrap();
1262 let store = test_store(&tmp).await;
1263 let config_dir = tmp.path().join("agent_home").join("config");
1264
1265 store
1267 .write_file("SOUL.md", "# Soul\nCustom soul.")
1268 .await
1269 .unwrap();
1270 assert!(config_dir.join("SOUL.md").is_file());
1271 let content = std::fs::read_to_string(config_dir.join("SOUL.md")).unwrap();
1272 assert!(content.contains("Custom soul"));
1273
1274 let read = store.read_file("SOUL.md").unwrap();
1276 assert!(read.contains("Custom soul"));
1277 }
1278
1279 #[tokio::test]
1280 async fn runtime_files_routed_to_agent_home() {
1281 let tmp = TempDir::new().unwrap();
1282 let store = test_store(&tmp).await;
1283 let agent_home = tmp.path().join("agent_home");
1284 let config_dir = agent_home.join("config");
1285
1286 store.write_file("notes.md", "Some notes.").await.unwrap();
1288 assert!(agent_home.join("notes.md").is_file());
1289 assert!(!config_dir.join("notes.md").exists());
1290
1291 let content = store.read_file("notes.md").unwrap();
1293 assert!(content.contains("Some notes"));
1294 }
1295
1296 #[tokio::test]
1297 async fn reindex_covers_both_config_and_agent_home() {
1298 let tmp = TempDir::new().unwrap();
1299 let store = test_store(&tmp).await;
1300
1301 store
1303 .write_file("SOUL.md", "Soul content about quantum.")
1304 .await
1305 .unwrap();
1306
1307 store
1309 .write_file("notes.md", "Notes about quantum.")
1310 .await
1311 .unwrap();
1312
1313 store.reindex().await.unwrap();
1315
1316 let results = store.search("quantum", 10).await.unwrap();
1317 let sources: Vec<&str> = results.iter().map(|r| r.source.as_str()).collect();
1318 assert!(
1319 sources.contains(&"SOUL.md"),
1320 "SOUL.md from config_dir should be indexed"
1321 );
1322 assert!(
1323 sources.contains(&"notes.md"),
1324 "notes.md from agent_home should be indexed"
1325 );
1326 }
1327
1328 #[tokio::test]
1329 async fn bootstrap_context_reads_from_config_dir() {
1330 let tmp = TempDir::new().unwrap();
1331 let store = test_store(&tmp).await;
1332
1333 store
1335 .write_file("SOUL.md", "# Soul\nI am ConfigBot.")
1336 .await
1337 .unwrap();
1338
1339 let ctx = store.bootstrap_context().unwrap();
1340 assert!(
1341 ctx.contains("ConfigBot"),
1342 "bootstrap should read from config_dir"
1343 );
1344 }
1345
1346 #[tokio::test]
1347 async fn has_bootstrap_checks_config_dir() {
1348 let tmp = TempDir::new().unwrap();
1349 let store = test_store(&tmp).await;
1350 let config_dir = tmp.path().join("agent_home").join("config");
1351
1352 assert!(
1354 !store.has_bootstrap(),
1355 "Default BOOTSTRAP.md should be empty"
1356 );
1357
1358 std::fs::write(
1360 config_dir.join("BOOTSTRAP.md"),
1361 "Do something on first run.",
1362 )
1363 .unwrap();
1364 assert!(
1365 store.has_bootstrap(),
1366 "BOOTSTRAP.md with content should be detected"
1367 );
1368
1369 store.clear_bootstrap().unwrap();
1371 assert!(
1372 !store.has_bootstrap(),
1373 "Cleared BOOTSTRAP.md should not be detected"
1374 );
1375 }
1376
1377 #[tokio::test]
1380 async fn new_user_creates_db_in_user_dir() {
1381 let tmp = TempDir::new().unwrap();
1382 let user_dir = tmp.path().join("users").join("alice");
1383
1384 let _store = MemoryStore::new_user(&user_dir).await.unwrap();
1385
1386 assert!(
1387 user_dir.join("memory.db").exists(),
1388 "memory.db should be in user_dir"
1389 );
1390 }
1391
1392 #[tokio::test]
1393 async fn new_user_does_not_seed_defaults() {
1394 let tmp = TempDir::new().unwrap();
1395 let user_dir = tmp.path().join("users").join("bob");
1396
1397 let _store = MemoryStore::new_user(&user_dir).await.unwrap();
1398
1399 assert!(
1401 !user_dir.join("SOUL.md").exists(),
1402 "new_user should not seed SOUL.md"
1403 );
1404 assert!(
1405 !user_dir.join("HEARTBEAT.md").exists(),
1406 "new_user should not seed HEARTBEAT.md"
1407 );
1408 }
1409
1410 #[tokio::test]
1411 async fn new_user_indexes_existing_files() {
1412 let tmp = TempDir::new().unwrap();
1413 let user_dir = tmp.path().join("users").join("carol");
1414 std::fs::create_dir_all(&user_dir).unwrap();
1415
1416 std::fs::write(
1418 user_dir.join("MEMORY.md"),
1419 "# Memory\n\nCarol likes functional programming.\n",
1420 )
1421 .unwrap();
1422
1423 let store = MemoryStore::new_user(&user_dir).await.unwrap();
1424
1425 let results = store.search("functional programming", 5).await.unwrap();
1427 assert!(
1428 !results.is_empty(),
1429 "Pre-existing file should be indexed on startup"
1430 );
1431 assert!(results
1432 .iter()
1433 .any(|r| r.text.contains("functional programming")));
1434 }
1435
1436 #[tokio::test]
1437 async fn new_user_write_and_search() {
1438 let tmp = TempDir::new().unwrap();
1439 let user_dir = tmp.path().join("users").join("dave");
1440
1441 let store = MemoryStore::new_user(&user_dir).await.unwrap();
1442
1443 store
1444 .write_file("MEMORY.md", "# Memory\n\nDave prefers dark theme.\n")
1445 .await
1446 .unwrap();
1447
1448 let results = store.search("dark theme", 5).await.unwrap();
1449 assert!(!results.is_empty(), "Written file should be searchable");
1450 assert!(results.iter().any(|r| r.text.contains("dark theme")));
1451 }
1452
1453 #[tokio::test]
1454 async fn new_user_append_daily_and_search() {
1455 let tmp = TempDir::new().unwrap();
1456 let user_dir = tmp.path().join("users").join("eve");
1457
1458 let store = MemoryStore::new_user(&user_dir).await.unwrap();
1459
1460 store
1461 .append_daily("Discussed API design patterns")
1462 .await
1463 .unwrap();
1464
1465 let results = store.search("API design", 5).await.unwrap();
1466 assert!(!results.is_empty(), "Daily log should be searchable");
1467 }
1468}