Skip to main content

starpod_memory/
store.rs

1use std::path::{Path, PathBuf};
2use std::str::FromStr;
3use std::sync::Arc;
4
5use chrono::Local;
6use sqlx::sqlite::{SqliteConnectOptions, SqlitePoolOptions};
7use sqlx::{Row, SqlitePool};
8use tracing::debug;
9
10use starpod_core::{Result, StarpodError};
11
12use crate::defaults;
13use crate::embedder::{self, Embedder};
14use crate::fusion;
15use crate::indexer::{self, reindex_source, CHUNK_OVERLAP, CHUNK_SIZE};
16use crate::schema;
17use crate::scoring;
18
19/// Maximum characters to include from a single file in bootstrap context.
20const BOOTSTRAP_FILE_CAP: usize = 20_000;
21
22/// Default half-life for temporal decay (in days).
23const DEFAULT_HALF_LIFE_DAYS: f64 = 30.0;
24
25/// A search result from the memory index.
26///
27/// Represents a chunk of text from a source file that matched a query.
28/// The `rank` field is negative, with more negative values indicating
29/// better matches. This convention is consistent across FTS5 (where rank
30/// is natively negative), RRF fusion, and hybrid search.
31#[derive(Debug, Clone)]
32pub struct SearchResult {
33    /// Source file the chunk came from (e.g. `"SOUL.md"`, `"memory/2026-03-15.md"`).
34    pub source: String,
35    /// The matching text chunk.
36    pub text: String,
37    /// Starting line number (1-indexed) in the source file.
38    pub line_start: usize,
39    /// Ending line number in the source file.
40    pub line_end: usize,
41    /// Rank score — more negative = better match.
42    ///
43    /// For FTS5-only search this is the raw BM25 score adjusted for temporal decay.
44    /// For hybrid search this is the negative RRF score after decay and MMR.
45    pub rank: f64,
46}
47
48/// The main memory store — manages agent-level markdown files with a hybrid search index.
49///
50/// Blueprint-managed files (SOUL.md, lifecycle files) live in `config_dir`
51/// (`.starpod/config/`). Runtime data files (daily logs, agent-written files)
52/// live in `agent_home` (`.starpod/`).
53/// The FTS5/vector database lives in `db_dir` (`.starpod/db/`).
54/// User-specific files (USER.md, MEMORY.md, daily logs) are handled by
55/// [`UserMemoryView`](crate::user_view::UserMemoryView), not this struct.
56///
57/// # Search Pipeline
58///
59/// - [`search`](Self::search) — FTS5 + temporal decay (always available)
60/// - [`vector_search`](Self::vector_search) — cosine similarity (requires embedder)
61/// - [`hybrid_search`](Self::hybrid_search) — FTS5 + vector → RRF fusion → decay → MMR
62///
63/// # Security
64///
65/// All file read/write operations validate paths via [`scoring::validate_path`]
66/// to prevent directory traversal. Writes are capped at 1 MB.
67pub struct MemoryStore {
68    /// Agent home directory (.starpod/) — runtime data files, general read/write.
69    agent_home: PathBuf,
70    /// Config directory (.starpod/config/) — blueprint-managed files (SOUL.md, lifecycle).
71    config_dir: PathBuf,
72    pool: SqlitePool,
73    /// Half-life in days for temporal decay on search results.
74    half_life_days: f64,
75    /// MMR lambda: 0.0 = max diversity, 1.0 = pure relevance.
76    mmr_lambda: f64,
77    /// Optional embedder for vector search (enabled with `embeddings` feature).
78    embedder: Option<Arc<dyn Embedder>>,
79    /// Target chunk size in characters for indexing.
80    chunk_size: usize,
81    /// Overlap in characters between chunks.
82    chunk_overlap: usize,
83    /// Maximum characters to include from a single file in bootstrap context.
84    bootstrap_file_cap: usize,
85}
86
87impl MemoryStore {
88    /// Create a new MemoryStore.
89    ///
90    /// - `agent_home`: the `.starpod/` directory (runtime data, general read/write)
91    /// - `config_dir`: the `.starpod/config/` directory (SOUL.md, lifecycle files)
92    /// - `db_dir`: the `.starpod/db/` directory (contains memory.db)
93    pub async fn new(agent_home: &Path, config_dir: &Path, db_dir: &Path) -> Result<Self> {
94        // Ensure directories exist
95        std::fs::create_dir_all(agent_home).map_err(StarpodError::Io)?;
96        std::fs::create_dir_all(config_dir).map_err(StarpodError::Io)?;
97        std::fs::create_dir_all(db_dir).map_err(StarpodError::Io)?;
98
99        // Open SQLite pool — 2 connections (one writer + one reader).
100        // SQLite serialises writes, so extra connections only waste memory.
101        let db_path = db_dir.join("memory.db");
102        let opts =
103            SqliteConnectOptions::from_str(&format!("sqlite://{}?mode=rwc", db_path.display()))
104                .map_err(|e| StarpodError::Database(format!("Invalid DB path: {}", e)))?
105                .pragma("journal_mode", "WAL")
106                .pragma("busy_timeout", "5000")
107                .pragma("synchronous", "NORMAL");
108
109        let pool = SqlitePoolOptions::new()
110            .max_connections(2)
111            .connect_with(opts)
112            .await
113            .map_err(|e| StarpodError::Database(format!("Failed to open database: {}", e)))?;
114
115        // Run migrations
116        schema::run_migrations(&pool).await?;
117
118        let store = Self {
119            agent_home: agent_home.to_path_buf(),
120            config_dir: config_dir.to_path_buf(),
121            pool,
122            half_life_days: DEFAULT_HALF_LIFE_DAYS,
123            mmr_lambda: 0.7,
124            embedder: None,
125            chunk_size: CHUNK_SIZE,
126            chunk_overlap: CHUNK_OVERLAP,
127            bootstrap_file_cap: BOOTSTRAP_FILE_CAP,
128        };
129
130        // Seed default files if they don't exist
131        store.seed_defaults()?;
132
133        // Initial index
134        store.reindex().await?;
135
136        Ok(store)
137    }
138
139    /// Create a lightweight per-user memory store.
140    ///
141    /// Uses `user_dir` for both file storage and the SQLite database
142    /// (`user_dir/memory.db`). Skips seeding default lifecycle files
143    /// (SOUL.md, HEARTBEAT.md, etc.) since those belong to the agent-level store.
144    ///
145    /// The store has `max_connections(1)` to keep resource usage low when
146    /// many users each get their own store.
147    ///
148    /// Any existing `.md` files in `user_dir` are indexed on creation.
149    pub async fn new_user(user_dir: &Path) -> Result<Self> {
150        std::fs::create_dir_all(user_dir).map_err(StarpodError::Io)?;
151
152        let db_path = user_dir.join("memory.db");
153        let opts =
154            SqliteConnectOptions::from_str(&format!("sqlite://{}?mode=rwc", db_path.display()))
155                .map_err(|e| StarpodError::Database(format!("Invalid DB path: {}", e)))?;
156
157        let pool = SqlitePoolOptions::new()
158            .max_connections(1)
159            .connect_with(opts)
160            .await
161            .map_err(|e| StarpodError::Database(format!("Failed to open user database: {}", e)))?;
162
163        schema::run_migrations(&pool).await?;
164
165        let store = Self {
166            agent_home: user_dir.to_path_buf(),
167            config_dir: user_dir.to_path_buf(),
168            pool,
169            half_life_days: DEFAULT_HALF_LIFE_DAYS,
170            mmr_lambda: 0.7,
171            embedder: None,
172            chunk_size: CHUNK_SIZE,
173            chunk_overlap: CHUNK_OVERLAP,
174            bootstrap_file_cap: BOOTSTRAP_FILE_CAP,
175        };
176
177        // Index existing user files (no default seeding)
178        store.reindex().await?;
179
180        Ok(store)
181    }
182
183    /// Seed default lifecycle files on first run.
184    ///
185    /// Blueprint-managed files (SOUL.md, HEARTBEAT.md, BOOT.md, BOOTSTRAP.md)
186    /// are seeded into `config_dir`. USER.md and MEMORY.md are per-user files
187    /// managed by [`UserMemoryView`](crate::user_view::UserMemoryView).
188    ///
189    /// Returns `true` if this is a fresh config (SOUL.md didn't exist yet).
190    fn seed_defaults(&self) -> Result<bool> {
191        let fresh = !self.config_dir.join("SOUL.md").exists();
192
193        // Seed SOUL.md only if not present (first init without blueprint)
194        if fresh {
195            let path = self.config_dir.join("SOUL.md");
196            debug!(file = "SOUL.md", "Seeding default SOUL.md");
197            std::fs::write(&path, defaults::DEFAULT_SOUL)?;
198        }
199
200        // Lifecycle files in config_dir
201        let lifecycle_files = [
202            ("HEARTBEAT.md", defaults::DEFAULT_HEARTBEAT),
203            ("BOOT.md", defaults::DEFAULT_BOOT),
204            ("BOOTSTRAP.md", defaults::DEFAULT_BOOTSTRAP),
205        ];
206
207        for (name, content) in &lifecycle_files {
208            let path = self.config_dir.join(name);
209            if !path.exists() {
210                debug!(file = %name, "Seeding default file");
211                std::fs::write(&path, content)?;
212            }
213        }
214
215        Ok(fresh)
216    }
217
218    /// Get the agent home directory path (.starpod/).
219    pub fn agent_home(&self) -> &Path {
220        &self.agent_home
221    }
222
223    /// Get the config directory path (.starpod/config/).
224    pub fn config_dir(&self) -> &Path {
225        &self.config_dir
226    }
227
228    /// Blueprint-managed file names that live in config_dir.
229    const CONFIG_FILES: &[&str] = &["SOUL.md", "HEARTBEAT.md", "BOOT.md", "BOOTSTRAP.md"];
230
231    /// Resolve a file path: config files go to config_dir, everything else to agent_home.
232    fn resolve_path(&self, name: &str) -> PathBuf {
233        // Check if this is a known config file (top-level only, not in subdirs)
234        if !name.contains('/') && Self::CONFIG_FILES.contains(&name) {
235            self.config_dir.join(name)
236        } else {
237            self.agent_home.join(name)
238        }
239    }
240
241    /// Returns `true` if BOOTSTRAP.md exists and has non-empty content.
242    pub fn has_bootstrap(&self) -> bool {
243        let path = self.config_dir.join("BOOTSTRAP.md");
244        path.is_file()
245            && std::fs::read_to_string(&path)
246                .map(|c| !c.trim().is_empty())
247                .unwrap_or(false)
248    }
249
250    /// Delete BOOTSTRAP.md (called after successful bootstrap execution).
251    pub fn clear_bootstrap(&self) -> Result<()> {
252        let path = self.config_dir.join("BOOTSTRAP.md");
253        if path.exists() {
254            std::fs::write(&path, "")?;
255        }
256        Ok(())
257    }
258
259    /// Build agent-level bootstrap context from SOUL.md only.
260    ///
261    /// User-specific context (USER.md, MEMORY.md, daily logs) is handled by
262    /// [`UserMemoryView::bootstrap_context()`](crate::user_view::UserMemoryView::bootstrap_context).
263    pub fn bootstrap_context(&self) -> Result<String> {
264        let content = self.read_file("SOUL.md")?;
265        let capped = if content.len() > self.bootstrap_file_cap {
266            let mut end = self.bootstrap_file_cap;
267            while end > 0 && !content.is_char_boundary(end) {
268                end -= 1;
269            }
270            &content[..end]
271        } else {
272            &content
273        };
274        Ok(format!("--- SOUL.md ---\n{}", capped))
275    }
276
277    /// Set the half-life for temporal decay on search results.
278    pub fn set_half_life_days(&mut self, days: f64) {
279        self.half_life_days = days;
280    }
281
282    /// Set the MMR lambda for diversity vs relevance balance.
283    pub fn set_mmr_lambda(&mut self, lambda: f64) {
284        self.mmr_lambda = lambda;
285    }
286
287    /// Set the target chunk size in characters for indexing.
288    pub fn set_chunk_size(&mut self, size: usize) {
289        self.chunk_size = size;
290    }
291
292    /// Set the overlap in characters between chunks.
293    pub fn set_chunk_overlap(&mut self, overlap: usize) {
294        self.chunk_overlap = overlap;
295    }
296
297    /// Set the maximum characters to include from a single file in bootstrap context.
298    pub fn set_bootstrap_file_cap(&mut self, cap: usize) {
299        self.bootstrap_file_cap = cap;
300    }
301
302    /// Full-text search across all indexed content.
303    ///
304    /// Results are re-ranked with temporal decay: recent daily logs score
305    /// higher than older ones, while evergreen files (SOUL.md, HEARTBEAT.md)
306    /// are unaffected.
307    pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
308        // Fetch more than needed so we have room after re-ranking
309        let fetch_limit = (limit * 3).max(30);
310        let rows = sqlx::query(
311            "SELECT source, chunk_text, line_start, line_end, rank
312             FROM memory_fts
313             WHERE memory_fts MATCH ?1
314             ORDER BY rank
315             LIMIT ?2",
316        )
317        .bind(query)
318        .bind(fetch_limit as i64)
319        .fetch_all(&self.pool)
320        .await
321        .map_err(|e| StarpodError::Database(format!("Search query failed: {}", e)))?;
322
323        let mut results: Vec<SearchResult> = rows
324            .iter()
325            .map(|row| {
326                let source = row.get::<String, _>("source");
327                let raw_rank = row.get::<f64, _>("rank");
328                let adjusted_rank = scoring::apply_decay(raw_rank, &source, self.half_life_days);
329                SearchResult {
330                    source,
331                    text: row.get::<String, _>("chunk_text"),
332                    line_start: row.get::<i64, _>("line_start") as usize,
333                    line_end: row.get::<i64, _>("line_end") as usize,
334                    rank: adjusted_rank,
335                }
336            })
337            .collect();
338
339        // Re-sort by adjusted rank (more negative = better)
340        results.sort_by(|a, b| {
341            a.rank
342                .partial_cmp(&b.rank)
343                .unwrap_or(std::cmp::Ordering::Equal)
344        });
345        results.truncate(limit);
346
347        Ok(results)
348    }
349
350    /// Set the embedder for vector search.
351    pub fn set_embedder(&mut self, embedder: Arc<dyn Embedder>) {
352        self.embedder = Some(embedder);
353    }
354
355    /// Vector search: embed the query, compare against stored vectors, return top-K.
356    ///
357    /// Returns empty vec if no embedder is configured.
358    pub async fn vector_search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
359        let embedder = match &self.embedder {
360            Some(e) => e,
361            None => return Ok(Vec::new()),
362        };
363
364        // Embed the query
365        let query_vecs = embedder.embed(&[query.to_string()]).await?;
366        let query_vec = match query_vecs.first() {
367            Some(v) => v,
368            None => return Ok(Vec::new()),
369        };
370
371        // Load all stored vectors
372        let rows = sqlx::query(
373            "SELECT v.source, v.embedding, v.line_start, v.line_end, f.chunk_text
374             FROM memory_vectors v
375             LEFT JOIN memory_fts f ON f.source = v.source
376                 AND f.line_start = v.line_start AND f.line_end = v.line_end",
377        )
378        .fetch_all(&self.pool)
379        .await
380        .map_err(|e| StarpodError::Database(format!("Vector search failed: {}", e)))?;
381
382        let mut scored: Vec<(f32, SearchResult)> = Vec::new();
383        for row in &rows {
384            let blob: Vec<u8> = row.get("embedding");
385            let embedding = bytes_to_f32_vec(&blob);
386            let similarity = embedder::cosine_similarity(query_vec, &embedding);
387
388            let source: String = row.get("source");
389            let text: String = row.try_get("chunk_text").unwrap_or_default();
390
391            scored.push((
392                similarity,
393                SearchResult {
394                    source,
395                    text,
396                    line_start: row.get::<i64, _>("line_start") as usize,
397                    line_end: row.get::<i64, _>("line_end") as usize,
398                    rank: -(similarity as f64), // negative similarity so more negative = better
399                },
400            ));
401        }
402
403        // Sort by similarity descending
404        scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
405        scored.truncate(limit);
406
407        Ok(scored.into_iter().map(|(_, r)| r).collect())
408    }
409
410    /// Hybrid search: run FTS5 + vector search, fuse with RRF, apply MMR.
411    ///
412    /// Falls back to FTS5-only when no embedder is configured.
413    /// Pipeline: FTS5 + vector → RRF fusion → temporal decay → MMR diversity.
414    pub async fn hybrid_search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
415        let embedder = match &self.embedder {
416            Some(e) => e,
417            None => return self.search(query, limit).await,
418        };
419
420        // Run both searches concurrently
421        let fts_limit = (limit * 3).max(30);
422        let vec_limit = (limit * 3).max(30);
423
424        let (fts_results, vec_results) = tokio::join!(
425            self.fts_search_raw(query, fts_limit),
426            self.vector_search(query, vec_limit),
427        );
428
429        let fts_results = fts_results?;
430        let vec_results = vec_results?;
431
432        // Fuse with RRF
433        let mut fused = fusion::reciprocal_rank_fusion(&fts_results, &vec_results, limit * 3);
434
435        // Apply temporal decay — multiply to make old results less negative (worse)
436        for result in &mut fused {
437            let decay = scoring::decay_factor(&result.source, self.half_life_days);
438            if decay > 0.0 && decay < 1.0 {
439                result.rank *= decay;
440            }
441        }
442
443        // Re-sort by decayed rank
444        fused.sort_by(|a, b| {
445            a.rank
446                .partial_cmp(&b.rank)
447                .unwrap_or(std::cmp::Ordering::Equal)
448        });
449
450        // Apply MMR for diversity: embed top candidates, then re-rank
451        let mmr_pool_size = (limit * 2).min(fused.len());
452        if mmr_pool_size > 0 {
453            // Embed the query for MMR
454            let query_vecs = embedder.embed(&[query.to_string()]).await?;
455            if let Some(query_vec) = query_vecs.first() {
456                // Embed the candidate texts
457                let texts: Vec<String> = fused[..mmr_pool_size]
458                    .iter()
459                    .map(|r| r.text.clone())
460                    .collect();
461                let embeddings = embedder.embed(&texts).await?;
462
463                let candidates: Vec<(Vec<f32>, usize)> = embeddings
464                    .into_iter()
465                    .enumerate()
466                    .map(|(i, emb)| (emb, i))
467                    .collect();
468
469                let selected_indices =
470                    scoring::mmr_rerank(query_vec, &candidates, limit, self.mmr_lambda);
471
472                let pool = fused;
473                fused = selected_indices
474                    .into_iter()
475                    .map(|idx| pool[idx].clone())
476                    .collect();
477            } else {
478                fused.truncate(limit);
479            }
480        }
481
482        Ok(fused)
483    }
484
485    /// Raw FTS5 search without decay (used internally by hybrid_search).
486    async fn fts_search_raw(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
487        let rows = sqlx::query(
488            "SELECT source, chunk_text, line_start, line_end, rank
489             FROM memory_fts
490             WHERE memory_fts MATCH ?1
491             ORDER BY rank
492             LIMIT ?2",
493        )
494        .bind(query)
495        .bind(limit as i64)
496        .fetch_all(&self.pool)
497        .await
498        .map_err(|e| StarpodError::Database(format!("Search query failed: {}", e)))?;
499
500        Ok(rows
501            .iter()
502            .map(|row| SearchResult {
503                source: row.get::<String, _>("source"),
504                text: row.get::<String, _>("chunk_text"),
505                line_start: row.get::<i64, _>("line_start") as usize,
506                line_end: row.get::<i64, _>("line_end") as usize,
507                rank: row.get::<f64, _>("rank"),
508            })
509            .collect())
510    }
511
512    /// Embed and store vectors for a source file's chunks.
513    async fn embed_and_store_source(&self, source: &str, text: &str) -> Result<()> {
514        let embedder = match &self.embedder {
515            Some(e) => e,
516            None => return Ok(()),
517        };
518
519        // Delete old vectors for this source
520        sqlx::query("DELETE FROM memory_vectors WHERE source = ?1")
521            .bind(source)
522            .execute(&self.pool)
523            .await
524            .map_err(|e| StarpodError::Database(format!("Failed to delete old vectors: {}", e)))?;
525
526        // Chunk the text
527        let chunks = indexer::chunk_text(source, text, self.chunk_size, self.chunk_overlap);
528        if chunks.is_empty() {
529            return Ok(());
530        }
531
532        // Embed all chunks in one batch
533        let texts: Vec<String> = chunks.iter().map(|c| c.text.clone()).collect();
534        let embeddings = embedder.embed(&texts).await?;
535
536        // Store vectors
537        for (idx, (chunk, embedding)) in chunks.iter().zip(embeddings.iter()).enumerate() {
538            let blob = f32_vec_to_bytes(embedding);
539            sqlx::query(
540                "INSERT INTO memory_vectors (source, chunk_idx, embedding, line_start, line_end)
541                 VALUES (?1, ?2, ?3, ?4, ?5)",
542            )
543            .bind(&chunk.source)
544            .bind(idx as i64)
545            .bind(&blob)
546            .bind(chunk.line_start as i64)
547            .bind(chunk.line_end as i64)
548            .execute(&self.pool)
549            .await
550            .map_err(|e| StarpodError::Database(format!("Failed to insert vector: {}", e)))?;
551        }
552
553        Ok(())
554    }
555
556    /// Read a file from the appropriate directory (config_dir for config files, agent_home otherwise).
557    pub fn read_file(&self, name: &str) -> Result<String> {
558        // Validate against agent_home (the broader sandbox)
559        scoring::validate_path(name, &self.agent_home)?;
560        let path = self.resolve_path(name);
561        if !path.exists() {
562            return Ok(String::new());
563        }
564        std::fs::read_to_string(&path).map_err(StarpodError::Io)
565    }
566
567    /// Write a file and reindex it.
568    ///
569    /// Config files (SOUL.md, lifecycle files) are written to config_dir,
570    /// everything else to agent_home.
571    pub async fn write_file(&self, name: &str, content: &str) -> Result<()> {
572        scoring::validate_path(name, &self.agent_home)?;
573        scoring::validate_content_size(content)?;
574
575        let path = self.resolve_path(name);
576
577        // Ensure parent directory exists
578        if let Some(parent) = path.parent() {
579            std::fs::create_dir_all(parent)?;
580        }
581
582        std::fs::write(&path, content)?;
583
584        // Reindex this file (FTS5 + vectors)
585        reindex_source(
586            &self.pool,
587            name,
588            content,
589            self.chunk_size,
590            self.chunk_overlap,
591        )
592        .await?;
593        self.embed_and_store_source(name, content).await?;
594
595        Ok(())
596    }
597
598    /// Append a timestamped entry to today's daily log.
599    pub async fn append_daily(&self, text: &str) -> Result<()> {
600        let today = Local::now().format("%Y-%m-%d").to_string();
601        let filename = format!("memory/{}.md", today);
602        let path = self.agent_home.join(&filename);
603
604        // Ensure parent directory exists
605        if let Some(parent) = path.parent() {
606            std::fs::create_dir_all(parent)?;
607        }
608
609        let timestamp = Local::now().format("%H:%M:%S").to_string();
610        let entry = format!("\n## {}\n{}\n", timestamp, text);
611
612        let mut content = if path.exists() {
613            std::fs::read_to_string(&path)?
614        } else {
615            format!("# Daily Log — {}\n", today)
616        };
617
618        content.push_str(&entry);
619        std::fs::write(&path, &content)?;
620
621        // Reindex the daily file (FTS5 + vectors)
622        reindex_source(
623            &self.pool,
624            &filename,
625            &content,
626            self.chunk_size,
627            self.chunk_overlap,
628        )
629        .await?;
630        self.embed_and_store_source(&filename, &content).await?;
631
632        Ok(())
633    }
634
635    /// Full reindex of agent-level markdown files.
636    ///
637    /// Indexes config files from config_dir (SOUL.md, lifecycle files) and
638    /// runtime files from agent_home (memory/ daily logs, agent-written files).
639    /// User-level files are not indexed here — they're handled per-user.
640    pub async fn reindex(&self) -> Result<()> {
641        // Clear all existing FTS entries
642        sqlx::query("DELETE FROM memory_fts")
643            .execute(&self.pool)
644            .await
645            .map_err(|e| StarpodError::Database(format!("Failed to clear FTS: {}", e)))?;
646
647        // Clear all existing vectors
648        sqlx::query("DELETE FROM memory_vectors")
649            .execute(&self.pool)
650            .await
651            .map_err(|e| StarpodError::Database(format!("Failed to clear vectors: {}", e)))?;
652
653        // Index config files (SOUL.md, HEARTBEAT.md, BOOT.md, BOOTSTRAP.md)
654        self.index_dir(&self.config_dir.clone(), "").await?;
655
656        // Index top-level .md files in agent_home (excluding config dir)
657        if let Ok(entries) = std::fs::read_dir(&self.agent_home) {
658            for entry in entries.flatten() {
659                let path = entry.path();
660                if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
661                    let filename = entry.file_name().to_string_lossy().to_string();
662                    // Skip config files (already indexed from config_dir)
663                    if !Self::CONFIG_FILES.iter().any(|&f| f == filename) {
664                        let content = std::fs::read_to_string(&path)?;
665                        reindex_source(
666                            &self.pool,
667                            &filename,
668                            &content,
669                            self.chunk_size,
670                            self.chunk_overlap,
671                        )
672                        .await?;
673                        self.embed_and_store_source(&filename, &content).await?;
674                    }
675                }
676            }
677        }
678
679        Ok(())
680    }
681
682    /// Index all .md files in a directory with a source prefix.
683    async fn index_dir(&self, dir: &Path, prefix: &str) -> Result<()> {
684        let entries = std::fs::read_dir(dir).map_err(StarpodError::Io)?;
685
686        for entry in entries {
687            let entry = entry.map_err(StarpodError::Io)?;
688            let path = entry.path();
689            if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
690                let filename = entry.file_name().to_string_lossy().to_string();
691                let source = format!("{}{}", prefix, filename);
692                let content = std::fs::read_to_string(&path)?;
693                reindex_source(
694                    &self.pool,
695                    &source,
696                    &content,
697                    self.chunk_size,
698                    self.chunk_overlap,
699                )
700                .await?;
701                self.embed_and_store_source(&source, &content).await?;
702            }
703        }
704
705        Ok(())
706    }
707}
708
709/// Convert a Vec<f32> to bytes for BLOB storage.
710fn f32_vec_to_bytes(vec: &[f32]) -> Vec<u8> {
711    let mut bytes = Vec::with_capacity(vec.len() * 4);
712    for &v in vec {
713        bytes.extend_from_slice(&v.to_le_bytes());
714    }
715    bytes
716}
717
718/// Convert bytes back to Vec<f32>.
719fn bytes_to_f32_vec(bytes: &[u8]) -> Vec<f32> {
720    bytes
721        .chunks_exact(4)
722        .map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
723        .collect()
724}
725
726#[cfg(test)]
727mod tests {
728    use super::*;
729    use tempfile::TempDir;
730
731    // ── Helper ──────────────────────────────────────────────────────────
732
733    /// Create a MemoryStore for tests with agent_home, config_dir, and db_dir as siblings.
734    async fn test_store(tmp: &TempDir) -> MemoryStore {
735        let agent_home = tmp.path().join("agent_home");
736        let config_dir = tmp.path().join("agent_home").join("config");
737        let db_dir = tmp.path().join("db");
738        MemoryStore::new(&agent_home, &config_dir, &db_dir)
739            .await
740            .unwrap()
741    }
742
743    // ── Existing tests ──────────────────────────────────────────────────
744
745    #[tokio::test]
746    async fn test_new_seeds_defaults() {
747        let tmp = TempDir::new().unwrap();
748        let store = test_store(&tmp).await;
749        let config_dir = tmp.path().join("agent_home").join("config");
750
751        // Config files should exist in config_dir
752        assert!(config_dir.join("SOUL.md").exists());
753        assert!(config_dir.join("HEARTBEAT.md").exists());
754        assert!(config_dir.join("BOOT.md").exists());
755        assert!(config_dir.join("BOOTSTRAP.md").exists());
756
757        // User-level files should NOT exist
758        assert!(!config_dir.join("USER.md").exists());
759        assert!(!config_dir.join("MEMORY.md").exists());
760
761        // DB should exist
762        assert!(tmp.path().join("db").join("memory.db").exists());
763
764        // Should be readable via read_file (routes to config_dir)
765        let soul = store.read_file("SOUL.md").unwrap();
766        assert!(soul.contains("Nova"));
767    }
768
769    #[tokio::test]
770    async fn test_write_and_search() {
771        let tmp = TempDir::new().unwrap();
772        let store = test_store(&tmp).await;
773
774        store
775            .write_file(
776                "test-content.md",
777                "Rust is a systems programming language focused on safety and performance.",
778            )
779            .await
780            .unwrap();
781
782        let results = store.search("Rust programming", 5).await.unwrap();
783        assert!(!results.is_empty());
784        assert!(results[0].text.contains("Rust"));
785    }
786
787    #[tokio::test]
788    async fn test_append_daily() {
789        let tmp = TempDir::new().unwrap();
790        let store = test_store(&tmp).await;
791        let agent_home = tmp.path().join("agent_home");
792
793        // Create memory/ dir for the test (normally done by UserMemoryView)
794        std::fs::create_dir_all(agent_home.join("memory")).unwrap();
795
796        store
797            .append_daily("Had a great conversation about Rust.")
798            .await
799            .unwrap();
800        store
801            .append_daily("Discussed memory management.")
802            .await
803            .unwrap();
804
805        let today = Local::now().format("%Y-%m-%d").to_string();
806        let content = store.read_file(&format!("memory/{}.md", today)).unwrap();
807        assert!(content.contains("great conversation"));
808        assert!(content.contains("memory management"));
809    }
810
811    #[tokio::test]
812    async fn test_bootstrap_context() {
813        let tmp = TempDir::new().unwrap();
814        let store = test_store(&tmp).await;
815
816        let ctx = store.bootstrap_context().unwrap();
817        assert!(ctx.contains("SOUL.md"));
818        assert!(ctx.contains("Nova"));
819        // User files should NOT be in agent bootstrap
820        assert!(!ctx.contains("USER.md"));
821        assert!(!ctx.contains("MEMORY.md"));
822    }
823
824    #[tokio::test]
825    async fn test_reindex() {
826        let tmp = TempDir::new().unwrap();
827        let store = test_store(&tmp).await;
828        let agent_home = tmp.path().join("agent_home");
829
830        // Write a file directly (bypassing write_file)
831        std::fs::write(
832            agent_home.join("test-quantum.md"),
833            "This is about quantum computing and qubits.",
834        )
835        .unwrap();
836
837        // Reindex should pick it up
838        store.reindex().await.unwrap();
839
840        let results = store.search("quantum computing", 5).await.unwrap();
841        assert!(!results.is_empty());
842    }
843
844    // ── Path validation integration tests ───────────────────────────────
845
846    #[tokio::test]
847    async fn write_file_rejects_traversal() {
848        let tmp = TempDir::new().unwrap();
849        let store = test_store(&tmp).await;
850        let err = store.write_file("../escape.md", "evil content").await;
851        assert!(err.is_err(), "write_file should reject path traversal");
852    }
853
854    #[tokio::test]
855    async fn write_file_rejects_non_md() {
856        let tmp = TempDir::new().unwrap();
857        let store = test_store(&tmp).await;
858        let err = store.write_file("script.sh", "#!/bin/bash").await;
859        assert!(err.is_err(), "write_file should reject non-.md files");
860    }
861
862    #[tokio::test]
863    async fn write_file_rejects_absolute_path() {
864        let tmp = TempDir::new().unwrap();
865        let store = test_store(&tmp).await;
866        let err = store.write_file("/tmp/evil.md", "content").await;
867        assert!(err.is_err(), "write_file should reject absolute paths");
868    }
869
870    #[tokio::test]
871    async fn read_file_rejects_traversal() {
872        let tmp = TempDir::new().unwrap();
873        let store = test_store(&tmp).await;
874        let err = store.read_file("../../etc/passwd.md");
875        assert!(err.is_err(), "read_file should reject path traversal");
876    }
877
878    #[tokio::test]
879    async fn read_file_rejects_non_md() {
880        let tmp = TempDir::new().unwrap();
881        let store = test_store(&tmp).await;
882        let err = store.read_file("secret.json");
883        assert!(err.is_err(), "read_file should reject non-.md files");
884    }
885
886    // ── Content size validation tests ───────────────────────────────────
887
888    #[tokio::test]
889    async fn write_file_rejects_oversized_content() {
890        let tmp = TempDir::new().unwrap();
891        let store = test_store(&tmp).await;
892        let big = "x".repeat(scoring::MAX_WRITE_SIZE + 1);
893        let err = store.write_file("big.md", &big).await;
894        assert!(err.is_err(), "write_file should reject content > 1 MB");
895    }
896
897    #[tokio::test]
898    async fn write_file_accepts_content_at_limit() {
899        let tmp = TempDir::new().unwrap();
900        let store = test_store(&tmp).await;
901        let exact = "x".repeat(scoring::MAX_WRITE_SIZE);
902        let result = store.write_file("exact.md", &exact).await;
903        assert!(
904            result.is_ok(),
905            "write_file should accept content at exactly 1 MB"
906        );
907    }
908
909    // ── Setter tests ────────────────────────────────────────────────────
910
911    #[tokio::test]
912    async fn set_half_life_days_is_applied() {
913        let tmp = TempDir::new().unwrap();
914        let mut store = test_store(&tmp).await;
915        store.set_half_life_days(7.0);
916        assert_eq!(store.half_life_days, 7.0);
917    }
918
919    #[tokio::test]
920    async fn set_mmr_lambda_is_applied() {
921        let tmp = TempDir::new().unwrap();
922        let mut store = test_store(&tmp).await;
923        store.set_mmr_lambda(0.5);
924        assert_eq!(store.mmr_lambda, 0.5);
925    }
926
927    #[tokio::test]
928    async fn set_chunk_size_is_applied() {
929        let tmp = TempDir::new().unwrap();
930        let mut store = test_store(&tmp).await;
931        store.set_chunk_size(800);
932        assert_eq!(store.chunk_size, 800);
933    }
934
935    #[tokio::test]
936    async fn set_chunk_overlap_is_applied() {
937        let tmp = TempDir::new().unwrap();
938        let mut store = test_store(&tmp).await;
939        store.set_chunk_overlap(160);
940        assert_eq!(store.chunk_overlap, 160);
941    }
942
943    #[tokio::test]
944    async fn set_bootstrap_file_cap_is_applied() {
945        let tmp = TempDir::new().unwrap();
946        let mut store = test_store(&tmp).await;
947        store.set_bootstrap_file_cap(5000);
948        assert_eq!(store.bootstrap_file_cap, 5000);
949    }
950
951    #[tokio::test]
952    async fn bootstrap_file_cap_limits_output() {
953        let tmp = TempDir::new().unwrap();
954        let mut store = test_store(&tmp).await;
955
956        // Write a large file (well above the cap we'll set)
957        let large_content = "x".repeat(10_000);
958        store.write_file("SOUL.md", &large_content).await.unwrap();
959
960        // Set a small bootstrap_file_cap
961        store.set_bootstrap_file_cap(500);
962
963        let ctx = store.bootstrap_context().unwrap();
964        // The SOUL.md section should be capped at 500 chars of content.
965        // Find the SOUL.md section and verify its content portion is truncated.
966        let soul_section = ctx
967            .split("--- SOUL.md ---\n")
968            .nth(1)
969            .unwrap_or("")
970            .split("\n\n--- ")
971            .next()
972            .unwrap_or("");
973        assert!(
974            soul_section.len() <= 500,
975            "SOUL.md section should be capped at 500 chars, got {}",
976            soul_section.len(),
977        );
978    }
979
980    // ── Vector search without embedder ──────────────────────────────────
981
982    #[tokio::test]
983    async fn vector_search_returns_empty_without_embedder() {
984        let tmp = TempDir::new().unwrap();
985        let store = test_store(&tmp).await;
986        let results = store.vector_search("anything", 10).await.unwrap();
987        assert!(
988            results.is_empty(),
989            "vector_search should return empty without embedder"
990        );
991    }
992
993    #[tokio::test]
994    async fn hybrid_search_falls_back_to_fts_without_embedder() {
995        let tmp = TempDir::new().unwrap();
996        let store = test_store(&tmp).await;
997
998        store
999            .write_file(
1000                "test-elephants.md",
1001                "Unique test content about elephants in Africa.",
1002            )
1003            .await
1004            .unwrap();
1005
1006        // hybrid_search should still work (falling back to FTS-only)
1007        let results = store.hybrid_search("elephants Africa", 5).await.unwrap();
1008        assert!(
1009            !results.is_empty(),
1010            "hybrid_search should fall back to FTS without embedder"
1011        );
1012        assert!(results[0].text.contains("elephants"));
1013    }
1014
1015    // ── Mock embedder integration tests ─────────────────────────────────
1016
1017    /// A mock embedder that returns deterministic vectors for testing.
1018    /// Each text is embedded as a vector where the i-th dimension is the
1019    /// count of the i-th character ('a'=0, 'b'=1, ...), normalized.
1020    struct MockEmbedder;
1021
1022    #[async_trait::async_trait]
1023    impl Embedder for MockEmbedder {
1024        async fn embed(&self, texts: &[String]) -> Result<Vec<Vec<f32>>> {
1025            Ok(texts
1026                .iter()
1027                .map(|t| {
1028                    let mut vec = vec![0.0f32; 8];
1029                    for ch in t.chars() {
1030                        let idx = (ch.to_ascii_lowercase() as usize).wrapping_sub('a' as usize);
1031                        if idx < 8 {
1032                            vec[idx] += 1.0;
1033                        }
1034                    }
1035                    // Normalize
1036                    let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
1037                    if norm > 0.0 {
1038                        for v in &mut vec {
1039                            *v /= norm;
1040                        }
1041                    }
1042                    vec
1043                })
1044                .collect())
1045        }
1046
1047        fn dimensions(&self) -> usize {
1048            8
1049        }
1050    }
1051
1052    #[tokio::test]
1053    async fn set_embedder_enables_vector_storage() {
1054        let tmp = TempDir::new().unwrap();
1055        let mut store = test_store(&tmp).await;
1056        store.set_embedder(Arc::new(MockEmbedder));
1057
1058        store
1059            .write_file(
1060                "test-cats.md",
1061                "Cats are wonderful animals that love to sleep.",
1062            )
1063            .await
1064            .unwrap();
1065
1066        // Verify vectors were stored
1067        let count: i64 =
1068            sqlx::query_scalar("SELECT COUNT(*) FROM memory_vectors WHERE source = 'test-cats.md'")
1069                .fetch_one(&store.pool)
1070                .await
1071                .unwrap();
1072        assert!(
1073            count > 0,
1074            "Vectors should be stored after write_file with embedder"
1075        );
1076    }
1077
1078    #[tokio::test]
1079    async fn vector_search_with_mock_embedder() {
1080        let tmp = TempDir::new().unwrap();
1081        let mut store = test_store(&tmp).await;
1082        store.set_embedder(Arc::new(MockEmbedder));
1083
1084        store
1085            .write_file("test-abc.md", "aaa bbb ccc abc")
1086            .await
1087            .unwrap();
1088        store
1089            .write_file("test-def.md", "ddd eee fff def")
1090            .await
1091            .unwrap();
1092
1093        // Search for something similar to "abc" content
1094        let results = store.vector_search("aaa abc", 5).await.unwrap();
1095        assert!(
1096            !results.is_empty(),
1097            "vector_search should return results with embedder"
1098        );
1099    }
1100
1101    #[tokio::test]
1102    async fn hybrid_search_with_mock_embedder() {
1103        let tmp = TempDir::new().unwrap();
1104        let mut store = test_store(&tmp).await;
1105        store.set_embedder(Arc::new(MockEmbedder));
1106
1107        store
1108            .write_file("test-alpha.md", "Alpha beta gamma delta")
1109            .await
1110            .unwrap();
1111        store
1112            .write_file("test-beta.md", "Beta epsilon zeta eta")
1113            .await
1114            .unwrap();
1115
1116        let results = store.hybrid_search("alpha beta", 5).await.unwrap();
1117        assert!(
1118            !results.is_empty(),
1119            "hybrid_search should return results with embedder"
1120        );
1121    }
1122
1123    #[tokio::test]
1124    async fn reindex_clears_and_rebuilds_vectors() {
1125        let tmp = TempDir::new().unwrap();
1126        let mut store = test_store(&tmp).await;
1127        store.set_embedder(Arc::new(MockEmbedder));
1128
1129        store
1130            .write_file("test-vectors.md", "Test content here")
1131            .await
1132            .unwrap();
1133
1134        // Count vectors before reindex
1135        let before: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM memory_vectors")
1136            .fetch_one(&store.pool)
1137            .await
1138            .unwrap();
1139        assert!(before > 0);
1140
1141        // Reindex should clear and rebuild
1142        store.reindex().await.unwrap();
1143
1144        let after: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM memory_vectors")
1145            .fetch_one(&store.pool)
1146            .await
1147            .unwrap();
1148        // Should have vectors for all files (defaults + test.md)
1149        assert!(after > 0, "Reindex should rebuild vectors");
1150    }
1151
1152    // ── Byte conversion round-trip test ─────────────────────────────────
1153
1154    #[test]
1155    fn f32_bytes_round_trip() {
1156        let original = vec![1.0f32, -2.5, 0.0, std::f32::consts::PI, f32::MAX, f32::MIN];
1157        let bytes = f32_vec_to_bytes(&original);
1158        assert_eq!(bytes.len(), original.len() * 4);
1159        let restored = bytes_to_f32_vec(&bytes);
1160        assert_eq!(original, restored);
1161    }
1162
1163    #[test]
1164    fn f32_bytes_empty_round_trip() {
1165        let original: Vec<f32> = vec![];
1166        let bytes = f32_vec_to_bytes(&original);
1167        assert!(bytes.is_empty());
1168        let restored = bytes_to_f32_vec(&bytes);
1169        assert!(restored.is_empty());
1170    }
1171
1172    #[test]
1173    fn f32_bytes_single_value() {
1174        let original = vec![42.0f32];
1175        let bytes = f32_vec_to_bytes(&original);
1176        assert_eq!(bytes.len(), 4);
1177        let restored = bytes_to_f32_vec(&bytes);
1178        assert_eq!(original, restored);
1179    }
1180
1181    // ── Search with temporal decay test ─────────────────────────────────
1182
1183    // ── Temporal decay test ─────────────────────────────────────────────
1184
1185    #[tokio::test]
1186    async fn search_applies_temporal_decay() {
1187        let tmp = TempDir::new().unwrap();
1188        let store = test_store(&tmp).await;
1189        let agent_home = tmp.path().join("agent_home");
1190
1191        // Write the same content to an evergreen file and a daily log
1192        let content = "Temporal decay test content about quantum physics and relativity.";
1193        store.write_file("test-physics.md", content).await.unwrap();
1194
1195        // Write to an old daily log file directly, then reindex
1196        let old_date = Local::now().date_naive() - chrono::Duration::days(90);
1197        let old_filename = format!("memory/{}.md", old_date.format("%Y-%m-%d"));
1198        std::fs::create_dir_all(agent_home.join("memory")).unwrap();
1199        let old_path = agent_home.join(&old_filename);
1200        std::fs::write(&old_path, content).unwrap();
1201        store.reindex().await.unwrap();
1202
1203        let results = store
1204            .search("quantum physics relativity", 10)
1205            .await
1206            .unwrap();
1207        // Should find the evergreen file at minimum
1208        assert!(
1209            !results.is_empty(),
1210            "Should find at least the evergreen file"
1211        );
1212    }
1213
1214    #[tokio::test]
1215    async fn test_append_daily_creates_memory_dir() {
1216        let tmp = TempDir::new().unwrap();
1217        let store = test_store(&tmp).await;
1218        let agent_home = tmp.path().join("agent_home");
1219
1220        // Do NOT create memory/ dir — append_daily should create it
1221        assert!(!agent_home.join("memory").exists());
1222
1223        store
1224            .append_daily("First entry without pre-existing dir.")
1225            .await
1226            .unwrap();
1227
1228        assert!(agent_home.join("memory").exists());
1229        let today = Local::now().format("%Y-%m-%d").to_string();
1230        let content = store.read_file(&format!("memory/{}.md", today)).unwrap();
1231        assert!(content.contains("First entry"));
1232    }
1233
1234    #[tokio::test]
1235    async fn test_bootstrap_context_multibyte_safe() {
1236        let tmp = TempDir::new().unwrap();
1237        let agent_home = tmp.path().join("agent_home");
1238        let config_dir = agent_home.join("config");
1239        let db_dir = tmp.path().join("db");
1240        std::fs::create_dir_all(&config_dir).unwrap();
1241
1242        // Write SOUL.md with multibyte chars that would cause a panic if
1243        // the cap falls on a char boundary
1244        let soul = "# Soul\n".to_string() + &"café 🌟 ".repeat(5000);
1245        std::fs::write(config_dir.join("SOUL.md"), &soul).unwrap();
1246
1247        let store = MemoryStore::new(&agent_home, &config_dir, &db_dir)
1248            .await
1249            .unwrap();
1250        // Should not panic even though the cap likely falls mid-character
1251        let ctx = store.bootstrap_context().unwrap();
1252        assert!(ctx.contains("SOUL.md"));
1253        // The content should be valid UTF-8
1254        assert!(ctx.is_char_boundary(ctx.len()));
1255    }
1256
1257    // ── Config/agent_home separation ──────────────────────────────
1258
1259    #[tokio::test]
1260    async fn config_files_routed_to_config_dir() {
1261        let tmp = TempDir::new().unwrap();
1262        let store = test_store(&tmp).await;
1263        let config_dir = tmp.path().join("agent_home").join("config");
1264
1265        // Writing SOUL.md should go to config_dir
1266        store
1267            .write_file("SOUL.md", "# Soul\nCustom soul.")
1268            .await
1269            .unwrap();
1270        assert!(config_dir.join("SOUL.md").is_file());
1271        let content = std::fs::read_to_string(config_dir.join("SOUL.md")).unwrap();
1272        assert!(content.contains("Custom soul"));
1273
1274        // Reading should return from config_dir
1275        let read = store.read_file("SOUL.md").unwrap();
1276        assert!(read.contains("Custom soul"));
1277    }
1278
1279    #[tokio::test]
1280    async fn runtime_files_routed_to_agent_home() {
1281        let tmp = TempDir::new().unwrap();
1282        let store = test_store(&tmp).await;
1283        let agent_home = tmp.path().join("agent_home");
1284        let config_dir = agent_home.join("config");
1285
1286        // Writing a non-config .md file should go to agent_home
1287        store.write_file("notes.md", "Some notes.").await.unwrap();
1288        assert!(agent_home.join("notes.md").is_file());
1289        assert!(!config_dir.join("notes.md").exists());
1290
1291        // Reading it back should work
1292        let content = store.read_file("notes.md").unwrap();
1293        assert!(content.contains("Some notes"));
1294    }
1295
1296    #[tokio::test]
1297    async fn reindex_covers_both_config_and_agent_home() {
1298        let tmp = TempDir::new().unwrap();
1299        let store = test_store(&tmp).await;
1300
1301        // Write a config file (SOUL.md → config_dir)
1302        store
1303            .write_file("SOUL.md", "Soul content about quantum.")
1304            .await
1305            .unwrap();
1306
1307        // Write a runtime file (notes.md → agent_home)
1308        store
1309            .write_file("notes.md", "Notes about quantum.")
1310            .await
1311            .unwrap();
1312
1313        // Reindex should pick up both
1314        store.reindex().await.unwrap();
1315
1316        let results = store.search("quantum", 10).await.unwrap();
1317        let sources: Vec<&str> = results.iter().map(|r| r.source.as_str()).collect();
1318        assert!(
1319            sources.contains(&"SOUL.md"),
1320            "SOUL.md from config_dir should be indexed"
1321        );
1322        assert!(
1323            sources.contains(&"notes.md"),
1324            "notes.md from agent_home should be indexed"
1325        );
1326    }
1327
1328    #[tokio::test]
1329    async fn bootstrap_context_reads_from_config_dir() {
1330        let tmp = TempDir::new().unwrap();
1331        let store = test_store(&tmp).await;
1332
1333        // Overwrite SOUL.md in config_dir
1334        store
1335            .write_file("SOUL.md", "# Soul\nI am ConfigBot.")
1336            .await
1337            .unwrap();
1338
1339        let ctx = store.bootstrap_context().unwrap();
1340        assert!(
1341            ctx.contains("ConfigBot"),
1342            "bootstrap should read from config_dir"
1343        );
1344    }
1345
1346    #[tokio::test]
1347    async fn has_bootstrap_checks_config_dir() {
1348        let tmp = TempDir::new().unwrap();
1349        let store = test_store(&tmp).await;
1350        let config_dir = tmp.path().join("agent_home").join("config");
1351
1352        // Default BOOTSTRAP.md should be empty
1353        assert!(
1354            !store.has_bootstrap(),
1355            "Default BOOTSTRAP.md should be empty"
1356        );
1357
1358        // Write content to BOOTSTRAP.md in config_dir
1359        std::fs::write(
1360            config_dir.join("BOOTSTRAP.md"),
1361            "Do something on first run.",
1362        )
1363        .unwrap();
1364        assert!(
1365            store.has_bootstrap(),
1366            "BOOTSTRAP.md with content should be detected"
1367        );
1368
1369        // Clear it
1370        store.clear_bootstrap().unwrap();
1371        assert!(
1372            !store.has_bootstrap(),
1373            "Cleared BOOTSTRAP.md should not be detected"
1374        );
1375    }
1376
1377    // ── new_user tests ─────────────────────────────────────────────
1378
1379    #[tokio::test]
1380    async fn new_user_creates_db_in_user_dir() {
1381        let tmp = TempDir::new().unwrap();
1382        let user_dir = tmp.path().join("users").join("alice");
1383
1384        let _store = MemoryStore::new_user(&user_dir).await.unwrap();
1385
1386        assert!(
1387            user_dir.join("memory.db").exists(),
1388            "memory.db should be in user_dir"
1389        );
1390    }
1391
1392    #[tokio::test]
1393    async fn new_user_does_not_seed_defaults() {
1394        let tmp = TempDir::new().unwrap();
1395        let user_dir = tmp.path().join("users").join("bob");
1396
1397        let _store = MemoryStore::new_user(&user_dir).await.unwrap();
1398
1399        // new_user should NOT create SOUL.md, HEARTBEAT.md, etc.
1400        assert!(
1401            !user_dir.join("SOUL.md").exists(),
1402            "new_user should not seed SOUL.md"
1403        );
1404        assert!(
1405            !user_dir.join("HEARTBEAT.md").exists(),
1406            "new_user should not seed HEARTBEAT.md"
1407        );
1408    }
1409
1410    #[tokio::test]
1411    async fn new_user_indexes_existing_files() {
1412        let tmp = TempDir::new().unwrap();
1413        let user_dir = tmp.path().join("users").join("carol");
1414        std::fs::create_dir_all(&user_dir).unwrap();
1415
1416        // Pre-create a file before store initialization
1417        std::fs::write(
1418            user_dir.join("MEMORY.md"),
1419            "# Memory\n\nCarol likes functional programming.\n",
1420        )
1421        .unwrap();
1422
1423        let store = MemoryStore::new_user(&user_dir).await.unwrap();
1424
1425        // The existing file should be indexed and searchable
1426        let results = store.search("functional programming", 5).await.unwrap();
1427        assert!(
1428            !results.is_empty(),
1429            "Pre-existing file should be indexed on startup"
1430        );
1431        assert!(results
1432            .iter()
1433            .any(|r| r.text.contains("functional programming")));
1434    }
1435
1436    #[tokio::test]
1437    async fn new_user_write_and_search() {
1438        let tmp = TempDir::new().unwrap();
1439        let user_dir = tmp.path().join("users").join("dave");
1440
1441        let store = MemoryStore::new_user(&user_dir).await.unwrap();
1442
1443        store
1444            .write_file("MEMORY.md", "# Memory\n\nDave prefers dark theme.\n")
1445            .await
1446            .unwrap();
1447
1448        let results = store.search("dark theme", 5).await.unwrap();
1449        assert!(!results.is_empty(), "Written file should be searchable");
1450        assert!(results.iter().any(|r| r.text.contains("dark theme")));
1451    }
1452
1453    #[tokio::test]
1454    async fn new_user_append_daily_and_search() {
1455        let tmp = TempDir::new().unwrap();
1456        let user_dir = tmp.path().join("users").join("eve");
1457
1458        let store = MemoryStore::new_user(&user_dir).await.unwrap();
1459
1460        store
1461            .append_daily("Discussed API design patterns")
1462            .await
1463            .unwrap();
1464
1465        let results = store.search("API design", 5).await.unwrap();
1466        assert!(!results.is_empty(), "Daily log should be searchable");
1467    }
1468}