Skip to main content

codesearch/vectordb/
store.rs

1use crate::embed::EmbeddedChunk;
2use crate::info_print;
3use anyhow::{anyhow, Result};
4use arroy::distances::Cosine;
5use arroy::{Database as ArroyDatabase, ItemId, Reader, Writer};
6use heed::byteorder::BigEndian;
7use heed::types::*;
8use heed::{Database, EnvFlags, EnvOpenOptions};
9use rand::rngs::StdRng;
10use rand::SeedableRng;
11use serde::{Deserialize, Serialize};
12use std::fs;
13use std::num::NonZeroUsize;
14use std::path::Path;
15
16/// Chunk metadata stored in the database
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct ChunkMetadata {
19    pub content: String,
20    pub path: String,
21    pub start_line: usize,
22    pub end_line: usize,
23    pub kind: String,
24    pub signature: Option<String>,
25    pub docstring: Option<String>,
26    pub context: Option<String>,
27    pub hash: String,
28    /// Lines of code immediately before this chunk (for context)
29    #[serde(default)]
30    pub context_prev: Option<String>,
31    /// Lines of code immediately after this chunk (for context)
32    #[serde(default)]
33    pub context_next: Option<String>,
34    /// Searchable text combining signature, name, and content for better searchability
35    #[serde(default)]
36    pub searchable_text: String,
37}
38
39impl ChunkMetadata {
40    fn from_embedded_chunk(chunk: &EmbeddedChunk) -> Self {
41        // Build searchable text from signature, docstring, and content
42        let searchable_text = {
43            let mut parts = Vec::new();
44
45            // Add signature if available (e.g., "fn handle_file_modified(path: PathBuf)")
46            if let Some(sig) = &chunk.chunk.signature {
47                parts.push(sig.clone());
48            }
49
50            // Add docstring if available
51            if let Some(doc) = &chunk.chunk.docstring {
52                parts.push(doc.clone());
53            }
54
55            // Add kind (e.g., "Function", "Struct", "Impl")
56            parts.push(format!("{:?}", chunk.chunk.kind));
57
58            // Add content
59            parts.push(chunk.chunk.content.clone());
60
61            parts.join("\n")
62        };
63
64        Self {
65            content: chunk.chunk.content.clone(),
66            path: chunk.chunk.path.clone(),
67            start_line: chunk.chunk.start_line,
68            end_line: chunk.chunk.end_line,
69            kind: format!("{:?}", chunk.chunk.kind),
70            signature: chunk.chunk.signature.clone(),
71            docstring: chunk.chunk.docstring.clone(),
72            context: if chunk.chunk.context.is_empty() {
73                None
74            } else {
75                Some(chunk.chunk.context.join(" > "))
76            },
77            hash: chunk.chunk.hash.clone(),
78            context_prev: chunk.chunk.context_prev.clone(),
79            context_next: chunk.chunk.context_next.clone(),
80            searchable_text,
81        }
82    }
83}
84
85/// Vector database using arroy + heed (LMDB)
86///
87/// Single-file database with:
88/// - Vector search via arroy (ANN with random projections)
89/// - Metadata storage via heed (LMDB)
90/// - ACID transactions
91/// - Memory-mapped for performance
92pub struct VectorStore {
93    env: heed::Env,
94    vectors: ArroyDatabase<Cosine>,
95    chunks: Database<U32<BigEndian>, SerdeBincode<ChunkMetadata>>,
96    next_id: u32,
97    dimensions: usize,
98    indexed: bool,
99}
100
101impl VectorStore {
102    /// Clear stale LMDB reader slots left by crashed processes.
103    ///
104    /// LMDB has a fixed reader table (default 126 slots). Processes that crash
105    /// without closing their read transactions leave stale entries. When the table
106    /// fills up, new read transactions fail with `MDB_READERS_FULL`.
107    ///
108    /// Call this on startup to reclaim slots from dead PIDs.
109    pub fn clear_stale_readers(&self) -> anyhow::Result<usize> {
110        let cleared = self.env.clear_stale_readers()?;
111        if cleared > 0 {
112            tracing::info!("Cleared {} stale LMDB reader slots", cleared);
113        }
114        Ok(cleared)
115    }
116
117    /// Create or open a vector store
118    ///
119    /// # Arguments
120    /// * `db_path` - Path to the database directory (e.g., ".codesearch.db")
121    /// * `dimensions` - Dimensionality of embeddings (e.g., 384, 768)
122    pub fn new(db_path: &Path, dimensions: usize) -> Result<Self> {
123        info_print!("📦 Opening vector database at: {}", db_path.display());
124
125        // Create database directory (LMDB expects a directory, not a file)
126        std::fs::create_dir_all(db_path)?;
127
128        // Clean up any stale .del files from previous crashed runs
129        cleanup_stale_del_files(db_path)?;
130
131        // Open LMDB environment
132        let map_size_mb = std::env::var("CODESEARCH_LMDB_MAP_SIZE_MB")
133            .ok()
134            .and_then(|s| s.parse::<usize>().ok())
135            .unwrap_or(crate::constants::DEFAULT_LMDB_MAP_SIZE_MB);
136        let env = unsafe {
137            EnvOpenOptions::new()
138                .map_size(map_size_mb * 1024 * 1024)
139                .max_dbs(10)
140                .open(db_path)?
141        };
142
143        // Open or create databases
144        let mut wtxn = env.write_txn()?;
145
146        let vectors: ArroyDatabase<Cosine> = env.create_database(&mut wtxn, Some("vectors"))?;
147        let chunks: Database<U32<BigEndian>, SerdeBincode<ChunkMetadata>> =
148            env.create_database(&mut wtxn, Some("chunks"))?;
149
150        // Get the next ID from the maximum existing key + 1
151        // Using len() is wrong after delete+insert cycles: deleted IDs create gaps
152        // so len() < max_key + 1, causing ID collisions on re-open
153        let next_id = match chunks.last(&wtxn)? {
154            Some((max_key, _)) => max_key + 1,
155            None => 0,
156        };
157
158        wtxn.commit()?;
159
160        // Check if database is already indexed by trying to open a reader
161        let indexed = if next_id > 0 {
162            let rtxn = env.read_txn()?;
163            match Reader::open(&rtxn, 0, vectors) {
164                Ok(_) => {
165                    tracing::debug!("Index detected: Reader::open succeeded");
166                    true
167                }
168                Err(e) => {
169                    tracing::debug!("Index not detected: Reader::open failed: {:?}", e);
170                    false
171                }
172            }
173        } else {
174            false
175        };
176
177        info_print!("✅ Database opened (next_id: {})", next_id);
178
179        Ok(Self {
180            env,
181            vectors,
182            chunks,
183            next_id,
184            dimensions,
185            indexed,
186        })
187    }
188
189    /// Open a vector store in read-only mode (for searches while another process writes)
190    ///
191    /// # Arguments
192    /// * `db_path` - Path to the database directory (e.g., ".codesearch.db")
193    /// * `dimensions` - Dimensionality of embeddings (e.g., 384, 768)
194    pub fn open_readonly(db_path: &Path, dimensions: usize) -> Result<Self> {
195        tracing::debug!(
196            "📦 Opening vector database (read-only) at: {}",
197            db_path.display()
198        );
199
200        if !db_path.exists() {
201            return Err(anyhow::anyhow!(
202                "Database does not exist at: {}",
203                db_path.display()
204            ));
205        }
206
207        // Open LMDB environment in read-only mode
208        let map_size_mb = std::env::var("CODESEARCH_LMDB_MAP_SIZE_MB")
209            .ok()
210            .and_then(|s| s.parse::<usize>().ok())
211            .unwrap_or(crate::constants::DEFAULT_LMDB_MAP_SIZE_MB);
212        let env = unsafe {
213            EnvOpenOptions::new()
214                .map_size(map_size_mb * 1024 * 1024)
215                .max_dbs(10)
216                .flags(EnvFlags::READ_ONLY)
217                .open(db_path)?
218        };
219
220        // Open databases (read-only, no create)
221        let rtxn = env.read_txn()?;
222
223        let vectors: ArroyDatabase<Cosine> = env
224            .open_database(&rtxn, Some("vectors"))?
225            .ok_or_else(|| anyhow::anyhow!("vectors database not found"))?;
226        let chunks: Database<U32<BigEndian>, SerdeBincode<ChunkMetadata>> = env
227            .open_database(&rtxn, Some("chunks"))?
228            .ok_or_else(|| anyhow::anyhow!("chunks database not found"))?;
229
230        // Get the next ID from the maximum existing key + 1
231        // Using len() is wrong after delete+insert cycles: deleted IDs create gaps
232        let next_id = match chunks.last(&rtxn)? {
233            Some((max_key, _)) => max_key + 1,
234            None => 0,
235        };
236
237        // Check if database is already indexed
238        let indexed = if next_id > 0 {
239            Reader::open(&rtxn, 0, vectors).is_ok()
240        } else {
241            false
242        };
243
244        drop(rtxn);
245
246        tracing::debug!(
247            "✅ Database opened read-only (next_id: {}, indexed: {})",
248            next_id,
249            indexed
250        );
251
252        Ok(Self {
253            env,
254            vectors,
255            chunks,
256            next_id,
257            dimensions,
258            indexed,
259        })
260    }
261
262    /// Insert embedded chunks into the database
263    ///
264    /// Returns the number of chunks inserted
265    #[allow(dead_code)] // Reserved for batch insert operations
266    pub fn insert_chunks(&mut self, chunks: Vec<EmbeddedChunk>) -> Result<usize> {
267        if chunks.is_empty() {
268            return Ok(0);
269        }
270
271        eprintln!("📊 Inserting {} chunks...", chunks.len());
272
273        let mut wtxn = self.env.write_txn()?;
274        let writer = Writer::new(self.vectors, 0, self.dimensions);
275
276        for chunk in &chunks {
277            let id = self.next_id;
278
279            // Check embedding dimensions
280            if chunk.embedding.len() != self.dimensions {
281                return Err(anyhow!(
282                    "Embedding dimension mismatch: expected {}, got {}",
283                    self.dimensions,
284                    chunk.embedding.len()
285                ));
286            }
287
288            // Add vector to arroy
289            writer.add_item(&mut wtxn, id, &chunk.embedding)?;
290
291            // Store metadata
292            let metadata = ChunkMetadata::from_embedded_chunk(chunk);
293            self.chunks.put(&mut wtxn, &id, &metadata)?;
294
295            self.next_id += 1;
296        }
297
298        wtxn.commit()?;
299
300        // Mark as not indexed (need to rebuild index after inserts)
301        self.indexed = false;
302
303        eprintln!(
304            "✅ Inserted {} chunks (IDs: {}-{})",
305            chunks.len(),
306            self.next_id - chunks.len() as u32,
307            self.next_id - 1
308        );
309
310        Ok(chunks.len())
311    }
312
313    /// Build the vector index
314    ///
315    /// Must be called after inserting chunks and before searching
316    pub fn build_index(&mut self) -> Result<()> {
317        let mut wtxn = self.env.write_txn()?;
318        let writer = Writer::new(self.vectors, 0, self.dimensions);
319
320        let mut rng = StdRng::seed_from_u64(rand::random());
321        writer.builder(&mut rng).build(&mut wtxn)?;
322
323        wtxn.commit()?;
324
325        self.indexed = true;
326
327        Ok(())
328    }
329
330    /// Search for similar chunks
331    ///
332    /// # Arguments
333    /// * `query_embedding` - The query vector
334    /// * `limit` - Maximum number of results to return
335    ///
336    /// # Returns
337    /// Vector of search results with metadata and scores
338    pub fn search(&self, query_embedding: &[f32], limit: usize) -> Result<Vec<SearchResult>> {
339        if query_embedding.len() != self.dimensions {
340            return Err(anyhow!(
341                "Query embedding dimension mismatch: expected {}, got {}",
342                self.dimensions,
343                query_embedding.len()
344            ));
345        }
346
347        if !self.indexed {
348            return Err(anyhow!(
349                "Index not built. Call build_index() after inserting chunks."
350            ));
351        }
352
353        let rtxn = self.env.read_txn()?;
354        let reader = Reader::open(&rtxn, 0, self.vectors)?;
355
356        // Perform ANN search with quality boost
357        let mut query = reader.nns(limit);
358
359        // Improve search quality by exploring more candidates
360        if let Some(n_trees) = NonZeroUsize::new(reader.n_trees()) {
361            if let Some(search_k) = NonZeroUsize::new(limit * n_trees.get() * 15) {
362                query.search_k(search_k);
363            }
364        }
365
366        let results = query.by_vector(&rtxn, query_embedding)?;
367
368        // Fetch metadata for each result
369        let mut search_results = Vec::new();
370
371        for (id, distance) in results {
372            if let Some(metadata) = self.chunks.get(&rtxn, &id)? {
373                search_results.push(SearchResult {
374                    id,
375                    content: metadata.content,
376                    path: metadata.path,
377                    start_line: metadata.start_line,
378                    end_line: metadata.end_line,
379                    kind: metadata.kind,
380                    signature: metadata.signature,
381                    docstring: metadata.docstring,
382                    context: metadata.context,
383                    hash: metadata.hash,
384                    distance,
385                    score: 1.0 - distance, // Convert distance to similarity score
386                    context_prev: metadata.context_prev,
387                    context_next: metadata.context_next,
388                });
389            }
390        }
391
392        Ok(search_results)
393    }
394
395    /// Get statistics about the vector store
396    pub fn stats(&self) -> Result<StoreStats> {
397        let rtxn = self.env.read_txn()?;
398
399        let total_chunks = self.chunks.len(&rtxn)?;
400
401        // Count unique files
402        let mut unique_files = std::collections::HashSet::new();
403        for result in self.chunks.iter(&rtxn)? {
404            let (_, metadata) = result?;
405            unique_files.insert(metadata.path.clone());
406        }
407
408        // Get max chunk ID from the last key in LMDB (sorted)
409        let max_chunk_id = self.chunks.last(&rtxn)?.map(|(k, _)| k).unwrap_or(0);
410
411        Ok(StoreStats {
412            total_chunks: total_chunks as usize,
413            total_files: unique_files.len(),
414            indexed: self.indexed,
415            dimensions: self.dimensions,
416            max_chunk_id,
417        })
418    }
419
420    /// Delete chunks by their IDs
421    ///
422    /// Returns the number of chunks deleted
423    pub fn delete_chunks(&mut self, chunk_ids: &[u32]) -> Result<usize> {
424        if chunk_ids.is_empty() {
425            return Ok(0);
426        }
427
428        let mut wtxn = self.env.write_txn()?;
429        let writer = Writer::new(self.vectors, 0, self.dimensions);
430
431        let mut deleted = 0;
432        for &id in chunk_ids {
433            // Delete from vector database
434            if writer.del_item(&mut wtxn, id).is_ok() {
435                deleted += 1;
436            }
437            // Delete from metadata
438            self.chunks.delete(&mut wtxn, &id)?;
439        }
440
441        wtxn.commit()?;
442
443        // Mark as needing re-index
444        if deleted > 0 {
445            self.indexed = false;
446        }
447
448        Ok(deleted)
449    }
450
451    /// Delete all chunks from a specific file
452    ///
453    /// Returns the IDs of deleted chunks
454    /// Insert chunks and return their assigned IDs
455    ///
456    /// Useful for tracking which chunks belong to which file
457    pub fn insert_chunks_with_ids(&mut self, chunks: Vec<EmbeddedChunk>) -> Result<Vec<u32>> {
458        if chunks.is_empty() {
459            return Ok(vec![]);
460        }
461
462        let start_id = self.next_id;
463        let mut wtxn = self.env.write_txn()?;
464        let writer = Writer::new(self.vectors, 0, self.dimensions);
465
466        for chunk in &chunks {
467            let id = self.next_id;
468
469            if chunk.embedding.len() != self.dimensions {
470                return Err(anyhow!(
471                    "Embedding dimension mismatch: expected {}, got {}",
472                    self.dimensions,
473                    chunk.embedding.len()
474                ));
475            }
476
477            writer.add_item(&mut wtxn, id, &chunk.embedding)?;
478            let metadata = ChunkMetadata::from_embedded_chunk(chunk);
479            self.chunks.put(&mut wtxn, &id, &metadata)?;
480
481            self.next_id += 1;
482        }
483
484        wtxn.commit()?;
485        self.indexed = false;
486
487        let ids: Vec<u32> = (start_id..self.next_id).collect();
488        Ok(ids)
489    }
490
491    /// Clear all data from the database
492    #[allow(dead_code)] // Reserved for database reset operations
493    pub fn clear(&mut self) -> Result<()> {
494        eprintln!("🗑️  Clearing database...");
495
496        let mut wtxn = self.env.write_txn()?;
497
498        // Clear both databases
499        self.chunks.clear(&mut wtxn)?;
500        self.vectors.clear(&mut wtxn)?;
501
502        wtxn.commit()?;
503
504        self.next_id = 0;
505        self.indexed = false;
506
507        eprintln!("✅ Database cleared");
508        Ok(())
509    }
510
511    /// Get a chunk by ID
512    pub fn get_chunk(&self, id: u32) -> Result<Option<ChunkMetadata>> {
513        let rtxn = self.env.read_txn()?;
514        Ok(self.chunks.get(&rtxn, &id)?)
515    }
516
517    /// Get a chunk as SearchResult (for hybrid search)
518    pub fn get_chunk_as_result(&self, id: u32) -> Result<Option<SearchResult>> {
519        let rtxn = self.env.read_txn()?;
520        if let Some(meta) = self.chunks.get(&rtxn, &id)? {
521            Ok(Some(SearchResult {
522                id,
523                content: meta.content,
524                path: meta.path,
525                start_line: meta.start_line,
526                end_line: meta.end_line,
527                kind: meta.kind,
528                signature: meta.signature,
529                docstring: meta.docstring,
530                context: meta.context,
531                hash: meta.hash,
532                distance: 0.0,
533                score: 0.0, // Will be set by caller
534                context_prev: meta.context_prev,
535                context_next: meta.context_next,
536            }))
537        } else {
538            Ok(None)
539        }
540    }
541
542    /// Iterate all chunks in the store via LMDB cursor.
543    /// Returns (id, metadata) pairs for every chunk, regardless of ID gaps.
544    /// This is the correct way to enumerate chunks after delete+insert cycles.
545    pub fn all_chunks(&self) -> Result<Vec<(u32, ChunkMetadata)>> {
546        let rtxn = self.env.read_txn()?;
547        let mut result = Vec::new();
548        for entry in self.chunks.iter(&rtxn)? {
549            let (id, metadata) = entry?;
550            result.push((id, metadata));
551        }
552        Ok(result)
553    }
554
555    /// Get the database file size in bytes
556    #[allow(dead_code)] // Reserved for stats display
557    pub fn db_size(&self) -> Result<u64> {
558        let info = self.env.info();
559        Ok(info.map_size as u64)
560    }
561
562    /// Check if the index is built
563    pub fn is_indexed(&self) -> bool {
564        self.indexed
565    }
566}
567
568/// Search result with metadata
569#[derive(Debug, Clone)]
570#[allow(dead_code)] // Fields docstring/hash used for completeness
571pub struct SearchResult {
572    pub id: ItemId,
573    pub content: String,
574    pub path: String,
575    pub start_line: usize,
576    pub end_line: usize,
577    pub kind: String,
578    pub signature: Option<String>,
579    pub docstring: Option<String>,
580    pub context: Option<String>,
581    pub hash: String,
582    pub distance: f32,
583    pub score: f32, // 1.0 - distance (higher is better)
584    /// Lines of code immediately before this chunk (for context)
585    pub context_prev: Option<String>,
586    /// Lines of code immediately after this chunk (for context)
587    pub context_next: Option<String>,
588}
589
590/// Statistics about the vector store
591#[derive(Debug, Clone)]
592pub struct StoreStats {
593    pub total_chunks: usize,
594    pub total_files: usize,
595    pub indexed: bool,
596    pub dimensions: usize,
597    /// The highest chunk ID in the store (or 0 if empty).
598    /// NOTE: This may be > total_chunks when chunks have been deleted.
599    pub max_chunk_id: u32,
600}
601
602/// Clean up stale .del files from previous crashed runs
603///
604/// LMDB creates .del files when deleting items, but if the process crashes
605/// or is interrupted, these files can be left behind and cause errors on
606/// the next run. This function removes any .del files before opening the DB.
607fn cleanup_stale_del_files(db_path: &Path) -> Result<()> {
608    if !db_path.exists() {
609        return Ok(());
610    }
611
612    let entries = fs::read_dir(db_path)?;
613    let mut cleaned = 0;
614
615    for entry in entries {
616        let entry = entry?;
617        let path = entry.path();
618
619        // Check if file ends with .del
620        if path.extension().and_then(|s| s.to_str()) == Some("del") {
621            // Remove the .del file
622            fs::remove_file(&path)?;
623            cleaned += 1;
624        }
625    }
626
627    if cleaned > 0 {
628        tracing::debug!("Cleaned up {} stale .del files", cleaned);
629    }
630
631    Ok(())
632}
633
634#[cfg(test)]
635mod tests {
636    use super::*;
637    use crate::chunker::{Chunk, ChunkKind};
638    use crate::embed::EmbeddedChunk;
639    use tempfile::tempdir;
640
641    #[test]
642    fn test_vector_store_creation() {
643        let temp_dir = tempdir().unwrap();
644        let db_path = temp_dir.path().join("test.db");
645
646        let store = VectorStore::new(&db_path, 384);
647        assert!(store.is_ok());
648
649        let store = store.unwrap();
650        assert_eq!(store.dimensions, 384);
651        assert!(!store.is_indexed());
652    }
653
654    #[test]
655    fn test_insert_and_search() {
656        let temp_dir = tempdir().unwrap();
657        let db_path = temp_dir.path().join("test.db");
658
659        let mut store = VectorStore::new(&db_path, 4).unwrap();
660
661        // Create test chunks with different embeddings
662        let chunks = vec![
663            EmbeddedChunk::new(
664                Chunk::new(
665                    "fn authenticate() {}".to_string(),
666                    0,
667                    1,
668                    ChunkKind::Function,
669                    "auth.rs".to_string(),
670                ),
671                vec![1.0, 0.0, 0.0, 0.0], // Close to query
672            ),
673            EmbeddedChunk::new(
674                Chunk::new(
675                    "fn calculate() {}".to_string(),
676                    2,
677                    3,
678                    ChunkKind::Function,
679                    "math.rs".to_string(),
680                ),
681                vec![0.0, 1.0, 0.0, 0.0], // Far from query
682            ),
683        ];
684
685        // Insert
686        let count = store.insert_chunks(chunks).unwrap();
687        assert_eq!(count, 2);
688
689        // Build index
690        store.build_index().unwrap();
691        assert!(store.is_indexed());
692
693        // Search with query similar to first chunk
694        let query = vec![0.9, 0.1, 0.0, 0.0];
695        let results = store.search(&query, 2).unwrap();
696
697        assert_eq!(results.len(), 2);
698        // First result should be the authenticate function (closer to query)
699        assert!(results[0].content.contains("authenticate"));
700        assert!(results[0].score > results[1].score);
701    }
702
703    #[test]
704    fn test_stats() {
705        let temp_dir = tempdir().unwrap();
706        let db_path = temp_dir.path().join("test.db");
707
708        let mut store = VectorStore::new(&db_path, 4).unwrap();
709
710        let chunks = vec![
711            EmbeddedChunk::new(
712                Chunk::new(
713                    "fn test1() {}".to_string(),
714                    0,
715                    1,
716                    ChunkKind::Function,
717                    "file1.rs".to_string(),
718                ),
719                vec![1.0, 0.0, 0.0, 0.0],
720            ),
721            EmbeddedChunk::new(
722                Chunk::new(
723                    "fn test2() {}".to_string(),
724                    0,
725                    1,
726                    ChunkKind::Function,
727                    "file2.rs".to_string(),
728                ),
729                vec![0.0, 1.0, 0.0, 0.0],
730            ),
731        ];
732
733        store.insert_chunks(chunks).unwrap();
734        store.build_index().unwrap();
735
736        let stats = store.stats().unwrap();
737        assert_eq!(stats.total_chunks, 2);
738        assert_eq!(stats.total_files, 2);
739        assert!(stats.indexed);
740        assert_eq!(stats.dimensions, 4);
741    }
742
743    #[test]
744    fn test_clear() {
745        let temp_dir = tempdir().unwrap();
746        let db_path = temp_dir.path().join("test.db");
747
748        let mut store = VectorStore::new(&db_path, 4).unwrap();
749
750        let chunks = vec![EmbeddedChunk::new(
751            Chunk::new(
752                "fn test() {}".to_string(),
753                0,
754                1,
755                ChunkKind::Function,
756                "test.rs".to_string(),
757            ),
758            vec![1.0, 0.0, 0.0, 0.0],
759        )];
760
761        store.insert_chunks(chunks).unwrap();
762        store.build_index().unwrap();
763
764        let stats = store.stats().unwrap();
765        assert_eq!(stats.total_chunks, 1);
766
767        store.clear().unwrap();
768
769        let stats = store.stats().unwrap();
770        assert_eq!(stats.total_chunks, 0);
771        assert!(!stats.indexed);
772    }
773
774    #[test]
775    fn test_get_chunk() {
776        let temp_dir = tempdir().unwrap();
777        let db_path = temp_dir.path().join("test.db");
778
779        let mut store = VectorStore::new(&db_path, 4).unwrap();
780
781        let chunks = vec![EmbeddedChunk::new(
782            Chunk::new(
783                "fn test() {}".to_string(),
784                0,
785                1,
786                ChunkKind::Function,
787                "test.rs".to_string(),
788            ),
789            vec![1.0, 0.0, 0.0, 0.0],
790        )];
791
792        store.insert_chunks(chunks).unwrap();
793
794        let metadata = store.get_chunk(0).unwrap();
795        assert!(metadata.is_some());
796
797        let metadata = metadata.unwrap();
798        assert_eq!(metadata.content, "fn test() {}");
799        assert_eq!(metadata.path, "test.rs");
800    }
801
802    #[test]
803    fn test_persistence() {
804        let temp_dir = tempdir().unwrap();
805        let db_path = temp_dir.path().join("test.db");
806
807        // First session: insert and close
808        {
809            let mut store = VectorStore::new(&db_path, 4).unwrap();
810
811            let chunks = vec![EmbeddedChunk::new(
812                Chunk::new(
813                    "fn test() {}".to_string(),
814                    0,
815                    1,
816                    ChunkKind::Function,
817                    "test.rs".to_string(),
818                ),
819                vec![1.0, 0.0, 0.0, 0.0],
820            )];
821
822            store.insert_chunks(chunks).unwrap();
823            store.build_index().unwrap();
824        }
825
826        // Second session: reopen and verify
827        {
828            let store = VectorStore::new(&db_path, 4).unwrap();
829
830            let stats = store.stats().unwrap();
831            assert_eq!(stats.total_chunks, 1);
832
833            let metadata = store.get_chunk(0).unwrap();
834            assert!(metadata.is_some());
835        }
836    }
837}